1 #ifndef _SUNPINYIN_PYTRIE_GEN_H__ 2 #define _SUNPINYIN_PYTRIE_GEN_H__ 3 4 #include "../portability.h" 5 6 #include <string> 7 #include <vector> 8 #include <map> 9 #include <set> 10 #include <list> 11 12 #include "pytrie.h" 13 14 class CWordEvaluator { 15 public: 16 virtual double 17 getCost(unsigned int wid) = 0; 18 19 virtual bool 20 isSeen(unsigned int wid) = 0; 21 }; 22 23 class CPinyinTrieMaker { 24 public: 25 class TNode; 26 class TWordInfo; 27 28 union TWordId { 29 unsigned int m_all; 30 struct TAnony { //Some compiler do not support anonymous defaultly 31 #ifdef WORDS_BIGENDIAN 32 unsigned m_other : 6; 33 unsigned m_bHide : 1; 34 unsigned m_cost : 5; 35 unsigned m_csLevel : 2; 36 unsigned m_id : WORD_ID_WIDTH; 37 #else 38 unsigned m_id : WORD_ID_WIDTH; 39 unsigned m_csLevel : 2; 40 unsigned m_cost : 5; 41 unsigned m_bHide : 1; 42 unsigned m_other : 6; 43 #endif 44 } anony; 45 46 public: 47 TWordId() : m_all(0) { } 48 TWordId(const TWordId& b) : m_all(b.m_all) { } 49 TWordId(unsigned id, unsigned cost = 0, unsigned hide = 0, unsigned cslvl = 0) 50 { 51 anony.m_id=id; 52 anony.m_cost=cost; 53 anony.m_bHide= (hide)?1:0; 54 anony.m_csLevel=cslvl; 55 anony.m_other=0; 56 } 57 58 bool operator< (const TWordId& b) const 59 { return anony.m_id < b.anony.m_id; } 60 61 bool operator==(const TWordId& b) const 62 { return anony.m_id == b.anony.m_id; } 63 64 operator unsigned int() const 65 { return anony.m_id; } 66 }; 67 68 typedef std::set<TWordId> CWordSet; 69 typedef std::vector<TWordInfo> CWordVec; 70 typedef std::map<unsigned, TNode*> CTrans; 71 typedef std::set<TNode*> CNodeSet; 72 typedef std::list<TNode*> CNodeList; 73 typedef std::vector<std::string> CLexicon; 74 75 class TWordInfo { 76 public: 77 TWordId m_id; 78 double m_cost; 79 bool m_bSeen; 80 81 TWordInfo(TWordId id = 0, double cost = 0.0, bool seen=false) 82 : m_id(id), m_cost(cost), m_bSeen(seen) 83 { 84 if (m_id.anony.m_bHide) { 85 m_bSeen = false; 86 } 87 m_cost = cost + m_id.anony.m_cost; 88 } 89 90 bool 91 operator< (const TWordInfo& b) const 92 { 93 double fa = (m_bSeen)?(m_cost - 5000.0):(m_cost); 94 double fb = (b.m_bSeen)?(b.m_cost - 5000.0):(b.m_cost); 95 return (fa < fb); 96 } 97 }; 98 99 class PNodeSet { 100 public: 101 PNodeSet(const CNodeSet *pns) : m_pns(pns) { } 102 103 PNodeSet(const PNodeSet& another) : m_pns(another.m_pns) { } 104 105 const CNodeSet* 106 operator->(void) { return m_pns; } 107 108 const CNodeSet& 109 operator*(void) { return *m_pns; } 110 111 bool 112 operator< (const PNodeSet& another) const; 113 114 bool 115 operator==(const PNodeSet& another) const; 116 117 protected: 118 const CNodeSet * m_pns; 119 }; 120 121 typedef std::map<PNodeSet, TNode*> CStateMap; 122 123 class TNode { 124 public: 125 static CNodeList m_AllNodes; 126 public: 127 bool m_bExpanded; 128 bool m_bFullSyllableTransfer; 129 CWordSet m_WordIdSet; 130 CTrans m_Trans; 131 CNodeSet m_cmbNodes; 132 public: 133 TNode(); 134 }; 135 136 protected: 137 CStateMap m_StateMap; 138 TNode m_RootNode; 139 CLexicon m_Lexicon; 140 141 public: 142 CPinyinTrieMaker(); 143 144 ~CPinyinTrieMaker() {} //forget this 145 146 bool 147 constructFromLexicon(const char* fileName); 148 149 bool 150 insertFullPinyinPair(const char* pinyin, TWordId wid); 151 152 bool 153 threadNonCompletePinyin(void); 154 155 void 156 print(FILE* fp, TNode* root, std::string& pinyin); 157 158 bool 159 write(const char* fileName, CWordEvaluator* psrt); 160 161 bool 162 write(FILE *fp, CWordEvaluator* psrt); 163 164 protected: 165 TNode* 166 insertTransfer(TNode* pnode, unsigned s); 167 168 TNode* 169 addCombinedTransfers (TNode *pnode, unsigned s, const CNodeSet& nodes); 170 171 void 172 combineInitialTrans(TNode *pnode); 173 174 void 175 expandCombinedNode(TNode *pnode); 176 }; 177 178 #endif 179
