1 #ifndef __SUNPINYIN_PYTRIE_H__ 2 #define __SUNPINYIN_PYTRIE_H__ 3 4 #ifdef HAVE_CONFIG_H 5 #include <config.h> 6 #endif 7 8 #include "../portability.h" 9 #include "syllable.h" 10 #include <map> 11 12 #define WORD_ID_WIDTH 18 13 14 class CPinyinTrie { 15 public: 16 friend class CPinyinTrieMaker; 17 18 struct TTransUnit { 19 TSyllable m_Syllable; 20 unsigned m_Offset; 21 }; 22 23 struct TWordIdInfo { 24 #ifdef WORDS_BIGENDIAN 25 unsigned m_bSeen : 1; 26 unsigned m_cost : 5; 27 unsigned m_len : 6; 28 unsigned m_csLevel : 2; 29 unsigned m_id : WORD_ID_WIDTH; 30 #else 31 unsigned m_id : WORD_ID_WIDTH; 32 unsigned m_csLevel : 2; 33 unsigned m_len : 6; 34 unsigned m_cost : 5; 35 unsigned m_bSeen : 1; 36 #endif 37 38 TWordIdInfo() { memset(this, 0, sizeof(TWordIdInfo)); } 39 40 TWordIdInfo(unsigned id, unsigned len=0, unsigned seen=0, unsigned cost = 0, unsigned cslvl = 0) 41 : m_id(id), m_csLevel(cslvl), m_len(len), m_cost(cost), m_bSeen(seen) { } 42 43 operator unsigned int() const { return m_id; } 44 }; 45 46 struct TNode { 47 #ifdef WORDS_BIGENDIAN 48 unsigned m_other : 5; 49 unsigned m_bFullSyllableTransfer: 1; 50 unsigned m_csLevel : 2; 51 unsigned m_nTransfer : 12; 52 unsigned m_nWordId : 12; 53 #else 54 unsigned m_nWordId : 12; 55 unsigned m_nTransfer : 12; 56 unsigned m_csLevel : 2; 57 unsigned m_bFullSyllableTransfer: 1; 58 unsigned m_other : 5; 59 #endif 60 61 static unsigned int 62 size_for(unsigned int nTransfer, unsigned int nWordId) 63 { return sizeof(TNode) + sizeof(TTransUnit)*nTransfer + 64 sizeof(TWordIdInfo)*nWordId; } 65 66 TNode() 67 { *((unsigned*)this) = 0; } 68 69 bool 70 hasPinyinChild(void) const 71 { return (m_nTransfer > 1);} 72 73 const TTransUnit* 74 getTrans() const 75 { return (TTransUnit*)(this+1); } 76 77 const TWordIdInfo* 78 getWordIdPtr() const 79 { return (TWordIdInfo*)(((char*)(this+1))+sizeof(TTransUnit)*m_nTransfer); } 80 81 unsigned int 82 transfer(unsigned s) const 83 { 84 unsigned int b = 0, e = m_nTransfer; 85 const TTransUnit* ptrans = getTrans(); 86 while (b < e) { 87 int m = b + (e-b)/2; 88 if (ptrans[m].m_Syllable == s) 89 return ptrans[m].m_Offset; 90 if (ptrans[m].m_Syllable < s) 91 b = m + 1; 92 else 93 e = m; 94 } 95 return 0; 96 } 97 }; 98 99 public: 100 CPinyinTrie() : m_Size(0), m_mem(NULL), m_words(NULL) { } 101 102 ~CPinyinTrie() 103 { free(); } 104 105 bool 106 load(const char* fileName); 107 108 void 109 free(void); 110 111 bool 112 isValid(const TNode* pnode, bool allowNonComplete, unsigned csLevel=0); 113 114 unsigned int 115 getRootOffset() const 116 { return 3 * sizeof(unsigned int); } 117 118 const TNode* 119 getRootNode() const 120 { return (TNode*)(m_mem+getRootOffset()); } 121 122 const TNode* 123 nodeFromOffset(unsigned int offset) const 124 { return (offset < getRootOffset())?NULL:((TNode*)(m_mem+offset)); } 125 126 unsigned int 127 getWordCount(void) const 128 { return *(unsigned int*)m_mem; } 129 130 unsigned int 131 getNodeCount(void) const 132 { return *(unsigned int*)(m_mem+sizeof(unsigned int)); } 133 134 unsigned int 135 getStringOffset(void) const 136 { return *(unsigned int*)(m_mem+2*sizeof(unsigned int)); } 137 138 inline const TNode* 139 transfer(const TNode* pnode, unsigned s) const 140 { return nodeFromOffset(pnode->transfer(s)); } 141 142 inline const TNode* 143 transfer(unsigned s) const 144 { return transfer(getRootNode(), s); } 145 146 unsigned int 147 getSymbolId(const TWCHAR* wstr); 148 149 unsigned int 150 getSymbolId(const wstring & wstr); 151 152 const TWCHAR* 153 operator[](unsigned int idx) const 154 { return m_words[idx]; } 155 156 int 157 lengthAt(unsigned int idx) const; 158 159 void 160 print(FILE *fp) const; 161 162 protected: 163 unsigned int m_Size; 164 char *m_mem; 165 TWCHAR **m_words; 166 167 std::map<wstring, unsigned> m_SymbolMap; 168 169 void 170 print(const TNode* pRoot, std::string& prefix, FILE *fp) const; 171 }; 172 173 #endif /* __SUNPINYIN_PYTRIE_H__*/ 174