1 /* 2 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. 3 * 4 * Copyright (c) 2007 Sun Microsystems, Inc. All Rights Reserved. 5 * 6 * The contents of this file are subject to the terms of either the GNU Lesser 7 * General Public License Version 2.1 only ("LGPL") or the Common Development and 8 * Distribution License ("CDDL")(collectively, the "License"). You may not use this 9 * file except in compliance with the License. You can obtain a copy of the CDDL at 10 * http://www.opensource.org/licenses/cddl1.php and a copy of the LGPLv2.1 at 11 * http://www.opensource.org/licenses/lgpl-license.php. See the License for the 12 * specific language governing permissions and limitations under the License. When 13 * distributing the software, include this License Header Notice in each file and 14 * include the full text of the License in the License file as well as the 15 * following notice: 16 * 17 * NOTICE PURSUANT TO SECTION 9 OF THE COMMON DEVELOPMENT AND DISTRIBUTION LICENSE 18 * (CDDL) 19 * For Covered Software in this distribution, this License shall be governed by the 20 * laws of the State of California (excluding conflict-of-law provisions). 21 * Any litigation relating to this License shall be subject to the jurisdiction of 22 * the Federal Courts of the Northern District of California and the state courts 23 * of the State of California, with venue lying in Santa Clara County, California. 24 * 25 * Contributor(s): 26 * 27 * If you wish your version of this file to be governed by only the CDDL or only 28 * the LGPL Version 2.1, indicate your decision by adding "[Contributor]" elects to 29 * include this software in this distribution under the [CDDL or LGPL Version 2.1] 30 * license." If you don't indicate a single choice of license, a recipient has the 31 * option to distribute your version of this file under either the CDDL or the LGPL 32 * Version 2.1, or to extend the choice of license to its licensees as provided 33 * above. However, if you add LGPL Version 2.1 code and therefore, elected the LGPL 34 * Version 2 license, then the option applies only if the new code is made subject 35 * to such option by the copyright holder. 36 */ 37 38 #ifndef SUNPY_LATTICE_STATES_H 39 #define SUNPY_LATTICE_STATES_H 40 41 #include <vector> 42 #include <map> 43 #include "portability.h" 44 #include "imi_data.h" 45 46 typedef TLongExpFloat TSentenceScore; 47 48 /** 49 * CSlmState represent the history. In real implementation, it's a 50 * node pointer to a state in the language model. But to save the 51 * language model size, the state node in language model do not 52 * thread the back-off pointer. Now, we just use the Word Id for 53 * the node in the language model. Later we should abstract the 54 * StateNode from language model implemetation to replace this 55 * definition. 56 */ 57 typedef CThreadSlm::TState CSlmState; 58 59 /** 60 * A WordKey could represent a word. Define this use the unsigned int 61 * directly. Because in the future, we may adopt word class, such as 62 * Digital Word Class. 63 */ 64 typedef unsigned CWordId; 65 66 /** 67 * This class is used to record lexicon state (pinyin trie nodes) 68 * just before a bone. From the bone, it could see when arriving 69 * it, how many valid Pinyin Trie Node still could be used to search 70 * more words further, and what bone is its starting bone. 71 */ 72 struct TLexiconState { 73 typedef std::vector<CPinyinTrie::TWordIdInfo> TWordIdInfoVec; 74 75 unsigned m_start; 76 const CPinyinTrie::TNode *m_pPYNode; 77 TWordIdInfoVec m_words; 78 CSyllables m_syls; 79 bool m_bPinyin; 80 bool m_bFuzzy; 81 82 TLexiconState (unsigned start, const CPinyinTrie::TNode *pnode, CSyllables& syl, bool isFuzzy=false): 83 m_start(start), m_pPYNode(pnode), m_syls(syl), m_bPinyin(true), m_bFuzzy(isFuzzy) {} 84 85 TLexiconState (unsigned start, CSyllables &syl, TWordIdInfoVec &words, bool isFuzzy=false): 86 m_start(start), m_pPYNode(NULL), m_words(words), m_syls(syl), m_bPinyin(true), m_bFuzzy(isFuzzy) {} 87 88 TLexiconState (unsigned start, unsigned wid): 89 m_start(start), m_pPYNode(NULL), m_bPinyin(false) {m_words.push_back(wid);} 90 91 const CPinyinTrie::TWordIdInfo *getWords (unsigned &num); 92 void print (std::string prefix) const; 93 }; 94 95 /** 96 * A list of Lexicon State. Every state may from different 97 * starting position. Later, when Fuzzy PinYin are added, 98 * more than one state may comes from one starting bone. 99 */ 100 typedef std::vector<TLexiconState> CLexiconStates; 101 102 103 /** 104 * The basic static unit used in the lattice searching 105 */ 106 struct TLatticeState { 107 TSentenceScore m_score; 108 unsigned m_frIdx; 109 TLatticeState *m_pBackTraceNode; 110 CSlmState m_slmState; 111 CWordId m_backTraceWordId; 112 113 TLatticeState(double score = -1.0, 114 unsigned frIdx=0, 115 TLatticeState* btNodePtr = NULL, 116 CSlmState sk= CSlmState(), 117 CWordId wk = CWordId()) 118 : m_score(score), m_frIdx(frIdx), m_pBackTraceNode(btNodePtr), 119 m_slmState(sk), m_backTraceWordId(wk) {} 120 121 /** for debug printing... */ 122 void 123 print(std::string prefix) const; 124 }; 125 126 typedef std::vector<TLatticeState> CLatticeStateVec; 127 128 /** 129 * All lattice node on a lattice frame. This class provide beam pruning 130 * while push_back, which means at most the best MAX states are reserved, 131 * ie, weak state will may be discard while new better state are inserted, 132 * and the number MAX is arrived. 133 */ 134 class CLatticeStates { 135 private: 136 static const unsigned beam_width = 32; 137 138 public: 139 /** just use the CLatticeStateVec's iterator */ 140 typedef CLatticeStateVec::iterator iterator; 141 142 /** just use the CLatticeStateVec's iterator */ 143 typedef CLatticeStateVec::const_iterator const_iterator; 144 145 typedef CLatticeStateVec::reference reference; 146 typedef CLatticeStateVec::const_reference const_reference; 147 typedef CLatticeStateVec::size_type size_type; 148 149 public: 150 void 151 clear(); 152 153 void 154 push_back(const TLatticeState& node); 155 156 //@{ 157 /** return the first iterator of m_vec. */ 158 size_t 159 size() 160 { return m_vec.size(); } 161 162 iterator 163 begin() 164 { return m_vec.begin(); } 165 166 /** return the first iterator of m_vec. */ 167 const_iterator 168 begin() const 169 { return m_vec.begin(); } 170 //@} 171 172 173 //@{ 174 /** return the last iterator of m_vec. */ 175 iterator 176 end() 177 { return m_vec.end(); } 178 179 /** return the last iterator of m_vec. */ 180 const_iterator 181 end() const 182 { return m_vec.end(); } 183 //@} 184 185 reference 186 operator[] (size_type index) 187 {return m_vec[index];} 188 189 const_reference 190 operator[] (size_type index) const 191 {return m_vec[index];} 192 193 protected: 194 void 195 bubbleUp(int idxInHeap); 196 197 void 198 ironDown(int idxInHeap); 199 200 protected: 201 std::vector<TLatticeState> m_vec; 202 std::vector<int> m_vecIdxInHeap; 203 std::map<CSlmState, int> m_map; 204 std::vector<int> m_heap; 205 }; 206 207 #endif 208
