OpenGrok

Cross Reference: lattice_states.h
xref: /nv-g11n/inputmethod/sunpinyin2/src/ime-core/lattice_states.h
Home | History | Annotate | Line # | Download | only in ime-core
      1 /*
      2  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
      3  *
      4  * Copyright (c) 2007 Sun Microsystems, Inc. All Rights Reserved.
      5  *
      6  * The contents of this file are subject to the terms of either the GNU Lesser
      7  * General Public License Version 2.1 only ("LGPL") or the Common Development and
      8  * Distribution License ("CDDL")(collectively, the "License"). You may not use this
      9  * file except in compliance with the License. You can obtain a copy of the CDDL at
     10  * http://www.opensource.org/licenses/cddl1.php and a copy of the LGPLv2.1 at
     11  * http://www.opensource.org/licenses/lgpl-license.php. See the License for the
     12  * specific language governing permissions and limitations under the License. When
     13  * distributing the software, include this License Header Notice in each file and
     14  * include the full text of the License in the License file as well as the
     15  * following notice:
     16  *
     17  * NOTICE PURSUANT TO SECTION 9 OF THE COMMON DEVELOPMENT AND DISTRIBUTION LICENSE
     18  * (CDDL)
     19  * For Covered Software in this distribution, this License shall be governed by the
     20  * laws of the State of California (excluding conflict-of-law provisions).
     21  * Any litigation relating to this License shall be subject to the jurisdiction of
     22  * the Federal Courts of the Northern District of California and the state courts
     23  * of the State of California, with venue lying in Santa Clara County, California.
     24  *
     25  * Contributor(s):
     26  *
     27  * If you wish your version of this file to be governed by only the CDDL or only
     28  * the LGPL Version 2.1, indicate your decision by adding "[Contributor]" elects to
     29  * include this software in this distribution under the [CDDL or LGPL Version 2.1]
     30  * license." If you don't indicate a single choice of license, a recipient has the
     31  * option to distribute your version of this file under either the CDDL or the LGPL
     32  * Version 2.1, or to extend the choice of license to its licensees as provided
     33  * above. However, if you add LGPL Version 2.1 code and therefore, elected the LGPL
     34  * Version 2 license, then the option applies only if the new code is made subject
     35  * to such option by the copyright holder.
     36  */
     37 
     38 #ifndef SUNPY_LATTICE_STATES_H
     39 #define SUNPY_LATTICE_STATES_H
     40 
     41 #include <vector>
     42 #include <map>
     43 #include "portability.h"
     44 #include "imi_data.h"
     45 
     46 typedef TLongExpFloat TSentenceScore;
     47 
     48 /**
     49  * CSlmState represent the history. In real implementation, it's a
     50  * node pointer to a state in the language model. But to save the
     51  * language model size, the state node in language model do not
     52  * thread the back-off pointer. Now, we just use the Word Id for
     53  * the node in the language model. Later we should abstract the
     54  * StateNode from language model implemetation to replace this
     55  * definition.
     56  */
     57 typedef CThreadSlm::TState          CSlmState;
     58 
     59 /**
     60  * A WordKey could represent a word. Define this use the unsigned int
     61  * directly. Because in the future, we may adopt word class, such as
     62  * Digital Word Class.
     63  */
     64 typedef unsigned                    CWordId;
     65 
     66 /**
     67  * This class is used to record lexicon state (pinyin trie nodes)
     68  * just before a bone. From the bone, it could see when arriving
     69  * it, how many valid Pinyin Trie Node still could be used to search
     70  * more words further, and what bone is its starting bone.
     71  */
     72 struct TLexiconState {
     73     typedef std::vector<CPinyinTrie::TWordIdInfo> TWordIdInfoVec;
     74 
     75     unsigned                    m_start;
     76     const CPinyinTrie::TNode   *m_pPYNode;
     77     TWordIdInfoVec              m_words;
     78     CSyllables                  m_syls;
     79     bool                        m_bPinyin;
     80     bool                        m_bFuzzy;
     81 
     82     TLexiconState (unsigned start, const CPinyinTrie::TNode *pnode, CSyllables& syl, bool isFuzzy=false):
     83         m_start(start), m_pPYNode(pnode), m_syls(syl), m_bPinyin(true), m_bFuzzy(isFuzzy) {}
     84 
     85     TLexiconState (unsigned start, CSyllables &syl, TWordIdInfoVec &words, bool isFuzzy=false):
     86         m_start(start), m_pPYNode(NULL), m_words(words), m_syls(syl), m_bPinyin(true), m_bFuzzy(isFuzzy) {}
     87 
     88     TLexiconState (unsigned start, unsigned wid):
     89         m_start(start), m_pPYNode(NULL), m_bPinyin(false) {m_words.push_back(wid);}
     90 
     91     const CPinyinTrie::TWordIdInfo *getWords (unsigned &num);
     92     void print (std::string prefix) const;
     93 };
     94 
     95 /**
     96  * A list of Lexicon State. Every state may from different
     97  * starting position. Later, when Fuzzy PinYin are added,
     98  * more than one state may comes from one starting bone.
     99  */
    100 typedef std::vector<TLexiconState>    CLexiconStates;
    101 
    102 
    103 /**
    104  * The basic static unit used in the lattice searching
    105  */
    106 struct TLatticeState {
    107     TSentenceScore      m_score;
    108     unsigned            m_frIdx;
    109     TLatticeState      *m_pBackTraceNode;
    110     CSlmState           m_slmState;
    111     CWordId             m_backTraceWordId;
    112 
    113     TLatticeState(double score = -1.0,
    114                   unsigned frIdx=0,
    115                   TLatticeState* btNodePtr = NULL,
    116                   CSlmState sk= CSlmState(),
    117                   CWordId wk = CWordId())
    118         : m_score(score), m_frIdx(frIdx), m_pBackTraceNode(btNodePtr),
    119           m_slmState(sk), m_backTraceWordId(wk) {}
    120 
    121     /** for debug printing... */
    122     void
    123     print(std::string prefix) const;
    124 };
    125 
    126 typedef std::vector<TLatticeState>  CLatticeStateVec;
    127 
    128 /**
    129  * All lattice node on a lattice frame. This class provide beam pruning
    130  * while push_back, which means at most the best MAX states are reserved,
    131  * ie, weak state will may be discard while new better state are inserted,
    132  * and the number MAX is arrived.
    133  */
    134 class CLatticeStates {
    135 private:
    136     static const unsigned beam_width = 32;
    137 
    138 public:
    139     /** just use the CLatticeStateVec's iterator */
    140     typedef CLatticeStateVec::iterator        iterator;
    141 
    142     /** just use the CLatticeStateVec's iterator */
    143     typedef CLatticeStateVec::const_iterator  const_iterator;
    144 
    145     typedef CLatticeStateVec::reference       reference;
    146     typedef CLatticeStateVec::const_reference const_reference;
    147     typedef CLatticeStateVec::size_type       size_type;
    148 
    149 public:
    150     void
    151     clear();
    152 
    153     void
    154     push_back(const TLatticeState& node);
    155 
    156     //@{
    157     /** return the first iterator of m_vec. */
    158     size_t
    159     size()
    160         { return m_vec.size(); }
    161 
    162     iterator
    163     begin()
    164         { return m_vec.begin(); }
    165 
    166     /** return the first iterator of m_vec. */
    167     const_iterator
    168     begin() const
    169         { return m_vec.begin(); }
    170     //@}
    171 
    172 
    173     //@{
    174     /** return the last iterator of m_vec. */
    175     iterator
    176     end()
    177         { return m_vec.end(); }
    178 
    179     /** return the last iterator of m_vec. */
    180     const_iterator
    181     end() const
    182         { return m_vec.end(); }
    183     //@}
    184 
    185     reference
    186     operator[] (size_type index)
    187         {return m_vec[index];}
    188 
    189     const_reference
    190     operator[] (size_type index) const
    191         {return m_vec[index];}
    192 
    193 protected:
    194     void
    195     bubbleUp(int idxInHeap);
    196 
    197     void
    198     ironDown(int idxInHeap);
    199 
    200 protected:
    201     std::vector<TLatticeState>      m_vec;
    202     std::vector<int>                m_vecIdxInHeap;
    203     std::map<CSlmState, int>        m_map;
    204     std::vector<int>                m_heap;
    205 };
    206 
    207 #endif
    208