OpenGrok

Cross Reference: imi_context.h
xref: /nv-g11n/inputmethod/sunpinyin2/src/ime-core/imi_context.h
Home | History | Annotate | Line # | Download | only in ime-core
      1 /*
      2  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
      3  *
      4  * Copyright (c) 2007 Sun Microsystems, Inc. All Rights Reserved.
      5  *
      6  * The contents of this file are subject to the terms of either the GNU Lesser
      7  * General Public License Version 2.1 only ("LGPL") or the Common Development and
      8  * Distribution License ("CDDL")(collectively, the "License"). You may not use this
      9  * file except in compliance with the License. You can obtain a copy of the CDDL at
     10  * http://www.opensource.org/licenses/cddl1.php and a copy of the LGPLv2.1 at
     11  * http://www.opensource.org/licenses/lgpl-license.php. See the License for the
     12  * specific language governing permissions and limitations under the License. When
     13  * distributing the software, include this License Header Notice in each file and
     14  * include the full text of the License in the License file as well as the
     15  * following notice:
     16  *
     17  * NOTICE PURSUANT TO SECTION 9 OF THE COMMON DEVELOPMENT AND DISTRIBUTION LICENSE
     18  * (CDDL)
     19  * For Covered Software in this distribution, this License shall be governed by the
     20  * laws of the State of California (excluding conflict-of-law provisions).
     21  * Any litigation relating to this License shall be subject to the jurisdiction of
     22  * the Federal Courts of the Northern District of California and the state courts
     23  * of the State of California, with venue lying in Santa Clara County, California.
     24  *
     25  * Contributor(s):
     26  *
     27  * If you wish your version of this file to be governed by only the CDDL or only
     28  * the LGPL Version 2.1, indicate your decision by adding "[Contributor]" elects to
     29  * include this software in this distribution under the [CDDL or LGPL Version 2.1]
     30  * license." If you don't indicate a single choice of license, a recipient has the
     31  * option to distribute your version of this file under either the CDDL or the LGPL
     32  * Version 2.1, or to extend the choice of license to its licensees as provided
     33  * above. However, if you add LGPL Version 2.1 code and therefore, elected the LGPL
     34  * Version 2 license, then the option applies only if the new code is made subject
     35  * to such option by the copyright holder.
     36  */
     37 
     38 #ifndef SUNPY_IMI_CONTEXT_H
     39 #define SUNPY_IMI_CONTEXT_H
     40 
     41 #include "portability.h"
     42 
     43 #ifdef HAVE_CONFIG_H
     44 #include <config.h>
     45 #endif
     46 
     47 #if defined(DEBUG) && defined (HAVE_ASSET_H)
     48 #include <assert.h>
     49 #endif
     50 
     51 #include <climits>
     52 #include <map>
     53 #include <vector>
     54 
     55 #include "pinyin_seg.h"
     56 #include "imi_data.h"
     57 #include "ic_history.h"
     58 #include "userdict.h"
     59 #include "lattice_states.h"
     60 #include "imi_funcobjs.h"
     61 
     62 /**
     63 * TSentenceScore is only used for whole sentence score,
     64 * the score from language model still using double.
     65 */
     66 typedef TLongExpFloat TSentenceScore;
     67 
     68 class CLatticeFrame;
     69 class CCandidate;
     70 class CIMIContext;
     71 
     72 typedef std::vector<CLatticeFrame>  CLattice;
     73 typedef std::vector<CCandidate>     CCandidates;
     74 typedef CCandidates::iterator       CCandidatesIter;
     75 
     76 union TCandiRank {
     77 public:
     78     bool
     79     operator< (const TCandiRank& b) const
     80         { return m_all < b.m_all; };
     81 
     82     TCandiRank() : m_all(0) { }
     83 
     84     TCandiRank(bool user, bool best, unsigned int len,
     85                bool fromLattice, TSentenceScore score);
     86 
     87     TCandiRank(bool user, bool best, unsigned int len,
     88               bool fromLattice, unsigned score);
     89 
     90 protected:
     91     unsigned  int               m_all;
     92     #if !defined(WORDS_BIGENDIAN)
     93     struct TAnony {
     94         unsigned                m_cost   : 24;
     95         unsigned                m_lattice: 1;
     96         unsigned                m_best   : 1;
     97         unsigned                m_len    : 5;
     98         unsigned                m_user   : 1;
     99     } anony;
    100     #else
    101     struct TAnony {
    102         unsigned                m_user   : 1;
    103         unsigned                m_len    : 5;
    104         unsigned                m_best   : 1;
    105         unsigned                m_lattice: 1;
    106         unsigned                m_cost   : 24;
    107     } anony;
    108     #endif
    109 }; // TCandiRank
    110 
    111 /**
    112  * CCandidate represent basic information about a single candidate.
    113  * Its start bone and finishing bone. It's content string. and its
    114  * word id.
    115  */
    116 class CCandidate {
    117     friend class CIMIContext;
    118 public:
    119     unsigned            m_start;
    120     unsigned            m_end;
    121     const TWCHAR       *m_cwstr;
    122 
    123 public:
    124     /** Give out the constructor for convinience */
    125     CCandidate(unsigned start=0, unsigned end=0, const TWCHAR* s = NULL, unsigned int wid=0)
    126         : m_start(start), m_end(end), m_cwstr(s), m_wordId(wid) {}
    127 
    128 protected:
    129     unsigned int        m_wordId;
    130 }; // of CCandidate
    131 
    132 class CLatticeFrame {
    133     friend class CIMIContext;
    134 public:
    135     enum TYPE {
    136         UNUSED                  = 0x0000,      // unused frame
    137         TAIL                    = 0x0001,      // tail frame
    138 
    139         CATE_SYLLABLE           = 0x0100,
    140         SYLLABLE                = 0x0101,      // pinyin
    141         SYLLABLE_SEP            = 0x0102,      // pinyin
    142         INCOMPLETE_SYLLABLE     = 0x0104,      // incomplete syllable string
    143 
    144         CATE_OTHER              = 0x0200,
    145         ASCII                   = 0x0201,      // english string
    146         PUNC                    = 0x0202,      // punctuation
    147         SYMBOL                  = 0x0204,      // other symbol
    148         DIGITAL                 = 0x0208,      // not implemeted here
    149     }; // TYPE
    150 
    151     enum BESTWORD_TYPE {
    152         NO_BESTWORD             = 1 << 0,
    153         BESTWORD                = 1 << 1,
    154         USER_SELECTED           = 1 << 2,
    155         IGNORED                 = 1 << 3,
    156     }; // BESTWORD_TYPE
    157 
    158     unsigned    m_type;
    159     unsigned    m_bwType;
    160     wstring     m_wstr;
    161 
    162     CLatticeFrame () : m_type (UNUSED), m_bwType (NO_BESTWORD) {}
    163 
    164     bool isUnusedFrame () const
    165         {return m_type == 0;}
    166 
    167     bool isSyllableFrame () const
    168         {return (m_type & CATE_SYLLABLE);}
    169 
    170     bool isSyllableSepFrame () const
    171         {return ((m_type & SYLLABLE_SEP) > CATE_SYLLABLE);}
    172 
    173     bool isTailFrame () const
    174         {return (m_type == TAIL);}
    175 
    176     void clear ()
    177     {
    178         m_type = UNUSED;
    179         m_bwType = NO_BESTWORD;
    180         m_lexiconStates.clear();
    181         m_latticeStates.clear();
    182         m_wstr.clear ();
    183     }
    184 
    185     void print (std::string prefix);
    186 
    187 protected:
    188     CCandidate                  m_bestWord;
    189     CLexiconStates              m_lexiconStates;
    190     CLatticeStates              m_latticeStates;
    191 }; // CLatticeFrame
    192 
    193 class CIMIContext {
    194 public:
    195      CIMIContext ();
    196     ~CIMIContext () {clear();}
    197 
    198     void clear ();
    199 
    200     void setCoreData (CIMIData *pCoreData);
    201     void setUserDict (CUserDict *pUserDict) {m_pUserDict = pUserDict;}
    202 
    203     void setHistoryMemory (CICHistory *phm) {m_pHistory = phm;}
    204     CICHistory * getHistoryMemory () {return m_pHistory;}
    205 
    206     void setHistoryPower (unsigned power)
    207         {m_historyPower = power <=10? power: 3;}
    208 
    209     int getHistoryPower ()
    210         {return m_historyPower;}
    211 
    212     void setFullSymbolForwarding (bool value=true) {m_bFullSymbolForwarding = value;}
    213     bool getFullSymbolForwarding () {return m_bFullSymbolForwarding;}
    214     void setGetFullSymbolOp (CGetFullSymbolOp *op) {m_pGetFullSymbolOp = op;}
    215 
    216     void setFullPunctForwarding (bool value=true) {m_bFullPunctForwarding = value;}
    217     bool getFullPunctForwarding () {return m_bFullPunctForwarding;}
    218     void setGetFullPunctOp (CGetFullPunctOp *op) {m_pGetFullPunctOp = op;}
    219 
    220     void setNonCompleteSyllable(bool value=true) {m_bNonCompleteSyllable = value;}
    221     bool getNonCompleteSyllable() {return m_bNonCompleteSyllable;}
    222 
    223     void setCharsetLevel (unsigned l) {m_csLevel = l;}
    224     unsigned getCharsetLevel () {return m_csLevel;}
    225 
    226     void setDynamicCandidateOrder (bool value=true) {m_bDynaCandiOrder = value;}
    227     bool getDynaCandiOrder () {return m_bDynaCandiOrder;}
    228 
    229     CLattice& getLattice () {return m_lattice;}
    230     bool buildLattice (IPySegmentor::TSegmentVec &segments, unsigned rebuildFrom=1, bool doSearch=true);
    231     bool isEmpty () {return m_tailIdx <= 1;}
    232     unsigned getLastFrIdx () {return m_tailIdx-1;}
    233 
    234     bool searchFrom (unsigned from=1);
    235     std::vector<unsigned>& getBestPath () {return m_bestPath;}
    236     unsigned getBestSentence (wstring& result, unsigned start=0, unsigned end=UINT_MAX);
    237 
    238     void getCandidates (unsigned frIdx, CCandidates& result);
    239     unsigned cancelSelection (unsigned frIdx, bool doSearch=true);
    240     void makeSelection (CCandidate &candi, bool doSearch=true);
    241 
    242     void memorize ();
    243     void printLattice ();
    244 
    245 protected:
    246     void _clearFrom (unsigned from);
    247 
    248     inline void _forwardSyllables (unsigned i, unsigned j, std::vector<unsigned>& syllables);
    249     inline void _forwardSingleSyllable (unsigned i, unsigned j, TSyllable syllable, bool isFuzzy=false);
    250     inline void _forwardSyllableSep (unsigned i, unsigned j);
    251     inline void _forwardString (unsigned i, unsigned j, std::vector<unsigned>& strbuf);
    252     inline void _forwardPunctChar (unsigned i, unsigned j, unsigned ch);
    253     inline void _forwardOrdinaryChar (unsigned i, unsigned j, unsigned ch);
    254     inline void _forwardTail (unsigned i, unsigned j);
    255 
    256     inline void _transferBetween (unsigned start, unsigned end, unsigned wid, double ic=1.0);
    257     inline void _backTraceBestPath ();
    258     inline void _clearBestPath ();
    259 
    260     inline const TWCHAR *_getWstr (unsigned wid);
    261 
    262     inline void _saveUserDict ();
    263     inline void _saveHistoryCache ();
    264 
    265 protected:
    266     CLattice                    m_lattice;
    267     unsigned                    m_tailIdx;
    268     std::vector<unsigned>       m_bestPath;
    269 
    270     CThreadSlm                 *m_pModel;
    271     CPinyinTrie                *m_pPinyinTrie;
    272     CUserDict                  *m_pUserDict;
    273     CICHistory                 *m_pHistory;
    274     unsigned                    m_historyPower;
    275 
    276     unsigned                    m_csLevel;
    277 
    278     bool                        m_bFullSymbolForwarding;
    279     CGetFullSymbolOp           *m_pGetFullSymbolOp;
    280 
    281     bool                        m_bFullPunctForwarding;
    282     CGetFullPunctOp            *m_pGetFullPunctOp;
    283 
    284     bool                        m_bNonCompleteSyllable;
    285     bool                        m_bDynaCandiOrder;
    286 
    287     unsigned                    m_candiStarts;
    288     unsigned                    m_candiEnds;
    289 }; // CIMIContext
    290 
    291 #endif
    292