Home | History | Annotate | Download | only in src
      1    0   yongsun /*
      2   82   yongsun  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
      3   82   yongsun  *
      4   82   yongsun  * Copyright (c) 2007 Sun Microsystems, Inc. All Rights Reserved.
      5   82   yongsun  *
      6   82   yongsun  * The contents of this file are subject to the terms of either the GNU Lesser
      7   82   yongsun  * General Public License Version 2.1 only ("LGPL") or the Common Development and
      8   82   yongsun  * Distribution License ("CDDL")(collectively, the "License"). You may not use this
      9   82   yongsun  * file except in compliance with the License. You can obtain a copy of the CDDL at
     10   82   yongsun  * http://www.opensource.org/licenses/cddl1.php and a copy of the LGPLv2.1 at
     11   82   yongsun  * http://www.opensource.org/licenses/lgpl-license.php. See the License for the
     12   82   yongsun  * specific language governing permissions and limitations under the License. When
     13   82   yongsun  * distributing the software, include this License Header Notice in each file and
     14   82   yongsun  * include the full text of the License in the License file as well as the
     15   82   yongsun  * following notice:
     16   82   yongsun  *
     17   82   yongsun  * NOTICE PURSUANT TO SECTION 9 OF THE COMMON DEVELOPMENT AND DISTRIBUTION LICENSE
     18   82   yongsun  * (CDDL)
     19   82   yongsun  * For Covered Software in this distribution, this License shall be governed by the
     20   82   yongsun  * laws of the State of California (excluding conflict-of-law provisions).
     21   82   yongsun  * Any litigation relating to this License shall be subject to the jurisdiction of
     22   82   yongsun  * the Federal Courts of the Northern District of California and the state courts
     23   82   yongsun  * of the State of California, with venue lying in Santa Clara County, California.
     24   82   yongsun  *
     25   82   yongsun  * Contributor(s):
     26   82   yongsun  *
     27   82   yongsun  * If you wish your version of this file to be governed by only the CDDL or only
     28   82   yongsun  * the LGPL Version 2.1, indicate your decision by adding "[Contributor]" elects to
     29   82   yongsun  * include this software in this distribution under the [CDDL or LGPL Version 2.1]
     30   82   yongsun  * license." If you don't indicate a single choice of license, a recipient has the
     31   82   yongsun  * option to distribute your version of this file under either the CDDL or the LGPL
     32   82   yongsun  * Version 2.1, or to extend the choice of license to its licensees as provided
     33   82   yongsun  * above. However, if you add LGPL Version 2.1 code and therefore, elected the LGPL
     34   82   yongsun  * Version 2 license, then the option applies only if the new code is made subject
     35   82   yongsun  * to such option by the copyright holder.
     36    0   yongsun  */
     37   82   yongsun 
     38    0   yongsun #ifdef HAVE_CONFIG_H
     39    0   yongsun #include <config.h>
     40    0   yongsun #endif
     41    0   yongsun 
     42  182  tchaikov #include <algorithm>
     43    0   yongsun #include "imi_context.h"
     44    0   yongsun #include "lattice_states.h"
     45    0   yongsun 
     46    0   yongsun 
     47    0   yongsun class TSkelCursor {
     48    0   yongsun public:
     49    0   yongsun     struct TPos {
     50    0   yongsun         CSkeletonIter m_bone;
     51    0   yongsun         int           m_idx;
     52    0   yongsun         TPos(CSkeletonIter bone=CSkeletonIter(), int idx=0)
     53    0   yongsun             : m_bone(bone), m_idx(idx) { }
     54    0   yongsun     };
     55    0   yongsun 
     56    0   yongsun public:
     57    0   yongsun     TSkelCursor(CSkeletonIter h1, CSkeletonIter t1, CSkeletonIter h2, CSkeletonIter t2, bool asis=false)
     58    0   yongsun             : m_h1(h1), m_t1(t1), m_h2(h2), m_t2(t2), m_bone(h1), m_idx(0), m_iLink(0)
     59    0   yongsun         { if (!asis) ensureCursor(); }
     60    0   yongsun 
     61    0   yongsun     inline bool
     62    0   yongsun     isPinyin() const
     63    0   yongsun         { return (m_bone != m_t2 && m_bone->isPinyinNode()); }
     64    0   yongsun 
     65    0   yongsun     inline bool
     66    0   yongsun     isBreakAfter() const
     67    0   yongsun         { return (m_bone == m_t2 || m_bone == m_t1 || m_idx >= m_bone->m_String.size()-1); }
     68    0   yongsun 
     69    0   yongsun     inline bool
     70    0   yongsun     isBreakAfter(TPos & pos) const
     71    0   yongsun         { return (pos.m_bone == m_t2 || pos.m_bone == m_t1 || pos.m_idx >= pos.m_bone->m_String.size()-1); }
     72    0   yongsun 
     73    0   yongsun     inline bool
     74    0   yongsun     isUserBreakAfter() const
     75    0   yongsun         { return (m_bone == m_t2 || m_bone == m_t1 ||
     76    0   yongsun                     (m_idx == m_bone->m_String.size()-1 && m_bone->isPinyinNode() &&
     77    0   yongsun                      m_bone->m_BoundaryType == CBone::USER_BOUNDARY)); }
     78    0   yongsun 
     79    0   yongsun     inline bool
     80    0   yongsun     isUserBreakAfter(TPos & pos) const
     81    0   yongsun         { return (pos.m_bone == m_t2 || pos.m_bone == m_t1 ||
     82    0   yongsun                       (pos.m_idx == pos.m_bone->m_String.size()-1 && pos.m_bone->isPinyinNode() &&
     83    0   yongsun                        pos.m_bone->m_BoundaryType == CBone::USER_BOUNDARY)); }
     84    0   yongsun 
     85    0   yongsun     inline TWCHAR
     86    0   yongsun     getChar()
     87    0   yongsun         {
     88    0   yongsun             ensureCursor();
     89    0   yongsun             return (m_bone != m_t2)?(m_bone->m_String[m_idx]):0;
     90    0   yongsun         }
     91    0   yongsun 
     92    0   yongsun     inline TWCHAR
     93    0   yongsun     getChar(TPos& pos) const // the pos should be ensured
     94    0   yongsun         { return (pos.m_bone != m_t2)?(pos.m_bone->m_String[pos.m_idx]):0; }
     95    0   yongsun 
     96    0   yongsun     void
     97    0   yongsun     next(bool asis=false)
     98    0   yongsun         {
     99    0   yongsun             ensureCursor();
    100    0   yongsun             if (m_bone != m_t2) {
    101    0   yongsun                 ++m_idx;
    102    0   yongsun                 if (!asis) ensureCursor();
    103    0   yongsun             }
    104    0   yongsun         }
    105    0   yongsun 
    106    0   yongsun     void
    107    0   yongsun     nextBone()
    108    0   yongsun         {
    109    0   yongsun             ensureCursor();
    110    0   yongsun             if (m_bone != m_t2) {
    111    0   yongsun                 ++m_bone;
    112    0   yongsun                 m_idx=0;
    113    0   yongsun             }
    114    0   yongsun             ensureCursor();
    115    0   yongsun         }
    116    0   yongsun 
    117    0   yongsun     inline bool
    118    0   yongsun     hasNext()
    119    0   yongsun         {
    120    0   yongsun             ensureCursor();
    121    0   yongsun             return m_bone != m_t2;
    122    0   yongsun         }
    123    0   yongsun 
    124    0   yongsun     inline bool
    125    0   yongsun     atFirstLink() const
    126    0   yongsun         { return m_iLink == 0; }
    127    0   yongsun 
    128    0   yongsun     inline TPos
    129    0   yongsun     getPosition() const
    130    0   yongsun         {
    131    0   yongsun             return TPos(m_bone, m_idx);
    132    0   yongsun         }
    133    0   yongsun 
    134    0   yongsun     /** The parameters must be retrieved from save object before */
    135    0   yongsun     inline void
    136    0   yongsun     setPosition(const TPos& pos)
    137    0   yongsun         { m_bone = pos.m_bone; m_idx = pos.m_idx; }
    138    0   yongsun 
    139    0   yongsun     bool
    140    0   yongsun     ensureCursor(TPos& curCompare);
    141    0   yongsun 
    142    0   yongsun protected:
    143    0   yongsun     CSkeletonIter   m_h1, m_h2, m_t1, m_t2;
    144    0   yongsun     CSkeletonIter   m_bone;
    145    0   yongsun     int             m_iLink, m_idx;
    146    0   yongsun 
    147    0   yongsun protected:
    148    0   yongsun     void
    149    0   yongsun     ensureCursor();
    150    0   yongsun };
    151    0   yongsun 
    152    0   yongsun void
    153    0   yongsun TSkelCursor::ensureCursor()
    154    0   yongsun {
    155    0   yongsun     while (m_bone != m_t1 && m_bone != m_t2 && m_idx >= m_bone->m_String.size()){
    156    0   yongsun         m_idx = 0;
    157    0   yongsun         ++m_bone;
    158    0   yongsun     }
    159    0   yongsun     if (m_bone == m_t1) {
    160    0   yongsun         ++m_iLink;
    161    0   yongsun         m_bone = m_h2;
    162    0   yongsun         m_idx = 0;
    163    0   yongsun         while (m_bone != m_t2 && m_idx >= m_bone->m_String.size()){
    164    0   yongsun             m_idx = 0;
    165    0   yongsun             ++m_bone;
    166    0   yongsun         }
    167    0   yongsun     }
    168    0   yongsun }
    169    0   yongsun 
    170    0   yongsun bool
    171    0   yongsun TSkelCursor::ensureCursor(TPos& curCompare)
    172    0   yongsun {
    173    0   yongsun     bool same = false;
    174    0   yongsun     same = (m_bone == curCompare.m_bone && m_idx == curCompare.m_idx);
    175    0   yongsun     while (m_bone != m_t1 && m_bone != m_t2 && m_idx >= m_bone->m_String.size()){
    176    0   yongsun         m_idx = 0;
    177    0   yongsun         ++m_bone;
    178    0   yongsun         same = same || (m_bone == curCompare.m_bone && m_idx == curCompare.m_idx);
    179    0   yongsun     }
    180    0   yongsun     if (m_bone == m_t1) {
    181    0   yongsun         ++m_iLink;
    182    0   yongsun         m_bone = m_h2;
    183    0   yongsun         m_idx = 0;
    184    0   yongsun         same = same || (m_bone == curCompare.m_bone && m_idx == curCompare.m_idx);
    185    0   yongsun         while (m_bone != m_t2 && m_idx >= m_bone->m_String.size()){
    186    0   yongsun             m_idx = 0;
    187    0   yongsun             ++m_bone;
    188    0   yongsun             same = same || (m_bone == curCompare.m_bone && m_idx == curCompare.m_idx);
    189    0   yongsun         }
    190    0   yongsun     }
    191    0   yongsun     return same;
    192    0   yongsun }
    193    0   yongsun 
    194    0   yongsun /**
    195    0   yongsun  * Determine whether or not the target iterator's position on list of head
    196    0   yongsun  * is located before the iterator first. More precisely, it return whether
    197    0   yongsun  * or not target in [head, first)
    198    0   yongsun  * @param target is the target iterator whose position to be decided
    199    0   yongsun  * @param head is the head iterator of the container (list or vector)
    200    0   yongsun  * @param first iterator to be compared with target
    201    0   yongsun  * @return whether or not the target iterator's position on list of head
    202    0   yongsun  * is located before the iterator first
    203    0   yongsun  */
    204    0   yongsun template<class forwardIt>
    205    0   yongsun bool
    206    0   yongsun isLocatedBefore(forwardIt target, forwardIt head, forwardIt first)
    207    0   yongsun {
    208    0   yongsun     for (; head != first; ++head) {
    209    0   yongsun         if (target == head) return true;
    210    0   yongsun     }
    211    0   yongsun     return false;
    212    0   yongsun }
    213    0   yongsun 
    214    0   yongsun CIMIContext::CIMIContext()
    215    0   yongsun     : m_bNonCompleteSyllable(false), m_bStrictLeft2Right(false),
    216    0   yongsun       m_bGBK(true), m_bGB18030(false), m_HistoryPower(3), m_ContextRanking(true),
    217    0   yongsun       m_pModel(NULL), m_pPinyinTrie(NULL), m_Skeleton(),
    218    0   yongsun       m_EffectiveCandiBoneStart(), m_EffectiveCandiBoneEnd()
    219    0   yongsun {
    220    0   yongsun }
    221    0   yongsun 
    222    0   yongsun void
    223    0   yongsun CIMIContext::setCoreData(CIMIData *pCoreData)
    224    0   yongsun {
    225    0   yongsun     m_pModel = pCoreData->getSlm();
    226    0   yongsun     m_pPinyinTrie = pCoreData->getPinyinTrie();
    227    0   yongsun }
    228    0   yongsun 
    229    0   yongsun void
    230    0   yongsun CIMIContext::clear()
    231    0   yongsun {
    232    0   yongsun     m_Skeleton.clear();
    233    0   yongsun     m_Skeleton.push_back(CBone());
    234    0   yongsun     m_Skeleton.push_back(CBone());
    235    0   yongsun     m_EffectiveCandiBoneStart = m_EffectiveCandiBoneEnd = getLastBone();
    236    0   yongsun 
    237    0   yongsun     // allocate bone's inner data when it is inserted into the skeleton
    238    0   yongsun     CSkeletonIter itEnd = m_Skeleton.end();
    239    0   yongsun     for (CSkeletonIter bone = m_Skeleton.begin(); bone != itEnd; ++bone) {
    240    0   yongsun         if (bone->m_pInnerData == NULL)
    241    0   yongsun             bone->m_pInnerData = new CBoneInnerData();
    242    0   yongsun     }
    243    0   yongsun 
    244    0   yongsun     searchFrom(m_Skeleton.begin());
    245    0   yongsun }
    246    0   yongsun 
    247    0   yongsun static bool
    248    0   yongsun isYuanYinChar(TWCHAR wc)
    249    0   yongsun {
    250    0   yongsun     return (wc == L'a' || wc == L'o' || wc == L'e' ||
    251    0   yongsun             wc == L'i' || wc == L'u' || wc == L'v');
    252    0   yongsun }
    253    0   yongsun 
    254    0   yongsun CSkeletonIter
    255    0   yongsun CIMIContext::cancelSelection(CSkeletonIter bone, bool update)
    256    0   yongsun {
    257    0   yongsun     bool found = false;
    258    0   yongsun     CSkeletonIter it = bone;
    259    0   yongsun     for (CSkeletonIter first=m_Skeleton.begin(); it->m_BoneType == CBone::NODE_PINYIN; --it) {
    260    0   yongsun         // BestWrod is conjunctive, so no need to check position if user selection
    261    0   yongsun         // like isLocatedBefore(bone, it, it->m_pInnerData->m_BestWord.m_BoneEnd)) {
    262    0   yongsun         if (it->m_pInnerData->m_BWType == CBoneInnerData::UserSelectedBestWord) {
    263    0   yongsun             it->m_pInnerData->m_BWType = CBoneInnerData::NoBestWordStartHere;
    264    0   yongsun             found = true;
    265    0   yongsun             break;
    266    0   yongsun         } else if (it->m_pInnerData->m_BWType != CBoneInnerData::NoBestWordStartHere) {
    267    0   yongsun             break;
    268    0   yongsun         }
    269    0   yongsun         if (it == first)
    270    0   yongsun             break;
    271    0   yongsun     }
    272    0   yongsun     if (found && update)
    273    0   yongsun         searchFrom(it);
    274    0   yongsun     return (found)?(it):(bone);
    275    0   yongsun }
    276    0   yongsun 
    277    0   yongsun CSkeletonIter
    278    0   yongsun CIMIContext::cancelSelectionCover(CSkeletonIter bone, bool update)
    279    0   yongsun {
    280    0   yongsun     bool found = false;
    281    0   yongsun     if (bone->m_pInnerData->m_BWType != CBoneInnerData::NoBestWordStartHere) {
    282    0   yongsun         return bone;
    283    0   yongsun     }
    284    0   yongsun     CSkeletonIter it = bone;
    285    0   yongsun     for (CSkeletonIter first=m_Skeleton.begin(); it != first; ) {
    286    0   yongsun         --it;
    287    0   yongsun         if (it->m_pInnerData->m_BWType != CBoneInnerData::NoBestWordStartHere) {
    288  182  tchaikov             // BestWord is conjunctive, so no need to check position if user selection
    289    0   yongsun             // like isLocatedBefore(bone, it, it->m_pInnerData->m_BestWord.m_BoneEnd)) {
    290    0   yongsun             if (it->m_pInnerData->m_BWType == CBoneInnerData::UserSelectedBestWord) {
    291    0   yongsun                 it->m_pInnerData->m_BWType = CBoneInnerData::NoBestWordStartHere;
    292    0   yongsun                 found = true;
    293    0   yongsun             }
    294    0   yongsun             break;
    295    0   yongsun         }
    296    0   yongsun     }
    297    0   yongsun     if (found && update)
    298    0   yongsun         searchFrom(it);
    299    0   yongsun     return (found)?(it):(bone);
    300    0   yongsun }
    301    0   yongsun 
    302    0   yongsun bool
    303    0   yongsun CIMIContext::makeSelection(const CCandidate& candi)
    304    0   yongsun {
    305    0   yongsun     CSkeletonIter boneLeft = cancelSelection(candi.m_BoneStart, false);
    306    0   yongsun 
    307   39  ys148558     /*
    308    0   yongsun     candi.m_BoneStart->m_pInnerData->m_BWType = CBoneInnerData::NoBestWordStartHere;
    309    0   yongsun 
    310    0   yongsun     for (CSkeletonIter bone = candi.m_BoneStart; bone != candi.m_BoneEnd; ++bone)
    311    0   yongsun         bone->m_pInnerData->m_BWType = CBoneInnerData::NoBestWordStartHere;
    312    0   yongsun     */
    313    0   yongsun 
    314    0   yongsun     candi.m_BoneStart->m_pInnerData->m_BestWord = candi;
    315    0   yongsun     candi.m_BoneStart->m_pInnerData->m_BWType = CBoneInnerData::UserSelectedBestWord;
    316    0   yongsun     searchFrom(boneLeft);
    317    0   yongsun 
    318    0   yongsun     return true;
    319    0   yongsun }
    320    0   yongsun 
    321    0   yongsun /**
    322    0   yongsun * it is illegal if boneStart == boneEnd and skel.size() == 0
    323    0   yongsun */
    324    0   yongsun bool
    325    0   yongsun CIMIContext::modify(CSkeletonIter boneStart,
    326    0   yongsun                     CSkeletonIter boneEnd,
    327    0   yongsun                     CSkeleton& skel,
    328    0   yongsun                     bool doSearch,
    329    0   yongsun                     CSkeletonIter* pItLeftmost)
    330    0   yongsun {
    331    0   yongsun     // No change needed, happen on OneLineView, call out a PINYIN i
    332    0   yongsun     // but return it back withou modification.
    333    0   yongsun     // FIXME, maybe this should be put back to OnreLineView's code, not here
    334    0   yongsun     if ((skel.size() == 1) && (boneEnd == ++CSkeletonIter(boneStart)) &&
    335    0   yongsun             (skel.begin()->m_BoneType == boneStart->m_BoneType) &&
    336    0   yongsun             (skel.begin()->m_String == boneStart->m_String)) {
    337    0   yongsun         if (pItLeftmost) *pItLeftmost = getLastBone();
    338    0   yongsun         boneStart->m_BoundaryType = skel.begin()->m_BoundaryType;
    339    0   yongsun         return false;
    340    0   yongsun     }
    341    0   yongsun 
    342    0   yongsun     // check whether or not the modification would affect the candidates
    343    0   yongsun     // retrieved by the previous getCandidates() call
    344    0   yongsun     CSkeletonIter first = boneStart;
    345    0   yongsun     if (first->m_pInnerData->m_LexiconStates.size() > 0)
    346    0   yongsun         first = first->m_pInnerData->m_LexiconStates[0].m_BoneStart;
    347    0   yongsun     bool affectCandidates =
    348    0   yongsun         !isLocatedBefore(m_EffectiveCandiBoneEnd, m_Skeleton.begin(), first);
    349    0   yongsun 
    350    0   yongsun     // We must check the user selection which may cover this node
    351    0   yongsun     // if there is such a user selection, we should do search from there
    352    0   yongsun     // starting bone of such a selection.
    353    0   yongsun     // The check should only be done when boneStart to be removed
    354    0   yongsun     CSkeletonIter lefter = cancelSelectionCover(boneStart, false);
    355    0   yongsun     bool bSearchLefter = (lefter != boneStart);
    356    0   yongsun 
    357    0   yongsun     // Another case is that previous UserSelection just ending at boneStart,
    358    0   yongsun     // which is the first bone to be deleted. In this case, new search will
    359    0   yongsun     // start with the newly inserted bone, and when searching, the User
    360    0   yongsun     // selection word will be check backward and got a wrong range. So
    361    0   yongsun     // we must make it change the UserSelection's ending Bone to the first
    362    0   yongsun     // bone after insertion.
    363    0   yongsun     bool bLeftUS = false;
    364    0   yongsun     CSkeletonIter leftUserBone = lefter;
    365    0   yongsun     if (skel.size() > 0 && !bSearchLefter && leftUserBone != m_Skeleton.begin()) {
    366    0   yongsun         do {
    367    0   yongsun             --leftUserBone;
    368    0   yongsun             int bwType = leftUserBone->m_pInnerData->m_BWType;
    369    0   yongsun             if (bwType != CBoneInnerData::NoBestWordStartHere) {
    370    0   yongsun                 bLeftUS = (bwType ==CBoneInnerData::UserSelectedBestWord &&
    371    0   yongsun                            leftUserBone->m_pInnerData->m_BestWord.m_BoneEnd == boneStart);
    372    0   yongsun                 break;
    373    0   yongsun             }
    374    0   yongsun         } while (leftUserBone != m_Skeleton.begin());
    375    0   yongsun     }
    376    0   yongsun 
    377    0   yongsun     CBoneInnerData *pid = NULL;
    378    0   yongsun     // remove the old range
    379    0   yongsun     if (boneStart != boneEnd) {
    380    0   yongsun         // before remove the bone, get the first's bone's innerData
    381    0   yongsun         // reserve it for the first bone to be inserted. (ie. just
    382    0   yongsun         // attach it to the first bone after deletion/insertion
    383    0   yongsun         pid = boneStart->m_pInnerData;
    384    0   yongsun         boneStart->m_pInnerData = NULL;
    385    0   yongsun 
    386    0   yongsun         m_Skeleton.erase(boneStart, boneEnd);
    387    0   yongsun     }
    388    0   yongsun 
    389    0   yongsun     // insert new list before boneEnd
    390    0   yongsun     first = boneEnd;
    391    0   yongsun     CSkeleton::iterator it1 = skel.begin(), h = skel.begin();
    392    0   yongsun     CSkeleton::iterator it2 = skel.end();
    393    0   yongsun     for (; it1 != it2; ++it1) {
    394    0   yongsun         CSkeletonIter tmp = m_Skeleton.insert(boneEnd, *it1);
    395    0   yongsun         if (it1 == h)
    396    0   yongsun             first = tmp;
    397    0   yongsun         else
    398    0   yongsun             tmp->m_pInnerData = new CBoneInnerData();
    399    0   yongsun     }
    400    0   yongsun 
    401    0   yongsun     if (first->m_pInnerData != NULL) {
    402    0   yongsun         // nothing inserted, must deleted something, ie pid != NULL
    403    0   yongsun         pid->m_BWType = first->m_pInnerData->m_BWType;
    404    0   yongsun         pid->m_BestWord = first->m_pInnerData->m_BestWord;
    405    0   yongsun         delete first->m_pInnerData;
    406    0   yongsun         first->m_pInnerData = pid;
    407    0   yongsun     } else if (pid == NULL) {
    408    0   yongsun         // nothing deleted, just inserting something
    409    0   yongsun         first->m_pInnerData = boneEnd->m_pInnerData;
    410    0   yongsun         boneEnd->m_pInnerData = new CBoneInnerData();
    411    0   yongsun         boneEnd->m_pInnerData->m_BWType = first->m_pInnerData->m_BWType;
    412    0   yongsun         boneEnd->m_pInnerData->m_BestWord = first->m_pInnerData->m_BestWord;
    413    0   yongsun         first->m_pInnerData->m_BWType = CBoneInnerData::NoBestWordStartHere;
    414    0   yongsun     } else {
    415    0   yongsun         //something deleted, something inserted
    416    0   yongsun         first->m_pInnerData = pid;
    417    0   yongsun         pid->m_BWType = CBoneInnerData::NoBestWordStartHere;
    418    0   yongsun     }
    419    0   yongsun 
    420    0   yongsun     // change the left user selection bone's best word's ending bone to first
    421    0   yongsun     if (bLeftUS)
    422    0   yongsun         leftUserBone->m_pInnerData->m_BestWord.m_BoneEnd = first;
    423    0   yongsun 
    424    0   yongsun     // rebuild the search lattice from the newly inserted list
    425    0   yongsun     // using the just copied lattice states (innerData)
    426    0   yongsun     if (pItLeftmost)
    427    0   yongsun         *pItLeftmost = (bSearchLefter)?(lefter):(first);
    428    0   yongsun     if (doSearch)
    429    0   yongsun         searchFrom((bSearchLefter)?(lefter):(first));
    430    0   yongsun 
    431    0   yongsun     return affectCandidates;
    432    0   yongsun }
    433    0   yongsun 
    434    0   yongsun bool
    435    0   yongsun CIMIContext::isValidSyllable(const TWCHAR* pstr)
    436    0   yongsun {
    437    0   yongsun     const CPinyinTrie::TNode* pyn = m_pPinyinTrie->transfer(pstr);
    438    0   yongsun     return m_pPinyinTrie->isValid(pyn, m_bNonCompleteSyllable, m_bGBK);
    439    0   yongsun }
    440    0   yongsun 
    441    0   yongsun bool
    442    0   yongsun CIMIContext::segPinyinSimplest(const wstring& pinyin, CSkeleton& result)
    443    0   yongsun {
    444    0   yongsun     #ifdef DEBUG
    445    0   yongsun         printf("SegPinyin:");
    446    0   yongsun         print_wide(pinyin.c_str());
    447    0   yongsun         printf("-->");
    448    0   yongsun     #endif
    449    0   yongsun 
    450    0   yongsun     //"zhuang" is longest syllable, 16 is enought
    451    0   yongsun     bool  validSyllable[16];
    452    0   yongsun     const CPinyinTrie::TNode* pathNodes[16];
    453    0   yongsun     const TWCHAR* str = pinyin.c_str();
    454    0   yongsun     const CPinyinTrie::TNode* pyn = m_pPinyinTrie->getRootNode();
    455    0   yongsun 
    456    0   yongsun     result.clear();
    457    0   yongsun 
    458    0   yongsun     //Find out the longest valid PINYIN prefix, save to lastValid
    459    0   yongsun     int idx, lastValid = - 1;
    460    0   yongsun     for (idx = 0; str[idx] != 0; ++idx) {
    461    0   yongsun         pyn = m_pPinyinTrie->transfer(pyn, (unsigned char)(str[idx]));
    462    0   yongsun         pathNodes[idx] = pyn;
    463    0   yongsun         if (validSyllable[idx] = m_pPinyinTrie->isValid(pyn, m_bNonCompleteSyllable, m_bGBK))
    464    0   yongsun             lastValid = idx;
    465    0   yongsun         if (pyn == NULL)
    466    0   yongsun             break;
    467    0   yongsun     }
    468    0   yongsun 
    469    0   yongsun     /*********************************************************************
    470    0   yongsun     Note, when NULL pyn arrived, the char should also be the last one.
    471    0   yongsun     Try to split it into two nodes if possible:
    472    0   yongsun         (1) [0..idx-2], [0..idx-1] is both complete syllable
    473    0   yongsun             [idx-1] is FuYin, [idx] is Yuanyin,
    474    0   yongsun             [idx-1...] is non-complete or complete (not NULL)
    475    0   yongsun             ====> split into [0..idx-2] [idx-1, idx]
    476    0   yongsun         (2) lastValid >= 0
    477    0   yongsun             ====> split into [0..lastValid] [lastValid+1..]
    478    0   yongsun                   if [lastValid+1...] is not valid, return false
    479    0   yongsun         (3) lastValid = -1
    480    0   yongsun             ====> give a invalid PINYIN bone [0..]
    481    0   yongsun     **********************************************************************/
    482    0   yongsun     if (pyn == NULL && idx >= 2 &&
    483    0   yongsun         pathNodes[idx-1]->m_bFullSyllableTransfer == 1 &&
    484    0   yongsun         pathNodes[idx-2]->m_bFullSyllableTransfer == 1 &&
    485    0   yongsun         !isYuanYinChar(str[idx-1]) && isYuanYinChar(str[idx]) &&
    486    0   yongsun         (pathNodes[idx] = m_pPinyinTrie->transfer(str+idx-1)) != NULL) {
    487    0   yongsun 
    488    0   yongsun         result.push_back(CBone(str, idx-1, CBone::AUTO_BOUNDARY, CBone::NODE_PINYIN));
    489    0   yongsun 
    490    0   yongsun         #ifdef DEBUG
    491    0   yongsun             print_wide(wstring(str, idx-1).c_str());
    492    0   yongsun             printf("'");
    493    0   yongsun         #endif
    494    0   yongsun 
    495    0   yongsun         int bt = CBone::NODE_INCOMPLETE_PINYIN;
    496    0   yongsun         if (pathNodes[idx]->m_bFullSyllableTransfer == 1)
    497    0   yongsun             bt = CBone::NODE_PINYIN;
    498    0   yongsun 
    499    0   yongsun         result.push_back( CBone(str+idx-1, CBone::AUTO_BOUNDARY, bt) );
    500    0   yongsun 
    501    0   yongsun         #ifdef DEBUG
    502    0   yongsun             print_wide(str+idx-1);
    503    0   yongsun             fflush(stdout);
    504    0   yongsun         #endif
    505    0   yongsun 
    506    0   yongsun         return true;
    507    0   yongsun     }
    508    0   yongsun 
    509    0   yongsun     if (pyn == NULL && lastValid >= 0) {
    510    0   yongsun         result.push_back(CBone(str, lastValid+1, CBone::AUTO_BOUNDARY, CBone::NODE_PINYIN));
    511    0   yongsun 
    512    0   yongsun         #ifdef DEBUG
    513    0   yongsun             print_wide(wstring(str, lastValid+1).c_str());
    514    0   yongsun             printf("'");
    515    0   yongsun         #endif
    516    0   yongsun 
    517    0   yongsun         int bt = CBone::NODE_INCOMPLETE_PINYIN;
    518    0   yongsun 
    519    0   yongsun         pathNodes[idx] = m_pPinyinTrie->transfer(str+lastValid+1);
    520    0   yongsun         if (pathNodes[idx] == NULL)
    521    0   yongsun             bt = CBone::NODE_INVALID_PINYIN;
    522    0   yongsun         else if (m_pPinyinTrie->isValid(pathNodes[idx], m_bNonCompleteSyllable, m_bGBK))
    523    0   yongsun             bt = CBone::NODE_PINYIN;
    524    0   yongsun         else
    525    0   yongsun             bt = CBone::NODE_INCOMPLETE_PINYIN;
    526    0   yongsun         result.push_back(CBone(str+lastValid+1, CBone::AUTO_BOUNDARY, bt));
    527    0   yongsun 
    528    0   yongsun         #ifdef DEBUG
    529    0   yongsun             print_wide(str+lastValid+1);
    530    0   yongsun             if (bt == CBone::NODE_INVALID_PINYIN)
    531    0   yongsun                 printf("(X)");
    532    0   yongsun             fflush(stdout);
    533    0   yongsun         #endif
    534    0   yongsun 
    535    0   yongsun         return (bt != CBone::NODE_INVALID_PINYIN);
    536    0   yongsun     }
    537    0   yongsun 
    538    0   yongsun     if (pyn == NULL) {
    539    0   yongsun         result.push_back(CBone(str, CBone::AUTO_BOUNDARY, CBone::NODE_INVALID_PINYIN));
    540    0   yongsun 
    541    0   yongsun         #ifdef DEBUG
    542    0   yongsun             print_wide(str);
    543    0   yongsun             printf("(X)");
    544    0   yongsun             fflush(stdout);
    545    0   yongsun         #endif
    546    0   yongsun 
    547    0   yongsun         return false;
    548    0   yongsun     }
    549    0   yongsun 
    550    0   yongsun     /********************************************************************
    551    0   yongsun     Now, pyn is not NULL, str[idx] should be 0,
    552    0   yongsun         [0..idx-1] is valid (non-complete or complete)
    553    0   yongsun     *********************************************************************/
    554    0   yongsun     int bt = (validSyllable[idx-1])?(CBone::NODE_PINYIN):(CBone::NODE_INCOMPLETE_PINYIN);
    555    0   yongsun     result.push_back(CBone(str, CBone::AUTO_BOUNDARY, bt));
    556    0   yongsun 
    557    0   yongsun     #ifdef DEBUG
    558    0   yongsun         print_wide(str);
    559    0   yongsun         fflush(stdout);
    560    0   yongsun     #endif
    561    0   yongsun 
    562    0   yongsun     return true;
    563    0   yongsun }
    564    0   yongsun 
    565    0   yongsun TCandiRank::TCandiRank(bool user, bool best, unsigned int len,
    566    0   yongsun                        bool fromLattice, TSentenceScore score)
    567    0   yongsun {
    568    0   yongsun     anony.m_user = (user)?0:1;
    569    0   yongsun     anony.m_best = (best)?0:1;
    570    0   yongsun     anony.m_len = (len > 31)?(0):(31-len);
    571    0   yongsun     anony.m_lattice = (fromLattice)?0:1;
    572    0   yongsun 
    573    0   yongsun     #ifdef DEBUG
    574    0   yongsun         //assert(fromLattice);
    575    0   yongsun         //assert(TSentenceScore(+0.0) < score);
    576    0   yongsun     #endif
    577    0   yongsun 
    578    0   yongsun     double ds = -score.log2();
    579    0   yongsun 
    580    0   yongsun     //make it 24-bit
    581    0   yongsun     if (ds > 32767.0)
    582    0   yongsun         ds = 32767.0;
    583    0   yongsun     else if (ds < -32768.0)
    584    0   yongsun         ds = -32768.0;
    585    0   yongsun     unsigned cost = unsigned((ds+32768.0)*256.0);
    586    0   yongsun     anony.m_cost = cost;
    587    0   yongsun }
    588    0   yongsun 
    589    0   yongsun TCandiRank::TCandiRank(bool user, bool best, unsigned int len,
    590    0   yongsun                        bool fromLattice, unsigned rank)
    591    0   yongsun {
    592    0   yongsun     anony.m_user = (user)?0:1;
    593    0   yongsun     anony.m_best = (best)?0:1;
    594    0   yongsun     anony.m_len = (len > 31)?(0):(31-len);
    595    0   yongsun     anony.m_lattice = (fromLattice)?0:1;
    596    0   yongsun     anony.m_cost = rank;
    597    0   yongsun }
    598    0   yongsun 
    599    0   yongsun struct TCandiPair {
    600    0   yongsun     CCandidate                      m_Candi;
    601    0   yongsun     TCandiRank                      m_Rank;
    602    0   yongsun 
    603    0   yongsun     TCandiPair() : m_Candi(), m_Rank() { }
    604    0   yongsun };
    605    0   yongsun 
    606    0   yongsun struct TCandiPairPtr {
    607    0   yongsun     TCandiPair*                     m_Ptr;
    608    0   yongsun 
    609    0   yongsun     TCandiPairPtr(TCandiPair* p=NULL) : m_Ptr(p)
    610    0   yongsun     { }
    611    0   yongsun 
    612    0   yongsun     bool
    613    0   yongsun     operator< (const TCandiPairPtr& b) const
    614    0   yongsun     { return m_Ptr->m_Rank < b.m_Ptr->m_Rank; }
    615    0   yongsun };
    616    0   yongsun 
    617    0   yongsun // FIXME, this procedure could be modified largely.
    618    0   yongsun void
    619    0   yongsun CIMIContext::getCandidates(CSkeletonIter bone, CCandidates& result)
    620    0   yongsun {
    621    0   yongsun     TCandiPair cp;
    622    0   yongsun     static std::map<unsigned int, TCandiPair> map;
    623    0   yongsun     std::map<unsigned int, TCandiPair>::iterator it_map;
    624    0   yongsun 
    625    0   yongsun     map.clear();
    626    0   yongsun     result.clear();
    627    0   yongsun     m_EffectiveCandiBoneStart = m_EffectiveCandiBoneEnd = bone;
    628    0   yongsun 
    629    0   yongsun     if (bone->isTailNode())
    630    0   yongsun         return;
    631    0   yongsun     if (!bone->isValidPinyinNode()) {
    632    0   yongsun         result.push_back(CCandidate(bone->m_String.c_str(), bone, ++CSkeletonIter(bone)));
    633    0   yongsun         return;
    634    0   yongsun     }
    635    0   yongsun 
    636    0   yongsun     // if user selection or best word starting at bone
    637    0   yongsun     if (bone->m_pInnerData->m_BWType != CBoneInnerData::NoBestWordStartHere) {
    638    0   yongsun         cp.m_Candi = bone->m_pInnerData->m_BestWord;
    639    0   yongsun         cp.m_Rank =
    640    0   yongsun             TCandiRank(bone->m_pInnerData->m_BWType == CBoneInnerData::UserSelectedBestWord,
    641    0   yongsun                        bone->m_pInnerData->m_BWType == CBoneInnerData::BestWordStartHere,
    642    0   yongsun                        0, false, 0);
    643    0   yongsun         map[cp.m_Candi.m_WordId] = cp;
    644    0   yongsun     }
    645    0   yongsun 
    646    0   yongsun     //collecting all candidates, from both lattice and lexicon
    647    0   yongsun     int len = 1;
    648    0   yongsun     cp.m_Candi.m_BoneStart = bone;
    649    0   yongsun     CSkeletonIter b = ++CSkeletonIter(bone);
    650    0   yongsun     while (b != (--m_Skeleton.end())) {
    651    0   yongsun         cp.m_Candi.m_BoneEnd = b;
    652    0   yongsun 
    653    0   yongsun         bool found = false;
    654    0   yongsun         CLexiconStates::iterator itlex = b->m_pInnerData->m_LexiconStates.begin();
    655    0   yongsun         CLexiconStates::iterator itlexe = b->m_pInnerData->m_LexiconStates.end();
    656    0   yongsun         for (; itlex != itlexe; ++itlex) {
    657    0   yongsun             if (itlex->m_BoneStart == bone) {
    658    0   yongsun                 found = true;
    659    0   yongsun                 if (itlex->m_bPinyin) {
    660    0   yongsun                     if (itlex->m_pPYNode && itlex->m_pPYNode->m_nWordId > 0) {
    661    0   yongsun                         unsigned sz = itlex->m_pPYNode->m_nWordId;
    662    0   yongsun                         const CPinyinTrie::TWordIdInfo* p = itlex->m_pPYNode->getWordIdPtr();
    663    0   yongsun                         for (unsigned int i = 0; i < sz; ++i, ++p) {
    664    0   yongsun                             if (m_bGBK || p->m_bGBK == 0) {
    665    0   yongsun                                 cp.m_Candi.m_WordId = p->m_id;
    666    0   yongsun                                 cp.m_Candi.m_String = (*m_pPinyinTrie)[cp.m_Candi.m_WordId];
    667    0   yongsun 
    668    0   yongsun                                 //sorting according to the order in PinYinTire
    669    0   yongsun                                 cp.m_Rank = TCandiRank(false, false, len, false, i);
    670    0   yongsun                                 it_map = map.find(cp.m_Candi.m_WordId);
    671    0   yongsun                                 if (it_map == map.end() || cp.m_Rank < it_map->second.m_Rank)
    672    0   yongsun                                     map[cp.m_Candi.m_WordId] = cp;
    673    0   yongsun                             }
    674    0   yongsun                         }
    675    0   yongsun                     }
    676    0   yongsun                 } else {
    677    0   yongsun                     cp.m_Candi.m_WordId = itlex->m_WordId;
    678    0   yongsun                     cp.m_Candi.m_String = bone->m_String.c_str();
    679    0   yongsun                     cp.m_Rank = TCandiRank(false, false, len, false, 0);
    680    0   yongsun                     it_map = map.find(cp.m_Candi.m_WordId);
    681    0   yongsun                     if (it_map == map.end() || cp.m_Rank < it_map->second.m_Rank)
    682    0   yongsun                         map[cp.m_Candi.m_WordId] = cp;
    683    0   yongsun                 }
    684    0   yongsun             }
    685    0   yongsun         }
    686    0   yongsun 
    687    0   yongsun         if (!found) break;
    688    0   yongsun 
    689    0   yongsun         CLatticeStates::iterator its = b->m_pInnerData->m_LatticeNodes.begin();
    690    0   yongsun         CLatticeStates::iterator ite = b->m_pInnerData->m_LatticeNodes.end();
    691    0   yongsun         for (;  its != ite; ++its) {
    692    0   yongsun             if (its->m_pBackTraceNode && its->m_pBackTraceNode->m_BoneAfter == bone) {
    693    0   yongsun                 cp.m_Candi.m_WordId = its->m_BackTraceWordId;
    694    0   yongsun                 cp.m_Candi.m_String = (*m_pPinyinTrie)[cp.m_Candi.m_WordId];
    695    0   yongsun                 if (cp.m_Candi.m_String == NULL)
    696    0   yongsun                     cp.m_Candi.m_String = bone->m_String.c_str();
    697    0   yongsun                 #ifdef _USE_RAW_PROBABILITY
    698    0   yongsun 
    699    0   yongsun                     #ifdef DEBUG
    700    0   yongsun                         //assert(its->m_pBackTraceNode->m_Score < 0.0 && its->m_Score < 0.0);
    701    0   yongsun                     #endif
    702    0   yongsun 
    703    0   yongsun                     cp.m_Rank = TCandiRank(false, false, len, true, its->m_Score / its->m_pBackTraceNode->m_Score);
    704    0   yongsun                 #else
    705    0   yongsun                     cp.m_Rank = TCandiRank(false, false, len, true, its->m_Score - its->m_pBackTraceNode->m_Score);
    706    0   yongsun                 #endif
    707    0   yongsun                 it_map = map.find(cp.m_Candi.m_WordId);
    708    0   yongsun                 if (it_map == map.end() || cp.m_Rank < it_map->second.m_Rank)
    709    0   yongsun                     map[cp.m_Candi.m_WordId] = cp;
    710    0   yongsun             }
    711    0   yongsun         }
    712    0   yongsun 
    713    0   yongsun         m_EffectiveCandiBoneEnd = b;
    714    0   yongsun         ++b;
    715    0   yongsun         ++len;
    716    0   yongsun     }
    717    0   yongsun 
    718    0   yongsun     std::vector<TCandiPairPtr> vec;
    719    0   yongsun 
    720    0   yongsun     vec.reserve(map.size());
    721    0   yongsun     std::map<unsigned int, TCandiPair>::iterator it_mapE = map.end();
    722    0   yongsun     for (it_map = map.begin(); it_map != it_mapE; ++it_map)
    723    0   yongsun         vec.push_back(TCandiPairPtr(&(it_map->second)));
    724    0   yongsun     std::make_heap(vec.begin(), vec.end());
    725    0   yongsun     std::sort_heap(vec.begin(), vec.end());
    726    0   yongsun 
    727    0   yongsun     for (int i=0, sz=vec.size(); i < sz; ++i)
    728    0   yongsun         result.push_back(vec[i].m_Ptr->m_Candi);
    729    0   yongsun }
    730    0   yongsun 
    731    0   yongsun int
    732    0   yongsun CIMIContext::getBestSentence(wstring & result, CSkeletonIter boneStart,
    733    0   yongsun                              CSkeletonIter boneEnd, bool original_format)
    734    0   yongsun {
    735    0   yongsun     int nWordConverted = 0;
    736    0   yongsun     result.clear();
    737    0   yongsun 
    738    0   yongsun     // no need to check begin(), because firstBone must at least has some
    739    0   yongsun     // auto best word or user selection best word starting from, this rule
    740    0   yongsun     // must be followed in this call
    741    0   yongsun     int len, prefix = 0;
    742    0   yongsun     CSkeletonIter realStart = boneStart;
    743    0   yongsun     while (realStart->m_pInnerData->m_BWType == CBoneInnerData::NoBestWordStartHere) {
    744    0   yongsun         ++prefix;
    745    0   yongsun         --realStart;
    746    0   yongsun     }
    747    0   yongsun 
    748    0   yongsun     while (true) {
    749    0   yongsun         #ifdef DEBUG
    750    0   yongsun             //assert(realStart->m_pInnerData->m_BWType != CBoneInnerData::NoBestWordStartHere);
    751    0   yongsun         #endif
    752    0   yongsun 
    753    0   yongsun         CSkeletonIter bone = boneStart;
    754    0   yongsun         CSkeletonIter rightBone = realStart->m_pInnerData->m_BestWord.m_BoneEnd;
    755    0   yongsun         if (realStart->m_BoneType != CBone::NODE_PINYIN &&
    756    0   yongsun                   realStart->m_BoneType != CBone::NODE_INCOMPLETE_PINYIN) {
    757    0   yongsun             for (; bone != rightBone && bone != boneEnd; ++bone) {
    758    0   yongsun                  if (!original_format)
    759    0   yongsun                      result.push_back(bone->m_String[0]);
    760    0   yongsun                  else
    761    0   yongsun                      result.push_back((unsigned)bone->m_BoundaryType);
    762    0   yongsun             }
    763    0   yongsun         } else {
    764    0   yongsun             ++nWordConverted;
    765    0   yongsun             // get the length from boneStart to current best word tail or end of range
    766    0   yongsun             for (len=0; bone != rightBone && bone != boneEnd; ++bone)
    767    0   yongsun                  ++len;
    768    0   yongsun             result.append(realStart->m_pInnerData->m_BestWord.m_String+prefix, len);
    769    0   yongsun         }
    770    0   yongsun         if (bone == boneEnd)
    771    0   yongsun             break;
    772    0   yongsun         boneStart = realStart = bone;
    773    0   yongsun         prefix = 0;
    774    0   yongsun     }
    775    0   yongsun 
    776    0   yongsun     return nWordConverted;
    777    0   yongsun }
    778    0   yongsun 
    779    0   yongsun /**
    780    0   yongsun  * Search from the bone to the tail. the bone can not beyond first psuedo tail.
    781    0   yongsun  * Before search, all BoneInnerData should be set. The states of the bones
    782    0   yongsun  * who's ahead of the bone would not be affected by this function. Yet, states
    783    0   yongsun  * of the bones beyond this bone will be updated or refreshed.
    784    0   yongsun  *
    785    0   yongsun  * After lattice search, only one best path are backtraced and each best word
    786    0   yongsun  * will be attached to corresponding bone.
    787    0   yongsun  */
    788    0   yongsun void
    789    0   yongsun CIMIContext::searchFrom(CSkeletonIter boneStart)
    790    0   yongsun {
    791    0   yongsun     // iterate every bone from boneStart to the second psuedo tail
    792    0   yongsun     CSkeletonIter itEnd = ++getLastBone();
    793    0   yongsun     CSkeletonIter bone = boneStart;
    794    0   yongsun     CSkeletonIter boneFirst = m_Skeleton.begin();
    795    0   yongsun     for (; bone != itEnd; ) {
    796    0   yongsun         if (bone == boneFirst) {
    797    0   yongsun             // do not clear USER_SELECTION_BEST_WORD !!
    798    0   yongsun             bone->m_pInnerData->m_LexiconStates.clear();
    799    0   yongsun             bone->m_pInnerData->m_LatticeNodes.clear();
    800    0   yongsun             #ifdef _USE_RAW_PROBABILITY
    801    0   yongsun                 bone->m_pInnerData->m_LatticeNodes.push_back(TLatticeState(-1.0, bone));
    802    0   yongsun             #else
    803    0   yongsun                 bone->m_pInnerData->m_LatticeNodes.push_back(TLatticeState(0.0, bone));
    804    0   yongsun             #endif
    805    0   yongsun         } else {
    806    0   yongsun             buildLatticeStates(bone);
    807    0   yongsun         }
    808    0   yongsun         switch (bone->m_BoneType) {
    809    0   yongsun         case CBone::NODE_TAIL:
    810    0   yongsun             bone = forwardTailBone(bone);
    811    0   yongsun             break;
    812    0   yongsun         case CBone::NODE_PINYIN:
    813    0   yongsun             bone = forwardPinyinBone(bone);
    814    0   yongsun             break;
    815    0   yongsun         case CBone::NODE_INCOMPLETE_PINYIN:
    816    0   yongsun         case CBone::NODE_INVALID_PINYIN:
    817    0   yongsun             bone = forwardInvalidBone(bone);
    818    0   yongsun             break;
    819    0   yongsun         case CBone::NODE_PUNC:
    820    0   yongsun             bone = forwardPuncBone(bone);
    821    0   yongsun             break;
    822    0   yongsun         case CBone::NODE_ASCII:
    823    0   yongsun         case CBone::NODE_SIMBOL:
    824    0   yongsun         case CBone::NODE_DIGITAL:
    825    0   yongsun             bone = forwardNonPinyinBone(bone);
    826    0   yongsun             break;
    827    0   yongsun         };
    828    0   yongsun     }
    829    0   yongsun 
    830    0   yongsun     //Build the last bone's lattice states
    831    0   yongsun     buildLatticeStates(itEnd);
    832    0   yongsun 
    833    0   yongsun     #ifdef DEBUG
    834    0   yongsun         //assert(itEnd->m_pInnerData->m_LatticeNodes.size() == 1);
    835    0   yongsun     #endif
    836    0   yongsun 
    837    0   yongsun     // clear all non-user selection
    838    0   yongsun     for (bone=boneFirst; bone != itEnd; ++bone) {
    839    0   yongsun         if (bone->m_pInnerData->m_BWType != CBoneInnerData::UserSelectedBestWord)
    840    0   yongsun             bone->m_pInnerData->m_BWType = CBoneInnerData::NoBestWordStartHere;
    841    0   yongsun     }
    842    0   yongsun 
    843    0   yongsun     // back tracing, find the best path
    844    0   yongsun     TLatticeState* bs = &(*(itEnd->m_pInnerData->m_LatticeNodes.begin()));
    845    0   yongsun     while (bs->m_BoneAfter != boneFirst) {
    846    0   yongsun         TLatticeState* fs = bs->m_pBackTraceNode;
    847    0   yongsun         CSkeletonIter  fb = fs->m_BoneAfter;
    848    0   yongsun         if (fb->m_pInnerData->m_BWType != CBoneInnerData::UserSelectedBestWord) {
    849    0   yongsun             fb->m_pInnerData->m_BWType = CBoneInnerData::BestWordStartHere;
    850    0   yongsun         }
    851    0   yongsun         fb->m_pInnerData->m_BestWord.m_BoneStart = fb;
    852    0   yongsun         fb->m_pInnerData->m_BestWord.m_BoneEnd = bs->m_BoneAfter;
    853    0   yongsun         fb->m_pInnerData->m_BestWord.m_WordId = bs->m_BackTraceWordId;
    854    0   yongsun         fb->m_pInnerData->m_BestWord.m_String = (*m_pPinyinTrie)[bs->m_BackTraceWordId];
    855    0   yongsun         if (fb->m_pInnerData->m_BestWord.m_String == NULL)
    856    0   yongsun             fb->m_pInnerData->m_BestWord.m_String = fb->m_String.c_str();
    857    0   yongsun         bs = fs;
    858    0   yongsun     }
    859    0   yongsun }
    860    0   yongsun 
    861    0   yongsun #ifdef DEBUG
    862    0   yongsun static double min_ts = 1.0;
    863    0   yongsun #endif
    864    0   yongsun 
    865    0   yongsun static double s_history_distribution[11] = {
    866    0   yongsun     0.0, 0.05, 0.10, 0.15, 0.20, 0.25, 0.30, 0.35, 0.40, 0.45, 0.50
    867    0   yongsun };
    868    0   yongsun 
    869    0   yongsun void
    870    0   yongsun CIMIContext::transferBetween(CSkeletonIter h, CSkeletonIter t, unsigned int id, double ic)
    871    0   yongsun {
    872    0   yongsun     CLatticeStates& latss1 = h->m_pInnerData->m_LatticeNodes;
    873    0   yongsun     CLatticeStates& latss2 = t->m_pInnerData->m_LatticeNodes;
    874    0   yongsun     CLatticeStates::iterator it1 = latss1.begin();
    875    0   yongsun     CLatticeStates::iterator ite = latss1.end();
    876    0   yongsun 
    877    0   yongsun     #ifdef _USE_RAW_PROBABILITY
    878    0   yongsun         TLatticeState node(-1.0, t);
    879    0   yongsun         TSentenceScore efic(1.0);
    880    0   yongsun     #else
    881    0   yongsun         TLatticeState node(0.0, t);
    882    0   yongsun         TSentenceScore efic(0.0);
    883    0   yongsun     #endif
    884    0   yongsun 
    885    0   yongsun     if (h->m_pInnerData->m_BestWord.m_WordId == id &&
    886    0   yongsun             h->m_pInnerData->m_BWType == CBoneInnerData::UserSelectedBestWord) {
    887    0   yongsun         #ifdef _USE_RAW_PROBABILITY
    888    0   yongsun             efic = efic * TSentenceScore(30000, 1.0);
    889    0   yongsun         #else
    890    0   yongsun             efic = ic - 30000.0;
    891    0   yongsun         #endif
    892    0   yongsun     }
    893    0   yongsun 
    894    0   yongsun     double weight_h = s_history_distribution[m_HistoryPower];
    895    0   yongsun     double weight_s = 1.0 - weight_h;
    896    0   yongsun 
    897    0   yongsun     for (; it1 != ite; ++it1) {
    898    0   yongsun         node.m_pBackTraceNode = &(*it1);
    899    0   yongsun         node.m_BackTraceWordId = id;
    900    0   yongsun         #ifdef _USE_RAW_PROBABILITY
    901    0   yongsun             // the fact is that we could only use bigram cache
    902    0   yongsun             // and all first level node in the language model are non-empty
    903    0   yongsun             double ts = m_pModel->transfer(it1->m_State, id, node.m_State);
    904    0   yongsun             m_pModel->historify(node.m_State);
    905    0   yongsun             // we do not want to shrink the history state if it could be found in cache
    906    0   yongsun             if (node.m_State.getLevel() == 0 && m_pHistory->seenBefore(id)) {
    907    0   yongsun                 node.m_State.setIdx(id);  // an psuedo unigram node state
    908    0   yongsun             }
    909    0   yongsun 
    910    0   yongsun             #ifdef DEBUG
    911    0   yongsun                 assert(it1->m_Score < TSentenceScore(0.0));
    912    0   yongsun             #endif
    913    0   yongsun 
    914    0   yongsun             double cost = ts;
    915    0   yongsun             if (m_pHistory) {
    916    0   yongsun                 unsigned history[2] = {m_pModel->lastWordId(it1->m_State), id};
    917    0   yongsun                 double hpr = m_pHistory->pr(history, history+2);
    918    0   yongsun                 cost = weight_s * ts + weight_h*hpr;
    919    0   yongsun             }
    920    0   yongsun             node.m_Score = it1->m_Score * efic * TSentenceScore(cost);
    921    0   yongsun 
    922    0   yongsun             #ifdef DEBUG
    923    0   yongsun                 if (!(node.m_Score < TSentenceScore(-0.0))) {
    924    0   yongsun                     static char strangeValue[256];
    925    0   yongsun 
    926    0   yongsun                     node.m_Score.toString(strangeValue);
    927    0   yongsun                     printf("\n***node.m_Score invalid %s ***\n", strangeValue);
    928    0   yongsun 
    929    0   yongsun                     it1->m_Score.toString(strangeValue);
    930    0   yongsun                     printf("***it1->m_Score is %s ***\n", strangeValue);
    931    0   yongsun 
    932    0   yongsun                     efic.toString(strangeValue);
    933    0   yongsun                     printf("***efic=%s, ic=(%lf)***\n", strangeValue, ic);
    934    0   yongsun 
    935    0   yongsun                     TSentenceScore(cost).toString(strangeValue);
    936    0   yongsun                     printf("***cost=%s(%lf), ts=(%16lf)***\n", strangeValue, cost, ts);
    937    0   yongsun 
    938    0   yongsun                     fflush(stdout);
    939    0   yongsun                     assert(false);
    940    0   yongsun                 }
    941    0   yongsun             #endif
    942    0   yongsun 
    943    0   yongsun         #else
    944    0   yongsun             double ts = m_pModel->transferNegLog(it1->m_State, id, node.m_State);
    945    0   yongsun             m_pModel->historify(node.m_State);
    946    0   yongsun             node.m_Score = it1->m_Score + ts + ic;
    947    0   yongsun         #endif
    948    0   yongsun 
    949    0   yongsun         latss2.push_back(node);
    950    0   yongsun     }
    951    0   yongsun }
    952    0   yongsun 
    953    0   yongsun void
    954    0   yongsun CIMIContext::buildLatticeStates(CSkeletonIter bone)
    955    0   yongsun {
    956    0   yongsun     bool bSingleSyllable, bSingleShort;
    957    0   yongsun     unsigned i, sz;
    958    0   yongsun     CSkeletonIter bonePrev = bone;
    959    0   yongsun 
    960    0   yongsun 
    961    0   yongsun     --bonePrev;
    962    0   yongsun     CBoneInnerData & innerData = *(bone->m_pInnerData);
    963    0   yongsun     CLexiconStates::iterator itLexState = innerData.m_LexiconStates.begin();
    964    0   yongsun     CLexiconStates::iterator itLexStateE = innerData.m_LexiconStates.end();
    965    0   yongsun     innerData.m_LatticeNodes.clear();
    966    0   yongsun     for (; itLexState != itLexStateE; ++itLexState) {
    967    0   yongsun         CLexiconState& ls = *itLexState;
    968    0   yongsun 
    969    0   yongsun         // the user selected word may be cut in first pruning process below,
    970    0   yongsun         // So, just let it go first, when it ends here
    971    0   yongsun         CBoneInnerData* pbid = ls.m_BoneStart->m_pInnerData;
    972    0   yongsun         if (pbid->m_BWType == CBoneInnerData::UserSelectedBestWord &&
    973    0   yongsun                     pbid->m_BestWord.m_BoneEnd == bone) {
    974    0   yongsun             #ifdef _USE_RAW_PROBABILITY
    975    0   yongsun                 transferBetween(ls.m_BoneStart, bone, pbid->m_BestWord.m_WordId, 1.0);
    976    0   yongsun             #else
    977    0   yongsun                 transferBetween(ls.m_BoneStart, bone, pbid->m_BestWord.m_WordId, 0.0);
    978    0   yongsun             #endif
    979    0   yongsun         }
    980    0   yongsun 
    981    0   yongsun         if (!ls.m_bPinyin) {
    982    0   yongsun             #ifdef _USE_RAW_PROBABILITY
    983    0   yongsun                 transferBetween(ls.m_BoneStart, bone, ls.m_WordId, 1.0);
    984    0   yongsun             #else
    985    0   yongsun                 transferBetween(ls.m_BoneStart, bone, ls.m_WordId, 0.0);
    986    0   yongsun             #endif
    987    0   yongsun         } else {
    988    0   yongsun             // Cutting words with little unigram possibilities
    989    0   yongsun             // at least 2, at most 32 of the words would be tried
    990    0   yongsun             // if unseed word starting from some position in the first 32
    991    0   yongsun             // candidates(ranked according to unigram pr in lexicon), do not
    992    0   yongsun             // let them be checked.
    993    0   yongsun             bSingleShort = bSingleSyllable = (ls.m_BoneStart == bonePrev);
    994    0   yongsun             if (bSingleSyllable) {
    995    0   yongsun                 register unsigned char uc = ls.m_BoneStart->m_String[0];
    996    0   yongsun                 bSingleShort = ((ls.m_BoneStart->m_String.size() == 1 && (uc != 'a' && uc != 'o' && uc !='e')) ||
    997    0   yongsun                                 (ls.m_BoneStart->m_String.size() == 2 && (ls.m_BoneStart->m_String[1] == 'h')));
    998    0   yongsun             }
    999    0   yongsun             //bSingleShort = (bSingleSyllable && !(m_pPinyinTrie->isValid(ls.m_pPYNode, false)));
   1000    0   yongsun 
   1001    0   yongsun             const CPinyinTrie::TNode* pn = ls.m_pPYNode;
   1002    0   yongsun             const CPinyinTrie::TWordIdInfo* pwidinfo = pn->getWordIdPtr();
   1003    0   yongsun             sz=pn->m_nWordId;
   1004    0   yongsun             if (bSingleShort)
   1005    0   yongsun                 sz = 12;
   1006    0   yongsun             else if (sz > 26)
   1007    0   yongsun                 sz = 26;
   1008    0   yongsun 
   1009    0   yongsun             int count = 0;
   1010    0   yongsun             for (i=0; count < sz && i < sz && (pwidinfo[i].m_bSeen == 1 || count < 2); ++i) {
   1011    0   yongsun                 if (m_bGBK || pwidinfo[i].m_bGBK == 0) {
   1012    0   yongsun                     #ifdef _USE_RAW_PROBABILITY
   1013    0   yongsun                         transferBetween(ls.m_BoneStart, bone, pwidinfo[i].m_id, 1.0);
   1014    0   yongsun                     #else
   1015    0   yongsun                         transferBetween(ls.m_BoneStart, bone, pwidinfo[i].m_id, 0.0);
   1016    0   yongsun                     #endif
   1017    0   yongsun                     ++count;
   1018    0   yongsun                 }
   1019    0   yongsun             }
   1020    0   yongsun             #ifdef _USE_RAW_PROBABILITY
   1021  238  tchaikov                 // try cached words
   1022    0   yongsun                 if (m_pHistory) {
   1023    0   yongsun                     for (sz = pn->m_nWordId; i < sz; ++i) {
   1024    0   yongsun                         if (m_bGBK || pwidinfo[i].m_bGBK == 0) {
   1025    0   yongsun                             if (m_pHistory->seenBefore(pwidinfo[i].m_id)) {
   1026    0   yongsun                                 transferBetween(ls.m_BoneStart, bone, pwidinfo[i].m_id, 1.0);
   1027    0   yongsun                             }
   1028    0   yongsun                         }
   1029    0   yongsun                     }
   1030    0   yongsun                 }
   1031    0   yongsun             #endif
   1032    0   yongsun         }
   1033    0   yongsun     }
   1034    0   yongsun }
   1035    0   yongsun 
   1036    0   yongsun 
   1037    0   yongsun /**
   1038    0   yongsun * Fussy Pinyin:
   1039    0   yongsun *
   1040    0   yongsun */
   1041    0   yongsun CSkeletonIter
   1042    0   yongsun CIMIContext::forwardOnePinyinBone(CSkeletonIter bone)
   1043    0   yongsun {
   1044    0   yongsun     const CPinyinTrie::TNode *pn = NULL;
   1045    0   yongsun 
   1046    0   yongsun     //clear next bone's lexicon states
   1047    0   yongsun     CSkeletonIter boneNext = ++CSkeletonIter(bone);
   1048    0   yongsun     CLexiconStates& lexss2 = boneNext->m_pInnerData->m_LexiconStates;
   1049    0   yongsun     lexss2.clear();
   1050    0   yongsun 
   1051    0   yongsun     // insert the root PinYin Lexicon node
   1052    0   yongsun     CLexiconStates& lexss1 = bone->m_pInnerData->m_LexiconStates;
   1053    0   yongsun     CLexiconStates::iterator it1 = lexss1.begin();
   1054    0   yongsun     CLexiconStates::iterator ite = lexss1.end();
   1055    0   yongsun     for (; it1 != ite; ++it1) {
   1056    0   yongsun         if (it1->m_bPinyin) {
   1057    0   yongsun             pn = m_pPinyinTrie->transfer(it1->m_pPYNode, bone->m_String.c_str());
   1058    0   yongsun             if (pn != NULL && (pn = m_pPinyinTrie->transfer(pn, TWCHAR('\''))) != NULL) {
   1059    0   yongsun                 lexss2.push_back(CLexiconState(it1->m_BoneStart, pn));
   1060    0   yongsun             }
   1061    0   yongsun         }
   1062    0   yongsun     }
   1063    0   yongsun 
   1064    0   yongsun     //try transfer from root state of the lexicon
   1065    0   yongsun     pn = m_pPinyinTrie->transfer(bone->m_String.c_str());
   1066    0   yongsun     if (pn != NULL && (pn = m_pPinyinTrie->transfer(pn, TWCHAR('\''))) != NULL) {
   1067    0   yongsun         lexss2.push_back(CLexiconState(bone, pn));
   1068    0   yongsun     }
   1069    0   yongsun 
   1070    0   yongsun     return boneNext;
   1071    0   yongsun }
   1072    0   yongsun 
   1073    0   yongsun CSkeletonIter
   1074    0   yongsun CIMIContext::forwardPinyinBone(CSkeletonIter bone)
   1075    0   yongsun {
   1076    0   yongsun     if (bone->m_pInnerData->m_BWType == CBoneInnerData::UserSelectedBestWord && m_bStrictLeft2Right) {
   1077    0   yongsun         CSkeletonIter boneLeft = bone;
   1078    0   yongsun         CSkeletonIter boneRight = bone->m_pInnerData->m_BestWord.m_BoneEnd;
   1079    0   yongsun         for (; bone != boneRight; ++bone)
   1080    0   yongsun             (++CSkeletonIter(bone))->m_pInnerData->clear();
   1081    0   yongsun         boneRight->m_pInnerData->m_LexiconStates.push_back(
   1082    0   yongsun             CLexiconState(boneLeft, boneLeft->m_pInnerData->m_BestWord.m_WordId)
   1083    0   yongsun         );
   1084    0   yongsun         return boneRight;
   1085    0   yongsun     } else {
   1086    0   yongsun         return forwardOnePinyinBone(bone);
   1087    0   yongsun     }
   1088    0   yongsun }
   1089    0   yongsun 
   1090    0   yongsun CSkeletonIter
   1091    0   yongsun CIMIContext::forwardInvalidBone(CSkeletonIter bone)
   1092    0   yongsun {
   1093    0   yongsun     CSkeletonIter boneNext = ++CSkeletonIter(bone);
   1094    0   yongsun     CLexiconStates & lss = boneNext->m_pInnerData->m_LexiconStates;
   1095    0   yongsun     lss.clear();
   1096    0   yongsun     lss.push_back(CLexiconState(bone, (unsigned int)UNKNOWN_WORD_ID));
   1097    0   yongsun 
   1098    0   yongsun     return boneNext;
   1099    0   yongsun }
   1100    0   yongsun 
   1101    0   yongsun CSkeletonIter
   1102    0   yongsun CIMIContext::forwardPuncBone(CSkeletonIter bone)
   1103    0   yongsun {
   1104    0   yongsun     unsigned int wid = m_pPinyinTrie->getSimbolId(bone->m_String);
   1105    0   yongsun 
   1106    0   yongsun     CSkeletonIter boneNext = ++CSkeletonIter(bone);
   1107    0   yongsun     CLexiconStates & lss = boneNext->m_pInnerData->m_LexiconStates;
   1108    0   yongsun     lss.clear();
   1109    0   yongsun     lss.push_back(CLexiconState(bone, wid));
   1110    0   yongsun 
   1111    0   yongsun     return boneNext;
   1112    0   yongsun }
   1113    0   yongsun 
   1114    0   yongsun CSkeletonIter
   1115    0   yongsun CIMIContext::forwardNonPinyinBone(CSkeletonIter bone)
   1116    0   yongsun {
   1117    0   yongsun     CSkeletonIter boneNext = ++CSkeletonIter(bone);
   1118    0   yongsun     CLexiconStates & lss = boneNext->m_pInnerData->m_LexiconStates;
   1119    0   yongsun     lss.clear();
   1120    0   yongsun     lss.push_back(CLexiconState(bone, (unsigned int)UNKNOWN_WORD_ID));
   1121    0   yongsun 
   1122    0   yongsun     return boneNext;
   1123    0   yongsun }
   1124    0   yongsun 
   1125    0   yongsun 
   1126    0   yongsun CSkeletonIter
   1127    0   yongsun CIMIContext::forwardTailBone(CSkeletonIter bone)
   1128    0   yongsun {
   1129    0   yongsun     CSkeletonIter boneNext = ++CSkeletonIter(bone);
   1130    0   yongsun     CLexiconStates & lss = boneNext->m_pInnerData->m_LexiconStates;
   1131    0   yongsun     lss.clear();
   1132    0   yongsun     lss.push_back(CLexiconState(bone, OOV_WORD_ID));
   1133    0   yongsun 
   1134    0   yongsun     return boneNext;
   1135    0   yongsun }
   1136    0   yongsun 
   1137    0   yongsun CBone::CBone(const CBone& b)
   1138    0   yongsun     : m_BoneType(b.m_BoneType), m_BoundaryType(b.m_BoundaryType),
   1139    0   yongsun       m_String(b.m_String), m_pInnerData(NULL)
   1140    0   yongsun {
   1141    0   yongsun }
   1142    0   yongsun 
   1143    0   yongsun CBone::CBone(int boundType, int boneType)
   1144    0   yongsun     : m_BoneType(boneType), m_BoundaryType(boundType),
   1145    0   yongsun       m_String(), m_pInnerData(NULL)
   1146    0   yongsun {
   1147    0   yongsun }
   1148    0   yongsun 
   1149    0   yongsun CBone::CBone(const TWCHAR* pwc, int boundType, int boneType)
   1150    0   yongsun     : m_BoneType(boneType), m_BoundaryType(boundType),
   1151    0   yongsun       m_String(pwc), m_pInnerData(NULL)
   1152    0   yongsun {
   1153    0   yongsun }
   1154    0   yongsun 
   1155    0   yongsun CBone::CBone(const TWCHAR* pwc,  size_t len, int boundType, int boneType)
   1156    0   yongsun     : m_BoneType(boneType), m_BoundaryType(boundType),
   1157    0   yongsun       m_String(pwc, len), m_pInnerData(NULL)
   1158    0   yongsun {
   1159    0   yongsun }
   1160    0   yongsun 
   1161    0   yongsun CBone::~CBone()
   1162    0   yongsun {
   1163  182  tchaikov     delete m_pInnerData;
   1164    0   yongsun     m_pInnerData = NULL;
   1165    0   yongsun }
   1166    0   yongsun 
   1167    0   yongsun bool
   1168    0   yongsun CBone::isUserSelectionStart(void)
   1169    0   yongsun {
   1170    0   yongsun     return (m_pInnerData != NULL &&
   1171    0   yongsun               m_pInnerData->m_BWType == CBoneInnerData::UserSelectedBestWord);
   1172    0   yongsun }
   1173    0   yongsun 
   1174    0   yongsun int
   1175    0   yongsun cursorMapping(CSkeletonIter head1, CSkeletonIter tail1,
   1176    0   yongsun               CSkeletonIter head2, CSkeletonIter tail2,
   1177    0   yongsun               CSkeleton& result,
   1178    0   yongsun               CSkeletonIter& cursor, int& cursorIdx, bool stickLeft = false)
   1179    0   yongsun {
   1180    0   yongsun     TSkelCursor sc(head1, tail1, head2, tail2, true);
   1181    0   yongsun     TSkelCursor::TPos cp(cursor, cursorIdx);
   1182    0   yongsun 
   1183    0   yongsun     int  len = 0;
   1184    0   yongsun     bool found =false;
   1185    0   yongsun 
   1186    0   yongsun     while (true) {
   1187    0   yongsun         found = sc.ensureCursor(cp);
   1188    0   yongsun         if (found) break;
   1189    0   yongsun         if (!sc.hasNext()) break;
   1190    0   yongsun         sc.next(true);
   1191    0   yongsun         ++len;
   1192    0   yongsun     }
   1193    0   yongsun 
   1194    0   yongsun     if (found) {
   1195    0   yongsun         TSkelCursor::TPos nc = sc.getPosition();
   1196    0   yongsun 
   1197    0   yongsun         int cmplen = cursorIdx = 0,  nNode = 0;
   1198    0   yongsun         for (cursor = result.begin(); cursor != result.end(); ++cmplen) {
   1199    0   yongsun             if (cmplen == len) break;
   1200    0   yongsun             ++cursorIdx;
   1201    0   yongsun             if (cursorIdx >= cursor->m_String.size()) {
   1202    0   yongsun                 ++cursor;
   1203    0   yongsun                 ++nNode;
   1204    0   yongsun                 cursorIdx = 0;
   1205    0   yongsun             }
   1206    0   yongsun         }
   1207    0   yongsun         if (cmplen == len) { // now we found that
   1208    0   yongsun             if (stickLeft && cursor != result.begin() && cursorIdx == 0) {
   1209    0   yongsun                 --cursor;
   1210    0   yongsun                 --nNode;
   1211    0   yongsun                 cursorIdx = cursor->m_String.size();
   1212    0   yongsun             }
   1213    0   yongsun         }
   1214    0   yongsun         return nNode;
   1215    0   yongsun     }
   1216    0   yongsun     return -1;
   1217    0   yongsun }
   1218    0   yongsun 
   1219    0   yongsun /**
   1220    0   yongsun * it is illegal if boneStart == boneEnd and skel.size() == 0
   1221    0   yongsun *
   1222    0   yongsun * 1. from current position, seeking left for 3 bones without HumanBoundary, or bones
   1223    0   yongsun *    of non-pinyin type between it. --> its, also can not beyond m_CandiBone.
   1224    0   yongsun * 2. in [its, bonStart) from left to right, find the first bone would cause different
   1225    0   yongsun *    segmentation result.  --> itd, other wise itds <-- boneStart
   1226    0   yongsun * 3. from [itd, boneEnd), do automatic segment
   1227    0   yongsun * 4. after boneEnd, util user boundary or non-pinyin bone
   1228    0   yongsun *    or segment result equals to original. --> itd2.
   1229    0   yongsun * 5. [itd, itd2) re-segment, all resulting bone goes into a new skeleton --> newskel.
   1230    0   yongsun *    while convert old cursor position into new position.
   1231    0   yongsun * 6. erase old or seg-affected nodes and splice in the newskel.
   1232    0   yongsun *    do search if needed
   1233    0   yongsun *
   1234    0   yongsun * The key part of this would seeking a solution for finding the automatic sentence
   1235    0   yongsun * segmentation result.
   1236    0   yongsun */
   1237    0   yongsun bool
   1238    0   yongsun CIMIContext::modifyAndReseg(CSkeletonIter boneStart, CSkeletonIter boneEnd, CSkeleton& skel,
   1239    0   yongsun                             CSkeletonIter& cursor, int& cursorIdx, CSkeletonIter& candiStart,
   1240    0   yongsun                             bool stickLeft, bool doSearch)
   1241    0   yongsun {
   1242    0   yongsun     CSkeleton newskel;
   1243    0   yongsun 
   1244    0   yongsun     // Try to look_left to prevent potential segmentation insufficiency
   1245    0   yongsun     CSkeletonIter nit, oit, its = boneStart;
   1246    0   yongsun     int look_left = 0;
   1247    0   yongsun     for (; look_left < 3 && its != getFirstBone() && its != candiStart; ++look_left) {
   1248    0   yongsun         --its;
   1249    0   yongsun         if (!its->isPinyinNode() || its->m_BoundaryType == CBone::USER_BOUNDARY) {
   1250    0   yongsun             ++its;
   1251    0   yongsun             break;
   1252    0   yongsun         }
   1253    0   yongsun         skel.push_front(*its);
   1254    0   yongsun     }
   1255    0   yongsun 
   1256    0   yongsun     // do Syllable segment on the virtual new list
   1257    0   yongsun     segPinyin(skel.begin(), skel.end(), boneEnd, getLastBone(), newskel);
   1258    0   yongsun 
   1259    0   yongsun     // Remapping the new cursor
   1260    0   yongsun     int  ncIdx = cursorMapping(skel.begin(), skel.end(), boneEnd, getLastBone(), newskel, cursor, cursorIdx, stickLeft);
   1261    0   yongsun 
   1262    0   yongsun     // Skip previous look-left nodes that are same with original
   1263    0   yongsun     int first_diff = 0;
   1264    0   yongsun     CSkeletonIter dif_oits=skel.begin();
   1265    0   yongsun     for (oit=skel.begin(), nit=newskel.begin(); first_diff < look_left; ++first_diff) {
   1266    0   yongsun         if (nit->m_String.size() != oit->m_String.size()) {
   1267    0   yongsun             dif_oits = oit;
   1268    0   yongsun             break;
   1269    0   yongsun         }
   1270    0   yongsun         if (ncIdx == 0) cursor = its;
   1271    0   yongsun         --ncIdx;
   1272    0   yongsun         ++its;
   1273    0   yongsun         ++nit;
   1274    0   yongsun         ++oit;
   1275    0   yongsun         newskel.pop_front();
   1276    0   yongsun     }
   1277    0   yongsun 
   1278    0   yongsun     // prepare to restore the CandiStart
   1279    0   yongsun     bool candiStartPositionReset = (its == candiStart);
   1280    0   yongsun 
   1281    0   yongsun     // Prepare for cursor reposition to restore after modify
   1282    0   yongsun     CSkeletonIter leftIt;
   1283    0   yongsun     bool leftItIsHead = (its == getFirstBone());
   1284    0   yongsun     if (!leftItIsHead) {
   1285    0   yongsun         leftIt = its;
   1286    0   yongsun         --leftIt;
   1287    0   yongsun     }
   1288    0   yongsun 
   1289    0   yongsun     // modify original node list
   1290    0   yongsun     bool affectCandidates = modify(its, getLastBone(), newskel, doSearch);
   1291    0   yongsun 
   1292    0   yongsun     // Reposition cursor
   1293    0   yongsun     if (ncIdx >= 0) {
   1294    0   yongsun         cursor = (leftItIsHead)?(getFirstBone()):(++CSkeletonIter(leftIt));
   1295    0   yongsun         for (int i=0; i < ncIdx; ++i)
   1296    0   yongsun             ++cursor;
   1297    0   yongsun     }
   1298    0   yongsun 
   1299    0   yongsun     // Reposition candiStart
   1300    0   yongsun     if (candiStartPositionReset) {
   1301    0   yongsun         candiStart = (leftItIsHead)?(getFirstBone()):(++CSkeletonIter(leftIt));
   1302    0   yongsun         affectCandidates = true;
   1303    0   yongsun     }
   1304    0   yongsun 
   1305    0   yongsun     return affectCandidates;
   1306    0   yongsun }
   1307    0   yongsun 
   1308    0   yongsun void
   1309    0   yongsun CIMIContext::segPinyin(CSkeletonIter head1, CSkeletonIter tail1,
   1310    0   yongsun                        CSkeletonIter head2, CSkeletonIter tail2,
   1311    0   yongsun                        CSkeleton& result)
   1312    0   yongsun {
   1313    0   yongsun     #ifdef DEBUG
   1314    0   yongsun         printf("SegPinyin:");
   1315    0   yongsun     #endif
   1316    0   yongsun 
   1317    0   yongsun     const CPinyinTrie::TNode* pathNodes[16];
   1318    0   yongsun     TSkelCursor::TPos         positions[16];
   1319    0   yongsun 
   1320    0   yongsun     #ifdef  DEBUG
   1321    0   yongsun         TWCHAR dbg_msg[2] = {0, 0};
   1322    0   yongsun         {
   1323    0   yongsun             TSkelCursor dsc(head1, tail1, head2, tail2);
   1324    0   yongsun             while (dsc.hasNext()) {
   1325    0   yongsun                 if (dsc.isPinyin()) {
   1326    0   yongsun                     dbg_msg[0] = dsc.getChar();
   1327    0   yongsun                     print_wide(dbg_msg);
   1328    0   yongsun                     if (dsc.isUserBreakAfter()) {
   1329    0   yongsun                         printf("'");
   1330    0   yongsun                     }
   1331    0   yongsun                 } else {
   1332    0   yongsun                     printf("_");
   1333    0   yongsun                     dbg_msg[0] = dsc.getChar();
   1334    0   yongsun                     print_wide(dbg_msg);
   1335    0   yongsun                 }
   1336    0   yongsun                 dsc.next();
   1337    0   yongsun             }
   1338    0   yongsun         }
   1339    0   yongsun     #endif
   1340    0   yongsun 
   1341    0   yongsun     result.clear();
   1342    0   yongsun     TSkelCursor sc(head1, tail1, head2, tail2);
   1343    0   yongsun     while (sc.hasNext()) {
   1344    0   yongsun         if (sc.isPinyin()) {
   1345  303   yongsun             int lastValid = 0;
   1346    0   yongsun             pathNodes[0] = m_pPinyinTrie->getRootNode();
   1347    0   yongsun             positions[0] = sc.getPosition();
   1348    0   yongsun             for (int idx=1; sc.isPinyin() && pathNodes[idx-1] != NULL; ++idx) {
   1349    0   yongsun                 pathNodes[idx] = m_pPinyinTrie->transfer(pathNodes[idx-1], sc.getChar());
   1350    0   yongsun                 sc.next();
   1351    0   yongsun                 positions[idx] = sc.getPosition();
   1352    0   yongsun                 if (m_pPinyinTrie->isValid(pathNodes[idx], m_bNonCompleteSyllable, m_bGBK))
   1353    0   yongsun                     lastValid = idx;
   1354    0   yongsun                 if (sc.isUserBreakAfter(positions[idx-1]))
   1355    0   yongsun                     break;
   1356    0   yongsun             }
   1357    0   yongsun             bool invalid = false;
   1358    0   yongsun             if (lastValid == 0) {
   1359    0   yongsun                 invalid = true;
   1360    0   yongsun                 lastValid = 1;
   1361    0   yongsun             }
   1362    0   yongsun             if (lastValid >= 2 && pathNodes[lastValid]->m_bFullSyllableTransfer && pathNodes[lastValid-1]->m_bFullSyllableTransfer) {
   1363    0   yongsun                 TWCHAR w1 = sc.getChar(positions[lastValid-1]);
   1364    0   yongsun                 TWCHAR w2 = sc.getChar(positions[lastValid]);
   1365    0   yongsun                 if (!isYuanYinChar(w1) && isYuanYinChar(w2)){
   1366    0   yongsun                     const CPinyinTrie::TNode* pytmp = NULL;
   1367    0   yongsun                     pytmp = m_pPinyinTrie->transfer(m_pPinyinTrie->getRootNode(), w1);
   1368    0   yongsun                     if (pytmp) pytmp = m_pPinyinTrie->transfer(pytmp, w2);
   1369    0   yongsun                     if (pytmp != NULL) --lastValid;
   1370    0   yongsun                 }
   1371    0   yongsun             }
   1372    0   yongsun             CBone bnint(CBone::AUTO_BOUNDARY, (invalid)?(CBone::NODE_INVALID_PINYIN):(CBone::NODE_PINYIN));
   1373    0   yongsun             if (sc.isUserBreakAfter(positions[lastValid-1]))
   1374    0   yongsun                 bnint.m_BoundaryType = CBone::USER_BOUNDARY;
   1375    0   yongsun             for (int idx=0; idx < lastValid; ++idx)
   1376    0   yongsun                 bnint.m_String += sc.getChar(positions[idx]);
   1377    0   yongsun             result.push_back(bnint);
   1378    0   yongsun             sc.setPosition(positions[lastValid]);
   1379    0   yongsun         } else {
   1380    0   yongsun             result.push_back(*(sc.getPosition().m_bone));
   1381    0   yongsun             sc.nextBone();
   1382    0   yongsun         }
   1383    0   yongsun     }
   1384    0   yongsun 
   1385    0   yongsun     #ifdef  DEBUG
   1386    0   yongsun         {
   1387    0   yongsun             printf(" ==> ");
   1388    0   yongsun             TSkelCursor dsc(result.begin(), result.end(), result.end(), result.end());
   1389    0   yongsun             while (dsc.hasNext()) {
   1390    0   yongsun                 if (dsc.isPinyin()) {
   1391    0   yongsun                     dbg_msg[0] = dsc.getChar();
   1392    0   yongsun                     print_wide(dbg_msg);
   1393    0   yongsun                     if (dsc.isBreakAfter()) {
   1394    0   yongsun                         printf("'");
   1395    0   yongsun                     }
   1396    0   yongsun                 } else {
   1397    0   yongsun                     printf("_");
   1398    0   yongsun                     dbg_msg[0] = dsc.getChar();
   1399    0   yongsun                     print_wide(dbg_msg);
   1400    0   yongsun                 }
   1401    0   yongsun                 dsc.next();
   1402    0   yongsun             }
   1403    0   yongsun         }
   1404    0   yongsun         fflush(stdout);
   1405    0   yongsun     #endif
   1406    0   yongsun     return;
   1407    0   yongsun }
   1408    0   yongsun 
   1409    0   yongsun void
   1410    0   yongsun CIMIContext::setHistoryMemory(CICHistory *phm)
   1411    0   yongsun {
   1412    0   yongsun     m_pHistory = phm;
   1413    0   yongsun }
   1414    0   yongsun 
   1415    0   yongsun CICHistory *
   1416    0   yongsun CIMIContext::getHistoryMemory()
   1417    0   yongsun {
   1418    0   yongsun     return m_pHistory;
   1419    0   yongsun }
   1420    0   yongsun 
   1421    0   yongsun void CIMIContext::memorize(void)
   1422    0   yongsun {
   1423    0   yongsun     if (m_pHistory != NULL) {
   1424    0   yongsun         std::vector<unsigned int> result;
   1425    0   yongsun         CSkeletonIter boneStart = getFirstBone();
   1426    0   yongsun         CSkeletonIter boneEnd = getLastBone();
   1427    0   yongsun 
   1428    0   yongsun         while (boneStart != boneEnd) {
   1429    0   yongsun             #ifdef DEBUG
   1430    0   yongsun                 //assert(boneStart->m_pInnerData->m_BWType != CBoneInnerData::NoBestWordStartHere);
   1431    0   yongsun             #endif
   1432    0   yongsun 
   1433    0   yongsun             CSkeletonIter bone = boneStart;
   1434    0   yongsun             CSkeletonIter rightBone = boneStart->m_pInnerData->m_BestWord.m_BoneEnd;
   1435    0   yongsun             if (boneStart->m_BoneType != CBone::NODE_PINYIN && boneStart->m_BoneType != CBone::NODE_INCOMPLETE_PINYIN) {
   1436    0   yongsun                 while (bone != rightBone && bone != boneEnd)
   1437    0   yongsun                     ++bone;
   1438    0   yongsun                 result.push_back(0);
   1439    0   yongsun             } else {
   1440    0   yongsun                 while (bone != rightBone && bone != boneEnd)
   1441    0   yongsun                      ++bone;
   1442    0   yongsun                 result.push_back(boneStart->m_pInnerData->m_BestWord.m_WordId);
   1443    0   yongsun             }
   1444    0   yongsun 
   1445    0   yongsun             boneStart = bone;
   1446    0   yongsun         }
   1447    0   yongsun         if (result.size() > 0)
   1448    0   yongsun             m_pHistory->memorize(&(result[0]), (&(result[0])) + result.size());
   1449    0   yongsun     }
   1450    0   yongsun }
   1451    0   yongsun 
   1452    0   yongsun void
   1453    0   yongsun CIMIContext::print_lattice()
   1454    0   yongsun {
   1455    0   yongsun     printf("\n");
   1456    0   yongsun     std::string prefix;
   1457    0   yongsun     CSkeletonIter bone = getFirstBone();
   1458    0   yongsun     CSkeletonIter boneEnd = getLastBone();
   1459    0   yongsun     for (;bone != boneEnd; ++bone)
   1460    0   yongsun         bone->print(prefix);
   1461    0   yongsun     boneEnd->print(prefix);
   1462    0   yongsun     (++boneEnd)->print(prefix);
   1463    0   yongsun     fflush(stdout);
   1464    0   yongsun }
   1465    0   yongsun 
   1466    0   yongsun void
   1467    0   yongsun CBone::print(std::string& prefix)
   1468    0   yongsun {
   1469    0   yongsun     printf(prefix.c_str());
   1470    0   yongsun     printf("{Bone@%X:", this);
   1471    0   yongsun     print_wide(m_String.c_str());
   1472    0   yongsun     printf("}");
   1473    0   yongsun     prefix += "    ";
   1474    0   yongsun     if (m_pInnerData)
   1475    0   yongsun         m_pInnerData->print(prefix);
   1476    0   yongsun     prefix.resize(prefix.size() - 4);
   1477    0   yongsun     fflush(stdout);
   1478    0   yongsun }
   1479    0   yongsun 
   1480    0   yongsun void
   1481    0   yongsun CCandidate::print(std::string& prefix)
   1482    0   yongsun {
   1483    0   yongsun     printf(prefix.c_str());
   1484    0   yongsun     printf("<Candidate @%X:", this);
   1485    0   yongsun     print_wide(m_String);
   1486    0   yongsun     printf("-- %d}", m_WordId);
   1487    0   yongsun     fflush(stdout);
   1488    0   yongsun }
   1489    0   yongsun 
   1490    0   yongsun 
   1491    0   yongsun TLongExpFloat::TLongExpFloat(double d)
   1492    0   yongsun {
   1493    0   yongsun     if (d != 0.0 && d != -0.0) {
   1494    0   yongsun         TDoubleAnatomy da(d);
   1495    0   yongsun         m_exp = da.getExp();
   1496    0   yongsun         da.clearExp();
   1497    0   yongsun         m_base = da.getValue();
   1498    0   yongsun     } else {
   1499    0   yongsun         m_base = d;
   1500    0   yongsun         m_exp = 0;
   1501    0   yongsun     }
   1502    0   yongsun }
   1503    0   yongsun 
   1504    0   yongsun TLongExpFloat
   1505    0   yongsun TLongExpFloat::operator* (const TLongExpFloat& b) const
   1506    0   yongsun {
   1507    0   yongsun     double d = this->m_base * b.m_base;
   1508    0   yongsun     TLongExpFloat reda(d);
   1509    0   yongsun     reda.m_exp += this->m_exp + b.m_exp;
   1510    0   yongsun     return reda;
   1511    0   yongsun }
   1512    0   yongsun 
   1513    0   yongsun TLongExpFloat
   1514    0   yongsun TLongExpFloat::operator/ (const TLongExpFloat& b) const
   1515    0   yongsun {
   1516    0   yongsun     double d = this->m_base / b.m_base;
   1517    0   yongsun     TLongExpFloat reda(d);
   1518    0   yongsun     reda.m_exp += (this->m_exp - b.m_exp);
   1519    0   yongsun     return reda;
   1520    0   yongsun }
   1521    0   yongsun 
   1522    0   yongsun bool
   1523    0   yongsun TLongExpFloat::operator< (const TLongExpFloat& b) const
   1524    0   yongsun {
   1525    0   yongsun     if (m_base >= 0.0 && b.m_base >= 0.0) {
   1526    0   yongsun         return (m_exp < b.m_exp || (m_exp == b.m_exp && m_base < b.m_base));
   1527    0   yongsun     } else if (m_base < 0.0 && b.m_base < 0.0) {
   1528    0   yongsun         return (m_exp > b.m_exp || (m_exp == b.m_exp && m_base < b.m_base));
   1529    0   yongsun     } else if (m_base < 0.0 && b.m_base >= 0.0)
   1530    0   yongsun         return true;
   1531    0   yongsun     else
   1532    0   yongsun         return false;
   1533    0   yongsun }
   1534    0   yongsun 
   1535    0   yongsun bool
   1536    0   yongsun TLongExpFloat::operator<=(const TLongExpFloat& b) const
   1537    0   yongsun {
   1538    0   yongsun     if (m_base >= 0.0 && b.m_base >= 0.0) {
   1539    0   yongsun         return (m_exp < b.m_exp || (m_exp == b.m_exp && m_base <= b.m_base));
   1540    0   yongsun     } else if (m_base < 0.0 && b.m_base < 0.0) {
   1541    0   yongsun         return (m_exp > b.m_exp || (m_exp == b.m_exp && m_base <= b.m_base));
   1542    0   yongsun     } else if (m_base < 0.0 && b.m_base >= 0.0)
   1543    0   yongsun         return true;
   1544    0   yongsun     else
   1545    0   yongsun         return false;
   1546    0   yongsun }
   1547    0   yongsun 
   1548    0   yongsun bool
   1549    0   yongsun TLongExpFloat::operator==(const TLongExpFloat& b) const
   1550    0   yongsun {
   1551    0   yongsun     return (m_base == b.m_base && m_exp == b.m_exp);
   1552    0   yongsun }
   1553    0   yongsun 
   1554    0   yongsun void
   1555    0   yongsun TLongExpFloat::toString(std::string& str) const
   1556    0   yongsun {
   1557    0   yongsun     char buf[256];
   1558    0   yongsun     toString(buf);
   1559    0   yongsun     str = buf;
   1560    0   yongsun }
   1561