1 0 yongsun /* 2 82 yongsun * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. 3 82 yongsun * 4 82 yongsun * Copyright (c) 2007 Sun Microsystems, Inc. All Rights Reserved. 5 82 yongsun * 6 82 yongsun * The contents of this file are subject to the terms of either the GNU Lesser 7 82 yongsun * General Public License Version 2.1 only ("LGPL") or the Common Development and 8 82 yongsun * Distribution License ("CDDL")(collectively, the "License"). You may not use this 9 82 yongsun * file except in compliance with the License. You can obtain a copy of the CDDL at 10 82 yongsun * http://www.opensource.org/licenses/cddl1.php and a copy of the LGPLv2.1 at 11 82 yongsun * http://www.opensource.org/licenses/lgpl-license.php. See the License for the 12 82 yongsun * specific language governing permissions and limitations under the License. When 13 82 yongsun * distributing the software, include this License Header Notice in each file and 14 82 yongsun * include the full text of the License in the License file as well as the 15 82 yongsun * following notice: 16 82 yongsun * 17 82 yongsun * NOTICE PURSUANT TO SECTION 9 OF THE COMMON DEVELOPMENT AND DISTRIBUTION LICENSE 18 82 yongsun * (CDDL) 19 82 yongsun * For Covered Software in this distribution, this License shall be governed by the 20 82 yongsun * laws of the State of California (excluding conflict-of-law provisions). 21 82 yongsun * Any litigation relating to this License shall be subject to the jurisdiction of 22 82 yongsun * the Federal Courts of the Northern District of California and the state courts 23 82 yongsun * of the State of California, with venue lying in Santa Clara County, California. 24 82 yongsun * 25 82 yongsun * Contributor(s): 26 82 yongsun * 27 82 yongsun * If you wish your version of this file to be governed by only the CDDL or only 28 82 yongsun * the LGPL Version 2.1, indicate your decision by adding "[Contributor]" elects to 29 82 yongsun * include this software in this distribution under the [CDDL or LGPL Version 2.1] 30 82 yongsun * license." If you don't indicate a single choice of license, a recipient has the 31 82 yongsun * option to distribute your version of this file under either the CDDL or the LGPL 32 82 yongsun * Version 2.1, or to extend the choice of license to its licensees as provided 33 82 yongsun * above. However, if you add LGPL Version 2.1 code and therefore, elected the LGPL 34 82 yongsun * Version 2 license, then the option applies only if the new code is made subject 35 82 yongsun * to such option by the copyright holder. 36 0 yongsun */ 37 82 yongsun 38 0 yongsun #ifdef HAVE_CONFIG_H 39 0 yongsun #include <config.h> 40 0 yongsun #endif 41 0 yongsun 42 182 tchaikov #include <algorithm> 43 0 yongsun #include "imi_context.h" 44 0 yongsun #include "lattice_states.h" 45 0 yongsun 46 0 yongsun 47 0 yongsun class TSkelCursor { 48 0 yongsun public: 49 0 yongsun struct TPos { 50 0 yongsun CSkeletonIter m_bone; 51 0 yongsun int m_idx; 52 0 yongsun TPos(CSkeletonIter bone=CSkeletonIter(), int idx=0) 53 0 yongsun : m_bone(bone), m_idx(idx) { } 54 0 yongsun }; 55 0 yongsun 56 0 yongsun public: 57 0 yongsun TSkelCursor(CSkeletonIter h1, CSkeletonIter t1, CSkeletonIter h2, CSkeletonIter t2, bool asis=false) 58 0 yongsun : m_h1(h1), m_t1(t1), m_h2(h2), m_t2(t2), m_bone(h1), m_idx(0), m_iLink(0) 59 0 yongsun { if (!asis) ensureCursor(); } 60 0 yongsun 61 0 yongsun inline bool 62 0 yongsun isPinyin() const 63 0 yongsun { return (m_bone != m_t2 && m_bone->isPinyinNode()); } 64 0 yongsun 65 0 yongsun inline bool 66 0 yongsun isBreakAfter() const 67 0 yongsun { return (m_bone == m_t2 || m_bone == m_t1 || m_idx >= m_bone->m_String.size()-1); } 68 0 yongsun 69 0 yongsun inline bool 70 0 yongsun isBreakAfter(TPos & pos) const 71 0 yongsun { return (pos.m_bone == m_t2 || pos.m_bone == m_t1 || pos.m_idx >= pos.m_bone->m_String.size()-1); } 72 0 yongsun 73 0 yongsun inline bool 74 0 yongsun isUserBreakAfter() const 75 0 yongsun { return (m_bone == m_t2 || m_bone == m_t1 || 76 0 yongsun (m_idx == m_bone->m_String.size()-1 && m_bone->isPinyinNode() && 77 0 yongsun m_bone->m_BoundaryType == CBone::USER_BOUNDARY)); } 78 0 yongsun 79 0 yongsun inline bool 80 0 yongsun isUserBreakAfter(TPos & pos) const 81 0 yongsun { return (pos.m_bone == m_t2 || pos.m_bone == m_t1 || 82 0 yongsun (pos.m_idx == pos.m_bone->m_String.size()-1 && pos.m_bone->isPinyinNode() && 83 0 yongsun pos.m_bone->m_BoundaryType == CBone::USER_BOUNDARY)); } 84 0 yongsun 85 0 yongsun inline TWCHAR 86 0 yongsun getChar() 87 0 yongsun { 88 0 yongsun ensureCursor(); 89 0 yongsun return (m_bone != m_t2)?(m_bone->m_String[m_idx]):0; 90 0 yongsun } 91 0 yongsun 92 0 yongsun inline TWCHAR 93 0 yongsun getChar(TPos& pos) const // the pos should be ensured 94 0 yongsun { return (pos.m_bone != m_t2)?(pos.m_bone->m_String[pos.m_idx]):0; } 95 0 yongsun 96 0 yongsun void 97 0 yongsun next(bool asis=false) 98 0 yongsun { 99 0 yongsun ensureCursor(); 100 0 yongsun if (m_bone != m_t2) { 101 0 yongsun ++m_idx; 102 0 yongsun if (!asis) ensureCursor(); 103 0 yongsun } 104 0 yongsun } 105 0 yongsun 106 0 yongsun void 107 0 yongsun nextBone() 108 0 yongsun { 109 0 yongsun ensureCursor(); 110 0 yongsun if (m_bone != m_t2) { 111 0 yongsun ++m_bone; 112 0 yongsun m_idx=0; 113 0 yongsun } 114 0 yongsun ensureCursor(); 115 0 yongsun } 116 0 yongsun 117 0 yongsun inline bool 118 0 yongsun hasNext() 119 0 yongsun { 120 0 yongsun ensureCursor(); 121 0 yongsun return m_bone != m_t2; 122 0 yongsun } 123 0 yongsun 124 0 yongsun inline bool 125 0 yongsun atFirstLink() const 126 0 yongsun { return m_iLink == 0; } 127 0 yongsun 128 0 yongsun inline TPos 129 0 yongsun getPosition() const 130 0 yongsun { 131 0 yongsun return TPos(m_bone, m_idx); 132 0 yongsun } 133 0 yongsun 134 0 yongsun /** The parameters must be retrieved from save object before */ 135 0 yongsun inline void 136 0 yongsun setPosition(const TPos& pos) 137 0 yongsun { m_bone = pos.m_bone; m_idx = pos.m_idx; } 138 0 yongsun 139 0 yongsun bool 140 0 yongsun ensureCursor(TPos& curCompare); 141 0 yongsun 142 0 yongsun protected: 143 0 yongsun CSkeletonIter m_h1, m_h2, m_t1, m_t2; 144 0 yongsun CSkeletonIter m_bone; 145 0 yongsun int m_iLink, m_idx; 146 0 yongsun 147 0 yongsun protected: 148 0 yongsun void 149 0 yongsun ensureCursor(); 150 0 yongsun }; 151 0 yongsun 152 0 yongsun void 153 0 yongsun TSkelCursor::ensureCursor() 154 0 yongsun { 155 0 yongsun while (m_bone != m_t1 && m_bone != m_t2 && m_idx >= m_bone->m_String.size()){ 156 0 yongsun m_idx = 0; 157 0 yongsun ++m_bone; 158 0 yongsun } 159 0 yongsun if (m_bone == m_t1) { 160 0 yongsun ++m_iLink; 161 0 yongsun m_bone = m_h2; 162 0 yongsun m_idx = 0; 163 0 yongsun while (m_bone != m_t2 && m_idx >= m_bone->m_String.size()){ 164 0 yongsun m_idx = 0; 165 0 yongsun ++m_bone; 166 0 yongsun } 167 0 yongsun } 168 0 yongsun } 169 0 yongsun 170 0 yongsun bool 171 0 yongsun TSkelCursor::ensureCursor(TPos& curCompare) 172 0 yongsun { 173 0 yongsun bool same = false; 174 0 yongsun same = (m_bone == curCompare.m_bone && m_idx == curCompare.m_idx); 175 0 yongsun while (m_bone != m_t1 && m_bone != m_t2 && m_idx >= m_bone->m_String.size()){ 176 0 yongsun m_idx = 0; 177 0 yongsun ++m_bone; 178 0 yongsun same = same || (m_bone == curCompare.m_bone && m_idx == curCompare.m_idx); 179 0 yongsun } 180 0 yongsun if (m_bone == m_t1) { 181 0 yongsun ++m_iLink; 182 0 yongsun m_bone = m_h2; 183 0 yongsun m_idx = 0; 184 0 yongsun same = same || (m_bone == curCompare.m_bone && m_idx == curCompare.m_idx); 185 0 yongsun while (m_bone != m_t2 && m_idx >= m_bone->m_String.size()){ 186 0 yongsun m_idx = 0; 187 0 yongsun ++m_bone; 188 0 yongsun same = same || (m_bone == curCompare.m_bone && m_idx == curCompare.m_idx); 189 0 yongsun } 190 0 yongsun } 191 0 yongsun return same; 192 0 yongsun } 193 0 yongsun 194 0 yongsun /** 195 0 yongsun * Determine whether or not the target iterator's position on list of head 196 0 yongsun * is located before the iterator first. More precisely, it return whether 197 0 yongsun * or not target in [head, first) 198 0 yongsun * @param target is the target iterator whose position to be decided 199 0 yongsun * @param head is the head iterator of the container (list or vector) 200 0 yongsun * @param first iterator to be compared with target 201 0 yongsun * @return whether or not the target iterator's position on list of head 202 0 yongsun * is located before the iterator first 203 0 yongsun */ 204 0 yongsun template<class forwardIt> 205 0 yongsun bool 206 0 yongsun isLocatedBefore(forwardIt target, forwardIt head, forwardIt first) 207 0 yongsun { 208 0 yongsun for (; head != first; ++head) { 209 0 yongsun if (target == head) return true; 210 0 yongsun } 211 0 yongsun return false; 212 0 yongsun } 213 0 yongsun 214 0 yongsun CIMIContext::CIMIContext() 215 0 yongsun : m_bNonCompleteSyllable(false), m_bStrictLeft2Right(false), 216 0 yongsun m_bGBK(true), m_bGB18030(false), m_HistoryPower(3), m_ContextRanking(true), 217 0 yongsun m_pModel(NULL), m_pPinyinTrie(NULL), m_Skeleton(), 218 0 yongsun m_EffectiveCandiBoneStart(), m_EffectiveCandiBoneEnd() 219 0 yongsun { 220 0 yongsun } 221 0 yongsun 222 0 yongsun void 223 0 yongsun CIMIContext::setCoreData(CIMIData *pCoreData) 224 0 yongsun { 225 0 yongsun m_pModel = pCoreData->getSlm(); 226 0 yongsun m_pPinyinTrie = pCoreData->getPinyinTrie(); 227 0 yongsun } 228 0 yongsun 229 0 yongsun void 230 0 yongsun CIMIContext::clear() 231 0 yongsun { 232 0 yongsun m_Skeleton.clear(); 233 0 yongsun m_Skeleton.push_back(CBone()); 234 0 yongsun m_Skeleton.push_back(CBone()); 235 0 yongsun m_EffectiveCandiBoneStart = m_EffectiveCandiBoneEnd = getLastBone(); 236 0 yongsun 237 0 yongsun // allocate bone's inner data when it is inserted into the skeleton 238 0 yongsun CSkeletonIter itEnd = m_Skeleton.end(); 239 0 yongsun for (CSkeletonIter bone = m_Skeleton.begin(); bone != itEnd; ++bone) { 240 0 yongsun if (bone->m_pInnerData == NULL) 241 0 yongsun bone->m_pInnerData = new CBoneInnerData(); 242 0 yongsun } 243 0 yongsun 244 0 yongsun searchFrom(m_Skeleton.begin()); 245 0 yongsun } 246 0 yongsun 247 0 yongsun static bool 248 0 yongsun isYuanYinChar(TWCHAR wc) 249 0 yongsun { 250 0 yongsun return (wc == L'a' || wc == L'o' || wc == L'e' || 251 0 yongsun wc == L'i' || wc == L'u' || wc == L'v'); 252 0 yongsun } 253 0 yongsun 254 0 yongsun CSkeletonIter 255 0 yongsun CIMIContext::cancelSelection(CSkeletonIter bone, bool update) 256 0 yongsun { 257 0 yongsun bool found = false; 258 0 yongsun CSkeletonIter it = bone; 259 0 yongsun for (CSkeletonIter first=m_Skeleton.begin(); it->m_BoneType == CBone::NODE_PINYIN; --it) { 260 0 yongsun // BestWrod is conjunctive, so no need to check position if user selection 261 0 yongsun // like isLocatedBefore(bone, it, it->m_pInnerData->m_BestWord.m_BoneEnd)) { 262 0 yongsun if (it->m_pInnerData->m_BWType == CBoneInnerData::UserSelectedBestWord) { 263 0 yongsun it->m_pInnerData->m_BWType = CBoneInnerData::NoBestWordStartHere; 264 0 yongsun found = true; 265 0 yongsun break; 266 0 yongsun } else if (it->m_pInnerData->m_BWType != CBoneInnerData::NoBestWordStartHere) { 267 0 yongsun break; 268 0 yongsun } 269 0 yongsun if (it == first) 270 0 yongsun break; 271 0 yongsun } 272 0 yongsun if (found && update) 273 0 yongsun searchFrom(it); 274 0 yongsun return (found)?(it):(bone); 275 0 yongsun } 276 0 yongsun 277 0 yongsun CSkeletonIter 278 0 yongsun CIMIContext::cancelSelectionCover(CSkeletonIter bone, bool update) 279 0 yongsun { 280 0 yongsun bool found = false; 281 0 yongsun if (bone->m_pInnerData->m_BWType != CBoneInnerData::NoBestWordStartHere) { 282 0 yongsun return bone; 283 0 yongsun } 284 0 yongsun CSkeletonIter it = bone; 285 0 yongsun for (CSkeletonIter first=m_Skeleton.begin(); it != first; ) { 286 0 yongsun --it; 287 0 yongsun if (it->m_pInnerData->m_BWType != CBoneInnerData::NoBestWordStartHere) { 288 182 tchaikov // BestWord is conjunctive, so no need to check position if user selection 289 0 yongsun // like isLocatedBefore(bone, it, it->m_pInnerData->m_BestWord.m_BoneEnd)) { 290 0 yongsun if (it->m_pInnerData->m_BWType == CBoneInnerData::UserSelectedBestWord) { 291 0 yongsun it->m_pInnerData->m_BWType = CBoneInnerData::NoBestWordStartHere; 292 0 yongsun found = true; 293 0 yongsun } 294 0 yongsun break; 295 0 yongsun } 296 0 yongsun } 297 0 yongsun if (found && update) 298 0 yongsun searchFrom(it); 299 0 yongsun return (found)?(it):(bone); 300 0 yongsun } 301 0 yongsun 302 0 yongsun bool 303 0 yongsun CIMIContext::makeSelection(const CCandidate& candi) 304 0 yongsun { 305 0 yongsun CSkeletonIter boneLeft = cancelSelection(candi.m_BoneStart, false); 306 0 yongsun 307 39 ys148558 /* 308 0 yongsun candi.m_BoneStart->m_pInnerData->m_BWType = CBoneInnerData::NoBestWordStartHere; 309 0 yongsun 310 0 yongsun for (CSkeletonIter bone = candi.m_BoneStart; bone != candi.m_BoneEnd; ++bone) 311 0 yongsun bone->m_pInnerData->m_BWType = CBoneInnerData::NoBestWordStartHere; 312 0 yongsun */ 313 0 yongsun 314 0 yongsun candi.m_BoneStart->m_pInnerData->m_BestWord = candi; 315 0 yongsun candi.m_BoneStart->m_pInnerData->m_BWType = CBoneInnerData::UserSelectedBestWord; 316 0 yongsun searchFrom(boneLeft); 317 0 yongsun 318 0 yongsun return true; 319 0 yongsun } 320 0 yongsun 321 0 yongsun /** 322 0 yongsun * it is illegal if boneStart == boneEnd and skel.size() == 0 323 0 yongsun */ 324 0 yongsun bool 325 0 yongsun CIMIContext::modify(CSkeletonIter boneStart, 326 0 yongsun CSkeletonIter boneEnd, 327 0 yongsun CSkeleton& skel, 328 0 yongsun bool doSearch, 329 0 yongsun CSkeletonIter* pItLeftmost) 330 0 yongsun { 331 0 yongsun // No change needed, happen on OneLineView, call out a PINYIN i 332 0 yongsun // but return it back withou modification. 333 0 yongsun // FIXME, maybe this should be put back to OnreLineView's code, not here 334 0 yongsun if ((skel.size() == 1) && (boneEnd == ++CSkeletonIter(boneStart)) && 335 0 yongsun (skel.begin()->m_BoneType == boneStart->m_BoneType) && 336 0 yongsun (skel.begin()->m_String == boneStart->m_String)) { 337 0 yongsun if (pItLeftmost) *pItLeftmost = getLastBone(); 338 0 yongsun boneStart->m_BoundaryType = skel.begin()->m_BoundaryType; 339 0 yongsun return false; 340 0 yongsun } 341 0 yongsun 342 0 yongsun // check whether or not the modification would affect the candidates 343 0 yongsun // retrieved by the previous getCandidates() call 344 0 yongsun CSkeletonIter first = boneStart; 345 0 yongsun if (first->m_pInnerData->m_LexiconStates.size() > 0) 346 0 yongsun first = first->m_pInnerData->m_LexiconStates[0].m_BoneStart; 347 0 yongsun bool affectCandidates = 348 0 yongsun !isLocatedBefore(m_EffectiveCandiBoneEnd, m_Skeleton.begin(), first); 349 0 yongsun 350 0 yongsun // We must check the user selection which may cover this node 351 0 yongsun // if there is such a user selection, we should do search from there 352 0 yongsun // starting bone of such a selection. 353 0 yongsun // The check should only be done when boneStart to be removed 354 0 yongsun CSkeletonIter lefter = cancelSelectionCover(boneStart, false); 355 0 yongsun bool bSearchLefter = (lefter != boneStart); 356 0 yongsun 357 0 yongsun // Another case is that previous UserSelection just ending at boneStart, 358 0 yongsun // which is the first bone to be deleted. In this case, new search will 359 0 yongsun // start with the newly inserted bone, and when searching, the User 360 0 yongsun // selection word will be check backward and got a wrong range. So 361 0 yongsun // we must make it change the UserSelection's ending Bone to the first 362 0 yongsun // bone after insertion. 363 0 yongsun bool bLeftUS = false; 364 0 yongsun CSkeletonIter leftUserBone = lefter; 365 0 yongsun if (skel.size() > 0 && !bSearchLefter && leftUserBone != m_Skeleton.begin()) { 366 0 yongsun do { 367 0 yongsun --leftUserBone; 368 0 yongsun int bwType = leftUserBone->m_pInnerData->m_BWType; 369 0 yongsun if (bwType != CBoneInnerData::NoBestWordStartHere) { 370 0 yongsun bLeftUS = (bwType ==CBoneInnerData::UserSelectedBestWord && 371 0 yongsun leftUserBone->m_pInnerData->m_BestWord.m_BoneEnd == boneStart); 372 0 yongsun break; 373 0 yongsun } 374 0 yongsun } while (leftUserBone != m_Skeleton.begin()); 375 0 yongsun } 376 0 yongsun 377 0 yongsun CBoneInnerData *pid = NULL; 378 0 yongsun // remove the old range 379 0 yongsun if (boneStart != boneEnd) { 380 0 yongsun // before remove the bone, get the first's bone's innerData 381 0 yongsun // reserve it for the first bone to be inserted. (ie. just 382 0 yongsun // attach it to the first bone after deletion/insertion 383 0 yongsun pid = boneStart->m_pInnerData; 384 0 yongsun boneStart->m_pInnerData = NULL; 385 0 yongsun 386 0 yongsun m_Skeleton.erase(boneStart, boneEnd); 387 0 yongsun } 388 0 yongsun 389 0 yongsun // insert new list before boneEnd 390 0 yongsun first = boneEnd; 391 0 yongsun CSkeleton::iterator it1 = skel.begin(), h = skel.begin(); 392 0 yongsun CSkeleton::iterator it2 = skel.end(); 393 0 yongsun for (; it1 != it2; ++it1) { 394 0 yongsun CSkeletonIter tmp = m_Skeleton.insert(boneEnd, *it1); 395 0 yongsun if (it1 == h) 396 0 yongsun first = tmp; 397 0 yongsun else 398 0 yongsun tmp->m_pInnerData = new CBoneInnerData(); 399 0 yongsun } 400 0 yongsun 401 0 yongsun if (first->m_pInnerData != NULL) { 402 0 yongsun // nothing inserted, must deleted something, ie pid != NULL 403 0 yongsun pid->m_BWType = first->m_pInnerData->m_BWType; 404 0 yongsun pid->m_BestWord = first->m_pInnerData->m_BestWord; 405 0 yongsun delete first->m_pInnerData; 406 0 yongsun first->m_pInnerData = pid; 407 0 yongsun } else if (pid == NULL) { 408 0 yongsun // nothing deleted, just inserting something 409 0 yongsun first->m_pInnerData = boneEnd->m_pInnerData; 410 0 yongsun boneEnd->m_pInnerData = new CBoneInnerData(); 411 0 yongsun boneEnd->m_pInnerData->m_BWType = first->m_pInnerData->m_BWType; 412 0 yongsun boneEnd->m_pInnerData->m_BestWord = first->m_pInnerData->m_BestWord; 413 0 yongsun first->m_pInnerData->m_BWType = CBoneInnerData::NoBestWordStartHere; 414 0 yongsun } else { 415 0 yongsun //something deleted, something inserted 416 0 yongsun first->m_pInnerData = pid; 417 0 yongsun pid->m_BWType = CBoneInnerData::NoBestWordStartHere; 418 0 yongsun } 419 0 yongsun 420 0 yongsun // change the left user selection bone's best word's ending bone to first 421 0 yongsun if (bLeftUS) 422 0 yongsun leftUserBone->m_pInnerData->m_BestWord.m_BoneEnd = first; 423 0 yongsun 424 0 yongsun // rebuild the search lattice from the newly inserted list 425 0 yongsun // using the just copied lattice states (innerData) 426 0 yongsun if (pItLeftmost) 427 0 yongsun *pItLeftmost = (bSearchLefter)?(lefter):(first); 428 0 yongsun if (doSearch) 429 0 yongsun searchFrom((bSearchLefter)?(lefter):(first)); 430 0 yongsun 431 0 yongsun return affectCandidates; 432 0 yongsun } 433 0 yongsun 434 0 yongsun bool 435 0 yongsun CIMIContext::isValidSyllable(const TWCHAR* pstr) 436 0 yongsun { 437 0 yongsun const CPinyinTrie::TNode* pyn = m_pPinyinTrie->transfer(pstr); 438 0 yongsun return m_pPinyinTrie->isValid(pyn, m_bNonCompleteSyllable, m_bGBK); 439 0 yongsun } 440 0 yongsun 441 0 yongsun bool 442 0 yongsun CIMIContext::segPinyinSimplest(const wstring& pinyin, CSkeleton& result) 443 0 yongsun { 444 0 yongsun #ifdef DEBUG 445 0 yongsun printf("SegPinyin:"); 446 0 yongsun print_wide(pinyin.c_str()); 447 0 yongsun printf("-->"); 448 0 yongsun #endif 449 0 yongsun 450 0 yongsun //"zhuang" is longest syllable, 16 is enought 451 0 yongsun bool validSyllable[16]; 452 0 yongsun const CPinyinTrie::TNode* pathNodes[16]; 453 0 yongsun const TWCHAR* str = pinyin.c_str(); 454 0 yongsun const CPinyinTrie::TNode* pyn = m_pPinyinTrie->getRootNode(); 455 0 yongsun 456 0 yongsun result.clear(); 457 0 yongsun 458 0 yongsun //Find out the longest valid PINYIN prefix, save to lastValid 459 0 yongsun int idx, lastValid = - 1; 460 0 yongsun for (idx = 0; str[idx] != 0; ++idx) { 461 0 yongsun pyn = m_pPinyinTrie->transfer(pyn, (unsigned char)(str[idx])); 462 0 yongsun pathNodes[idx] = pyn; 463 0 yongsun if (validSyllable[idx] = m_pPinyinTrie->isValid(pyn, m_bNonCompleteSyllable, m_bGBK)) 464 0 yongsun lastValid = idx; 465 0 yongsun if (pyn == NULL) 466 0 yongsun break; 467 0 yongsun } 468 0 yongsun 469 0 yongsun /********************************************************************* 470 0 yongsun Note, when NULL pyn arrived, the char should also be the last one. 471 0 yongsun Try to split it into two nodes if possible: 472 0 yongsun (1) [0..idx-2], [0..idx-1] is both complete syllable 473 0 yongsun [idx-1] is FuYin, [idx] is Yuanyin, 474 0 yongsun [idx-1...] is non-complete or complete (not NULL) 475 0 yongsun ====> split into [0..idx-2] [idx-1, idx] 476 0 yongsun (2) lastValid >= 0 477 0 yongsun ====> split into [0..lastValid] [lastValid+1..] 478 0 yongsun if [lastValid+1...] is not valid, return false 479 0 yongsun (3) lastValid = -1 480 0 yongsun ====> give a invalid PINYIN bone [0..] 481 0 yongsun **********************************************************************/ 482 0 yongsun if (pyn == NULL && idx >= 2 && 483 0 yongsun pathNodes[idx-1]->m_bFullSyllableTransfer == 1 && 484 0 yongsun pathNodes[idx-2]->m_bFullSyllableTransfer == 1 && 485 0 yongsun !isYuanYinChar(str[idx-1]) && isYuanYinChar(str[idx]) && 486 0 yongsun (pathNodes[idx] = m_pPinyinTrie->transfer(str+idx-1)) != NULL) { 487 0 yongsun 488 0 yongsun result.push_back(CBone(str, idx-1, CBone::AUTO_BOUNDARY, CBone::NODE_PINYIN)); 489 0 yongsun 490 0 yongsun #ifdef DEBUG 491 0 yongsun print_wide(wstring(str, idx-1).c_str()); 492 0 yongsun printf("'"); 493 0 yongsun #endif 494 0 yongsun 495 0 yongsun int bt = CBone::NODE_INCOMPLETE_PINYIN; 496 0 yongsun if (pathNodes[idx]->m_bFullSyllableTransfer == 1) 497 0 yongsun bt = CBone::NODE_PINYIN; 498 0 yongsun 499 0 yongsun result.push_back( CBone(str+idx-1, CBone::AUTO_BOUNDARY, bt) ); 500 0 yongsun 501 0 yongsun #ifdef DEBUG 502 0 yongsun print_wide(str+idx-1); 503 0 yongsun fflush(stdout); 504 0 yongsun #endif 505 0 yongsun 506 0 yongsun return true; 507 0 yongsun } 508 0 yongsun 509 0 yongsun if (pyn == NULL && lastValid >= 0) { 510 0 yongsun result.push_back(CBone(str, lastValid+1, CBone::AUTO_BOUNDARY, CBone::NODE_PINYIN)); 511 0 yongsun 512 0 yongsun #ifdef DEBUG 513 0 yongsun print_wide(wstring(str, lastValid+1).c_str()); 514 0 yongsun printf("'"); 515 0 yongsun #endif 516 0 yongsun 517 0 yongsun int bt = CBone::NODE_INCOMPLETE_PINYIN; 518 0 yongsun 519 0 yongsun pathNodes[idx] = m_pPinyinTrie->transfer(str+lastValid+1); 520 0 yongsun if (pathNodes[idx] == NULL) 521 0 yongsun bt = CBone::NODE_INVALID_PINYIN; 522 0 yongsun else if (m_pPinyinTrie->isValid(pathNodes[idx], m_bNonCompleteSyllable, m_bGBK)) 523 0 yongsun bt = CBone::NODE_PINYIN; 524 0 yongsun else 525 0 yongsun bt = CBone::NODE_INCOMPLETE_PINYIN; 526 0 yongsun result.push_back(CBone(str+lastValid+1, CBone::AUTO_BOUNDARY, bt)); 527 0 yongsun 528 0 yongsun #ifdef DEBUG 529 0 yongsun print_wide(str+lastValid+1); 530 0 yongsun if (bt == CBone::NODE_INVALID_PINYIN) 531 0 yongsun printf("(X)"); 532 0 yongsun fflush(stdout); 533 0 yongsun #endif 534 0 yongsun 535 0 yongsun return (bt != CBone::NODE_INVALID_PINYIN); 536 0 yongsun } 537 0 yongsun 538 0 yongsun if (pyn == NULL) { 539 0 yongsun result.push_back(CBone(str, CBone::AUTO_BOUNDARY, CBone::NODE_INVALID_PINYIN)); 540 0 yongsun 541 0 yongsun #ifdef DEBUG 542 0 yongsun print_wide(str); 543 0 yongsun printf("(X)"); 544 0 yongsun fflush(stdout); 545 0 yongsun #endif 546 0 yongsun 547 0 yongsun return false; 548 0 yongsun } 549 0 yongsun 550 0 yongsun /******************************************************************** 551 0 yongsun Now, pyn is not NULL, str[idx] should be 0, 552 0 yongsun [0..idx-1] is valid (non-complete or complete) 553 0 yongsun *********************************************************************/ 554 0 yongsun int bt = (validSyllable[idx-1])?(CBone::NODE_PINYIN):(CBone::NODE_INCOMPLETE_PINYIN); 555 0 yongsun result.push_back(CBone(str, CBone::AUTO_BOUNDARY, bt)); 556 0 yongsun 557 0 yongsun #ifdef DEBUG 558 0 yongsun print_wide(str); 559 0 yongsun fflush(stdout); 560 0 yongsun #endif 561 0 yongsun 562 0 yongsun return true; 563 0 yongsun } 564 0 yongsun 565 0 yongsun TCandiRank::TCandiRank(bool user, bool best, unsigned int len, 566 0 yongsun bool fromLattice, TSentenceScore score) 567 0 yongsun { 568 0 yongsun anony.m_user = (user)?0:1; 569 0 yongsun anony.m_best = (best)?0:1; 570 0 yongsun anony.m_len = (len > 31)?(0):(31-len); 571 0 yongsun anony.m_lattice = (fromLattice)?0:1; 572 0 yongsun 573 0 yongsun #ifdef DEBUG 574 0 yongsun //assert(fromLattice); 575 0 yongsun //assert(TSentenceScore(+0.0) < score); 576 0 yongsun #endif 577 0 yongsun 578 0 yongsun double ds = -score.log2(); 579 0 yongsun 580 0 yongsun //make it 24-bit 581 0 yongsun if (ds > 32767.0) 582 0 yongsun ds = 32767.0; 583 0 yongsun else if (ds < -32768.0) 584 0 yongsun ds = -32768.0; 585 0 yongsun unsigned cost = unsigned((ds+32768.0)*256.0); 586 0 yongsun anony.m_cost = cost; 587 0 yongsun } 588 0 yongsun 589 0 yongsun TCandiRank::TCandiRank(bool user, bool best, unsigned int len, 590 0 yongsun bool fromLattice, unsigned rank) 591 0 yongsun { 592 0 yongsun anony.m_user = (user)?0:1; 593 0 yongsun anony.m_best = (best)?0:1; 594 0 yongsun anony.m_len = (len > 31)?(0):(31-len); 595 0 yongsun anony.m_lattice = (fromLattice)?0:1; 596 0 yongsun anony.m_cost = rank; 597 0 yongsun } 598 0 yongsun 599 0 yongsun struct TCandiPair { 600 0 yongsun CCandidate m_Candi; 601 0 yongsun TCandiRank m_Rank; 602 0 yongsun 603 0 yongsun TCandiPair() : m_Candi(), m_Rank() { } 604 0 yongsun }; 605 0 yongsun 606 0 yongsun struct TCandiPairPtr { 607 0 yongsun TCandiPair* m_Ptr; 608 0 yongsun 609 0 yongsun TCandiPairPtr(TCandiPair* p=NULL) : m_Ptr(p) 610 0 yongsun { } 611 0 yongsun 612 0 yongsun bool 613 0 yongsun operator< (const TCandiPairPtr& b) const 614 0 yongsun { return m_Ptr->m_Rank < b.m_Ptr->m_Rank; } 615 0 yongsun }; 616 0 yongsun 617 0 yongsun // FIXME, this procedure could be modified largely. 618 0 yongsun void 619 0 yongsun CIMIContext::getCandidates(CSkeletonIter bone, CCandidates& result) 620 0 yongsun { 621 0 yongsun TCandiPair cp; 622 0 yongsun static std::map<unsigned int, TCandiPair> map; 623 0 yongsun std::map<unsigned int, TCandiPair>::iterator it_map; 624 0 yongsun 625 0 yongsun map.clear(); 626 0 yongsun result.clear(); 627 0 yongsun m_EffectiveCandiBoneStart = m_EffectiveCandiBoneEnd = bone; 628 0 yongsun 629 0 yongsun if (bone->isTailNode()) 630 0 yongsun return; 631 0 yongsun if (!bone->isValidPinyinNode()) { 632 0 yongsun result.push_back(CCandidate(bone->m_String.c_str(), bone, ++CSkeletonIter(bone))); 633 0 yongsun return; 634 0 yongsun } 635 0 yongsun 636 0 yongsun // if user selection or best word starting at bone 637 0 yongsun if (bone->m_pInnerData->m_BWType != CBoneInnerData::NoBestWordStartHere) { 638 0 yongsun cp.m_Candi = bone->m_pInnerData->m_BestWord; 639 0 yongsun cp.m_Rank = 640 0 yongsun TCandiRank(bone->m_pInnerData->m_BWType == CBoneInnerData::UserSelectedBestWord, 641 0 yongsun bone->m_pInnerData->m_BWType == CBoneInnerData::BestWordStartHere, 642 0 yongsun 0, false, 0); 643 0 yongsun map[cp.m_Candi.m_WordId] = cp; 644 0 yongsun } 645 0 yongsun 646 0 yongsun //collecting all candidates, from both lattice and lexicon 647 0 yongsun int len = 1; 648 0 yongsun cp.m_Candi.m_BoneStart = bone; 649 0 yongsun CSkeletonIter b = ++CSkeletonIter(bone); 650 0 yongsun while (b != (--m_Skeleton.end())) { 651 0 yongsun cp.m_Candi.m_BoneEnd = b; 652 0 yongsun 653 0 yongsun bool found = false; 654 0 yongsun CLexiconStates::iterator itlex = b->m_pInnerData->m_LexiconStates.begin(); 655 0 yongsun CLexiconStates::iterator itlexe = b->m_pInnerData->m_LexiconStates.end(); 656 0 yongsun for (; itlex != itlexe; ++itlex) { 657 0 yongsun if (itlex->m_BoneStart == bone) { 658 0 yongsun found = true; 659 0 yongsun if (itlex->m_bPinyin) { 660 0 yongsun if (itlex->m_pPYNode && itlex->m_pPYNode->m_nWordId > 0) { 661 0 yongsun unsigned sz = itlex->m_pPYNode->m_nWordId; 662 0 yongsun const CPinyinTrie::TWordIdInfo* p = itlex->m_pPYNode->getWordIdPtr(); 663 0 yongsun for (unsigned int i = 0; i < sz; ++i, ++p) { 664 0 yongsun if (m_bGBK || p->m_bGBK == 0) { 665 0 yongsun cp.m_Candi.m_WordId = p->m_id; 666 0 yongsun cp.m_Candi.m_String = (*m_pPinyinTrie)[cp.m_Candi.m_WordId]; 667 0 yongsun 668 0 yongsun //sorting according to the order in PinYinTire 669 0 yongsun cp.m_Rank = TCandiRank(false, false, len, false, i); 670 0 yongsun it_map = map.find(cp.m_Candi.m_WordId); 671 0 yongsun if (it_map == map.end() || cp.m_Rank < it_map->second.m_Rank) 672 0 yongsun map[cp.m_Candi.m_WordId] = cp; 673 0 yongsun } 674 0 yongsun } 675 0 yongsun } 676 0 yongsun } else { 677 0 yongsun cp.m_Candi.m_WordId = itlex->m_WordId; 678 0 yongsun cp.m_Candi.m_String = bone->m_String.c_str(); 679 0 yongsun cp.m_Rank = TCandiRank(false, false, len, false, 0); 680 0 yongsun it_map = map.find(cp.m_Candi.m_WordId); 681 0 yongsun if (it_map == map.end() || cp.m_Rank < it_map->second.m_Rank) 682 0 yongsun map[cp.m_Candi.m_WordId] = cp; 683 0 yongsun } 684 0 yongsun } 685 0 yongsun } 686 0 yongsun 687 0 yongsun if (!found) break; 688 0 yongsun 689 0 yongsun CLatticeStates::iterator its = b->m_pInnerData->m_LatticeNodes.begin(); 690 0 yongsun CLatticeStates::iterator ite = b->m_pInnerData->m_LatticeNodes.end(); 691 0 yongsun for (; its != ite; ++its) { 692 0 yongsun if (its->m_pBackTraceNode && its->m_pBackTraceNode->m_BoneAfter == bone) { 693 0 yongsun cp.m_Candi.m_WordId = its->m_BackTraceWordId; 694 0 yongsun cp.m_Candi.m_String = (*m_pPinyinTrie)[cp.m_Candi.m_WordId]; 695 0 yongsun if (cp.m_Candi.m_String == NULL) 696 0 yongsun cp.m_Candi.m_String = bone->m_String.c_str(); 697 0 yongsun #ifdef _USE_RAW_PROBABILITY 698 0 yongsun 699 0 yongsun #ifdef DEBUG 700 0 yongsun //assert(its->m_pBackTraceNode->m_Score < 0.0 && its->m_Score < 0.0); 701 0 yongsun #endif 702 0 yongsun 703 0 yongsun cp.m_Rank = TCandiRank(false, false, len, true, its->m_Score / its->m_pBackTraceNode->m_Score); 704 0 yongsun #else 705 0 yongsun cp.m_Rank = TCandiRank(false, false, len, true, its->m_Score - its->m_pBackTraceNode->m_Score); 706 0 yongsun #endif 707 0 yongsun it_map = map.find(cp.m_Candi.m_WordId); 708 0 yongsun if (it_map == map.end() || cp.m_Rank < it_map->second.m_Rank) 709 0 yongsun map[cp.m_Candi.m_WordId] = cp; 710 0 yongsun } 711 0 yongsun } 712 0 yongsun 713 0 yongsun m_EffectiveCandiBoneEnd = b; 714 0 yongsun ++b; 715 0 yongsun ++len; 716 0 yongsun } 717 0 yongsun 718 0 yongsun std::vector<TCandiPairPtr> vec; 719 0 yongsun 720 0 yongsun vec.reserve(map.size()); 721 0 yongsun std::map<unsigned int, TCandiPair>::iterator it_mapE = map.end(); 722 0 yongsun for (it_map = map.begin(); it_map != it_mapE; ++it_map) 723 0 yongsun vec.push_back(TCandiPairPtr(&(it_map->second))); 724 0 yongsun std::make_heap(vec.begin(), vec.end()); 725 0 yongsun std::sort_heap(vec.begin(), vec.end()); 726 0 yongsun 727 0 yongsun for (int i=0, sz=vec.size(); i < sz; ++i) 728 0 yongsun result.push_back(vec[i].m_Ptr->m_Candi); 729 0 yongsun } 730 0 yongsun 731 0 yongsun int 732 0 yongsun CIMIContext::getBestSentence(wstring & result, CSkeletonIter boneStart, 733 0 yongsun CSkeletonIter boneEnd, bool original_format) 734 0 yongsun { 735 0 yongsun int nWordConverted = 0; 736 0 yongsun result.clear(); 737 0 yongsun 738 0 yongsun // no need to check begin(), because firstBone must at least has some 739 0 yongsun // auto best word or user selection best word starting from, this rule 740 0 yongsun // must be followed in this call 741 0 yongsun int len, prefix = 0; 742 0 yongsun CSkeletonIter realStart = boneStart; 743 0 yongsun while (realStart->m_pInnerData->m_BWType == CBoneInnerData::NoBestWordStartHere) { 744 0 yongsun ++prefix; 745 0 yongsun --realStart; 746 0 yongsun } 747 0 yongsun 748 0 yongsun while (true) { 749 0 yongsun #ifdef DEBUG 750 0 yongsun //assert(realStart->m_pInnerData->m_BWType != CBoneInnerData::NoBestWordStartHere); 751 0 yongsun #endif 752 0 yongsun 753 0 yongsun CSkeletonIter bone = boneStart; 754 0 yongsun CSkeletonIter rightBone = realStart->m_pInnerData->m_BestWord.m_BoneEnd; 755 0 yongsun if (realStart->m_BoneType != CBone::NODE_PINYIN && 756 0 yongsun realStart->m_BoneType != CBone::NODE_INCOMPLETE_PINYIN) { 757 0 yongsun for (; bone != rightBone && bone != boneEnd; ++bone) { 758 0 yongsun if (!original_format) 759 0 yongsun result.push_back(bone->m_String[0]); 760 0 yongsun else 761 0 yongsun result.push_back((unsigned)bone->m_BoundaryType); 762 0 yongsun } 763 0 yongsun } else { 764 0 yongsun ++nWordConverted; 765 0 yongsun // get the length from boneStart to current best word tail or end of range 766 0 yongsun for (len=0; bone != rightBone && bone != boneEnd; ++bone) 767 0 yongsun ++len; 768 0 yongsun result.append(realStart->m_pInnerData->m_BestWord.m_String+prefix, len); 769 0 yongsun } 770 0 yongsun if (bone == boneEnd) 771 0 yongsun break; 772 0 yongsun boneStart = realStart = bone; 773 0 yongsun prefix = 0; 774 0 yongsun } 775 0 yongsun 776 0 yongsun return nWordConverted; 777 0 yongsun } 778 0 yongsun 779 0 yongsun /** 780 0 yongsun * Search from the bone to the tail. the bone can not beyond first psuedo tail. 781 0 yongsun * Before search, all BoneInnerData should be set. The states of the bones 782 0 yongsun * who's ahead of the bone would not be affected by this function. Yet, states 783 0 yongsun * of the bones beyond this bone will be updated or refreshed. 784 0 yongsun * 785 0 yongsun * After lattice search, only one best path are backtraced and each best word 786 0 yongsun * will be attached to corresponding bone. 787 0 yongsun */ 788 0 yongsun void 789 0 yongsun CIMIContext::searchFrom(CSkeletonIter boneStart) 790 0 yongsun { 791 0 yongsun // iterate every bone from boneStart to the second psuedo tail 792 0 yongsun CSkeletonIter itEnd = ++getLastBone(); 793 0 yongsun CSkeletonIter bone = boneStart; 794 0 yongsun CSkeletonIter boneFirst = m_Skeleton.begin(); 795 0 yongsun for (; bone != itEnd; ) { 796 0 yongsun if (bone == boneFirst) { 797 0 yongsun // do not clear USER_SELECTION_BEST_WORD !! 798 0 yongsun bone->m_pInnerData->m_LexiconStates.clear(); 799 0 yongsun bone->m_pInnerData->m_LatticeNodes.clear(); 800 0 yongsun #ifdef _USE_RAW_PROBABILITY 801 0 yongsun bone->m_pInnerData->m_LatticeNodes.push_back(TLatticeState(-1.0, bone)); 802 0 yongsun #else 803 0 yongsun bone->m_pInnerData->m_LatticeNodes.push_back(TLatticeState(0.0, bone)); 804 0 yongsun #endif 805 0 yongsun } else { 806 0 yongsun buildLatticeStates(bone); 807 0 yongsun } 808 0 yongsun switch (bone->m_BoneType) { 809 0 yongsun case CBone::NODE_TAIL: 810 0 yongsun bone = forwardTailBone(bone); 811 0 yongsun break; 812 0 yongsun case CBone::NODE_PINYIN: 813 0 yongsun bone = forwardPinyinBone(bone); 814 0 yongsun break; 815 0 yongsun case CBone::NODE_INCOMPLETE_PINYIN: 816 0 yongsun case CBone::NODE_INVALID_PINYIN: 817 0 yongsun bone = forwardInvalidBone(bone); 818 0 yongsun break; 819 0 yongsun case CBone::NODE_PUNC: 820 0 yongsun bone = forwardPuncBone(bone); 821 0 yongsun break; 822 0 yongsun case CBone::NODE_ASCII: 823 0 yongsun case CBone::NODE_SIMBOL: 824 0 yongsun case CBone::NODE_DIGITAL: 825 0 yongsun bone = forwardNonPinyinBone(bone); 826 0 yongsun break; 827 0 yongsun }; 828 0 yongsun } 829 0 yongsun 830 0 yongsun //Build the last bone's lattice states 831 0 yongsun buildLatticeStates(itEnd); 832 0 yongsun 833 0 yongsun #ifdef DEBUG 834 0 yongsun //assert(itEnd->m_pInnerData->m_LatticeNodes.size() == 1); 835 0 yongsun #endif 836 0 yongsun 837 0 yongsun // clear all non-user selection 838 0 yongsun for (bone=boneFirst; bone != itEnd; ++bone) { 839 0 yongsun if (bone->m_pInnerData->m_BWType != CBoneInnerData::UserSelectedBestWord) 840 0 yongsun bone->m_pInnerData->m_BWType = CBoneInnerData::NoBestWordStartHere; 841 0 yongsun } 842 0 yongsun 843 0 yongsun // back tracing, find the best path 844 0 yongsun TLatticeState* bs = &(*(itEnd->m_pInnerData->m_LatticeNodes.begin())); 845 0 yongsun while (bs->m_BoneAfter != boneFirst) { 846 0 yongsun TLatticeState* fs = bs->m_pBackTraceNode; 847 0 yongsun CSkeletonIter fb = fs->m_BoneAfter; 848 0 yongsun if (fb->m_pInnerData->m_BWType != CBoneInnerData::UserSelectedBestWord) { 849 0 yongsun fb->m_pInnerData->m_BWType = CBoneInnerData::BestWordStartHere; 850 0 yongsun } 851 0 yongsun fb->m_pInnerData->m_BestWord.m_BoneStart = fb; 852 0 yongsun fb->m_pInnerData->m_BestWord.m_BoneEnd = bs->m_BoneAfter; 853 0 yongsun fb->m_pInnerData->m_BestWord.m_WordId = bs->m_BackTraceWordId; 854 0 yongsun fb->m_pInnerData->m_BestWord.m_String = (*m_pPinyinTrie)[bs->m_BackTraceWordId]; 855 0 yongsun if (fb->m_pInnerData->m_BestWord.m_String == NULL) 856 0 yongsun fb->m_pInnerData->m_BestWord.m_String = fb->m_String.c_str(); 857 0 yongsun bs = fs; 858 0 yongsun } 859 0 yongsun } 860 0 yongsun 861 0 yongsun #ifdef DEBUG 862 0 yongsun static double min_ts = 1.0; 863 0 yongsun #endif 864 0 yongsun 865 0 yongsun static double s_history_distribution[11] = { 866 0 yongsun 0.0, 0.05, 0.10, 0.15, 0.20, 0.25, 0.30, 0.35, 0.40, 0.45, 0.50 867 0 yongsun }; 868 0 yongsun 869 0 yongsun void 870 0 yongsun CIMIContext::transferBetween(CSkeletonIter h, CSkeletonIter t, unsigned int id, double ic) 871 0 yongsun { 872 0 yongsun CLatticeStates& latss1 = h->m_pInnerData->m_LatticeNodes; 873 0 yongsun CLatticeStates& latss2 = t->m_pInnerData->m_LatticeNodes; 874 0 yongsun CLatticeStates::iterator it1 = latss1.begin(); 875 0 yongsun CLatticeStates::iterator ite = latss1.end(); 876 0 yongsun 877 0 yongsun #ifdef _USE_RAW_PROBABILITY 878 0 yongsun TLatticeState node(-1.0, t); 879 0 yongsun TSentenceScore efic(1.0); 880 0 yongsun #else 881 0 yongsun TLatticeState node(0.0, t); 882 0 yongsun TSentenceScore efic(0.0); 883 0 yongsun #endif 884 0 yongsun 885 0 yongsun if (h->m_pInnerData->m_BestWord.m_WordId == id && 886 0 yongsun h->m_pInnerData->m_BWType == CBoneInnerData::UserSelectedBestWord) { 887 0 yongsun #ifdef _USE_RAW_PROBABILITY 888 0 yongsun efic = efic * TSentenceScore(30000, 1.0); 889 0 yongsun #else 890 0 yongsun efic = ic - 30000.0; 891 0 yongsun #endif 892 0 yongsun } 893 0 yongsun 894 0 yongsun double weight_h = s_history_distribution[m_HistoryPower]; 895 0 yongsun double weight_s = 1.0 - weight_h; 896 0 yongsun 897 0 yongsun for (; it1 != ite; ++it1) { 898 0 yongsun node.m_pBackTraceNode = &(*it1); 899 0 yongsun node.m_BackTraceWordId = id; 900 0 yongsun #ifdef _USE_RAW_PROBABILITY 901 0 yongsun // the fact is that we could only use bigram cache 902 0 yongsun // and all first level node in the language model are non-empty 903 0 yongsun double ts = m_pModel->transfer(it1->m_State, id, node.m_State); 904 0 yongsun m_pModel->historify(node.m_State); 905 0 yongsun // we do not want to shrink the history state if it could be found in cache 906 0 yongsun if (node.m_State.getLevel() == 0 && m_pHistory->seenBefore(id)) { 907 0 yongsun node.m_State.setIdx(id); // an psuedo unigram node state 908 0 yongsun } 909 0 yongsun 910 0 yongsun #ifdef DEBUG 911 0 yongsun assert(it1->m_Score < TSentenceScore(0.0)); 912 0 yongsun #endif 913 0 yongsun 914 0 yongsun double cost = ts; 915 0 yongsun if (m_pHistory) { 916 0 yongsun unsigned history[2] = {m_pModel->lastWordId(it1->m_State), id}; 917 0 yongsun double hpr = m_pHistory->pr(history, history+2); 918 0 yongsun cost = weight_s * ts + weight_h*hpr; 919 0 yongsun } 920 0 yongsun node.m_Score = it1->m_Score * efic * TSentenceScore(cost); 921 0 yongsun 922 0 yongsun #ifdef DEBUG 923 0 yongsun if (!(node.m_Score < TSentenceScore(-0.0))) { 924 0 yongsun static char strangeValue[256]; 925 0 yongsun 926 0 yongsun node.m_Score.toString(strangeValue); 927 0 yongsun printf("\n***node.m_Score invalid %s ***\n", strangeValue); 928 0 yongsun 929 0 yongsun it1->m_Score.toString(strangeValue); 930 0 yongsun printf("***it1->m_Score is %s ***\n", strangeValue); 931 0 yongsun 932 0 yongsun efic.toString(strangeValue); 933 0 yongsun printf("***efic=%s, ic=(%lf)***\n", strangeValue, ic); 934 0 yongsun 935 0 yongsun TSentenceScore(cost).toString(strangeValue); 936 0 yongsun printf("***cost=%s(%lf), ts=(%16lf)***\n", strangeValue, cost, ts); 937 0 yongsun 938 0 yongsun fflush(stdout); 939 0 yongsun assert(false); 940 0 yongsun } 941 0 yongsun #endif 942 0 yongsun 943 0 yongsun #else 944 0 yongsun double ts = m_pModel->transferNegLog(it1->m_State, id, node.m_State); 945 0 yongsun m_pModel->historify(node.m_State); 946 0 yongsun node.m_Score = it1->m_Score + ts + ic; 947 0 yongsun #endif 948 0 yongsun 949 0 yongsun latss2.push_back(node); 950 0 yongsun } 951 0 yongsun } 952 0 yongsun 953 0 yongsun void 954 0 yongsun CIMIContext::buildLatticeStates(CSkeletonIter bone) 955 0 yongsun { 956 0 yongsun bool bSingleSyllable, bSingleShort; 957 0 yongsun unsigned i, sz; 958 0 yongsun CSkeletonIter bonePrev = bone; 959 0 yongsun 960 0 yongsun 961 0 yongsun --bonePrev; 962 0 yongsun CBoneInnerData & innerData = *(bone->m_pInnerData); 963 0 yongsun CLexiconStates::iterator itLexState = innerData.m_LexiconStates.begin(); 964 0 yongsun CLexiconStates::iterator itLexStateE = innerData.m_LexiconStates.end(); 965 0 yongsun innerData.m_LatticeNodes.clear(); 966 0 yongsun for (; itLexState != itLexStateE; ++itLexState) { 967 0 yongsun CLexiconState& ls = *itLexState; 968 0 yongsun 969 0 yongsun // the user selected word may be cut in first pruning process below, 970 0 yongsun // So, just let it go first, when it ends here 971 0 yongsun CBoneInnerData* pbid = ls.m_BoneStart->m_pInnerData; 972 0 yongsun if (pbid->m_BWType == CBoneInnerData::UserSelectedBestWord && 973 0 yongsun pbid->m_BestWord.m_BoneEnd == bone) { 974 0 yongsun #ifdef _USE_RAW_PROBABILITY 975 0 yongsun transferBetween(ls.m_BoneStart, bone, pbid->m_BestWord.m_WordId, 1.0); 976 0 yongsun #else 977 0 yongsun transferBetween(ls.m_BoneStart, bone, pbid->m_BestWord.m_WordId, 0.0); 978 0 yongsun #endif 979 0 yongsun } 980 0 yongsun 981 0 yongsun if (!ls.m_bPinyin) { 982 0 yongsun #ifdef _USE_RAW_PROBABILITY 983 0 yongsun transferBetween(ls.m_BoneStart, bone, ls.m_WordId, 1.0); 984 0 yongsun #else 985 0 yongsun transferBetween(ls.m_BoneStart, bone, ls.m_WordId, 0.0); 986 0 yongsun #endif 987 0 yongsun } else { 988 0 yongsun // Cutting words with little unigram possibilities 989 0 yongsun // at least 2, at most 32 of the words would be tried 990 0 yongsun // if unseed word starting from some position in the first 32 991 0 yongsun // candidates(ranked according to unigram pr in lexicon), do not 992 0 yongsun // let them be checked. 993 0 yongsun bSingleShort = bSingleSyllable = (ls.m_BoneStart == bonePrev); 994 0 yongsun if (bSingleSyllable) { 995 0 yongsun register unsigned char uc = ls.m_BoneStart->m_String[0]; 996 0 yongsun bSingleShort = ((ls.m_BoneStart->m_String.size() == 1 && (uc != 'a' && uc != 'o' && uc !='e')) || 997 0 yongsun (ls.m_BoneStart->m_String.size() == 2 && (ls.m_BoneStart->m_String[1] == 'h'))); 998 0 yongsun } 999 0 yongsun //bSingleShort = (bSingleSyllable && !(m_pPinyinTrie->isValid(ls.m_pPYNode, false))); 1000 0 yongsun 1001 0 yongsun const CPinyinTrie::TNode* pn = ls.m_pPYNode; 1002 0 yongsun const CPinyinTrie::TWordIdInfo* pwidinfo = pn->getWordIdPtr(); 1003 0 yongsun sz=pn->m_nWordId; 1004 0 yongsun if (bSingleShort) 1005 0 yongsun sz = 12; 1006 0 yongsun else if (sz > 26) 1007 0 yongsun sz = 26; 1008 0 yongsun 1009 0 yongsun int count = 0; 1010 0 yongsun for (i=0; count < sz && i < sz && (pwidinfo[i].m_bSeen == 1 || count < 2); ++i) { 1011 0 yongsun if (m_bGBK || pwidinfo[i].m_bGBK == 0) { 1012 0 yongsun #ifdef _USE_RAW_PROBABILITY 1013 0 yongsun transferBetween(ls.m_BoneStart, bone, pwidinfo[i].m_id, 1.0); 1014 0 yongsun #else 1015 0 yongsun transferBetween(ls.m_BoneStart, bone, pwidinfo[i].m_id, 0.0); 1016 0 yongsun #endif 1017 0 yongsun ++count; 1018 0 yongsun } 1019 0 yongsun } 1020 0 yongsun #ifdef _USE_RAW_PROBABILITY 1021 238 tchaikov // try cached words 1022 0 yongsun if (m_pHistory) { 1023 0 yongsun for (sz = pn->m_nWordId; i < sz; ++i) { 1024 0 yongsun if (m_bGBK || pwidinfo[i].m_bGBK == 0) { 1025 0 yongsun if (m_pHistory->seenBefore(pwidinfo[i].m_id)) { 1026 0 yongsun transferBetween(ls.m_BoneStart, bone, pwidinfo[i].m_id, 1.0); 1027 0 yongsun } 1028 0 yongsun } 1029 0 yongsun } 1030 0 yongsun } 1031 0 yongsun #endif 1032 0 yongsun } 1033 0 yongsun } 1034 0 yongsun } 1035 0 yongsun 1036 0 yongsun 1037 0 yongsun /** 1038 0 yongsun * Fussy Pinyin: 1039 0 yongsun * 1040 0 yongsun */ 1041 0 yongsun CSkeletonIter 1042 0 yongsun CIMIContext::forwardOnePinyinBone(CSkeletonIter bone) 1043 0 yongsun { 1044 0 yongsun const CPinyinTrie::TNode *pn = NULL; 1045 0 yongsun 1046 0 yongsun //clear next bone's lexicon states 1047 0 yongsun CSkeletonIter boneNext = ++CSkeletonIter(bone); 1048 0 yongsun CLexiconStates& lexss2 = boneNext->m_pInnerData->m_LexiconStates; 1049 0 yongsun lexss2.clear(); 1050 0 yongsun 1051 0 yongsun // insert the root PinYin Lexicon node 1052 0 yongsun CLexiconStates& lexss1 = bone->m_pInnerData->m_LexiconStates; 1053 0 yongsun CLexiconStates::iterator it1 = lexss1.begin(); 1054 0 yongsun CLexiconStates::iterator ite = lexss1.end(); 1055 0 yongsun for (; it1 != ite; ++it1) { 1056 0 yongsun if (it1->m_bPinyin) { 1057 0 yongsun pn = m_pPinyinTrie->transfer(it1->m_pPYNode, bone->m_String.c_str()); 1058 0 yongsun if (pn != NULL && (pn = m_pPinyinTrie->transfer(pn, TWCHAR('\''))) != NULL) { 1059 0 yongsun lexss2.push_back(CLexiconState(it1->m_BoneStart, pn)); 1060 0 yongsun } 1061 0 yongsun } 1062 0 yongsun } 1063 0 yongsun 1064 0 yongsun //try transfer from root state of the lexicon 1065 0 yongsun pn = m_pPinyinTrie->transfer(bone->m_String.c_str()); 1066 0 yongsun if (pn != NULL && (pn = m_pPinyinTrie->transfer(pn, TWCHAR('\''))) != NULL) { 1067 0 yongsun lexss2.push_back(CLexiconState(bone, pn)); 1068 0 yongsun } 1069 0 yongsun 1070 0 yongsun return boneNext; 1071 0 yongsun } 1072 0 yongsun 1073 0 yongsun CSkeletonIter 1074 0 yongsun CIMIContext::forwardPinyinBone(CSkeletonIter bone) 1075 0 yongsun { 1076 0 yongsun if (bone->m_pInnerData->m_BWType == CBoneInnerData::UserSelectedBestWord && m_bStrictLeft2Right) { 1077 0 yongsun CSkeletonIter boneLeft = bone; 1078 0 yongsun CSkeletonIter boneRight = bone->m_pInnerData->m_BestWord.m_BoneEnd; 1079 0 yongsun for (; bone != boneRight; ++bone) 1080 0 yongsun (++CSkeletonIter(bone))->m_pInnerData->clear(); 1081 0 yongsun boneRight->m_pInnerData->m_LexiconStates.push_back( 1082 0 yongsun CLexiconState(boneLeft, boneLeft->m_pInnerData->m_BestWord.m_WordId) 1083 0 yongsun ); 1084 0 yongsun return boneRight; 1085 0 yongsun } else { 1086 0 yongsun return forwardOnePinyinBone(bone); 1087 0 yongsun } 1088 0 yongsun } 1089 0 yongsun 1090 0 yongsun CSkeletonIter 1091 0 yongsun CIMIContext::forwardInvalidBone(CSkeletonIter bone) 1092 0 yongsun { 1093 0 yongsun CSkeletonIter boneNext = ++CSkeletonIter(bone); 1094 0 yongsun CLexiconStates & lss = boneNext->m_pInnerData->m_LexiconStates; 1095 0 yongsun lss.clear(); 1096 0 yongsun lss.push_back(CLexiconState(bone, (unsigned int)UNKNOWN_WORD_ID)); 1097 0 yongsun 1098 0 yongsun return boneNext; 1099 0 yongsun } 1100 0 yongsun 1101 0 yongsun CSkeletonIter 1102 0 yongsun CIMIContext::forwardPuncBone(CSkeletonIter bone) 1103 0 yongsun { 1104 0 yongsun unsigned int wid = m_pPinyinTrie->getSimbolId(bone->m_String); 1105 0 yongsun 1106 0 yongsun CSkeletonIter boneNext = ++CSkeletonIter(bone); 1107 0 yongsun CLexiconStates & lss = boneNext->m_pInnerData->m_LexiconStates; 1108 0 yongsun lss.clear(); 1109 0 yongsun lss.push_back(CLexiconState(bone, wid)); 1110 0 yongsun 1111 0 yongsun return boneNext; 1112 0 yongsun } 1113 0 yongsun 1114 0 yongsun CSkeletonIter 1115 0 yongsun CIMIContext::forwardNonPinyinBone(CSkeletonIter bone) 1116 0 yongsun { 1117 0 yongsun CSkeletonIter boneNext = ++CSkeletonIter(bone); 1118 0 yongsun CLexiconStates & lss = boneNext->m_pInnerData->m_LexiconStates; 1119 0 yongsun lss.clear(); 1120 0 yongsun lss.push_back(CLexiconState(bone, (unsigned int)UNKNOWN_WORD_ID)); 1121 0 yongsun 1122 0 yongsun return boneNext; 1123 0 yongsun } 1124 0 yongsun 1125 0 yongsun 1126 0 yongsun CSkeletonIter 1127 0 yongsun CIMIContext::forwardTailBone(CSkeletonIter bone) 1128 0 yongsun { 1129 0 yongsun CSkeletonIter boneNext = ++CSkeletonIter(bone); 1130 0 yongsun CLexiconStates & lss = boneNext->m_pInnerData->m_LexiconStates; 1131 0 yongsun lss.clear(); 1132 0 yongsun lss.push_back(CLexiconState(bone, OOV_WORD_ID)); 1133 0 yongsun 1134 0 yongsun return boneNext; 1135 0 yongsun } 1136 0 yongsun 1137 0 yongsun CBone::CBone(const CBone& b) 1138 0 yongsun : m_BoneType(b.m_BoneType), m_BoundaryType(b.m_BoundaryType), 1139 0 yongsun m_String(b.m_String), m_pInnerData(NULL) 1140 0 yongsun { 1141 0 yongsun } 1142 0 yongsun 1143 0 yongsun CBone::CBone(int boundType, int boneType) 1144 0 yongsun : m_BoneType(boneType), m_BoundaryType(boundType), 1145 0 yongsun m_String(), m_pInnerData(NULL) 1146 0 yongsun { 1147 0 yongsun } 1148 0 yongsun 1149 0 yongsun CBone::CBone(const TWCHAR* pwc, int boundType, int boneType) 1150 0 yongsun : m_BoneType(boneType), m_BoundaryType(boundType), 1151 0 yongsun m_String(pwc), m_pInnerData(NULL) 1152 0 yongsun { 1153 0 yongsun } 1154 0 yongsun 1155 0 yongsun CBone::CBone(const TWCHAR* pwc, size_t len, int boundType, int boneType) 1156 0 yongsun : m_BoneType(boneType), m_BoundaryType(boundType), 1157 0 yongsun m_String(pwc, len), m_pInnerData(NULL) 1158 0 yongsun { 1159 0 yongsun } 1160 0 yongsun 1161 0 yongsun CBone::~CBone() 1162 0 yongsun { 1163 182 tchaikov delete m_pInnerData; 1164 0 yongsun m_pInnerData = NULL; 1165 0 yongsun } 1166 0 yongsun 1167 0 yongsun bool 1168 0 yongsun CBone::isUserSelectionStart(void) 1169 0 yongsun { 1170 0 yongsun return (m_pInnerData != NULL && 1171 0 yongsun m_pInnerData->m_BWType == CBoneInnerData::UserSelectedBestWord); 1172 0 yongsun } 1173 0 yongsun 1174 0 yongsun int 1175 0 yongsun cursorMapping(CSkeletonIter head1, CSkeletonIter tail1, 1176 0 yongsun CSkeletonIter head2, CSkeletonIter tail2, 1177 0 yongsun CSkeleton& result, 1178 0 yongsun CSkeletonIter& cursor, int& cursorIdx, bool stickLeft = false) 1179 0 yongsun { 1180 0 yongsun TSkelCursor sc(head1, tail1, head2, tail2, true); 1181 0 yongsun TSkelCursor::TPos cp(cursor, cursorIdx); 1182 0 yongsun 1183 0 yongsun int len = 0; 1184 0 yongsun bool found =false; 1185 0 yongsun 1186 0 yongsun while (true) { 1187 0 yongsun found = sc.ensureCursor(cp); 1188 0 yongsun if (found) break; 1189 0 yongsun if (!sc.hasNext()) break; 1190 0 yongsun sc.next(true); 1191 0 yongsun ++len; 1192 0 yongsun } 1193 0 yongsun 1194 0 yongsun if (found) { 1195 0 yongsun TSkelCursor::TPos nc = sc.getPosition(); 1196 0 yongsun 1197 0 yongsun int cmplen = cursorIdx = 0, nNode = 0; 1198 0 yongsun for (cursor = result.begin(); cursor != result.end(); ++cmplen) { 1199 0 yongsun if (cmplen == len) break; 1200 0 yongsun ++cursorIdx; 1201 0 yongsun if (cursorIdx >= cursor->m_String.size()) { 1202 0 yongsun ++cursor; 1203 0 yongsun ++nNode; 1204 0 yongsun cursorIdx = 0; 1205 0 yongsun } 1206 0 yongsun } 1207 0 yongsun if (cmplen == len) { // now we found that 1208 0 yongsun if (stickLeft && cursor != result.begin() && cursorIdx == 0) { 1209 0 yongsun --cursor; 1210 0 yongsun --nNode; 1211 0 yongsun cursorIdx = cursor->m_String.size(); 1212 0 yongsun } 1213 0 yongsun } 1214 0 yongsun return nNode; 1215 0 yongsun } 1216 0 yongsun return -1; 1217 0 yongsun } 1218 0 yongsun 1219 0 yongsun /** 1220 0 yongsun * it is illegal if boneStart == boneEnd and skel.size() == 0 1221 0 yongsun * 1222 0 yongsun * 1. from current position, seeking left for 3 bones without HumanBoundary, or bones 1223 0 yongsun * of non-pinyin type between it. --> its, also can not beyond m_CandiBone. 1224 0 yongsun * 2. in [its, bonStart) from left to right, find the first bone would cause different 1225 0 yongsun * segmentation result. --> itd, other wise itds <-- boneStart 1226 0 yongsun * 3. from [itd, boneEnd), do automatic segment 1227 0 yongsun * 4. after boneEnd, util user boundary or non-pinyin bone 1228 0 yongsun * or segment result equals to original. --> itd2. 1229 0 yongsun * 5. [itd, itd2) re-segment, all resulting bone goes into a new skeleton --> newskel. 1230 0 yongsun * while convert old cursor position into new position. 1231 0 yongsun * 6. erase old or seg-affected nodes and splice in the newskel. 1232 0 yongsun * do search if needed 1233 0 yongsun * 1234 0 yongsun * The key part of this would seeking a solution for finding the automatic sentence 1235 0 yongsun * segmentation result. 1236 0 yongsun */ 1237 0 yongsun bool 1238 0 yongsun CIMIContext::modifyAndReseg(CSkeletonIter boneStart, CSkeletonIter boneEnd, CSkeleton& skel, 1239 0 yongsun CSkeletonIter& cursor, int& cursorIdx, CSkeletonIter& candiStart, 1240 0 yongsun bool stickLeft, bool doSearch) 1241 0 yongsun { 1242 0 yongsun CSkeleton newskel; 1243 0 yongsun 1244 0 yongsun // Try to look_left to prevent potential segmentation insufficiency 1245 0 yongsun CSkeletonIter nit, oit, its = boneStart; 1246 0 yongsun int look_left = 0; 1247 0 yongsun for (; look_left < 3 && its != getFirstBone() && its != candiStart; ++look_left) { 1248 0 yongsun --its; 1249 0 yongsun if (!its->isPinyinNode() || its->m_BoundaryType == CBone::USER_BOUNDARY) { 1250 0 yongsun ++its; 1251 0 yongsun break; 1252 0 yongsun } 1253 0 yongsun skel.push_front(*its); 1254 0 yongsun } 1255 0 yongsun 1256 0 yongsun // do Syllable segment on the virtual new list 1257 0 yongsun segPinyin(skel.begin(), skel.end(), boneEnd, getLastBone(), newskel); 1258 0 yongsun 1259 0 yongsun // Remapping the new cursor 1260 0 yongsun int ncIdx = cursorMapping(skel.begin(), skel.end(), boneEnd, getLastBone(), newskel, cursor, cursorIdx, stickLeft); 1261 0 yongsun 1262 0 yongsun // Skip previous look-left nodes that are same with original 1263 0 yongsun int first_diff = 0; 1264 0 yongsun CSkeletonIter dif_oits=skel.begin(); 1265 0 yongsun for (oit=skel.begin(), nit=newskel.begin(); first_diff < look_left; ++first_diff) { 1266 0 yongsun if (nit->m_String.size() != oit->m_String.size()) { 1267 0 yongsun dif_oits = oit; 1268 0 yongsun break; 1269 0 yongsun } 1270 0 yongsun if (ncIdx == 0) cursor = its; 1271 0 yongsun --ncIdx; 1272 0 yongsun ++its; 1273 0 yongsun ++nit; 1274 0 yongsun ++oit; 1275 0 yongsun newskel.pop_front(); 1276 0 yongsun } 1277 0 yongsun 1278 0 yongsun // prepare to restore the CandiStart 1279 0 yongsun bool candiStartPositionReset = (its == candiStart); 1280 0 yongsun 1281 0 yongsun // Prepare for cursor reposition to restore after modify 1282 0 yongsun CSkeletonIter leftIt; 1283 0 yongsun bool leftItIsHead = (its == getFirstBone()); 1284 0 yongsun if (!leftItIsHead) { 1285 0 yongsun leftIt = its; 1286 0 yongsun --leftIt; 1287 0 yongsun } 1288 0 yongsun 1289 0 yongsun // modify original node list 1290 0 yongsun bool affectCandidates = modify(its, getLastBone(), newskel, doSearch); 1291 0 yongsun 1292 0 yongsun // Reposition cursor 1293 0 yongsun if (ncIdx >= 0) { 1294 0 yongsun cursor = (leftItIsHead)?(getFirstBone()):(++CSkeletonIter(leftIt)); 1295 0 yongsun for (int i=0; i < ncIdx; ++i) 1296 0 yongsun ++cursor; 1297 0 yongsun } 1298 0 yongsun 1299 0 yongsun // Reposition candiStart 1300 0 yongsun if (candiStartPositionReset) { 1301 0 yongsun candiStart = (leftItIsHead)?(getFirstBone()):(++CSkeletonIter(leftIt)); 1302 0 yongsun affectCandidates = true; 1303 0 yongsun } 1304 0 yongsun 1305 0 yongsun return affectCandidates; 1306 0 yongsun } 1307 0 yongsun 1308 0 yongsun void 1309 0 yongsun CIMIContext::segPinyin(CSkeletonIter head1, CSkeletonIter tail1, 1310 0 yongsun CSkeletonIter head2, CSkeletonIter tail2, 1311 0 yongsun CSkeleton& result) 1312 0 yongsun { 1313 0 yongsun #ifdef DEBUG 1314 0 yongsun printf("SegPinyin:"); 1315 0 yongsun #endif 1316 0 yongsun 1317 0 yongsun const CPinyinTrie::TNode* pathNodes[16]; 1318 0 yongsun TSkelCursor::TPos positions[16]; 1319 0 yongsun 1320 0 yongsun #ifdef DEBUG 1321 0 yongsun TWCHAR dbg_msg[2] = {0, 0}; 1322 0 yongsun { 1323 0 yongsun TSkelCursor dsc(head1, tail1, head2, tail2); 1324 0 yongsun while (dsc.hasNext()) { 1325 0 yongsun if (dsc.isPinyin()) { 1326 0 yongsun dbg_msg[0] = dsc.getChar(); 1327 0 yongsun print_wide(dbg_msg); 1328 0 yongsun if (dsc.isUserBreakAfter()) { 1329 0 yongsun printf("'"); 1330 0 yongsun } 1331 0 yongsun } else { 1332 0 yongsun printf("_"); 1333 0 yongsun dbg_msg[0] = dsc.getChar(); 1334 0 yongsun print_wide(dbg_msg); 1335 0 yongsun } 1336 0 yongsun dsc.next(); 1337 0 yongsun } 1338 0 yongsun } 1339 0 yongsun #endif 1340 0 yongsun 1341 0 yongsun result.clear(); 1342 0 yongsun TSkelCursor sc(head1, tail1, head2, tail2); 1343 0 yongsun while (sc.hasNext()) { 1344 0 yongsun if (sc.isPinyin()) { 1345 303 yongsun int lastValid = 0; 1346 0 yongsun pathNodes[0] = m_pPinyinTrie->getRootNode(); 1347 0 yongsun positions[0] = sc.getPosition(); 1348 0 yongsun for (int idx=1; sc.isPinyin() && pathNodes[idx-1] != NULL; ++idx) { 1349 0 yongsun pathNodes[idx] = m_pPinyinTrie->transfer(pathNodes[idx-1], sc.getChar()); 1350 0 yongsun sc.next(); 1351 0 yongsun positions[idx] = sc.getPosition(); 1352 0 yongsun if (m_pPinyinTrie->isValid(pathNodes[idx], m_bNonCompleteSyllable, m_bGBK)) 1353 0 yongsun lastValid = idx; 1354 0 yongsun if (sc.isUserBreakAfter(positions[idx-1])) 1355 0 yongsun break; 1356 0 yongsun } 1357 0 yongsun bool invalid = false; 1358 0 yongsun if (lastValid == 0) { 1359 0 yongsun invalid = true; 1360 0 yongsun lastValid = 1; 1361 0 yongsun } 1362 0 yongsun if (lastValid >= 2 && pathNodes[lastValid]->m_bFullSyllableTransfer && pathNodes[lastValid-1]->m_bFullSyllableTransfer) { 1363 0 yongsun TWCHAR w1 = sc.getChar(positions[lastValid-1]); 1364 0 yongsun TWCHAR w2 = sc.getChar(positions[lastValid]); 1365 0 yongsun if (!isYuanYinChar(w1) && isYuanYinChar(w2)){ 1366 0 yongsun const CPinyinTrie::TNode* pytmp = NULL; 1367 0 yongsun pytmp = m_pPinyinTrie->transfer(m_pPinyinTrie->getRootNode(), w1); 1368 0 yongsun if (pytmp) pytmp = m_pPinyinTrie->transfer(pytmp, w2); 1369 0 yongsun if (pytmp != NULL) --lastValid; 1370 0 yongsun } 1371 0 yongsun } 1372 0 yongsun CBone bnint(CBone::AUTO_BOUNDARY, (invalid)?(CBone::NODE_INVALID_PINYIN):(CBone::NODE_PINYIN)); 1373 0 yongsun if (sc.isUserBreakAfter(positions[lastValid-1])) 1374 0 yongsun bnint.m_BoundaryType = CBone::USER_BOUNDARY; 1375 0 yongsun for (int idx=0; idx < lastValid; ++idx) 1376 0 yongsun bnint.m_String += sc.getChar(positions[idx]); 1377 0 yongsun result.push_back(bnint); 1378 0 yongsun sc.setPosition(positions[lastValid]); 1379 0 yongsun } else { 1380 0 yongsun result.push_back(*(sc.getPosition().m_bone)); 1381 0 yongsun sc.nextBone(); 1382 0 yongsun } 1383 0 yongsun } 1384 0 yongsun 1385 0 yongsun #ifdef DEBUG 1386 0 yongsun { 1387 0 yongsun printf(" ==> "); 1388 0 yongsun TSkelCursor dsc(result.begin(), result.end(), result.end(), result.end()); 1389 0 yongsun while (dsc.hasNext()) { 1390 0 yongsun if (dsc.isPinyin()) { 1391 0 yongsun dbg_msg[0] = dsc.getChar(); 1392 0 yongsun print_wide(dbg_msg); 1393 0 yongsun if (dsc.isBreakAfter()) { 1394 0 yongsun printf("'"); 1395 0 yongsun } 1396 0 yongsun } else { 1397 0 yongsun printf("_"); 1398 0 yongsun dbg_msg[0] = dsc.getChar(); 1399 0 yongsun print_wide(dbg_msg); 1400 0 yongsun } 1401 0 yongsun dsc.next(); 1402 0 yongsun } 1403 0 yongsun } 1404 0 yongsun fflush(stdout); 1405 0 yongsun #endif 1406 0 yongsun return; 1407 0 yongsun } 1408 0 yongsun 1409 0 yongsun void 1410 0 yongsun CIMIContext::setHistoryMemory(CICHistory *phm) 1411 0 yongsun { 1412 0 yongsun m_pHistory = phm; 1413 0 yongsun } 1414 0 yongsun 1415 0 yongsun CICHistory * 1416 0 yongsun CIMIContext::getHistoryMemory() 1417 0 yongsun { 1418 0 yongsun return m_pHistory; 1419 0 yongsun } 1420 0 yongsun 1421 0 yongsun void CIMIContext::memorize(void) 1422 0 yongsun { 1423 0 yongsun if (m_pHistory != NULL) { 1424 0 yongsun std::vector<unsigned int> result; 1425 0 yongsun CSkeletonIter boneStart = getFirstBone(); 1426 0 yongsun CSkeletonIter boneEnd = getLastBone(); 1427 0 yongsun 1428 0 yongsun while (boneStart != boneEnd) { 1429 0 yongsun #ifdef DEBUG 1430 0 yongsun //assert(boneStart->m_pInnerData->m_BWType != CBoneInnerData::NoBestWordStartHere); 1431 0 yongsun #endif 1432 0 yongsun 1433 0 yongsun CSkeletonIter bone = boneStart; 1434 0 yongsun CSkeletonIter rightBone = boneStart->m_pInnerData->m_BestWord.m_BoneEnd; 1435 0 yongsun if (boneStart->m_BoneType != CBone::NODE_PINYIN && boneStart->m_BoneType != CBone::NODE_INCOMPLETE_PINYIN) { 1436 0 yongsun while (bone != rightBone && bone != boneEnd) 1437 0 yongsun ++bone; 1438 0 yongsun result.push_back(0); 1439 0 yongsun } else { 1440 0 yongsun while (bone != rightBone && bone != boneEnd) 1441 0 yongsun ++bone; 1442 0 yongsun result.push_back(boneStart->m_pInnerData->m_BestWord.m_WordId); 1443 0 yongsun } 1444 0 yongsun 1445 0 yongsun boneStart = bone; 1446 0 yongsun } 1447 0 yongsun if (result.size() > 0) 1448 0 yongsun m_pHistory->memorize(&(result[0]), (&(result[0])) + result.size()); 1449 0 yongsun } 1450 0 yongsun } 1451 0 yongsun 1452 0 yongsun void 1453 0 yongsun CIMIContext::print_lattice() 1454 0 yongsun { 1455 0 yongsun printf("\n"); 1456 0 yongsun std::string prefix; 1457 0 yongsun CSkeletonIter bone = getFirstBone(); 1458 0 yongsun CSkeletonIter boneEnd = getLastBone(); 1459 0 yongsun for (;bone != boneEnd; ++bone) 1460 0 yongsun bone->print(prefix); 1461 0 yongsun boneEnd->print(prefix); 1462 0 yongsun (++boneEnd)->print(prefix); 1463 0 yongsun fflush(stdout); 1464 0 yongsun } 1465 0 yongsun 1466 0 yongsun void 1467 0 yongsun CBone::print(std::string& prefix) 1468 0 yongsun { 1469 0 yongsun printf(prefix.c_str()); 1470 0 yongsun printf("{Bone@%X:", this); 1471 0 yongsun print_wide(m_String.c_str()); 1472 0 yongsun printf("}"); 1473 0 yongsun prefix += " "; 1474 0 yongsun if (m_pInnerData) 1475 0 yongsun m_pInnerData->print(prefix); 1476 0 yongsun prefix.resize(prefix.size() - 4); 1477 0 yongsun fflush(stdout); 1478 0 yongsun } 1479 0 yongsun 1480 0 yongsun void 1481 0 yongsun CCandidate::print(std::string& prefix) 1482 0 yongsun { 1483 0 yongsun printf(prefix.c_str()); 1484 0 yongsun printf("<Candidate @%X:", this); 1485 0 yongsun print_wide(m_String); 1486 0 yongsun printf("-- %d}", m_WordId); 1487 0 yongsun fflush(stdout); 1488 0 yongsun } 1489 0 yongsun 1490 0 yongsun 1491 0 yongsun TLongExpFloat::TLongExpFloat(double d) 1492 0 yongsun { 1493 0 yongsun if (d != 0.0 && d != -0.0) { 1494 0 yongsun TDoubleAnatomy da(d); 1495 0 yongsun m_exp = da.getExp(); 1496 0 yongsun da.clearExp(); 1497 0 yongsun m_base = da.getValue(); 1498 0 yongsun } else { 1499 0 yongsun m_base = d; 1500 0 yongsun m_exp = 0; 1501 0 yongsun } 1502 0 yongsun } 1503 0 yongsun 1504 0 yongsun TLongExpFloat 1505 0 yongsun TLongExpFloat::operator* (const TLongExpFloat& b) const 1506 0 yongsun { 1507 0 yongsun double d = this->m_base * b.m_base; 1508 0 yongsun TLongExpFloat reda(d); 1509 0 yongsun reda.m_exp += this->m_exp + b.m_exp; 1510 0 yongsun return reda; 1511 0 yongsun } 1512 0 yongsun 1513 0 yongsun TLongExpFloat 1514 0 yongsun TLongExpFloat::operator/ (const TLongExpFloat& b) const 1515 0 yongsun { 1516 0 yongsun double d = this->m_base / b.m_base; 1517 0 yongsun TLongExpFloat reda(d); 1518 0 yongsun reda.m_exp += (this->m_exp - b.m_exp); 1519 0 yongsun return reda; 1520 0 yongsun } 1521 0 yongsun 1522 0 yongsun bool 1523 0 yongsun TLongExpFloat::operator< (const TLongExpFloat& b) const 1524 0 yongsun { 1525 0 yongsun if (m_base >= 0.0 && b.m_base >= 0.0) { 1526 0 yongsun return (m_exp < b.m_exp || (m_exp == b.m_exp && m_base < b.m_base)); 1527 0 yongsun } else if (m_base < 0.0 && b.m_base < 0.0) { 1528 0 yongsun return (m_exp > b.m_exp || (m_exp == b.m_exp && m_base < b.m_base)); 1529 0 yongsun } else if (m_base < 0.0 && b.m_base >= 0.0) 1530 0 yongsun return true; 1531 0 yongsun else 1532 0 yongsun return false; 1533 0 yongsun } 1534 0 yongsun 1535 0 yongsun bool 1536 0 yongsun TLongExpFloat::operator<=(const TLongExpFloat& b) const 1537 0 yongsun { 1538 0 yongsun if (m_base >= 0.0 && b.m_base >= 0.0) { 1539 0 yongsun return (m_exp < b.m_exp || (m_exp == b.m_exp && m_base <= b.m_base)); 1540 0 yongsun } else if (m_base < 0.0 && b.m_base < 0.0) { 1541 0 yongsun return (m_exp > b.m_exp || (m_exp == b.m_exp && m_base <= b.m_base)); 1542 0 yongsun } else if (m_base < 0.0 && b.m_base >= 0.0) 1543 0 yongsun return true; 1544 0 yongsun else 1545 0 yongsun return false; 1546 0 yongsun } 1547 0 yongsun 1548 0 yongsun bool 1549 0 yongsun TLongExpFloat::operator==(const TLongExpFloat& b) const 1550 0 yongsun { 1551 0 yongsun return (m_base == b.m_base && m_exp == b.m_exp); 1552 0 yongsun } 1553 0 yongsun 1554 0 yongsun void 1555 0 yongsun TLongExpFloat::toString(std::string& str) const 1556 0 yongsun { 1557 0 yongsun char buf[256]; 1558 0 yongsun toString(buf); 1559 0 yongsun str = buf; 1560 0 yongsun } 1561