1 #!/usr/bin/python 2 3 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. 4 # 5 # Copyright (c) 2007 Sun Microsystems, Inc. All Rights Reserved. 6 # 7 # The contents of this file are subject to the terms of either the GNU Lesser 8 # General Public License Version 2.1 only ("LGPL") or the Common Development and 9 # Distribution License ("CDDL")(collectively, the "License"). You may not use this 10 # file except in compliance with the License. You can obtain a copy of the CDDL at 11 # http://www.opensource.org/licenses/cddl1.php and a copy of the LGPLv2.1 at 12 # http://www.opensource.org/licenses/lgpl-license.php. See the License for the 13 # specific language governing permissions and limitations under the License. When 14 # distributing the software, include this License Header Notice in each file and 15 # include the full text of the License in the License file as well as the 16 # following notice: 17 # 18 # NOTICE PURSUANT TO SECTION 9 OF THE COMMON DEVELOPMENT AND DISTRIBUTION LICENSE 19 # (CDDL) 20 # For Covered Software in this distribution, this License shall be governed by the 21 # laws of the State of California (excluding conflict-of-law provisions). 22 # Any litigation relating to this License shall be subject to the jurisdiction of 23 # the Federal Courts of the Northern District of California and the state courts 24 # of the State of California, with venue lying in Santa Clara County, California. 25 # 26 # Contributor(s): 27 # 28 # If you wish your version of this file to be governed by only the CDDL or only 29 # the LGPL Version 2.1, indicate your decision by adding "[Contributor]" elects to 30 # include this software in this distribution under the [CDDL or LGPL Version 2.1] 31 # license." If you don't indicate a single choice of license, a recipient has the 32 # option to distribute your version of this file under either the CDDL or the LGPL 33 # Version 2.1, or to extend the choice of license to its licensees as provided 34 # above. However, if you add LGPL Version 2.1 code and therefore, elected the LGPL 35 # Version 2 license, then the option applies only if the new code is made subject 36 # to such option by the copyright holder. 37 38 initials = ["", "b", "p", "m", "f", "d", "t", "n", "l", "g", "k", "h", "j", "q", "x", "zh", "ch", "sh", "r", "z", "c", "s", "y", "w", ] 39 40 finals = ["", "a", "o", "e", "ai", "ei", "ao", "ou", "an", "en", "ang", "eng", "er", "i", "ia", "ie", "iao", "iu", "ian", "in", "iang", "ing", "u", "ua", "uo", "uai", "ui", "uan", "un", "uang", "ong", "v", "ue", "iong", ] 41 42 inner_fuzzy_finals = ['iao', 'ian', 'iang', 'uai', 'uan', 'uang'] 43 44 valid_syllables = { 45 "a": 0x00010, 46 "ai": 0x00040, 47 "an": 0x00080, 48 "ang": 0x000a0, 49 "ao": 0x00060, 50 "b": 0x01000, 51 "ba": 0x01010, 52 "bai": 0x01040, 53 "ban": 0x01080, 54 "bang": 0x010a0, 55 "bao": 0x01060, 56 "bei": 0x01050, 57 "ben": 0x01090, 58 "beng": 0x010b0, 59 "bi": 0x010d0, 60 "bian": 0x01120, 61 "biao": 0x01100, 62 "bie": 0x010f0, 63 "bin": 0x01130, 64 "bing": 0x01150, 65 "bo": 0x01020, 66 "bu": 0x01160, 67 "c": 0x14000, 68 "ca": 0x14010, 69 "cai": 0x14040, 70 "can": 0x14080, 71 "cang": 0x140a0, 72 "cao": 0x14060, 73 "ce": 0x14030, 74 "cei": 0x14050, 75 "cen": 0x14090, 76 "ceng": 0x140b0, 77 "ch": 0x10000, 78 "cha": 0x10010, 79 "chai": 0x10040, 80 "chan": 0x10080, 81 "chang": 0x100a0, 82 "chao": 0x10060, 83 "che": 0x10030, 84 "chen": 0x10090, 85 "cheng": 0x100b0, 86 "chi": 0x100d0, 87 "chong": 0x101e0, 88 "chou": 0x10070, 89 "chu": 0x10160, 90 "chua": 0x10170, 91 "chuai": 0x10190, 92 "chuan": 0x101b0, 93 "chuang": 0x101d0, 94 "chui": 0x101a0, 95 "chun": 0x101c0, 96 "chuo": 0x10180, 97 "ci": 0x140d0, 98 "cong": 0x141e0, 99 "cou": 0x14070, 100 "cu": 0x14160, 101 "cuan": 0x141b0, 102 "cui": 0x141a0, 103 "cun": 0x141c0, 104 "cuo": 0x14180, 105 "d": 0x05000, 106 "da": 0x05010, 107 "dai": 0x05040, 108 "dan": 0x05080, 109 "dang": 0x050a0, 110 "dao": 0x05060, 111 "de": 0x05030, 112 "dei": 0x05050, 113 "den": 0x05090, 114 "deng": 0x050b0, 115 "di": 0x050d0, 116 "dia": 0x050e0, 117 "dian": 0x05120, 118 "diao": 0x05100, 119 "die": 0x050f0, 120 "ding": 0x05150, 121 "diu": 0x05110, 122 "dong": 0x051e0, 123 "dou": 0x05070, 124 "du": 0x05160, 125 "duan": 0x051b0, 126 "dui": 0x051a0, 127 "dun": 0x051c0, 128 "duo": 0x05180, 129 "e": 0x00030, 130 "ei": 0x00050, 131 "en": 0x00090, 132 "eng": 0x000b0, 133 "er": 0x000c0, 134 "f": 0x04000, 135 "fa": 0x04010, 136 "fan": 0x04080, 137 "fang": 0x040a0, 138 "fei": 0x04050, 139 "fen": 0x04090, 140 "feng": 0x040b0, 141 "fiao": 0x04100, 142 "fo": 0x04020, 143 "fou": 0x04070, 144 "fu": 0x04160, 145 "g": 0x09000, 146 "ga": 0x09010, 147 "gai": 0x09040, 148 "gan": 0x09080, 149 "gang": 0x090a0, 150 "gao": 0x09060, 151 "ge": 0x09030, 152 "gei": 0x09050, 153 "gen": 0x09090, 154 "geng": 0x090b0, 155 "gong": 0x091e0, 156 "gou": 0x09070, 157 "gu": 0x09160, 158 "gua": 0x09170, 159 "guai": 0x09190, 160 "guan": 0x091b0, 161 "guang": 0x091d0, 162 "gui": 0x091a0, 163 "gun": 0x091c0, 164 "guo": 0x09180, 165 "h": 0x0b000, 166 "ha": 0x0b010, 167 "hai": 0x0b040, 168 "han": 0x0b080, 169 "hang": 0x0b0a0, 170 "hao": 0x0b060, 171 "he": 0x0b030, 172 "hei": 0x0b050, 173 "hen": 0x0b090, 174 "heng": 0x0b0b0, 175 "hong": 0x0b1e0, 176 "hou": 0x0b070, 177 "hu": 0x0b160, 178 "hua": 0x0b170, 179 "huai": 0x0b190, 180 "huan": 0x0b1b0, 181 "huang": 0x0b1d0, 182 "hui": 0x0b1a0, 183 "hun": 0x0b1c0, 184 "huo": 0x0b180, 185 "j": 0x0c000, 186 "ji": 0x0c0d0, 187 "jia": 0x0c0e0, 188 "jian": 0x0c120, 189 "jiang": 0x0c140, 190 "jiao": 0x0c100, 191 "jie": 0x0c0f0, 192 "jin": 0x0c130, 193 "jing": 0x0c150, 194 "jiong": 0x0c210, 195 "jiu": 0x0c110, 196 "ju": 0x0c160, 197 "juan": 0x0c1b0, 198 "jue": 0x0c200, 199 "jun": 0x0c1c0, 200 "k": 0x0a000, 201 "ka": 0x0a010, 202 "kai": 0x0a040, 203 "kan": 0x0a080, 204 "kang": 0x0a0a0, 205 "kao": 0x0a060, 206 "ke": 0x0a030, 207 "kei": 0x0a050, 208 "ken": 0x0a090, 209 "keng": 0x0a0b0, 210 "kong": 0x0a1e0, 211 "kou": 0x0a070, 212 "ku": 0x0a160, 213 "kua": 0x0a170, 214 "kuai": 0x0a190, 215 "kuan": 0x0a1b0, 216 "kuang": 0x0a1d0, 217 "kui": 0x0a1a0, 218 "kun": 0x0a1c0, 219 "kuo": 0x0a180, 220 "l": 0x08000, 221 "la": 0x08010, 222 "lai": 0x08040, 223 "lan": 0x08080, 224 "lang": 0x080a0, 225 "lao": 0x08060, 226 "le": 0x08030, 227 "lei": 0x08050, 228 "leng": 0x080b0, 229 "li": 0x080d0, 230 "lia": 0x080e0, 231 "lian": 0x08120, 232 "liang": 0x08140, 233 "liao": 0x08100, 234 "lie": 0x080f0, 235 "lin": 0x08130, 236 "ling": 0x08150, 237 "liu": 0x08110, 238 "long": 0x081e0, 239 "lou": 0x08070, 240 "lu": 0x08160, 241 "luan": 0x081b0, 242 "lue": 0x08200, 243 "lun": 0x081c0, 244 "luo": 0x08180, 245 "lv": 0x081f0, 246 "m": 0x03000, 247 "ma": 0x03010, 248 "mai": 0x03040, 249 "man": 0x03080, 250 "mang": 0x030a0, 251 "mao": 0x03060, 252 "me": 0x03030, 253 "mei": 0x03050, 254 "men": 0x03090, 255 "meng": 0x030b0, 256 "mi": 0x030d0, 257 "mian": 0x03120, 258 "miao": 0x03100, 259 "mie": 0x030f0, 260 "min": 0x03130, 261 "ming": 0x03150, 262 "miu": 0x03110, 263 "mo": 0x03020, 264 "mou": 0x03070, 265 "mu": 0x03160, 266 "n": 0x07000, 267 "na": 0x07010, 268 "nai": 0x07040, 269 "nan": 0x07080, 270 "nang": 0x070a0, 271 "nao": 0x07060, 272 "ne": 0x07030, 273 "nei": 0x07050, 274 "nen": 0x07090, 275 "neng": 0x070b0, 276 "ni": 0x070d0, 277 "nian": 0x07120, 278 "niang": 0x07140, 279 "niao": 0x07100, 280 "nie": 0x070f0, 281 "nin": 0x07130, 282 "ning": 0x07150, 283 "niu": 0x07110, 284 "nong": 0x071e0, 285 "nou": 0x07070, 286 "nu": 0x07160, 287 "nuan": 0x071b0, 288 "nue": 0x07200, 289 "nun": 0x071c0, 290 "nuo": 0x07180, 291 "nv": 0x071f0, 292 "o": 0x00020, 293 "ou": 0x00070, 294 "p": 0x02000, 295 "pa": 0x02010, 296 "pai": 0x02040, 297 "pan": 0x02080, 298 "pang": 0x020a0, 299 "pao": 0x02060, 300 "pei": 0x02050, 301 "pen": 0x02090, 302 "peng": 0x020b0, 303 "pi": 0x020d0, 304 "pian": 0x02120, 305 "piao": 0x02100, 306 "pie": 0x020f0, 307 "pin": 0x02130, 308 "ping": 0x02150, 309 "po": 0x02020, 310 "pou": 0x02070, 311 "pu": 0x02160, 312 "q": 0x0d000, 313 "qi": 0x0d0d0, 314 "qia": 0x0d0e0, 315 "qian": 0x0d120, 316 "qiang": 0x0d140, 317 "qiao": 0x0d100, 318 "qie": 0x0d0f0, 319 "qin": 0x0d130, 320 "qing": 0x0d150, 321 "qiong": 0x0d210, 322 "qiu": 0x0d110, 323 "qu": 0x0d160, 324 "quan": 0x0d1b0, 325 "que": 0x0d200, 326 "qun": 0x0d1c0, 327 "r": 0x12000, 328 "ran": 0x12080, 329 "rang": 0x120a0, 330 "rao": 0x12060, 331 "re": 0x12030, 332 "ren": 0x12090, 333 "reng": 0x120b0, 334 "ri": 0x120d0, 335 "rong": 0x121e0, 336 "rou": 0x12070, 337 "ru": 0x12160, 338 "ruan": 0x121b0, 339 "rui": 0x121a0, 340 "run": 0x121c0, 341 "ruo": 0x12180, 342 "s": 0x15000, 343 "sa": 0x15010, 344 "sai": 0x15040, 345 "san": 0x15080, 346 "sang": 0x150a0, 347 "sao": 0x15060, 348 "se": 0x15030, 349 "sen": 0x15090, 350 "seng": 0x150b0, 351 "sh": 0x11000, 352 "sha": 0x11010, 353 "shai": 0x11040, 354 "shan": 0x11080, 355 "shang": 0x110a0, 356 "shao": 0x11060, 357 "she": 0x11030, 358 "shei": 0x11050, 359 "shen": 0x11090, 360 "sheng": 0x110b0, 361 "shi": 0x110d0, 362 "shou": 0x11070, 363 "shu": 0x11160, 364 "shua": 0x11170, 365 "shuai": 0x11190, 366 "shuan": 0x111b0, 367 "shuang": 0x111d0, 368 "shui": 0x111a0, 369 "shun": 0x111c0, 370 "shuo": 0x11180, 371 "si": 0x150d0, 372 "song": 0x151e0, 373 "sou": 0x15070, 374 "su": 0x15160, 375 "suan": 0x151b0, 376 "sui": 0x151a0, 377 "sun": 0x151c0, 378 "suo": 0x15180, 379 "t": 0x06000, 380 "ta": 0x06010, 381 "tai": 0x06040, 382 "tan": 0x06080, 383 "tang": 0x060a0, 384 "tao": 0x06060, 385 "te": 0x06030, 386 "tei": 0x06050, 387 "teng": 0x060b0, 388 "ti": 0x060d0, 389 "tian": 0x06120, 390 "tiao": 0x06100, 391 "tie": 0x060f0, 392 "ting": 0x06150, 393 "tong": 0x061e0, 394 "tou": 0x06070, 395 "tu": 0x06160, 396 "tuan": 0x061b0, 397 "tui": 0x061a0, 398 "tun": 0x061c0, 399 "tuo": 0x06180, 400 "w": 0x17000, 401 "wa": 0x17010, 402 "wai": 0x17040, 403 "wan": 0x17080, 404 "wang": 0x170a0, 405 "wei": 0x17050, 406 "wen": 0x17090, 407 "weng": 0x170b0, 408 "wo": 0x17020, 409 "wu": 0x17160, 410 "x": 0x0e000, 411 "xi": 0x0e0d0, 412 "xia": 0x0e0e0, 413 "xian": 0x0e120, 414 "xiang": 0x0e140, 415 "xiao": 0x0e100, 416 "xie": 0x0e0f0, 417 "xin": 0x0e130, 418 "xing": 0x0e150, 419 "xiong": 0x0e210, 420 "xiu": 0x0e110, 421 "xu": 0x0e160, 422 "xuan": 0x0e1b0, 423 "xue": 0x0e200, 424 "xun": 0x0e1c0, 425 "y": 0x16000, 426 "ya": 0x16010, 427 "yai": 0x16040, 428 "yan": 0x16080, 429 "yang": 0x160a0, 430 "yao": 0x16060, 431 "ye": 0x16030, 432 "yi": 0x160d0, 433 "yin": 0x16130, 434 "ying": 0x16150, 435 "yo": 0x16020, 436 "yong": 0x161e0, 437 "you": 0x16070, 438 "yu": 0x16160, 439 "yuan": 0x161b0, 440 "yue": 0x16200, 441 "yun": 0x161c0, 442 "z": 0x13000, 443 "za": 0x13010, 444 "zai": 0x13040, 445 "zan": 0x13080, 446 "zang": 0x130a0, 447 "zao": 0x13060, 448 "ze": 0x13030, 449 "zei": 0x13050, 450 "zen": 0x13090, 451 "zeng": 0x130b0, 452 "zh": 0x0f000, 453 "zha": 0x0f010, 454 "zhai": 0x0f040, 455 "zhan": 0x0f080, 456 "zhang": 0x0f0a0, 457 "zhao": 0x0f060, 458 "zhe": 0x0f030, 459 "zhei": 0x0f050, 460 "zhen": 0x0f090, 461 "zheng": 0x0f0b0, 462 "zhi": 0x0f0d0, 463 "zhong": 0x0f1e0, 464 "zhou": 0x0f070, 465 "zhu": 0x0f160, 466 "zhua": 0x0f170, 467 "zhuai": 0x0f190, 468 "zhuan": 0x0f1b0, 469 "zhuang": 0x0f1d0, 470 "zhui": 0x0f1a0, 471 "zhun": 0x0f1c0, 472 "zhuo": 0x0f180, 473 "zi": 0x130d0, 474 "zong": 0x131e0, 475 "zou": 0x13070, 476 "zu": 0x13160, 477 "zuan": 0x131b0, 478 "zui": 0x131a0, 479 "zun": 0x131c0, 480 "zuo": 0x13180, 481 } 482