Home | History | Annotate | Download | only in python
      1 #!/usr/bin/python 
      2 
      3 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
      4 # 
      5 # Copyright (c) 2007 Sun Microsystems, Inc. All Rights Reserved.
      6 # 
      7 # The contents of this file are subject to the terms of either the GNU Lesser
      8 # General Public License Version 2.1 only ("LGPL") or the Common Development and
      9 # Distribution License ("CDDL")(collectively, the "License"). You may not use this
     10 # file except in compliance with the License. You can obtain a copy of the CDDL at
     11 # http://www.opensource.org/licenses/cddl1.php and a copy of the LGPLv2.1 at
     12 # http://www.opensource.org/licenses/lgpl-license.php. See the License for the 
     13 # specific language governing permissions and limitations under the License. When
     14 # distributing the software, include this License Header Notice in each file and
     15 # include the full text of the License in the License file as well as the
     16 # following notice:
     17 # 
     18 # NOTICE PURSUANT TO SECTION 9 OF THE COMMON DEVELOPMENT AND DISTRIBUTION LICENSE
     19 # (CDDL)
     20 # For Covered Software in this distribution, this License shall be governed by the
     21 # laws of the State of California (excluding conflict-of-law provisions).
     22 # Any litigation relating to this License shall be subject to the jurisdiction of
     23 # the Federal Courts of the Northern District of California and the state courts
     24 # of the State of California, with venue lying in Santa Clara County, California.
     25 # 
     26 # Contributor(s):
     27 # 
     28 # If you wish your version of this file to be governed by only the CDDL or only
     29 # the LGPL Version 2.1, indicate your decision by adding "[Contributor]" elects to
     30 # include this software in this distribution under the [CDDL or LGPL Version 2.1]
     31 # license." If you don't indicate a single choice of license, a recipient has the
     32 # option to distribute your version of this file under either the CDDL or the LGPL
     33 # Version 2.1, or to extend the choice of license to its licensees as provided
     34 # above. However, if you add LGPL Version 2.1 code and therefore, elected the LGPL
     35 # Version 2 license, then the option applies only if the new code is made subject
     36 # to such option by the copyright holder. 
     37 
     38 initials = ["", "b", "p", "m", "f", "d", "t", "n", "l", "g", "k", "h", "j", "q", "x", "zh", "ch", "sh", "r", "z", "c", "s", "y", "w", ]
     39 
     40 finals = ["", "a", "o", "e", "ai", "ei", "ao", "ou", "an", "en", "ang", "eng", "er", "i", "ia", "ie", "iao", "iu", "ian", "in", "iang", "ing", "u", "ua", "uo", "uai", "ui", "uan", "un", "uang", "ong", "v", "ue", "iong", ]
     41 
     42 inner_fuzzy_finals = ['iao', 'ian', 'iang', 'uai', 'uan', 'uang']
     43 
     44 valid_syllables = {
     45      "a":       0x00010,
     46      "ai":      0x00040,
     47      "an":      0x00080,
     48      "ang":     0x000a0,
     49      "ao":      0x00060,
     50      "b":       0x01000,
     51      "ba":      0x01010,
     52      "bai":     0x01040,
     53      "ban":     0x01080,
     54      "bang":    0x010a0,
     55      "bao":     0x01060,
     56      "bei":     0x01050,
     57      "ben":     0x01090,
     58      "beng":    0x010b0,
     59      "bi":      0x010d0,
     60      "bian":    0x01120,
     61      "biao":    0x01100,
     62      "bie":     0x010f0,
     63      "bin":     0x01130,
     64      "bing":    0x01150,
     65      "bo":      0x01020,
     66      "bu":      0x01160,
     67      "c":       0x14000,
     68      "ca":      0x14010,
     69      "cai":     0x14040,
     70      "can":     0x14080,
     71      "cang":    0x140a0,
     72      "cao":     0x14060,
     73      "ce":      0x14030,
     74      "cei":     0x14050,
     75      "cen":     0x14090,
     76      "ceng":    0x140b0,
     77      "ch":      0x10000,
     78      "cha":     0x10010,
     79      "chai":    0x10040,
     80      "chan":    0x10080,
     81      "chang":   0x100a0,
     82      "chao":    0x10060,
     83      "che":     0x10030,
     84      "chen":    0x10090,
     85      "cheng":   0x100b0,
     86      "chi":     0x100d0,
     87      "chong":   0x101e0,
     88      "chou":    0x10070,
     89      "chu":     0x10160,
     90      "chua":    0x10170,
     91      "chuai":   0x10190,
     92      "chuan":   0x101b0,
     93      "chuang":  0x101d0,
     94      "chui":    0x101a0,
     95      "chun":    0x101c0,
     96      "chuo":    0x10180,
     97      "ci":      0x140d0,
     98      "cong":    0x141e0,
     99      "cou":     0x14070,
    100      "cu":      0x14160,
    101      "cuan":    0x141b0,
    102      "cui":     0x141a0,
    103      "cun":     0x141c0,
    104      "cuo":     0x14180,
    105      "d":       0x05000,
    106      "da":      0x05010,
    107      "dai":     0x05040,
    108      "dan":     0x05080,
    109      "dang":    0x050a0,
    110      "dao":     0x05060,
    111      "de":      0x05030,
    112      "dei":     0x05050,
    113      "den":     0x05090,
    114      "deng":    0x050b0,
    115      "di":      0x050d0,
    116      "dia":     0x050e0,
    117      "dian":    0x05120,
    118      "diao":    0x05100,
    119      "die":     0x050f0,
    120      "ding":    0x05150,
    121      "diu":     0x05110,
    122      "dong":    0x051e0,
    123      "dou":     0x05070,
    124      "du":      0x05160,
    125      "duan":    0x051b0,
    126      "dui":     0x051a0,
    127      "dun":     0x051c0,
    128      "duo":     0x05180,
    129      "e":       0x00030,
    130      "ei":      0x00050,
    131      "en":      0x00090,
    132      "eng":     0x000b0,
    133      "er":      0x000c0,
    134      "f":       0x04000,
    135      "fa":      0x04010,
    136      "fan":     0x04080,
    137      "fang":    0x040a0,
    138      "fei":     0x04050,
    139      "fen":     0x04090,
    140      "feng":    0x040b0,
    141      "fiao":    0x04100,
    142      "fo":      0x04020,
    143      "fou":     0x04070,
    144      "fu":      0x04160,
    145      "g":       0x09000,
    146      "ga":      0x09010,
    147      "gai":     0x09040,
    148      "gan":     0x09080,
    149      "gang":    0x090a0,
    150      "gao":     0x09060,
    151      "ge":      0x09030,
    152      "gei":     0x09050,
    153      "gen":     0x09090,
    154      "geng":    0x090b0,
    155      "gong":    0x091e0,
    156      "gou":     0x09070,
    157      "gu":      0x09160,
    158      "gua":     0x09170,
    159      "guai":    0x09190,
    160      "guan":    0x091b0,
    161      "guang":   0x091d0,
    162      "gui":     0x091a0,
    163      "gun":     0x091c0,
    164      "guo":     0x09180,
    165      "h":       0x0b000,
    166      "ha":      0x0b010,
    167      "hai":     0x0b040,
    168      "han":     0x0b080,
    169      "hang":    0x0b0a0,
    170      "hao":     0x0b060,
    171      "he":      0x0b030,
    172      "hei":     0x0b050,
    173      "hen":     0x0b090,
    174      "heng":    0x0b0b0,
    175      "hong":    0x0b1e0,
    176      "hou":     0x0b070,
    177      "hu":      0x0b160,
    178      "hua":     0x0b170,
    179      "huai":    0x0b190,
    180      "huan":    0x0b1b0,
    181      "huang":   0x0b1d0,
    182      "hui":     0x0b1a0,
    183      "hun":     0x0b1c0,
    184      "huo":     0x0b180,
    185      "j":       0x0c000,
    186      "ji":      0x0c0d0,
    187      "jia":     0x0c0e0,
    188      "jian":    0x0c120,
    189      "jiang":   0x0c140,
    190      "jiao":    0x0c100,
    191      "jie":     0x0c0f0,
    192      "jin":     0x0c130,
    193      "jing":    0x0c150,
    194      "jiong":   0x0c210,
    195      "jiu":     0x0c110,
    196      "ju":      0x0c160,
    197      "juan":    0x0c1b0,
    198      "jue":     0x0c200,
    199      "jun":     0x0c1c0,
    200      "k":       0x0a000,
    201      "ka":      0x0a010,
    202      "kai":     0x0a040,
    203      "kan":     0x0a080,
    204      "kang":    0x0a0a0,
    205      "kao":     0x0a060,
    206      "ke":      0x0a030,
    207      "kei":     0x0a050,
    208      "ken":     0x0a090,
    209      "keng":    0x0a0b0,
    210      "kong":    0x0a1e0,
    211      "kou":     0x0a070,
    212      "ku":      0x0a160,
    213      "kua":     0x0a170,
    214      "kuai":    0x0a190,
    215      "kuan":    0x0a1b0,
    216      "kuang":   0x0a1d0,
    217      "kui":     0x0a1a0,
    218      "kun":     0x0a1c0,
    219      "kuo":     0x0a180,
    220      "l":       0x08000,
    221      "la":      0x08010,
    222      "lai":     0x08040,
    223      "lan":     0x08080,
    224      "lang":    0x080a0,
    225      "lao":     0x08060,
    226      "le":      0x08030,
    227      "lei":     0x08050,
    228      "leng":    0x080b0,
    229      "li":      0x080d0,
    230      "lia":     0x080e0,
    231      "lian":    0x08120,
    232      "liang":   0x08140,
    233      "liao":    0x08100,
    234      "lie":     0x080f0,
    235      "lin":     0x08130,
    236      "ling":    0x08150,
    237      "liu":     0x08110,
    238      "long":    0x081e0,
    239      "lou":     0x08070,
    240      "lu":      0x08160,
    241      "luan":    0x081b0,
    242      "lue":     0x08200,
    243      "lun":     0x081c0,
    244      "luo":     0x08180,
    245      "lv":      0x081f0,
    246      "m":       0x03000,
    247      "ma":      0x03010,
    248      "mai":     0x03040,
    249      "man":     0x03080,
    250      "mang":    0x030a0,
    251      "mao":     0x03060,
    252      "me":      0x03030,
    253      "mei":     0x03050,
    254      "men":     0x03090,
    255      "meng":    0x030b0,
    256      "mi":      0x030d0,
    257      "mian":    0x03120,
    258      "miao":    0x03100,
    259      "mie":     0x030f0,
    260      "min":     0x03130,
    261      "ming":    0x03150,
    262      "miu":     0x03110,
    263      "mo":      0x03020,
    264      "mou":     0x03070,
    265      "mu":      0x03160,
    266      "n":       0x07000,
    267      "na":      0x07010,
    268      "nai":     0x07040,
    269      "nan":     0x07080,
    270      "nang":    0x070a0,
    271      "nao":     0x07060,
    272      "ne":      0x07030,
    273      "nei":     0x07050,
    274      "nen":     0x07090,
    275      "neng":    0x070b0,
    276      "ni":      0x070d0,
    277      "nian":    0x07120,
    278      "niang":   0x07140,
    279      "niao":    0x07100,
    280      "nie":     0x070f0,
    281      "nin":     0x07130,
    282      "ning":    0x07150,
    283      "niu":     0x07110,
    284      "nong":    0x071e0,
    285      "nou":     0x07070,
    286      "nu":      0x07160,
    287      "nuan":    0x071b0,
    288      "nue":     0x07200,
    289      "nun":     0x071c0,
    290      "nuo":     0x07180,
    291      "nv":      0x071f0,
    292      "o":       0x00020,
    293      "ou":      0x00070,
    294      "p":       0x02000,
    295      "pa":      0x02010,
    296      "pai":     0x02040,
    297      "pan":     0x02080,
    298      "pang":    0x020a0,
    299      "pao":     0x02060,
    300      "pei":     0x02050,
    301      "pen":     0x02090,
    302      "peng":    0x020b0,
    303      "pi":      0x020d0,
    304      "pian":    0x02120,
    305      "piao":    0x02100,
    306      "pie":     0x020f0,
    307      "pin":     0x02130,
    308      "ping":    0x02150,
    309      "po":      0x02020,
    310      "pou":     0x02070,
    311      "pu":      0x02160,
    312      "q":       0x0d000,
    313      "qi":      0x0d0d0,
    314      "qia":     0x0d0e0,
    315      "qian":    0x0d120,
    316      "qiang":   0x0d140,
    317      "qiao":    0x0d100,
    318      "qie":     0x0d0f0,
    319      "qin":     0x0d130,
    320      "qing":    0x0d150,
    321      "qiong":   0x0d210,
    322      "qiu":     0x0d110,
    323      "qu":      0x0d160,
    324      "quan":    0x0d1b0,
    325      "que":     0x0d200,
    326      "qun":     0x0d1c0,
    327      "r":       0x12000,
    328      "ran":     0x12080,
    329      "rang":    0x120a0,
    330      "rao":     0x12060,
    331      "re":      0x12030,
    332      "ren":     0x12090,
    333      "reng":    0x120b0,
    334      "ri":      0x120d0,
    335      "rong":    0x121e0,
    336      "rou":     0x12070,
    337      "ru":      0x12160,
    338      "ruan":    0x121b0,
    339      "rui":     0x121a0,
    340      "run":     0x121c0,
    341      "ruo":     0x12180,
    342      "s":       0x15000,
    343      "sa":      0x15010,
    344      "sai":     0x15040,
    345      "san":     0x15080,
    346      "sang":    0x150a0,
    347      "sao":     0x15060,
    348      "se":      0x15030,
    349      "sen":     0x15090,
    350      "seng":    0x150b0,
    351      "sh":      0x11000,
    352      "sha":     0x11010,
    353      "shai":    0x11040,
    354      "shan":    0x11080,
    355      "shang":   0x110a0,
    356      "shao":    0x11060,
    357      "she":     0x11030,
    358      "shei":    0x11050,
    359      "shen":    0x11090,
    360      "sheng":   0x110b0,
    361      "shi":     0x110d0,
    362      "shou":    0x11070,
    363      "shu":     0x11160,
    364      "shua":    0x11170,
    365      "shuai":   0x11190,
    366      "shuan":   0x111b0,
    367      "shuang":  0x111d0,
    368      "shui":    0x111a0,
    369      "shun":    0x111c0,
    370      "shuo":    0x11180,
    371      "si":      0x150d0,
    372      "song":    0x151e0,
    373      "sou":     0x15070,
    374      "su":      0x15160,
    375      "suan":    0x151b0,
    376      "sui":     0x151a0,
    377      "sun":     0x151c0,
    378      "suo":     0x15180,
    379      "t":       0x06000,
    380      "ta":      0x06010,
    381      "tai":     0x06040,
    382      "tan":     0x06080,
    383      "tang":    0x060a0,
    384      "tao":     0x06060,
    385      "te":      0x06030,
    386      "tei":     0x06050,
    387      "teng":    0x060b0,
    388      "ti":      0x060d0,
    389      "tian":    0x06120,
    390      "tiao":    0x06100,
    391      "tie":     0x060f0,
    392      "ting":    0x06150,
    393      "tong":    0x061e0,
    394      "tou":     0x06070,
    395      "tu":      0x06160,
    396      "tuan":    0x061b0,
    397      "tui":     0x061a0,
    398      "tun":     0x061c0,
    399      "tuo":     0x06180,
    400      "w":       0x17000,
    401      "wa":      0x17010,
    402      "wai":     0x17040,
    403      "wan":     0x17080,
    404      "wang":    0x170a0,
    405      "wei":     0x17050,
    406      "wen":     0x17090,
    407      "weng":    0x170b0,
    408      "wo":      0x17020,
    409      "wu":      0x17160,
    410      "x":       0x0e000,
    411      "xi":      0x0e0d0,
    412      "xia":     0x0e0e0,
    413      "xian":    0x0e120,
    414      "xiang":   0x0e140,
    415      "xiao":    0x0e100,
    416      "xie":     0x0e0f0,
    417      "xin":     0x0e130,
    418      "xing":    0x0e150,
    419      "xiong":   0x0e210,
    420      "xiu":     0x0e110,
    421      "xu":      0x0e160,
    422      "xuan":    0x0e1b0,
    423      "xue":     0x0e200,
    424      "xun":     0x0e1c0,
    425      "y":       0x16000,
    426      "ya":      0x16010,
    427      "yai":     0x16040,
    428      "yan":     0x16080,
    429      "yang":    0x160a0,
    430      "yao":     0x16060,
    431      "ye":      0x16030,
    432      "yi":      0x160d0,
    433      "yin":     0x16130,
    434      "ying":    0x16150,
    435      "yo":      0x16020,
    436      "yong":    0x161e0,
    437      "you":     0x16070,
    438      "yu":      0x16160,
    439      "yuan":    0x161b0,
    440      "yue":     0x16200,
    441      "yun":     0x161c0,
    442      "z":       0x13000,
    443      "za":      0x13010,
    444      "zai":     0x13040,
    445      "zan":     0x13080,
    446      "zang":    0x130a0,
    447      "zao":     0x13060,
    448      "ze":      0x13030,
    449      "zei":     0x13050,
    450      "zen":     0x13090,
    451      "zeng":    0x130b0,
    452      "zh":      0x0f000,
    453      "zha":     0x0f010,
    454      "zhai":    0x0f040,
    455      "zhan":    0x0f080,
    456      "zhang":   0x0f0a0,
    457      "zhao":    0x0f060,
    458      "zhe":     0x0f030,
    459      "zhei":    0x0f050,
    460      "zhen":    0x0f090,
    461      "zheng":   0x0f0b0,
    462      "zhi":     0x0f0d0,
    463      "zhong":   0x0f1e0,
    464      "zhou":    0x0f070,
    465      "zhu":     0x0f160,
    466      "zhua":    0x0f170,
    467      "zhuai":   0x0f190,
    468      "zhuan":   0x0f1b0,
    469      "zhuang":  0x0f1d0,
    470      "zhui":    0x0f1a0,
    471      "zhun":    0x0f1c0,
    472      "zhuo":    0x0f180,
    473      "zi":      0x130d0,
    474      "zong":    0x131e0,
    475      "zou":     0x13070,
    476      "zu":      0x13160,
    477      "zuan":    0x131b0,
    478      "zui":     0x131a0,
    479      "zun":     0x131c0,
    480      "zuo":     0x13180,
    481 }
    482