1 281 yongsun # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. 2 281 yongsun # 3 281 yongsun # Copyright (c) 2007 Sun Microsystems, Inc. All Rights Reserved. 4 281 yongsun # 5 281 yongsun # The contents of this file are subject to the terms of either the GNU Lesser 6 281 yongsun # General Public License Version 2.1 only ("LGPL") or the Common Development and 7 281 yongsun # Distribution License ("CDDL")(collectively, the "License"). You may not use this 8 281 yongsun # file except in compliance with the License. You can obtain a copy of the CDDL at 9 281 yongsun # http://www.opensource.org/licenses/cddl1.php and a copy of the LGPLv2.1 at 10 281 yongsun # http://www.opensource.org/licenses/lgpl-license.php. See the License for the 11 281 yongsun # specific language governing permissions and limitations under the License. When 12 281 yongsun # distributing the software, include this License Header Notice in each file and 13 281 yongsun # include the full text of the License in the License file as well as the 14 281 yongsun # following notice: 15 281 yongsun # 16 281 yongsun # NOTICE PURSUANT TO SECTION 9 OF THE COMMON DEVELOPMENT AND DISTRIBUTION LICENSE 17 281 yongsun # (CDDL) 18 281 yongsun # For Covered Software in this distribution, this License shall be governed by the 19 281 yongsun # laws of the State of California (excluding conflict-of-law provisions). 20 281 yongsun # Any litigation relating to this License shall be subject to the jurisdiction of 21 281 yongsun # the Federal Courts of the Northern District of California and the state courts 22 281 yongsun # of the State of California, with venue lying in Santa Clara County, California. 23 281 yongsun # 24 281 yongsun # Contributor(s): 25 281 yongsun # 26 281 yongsun # If you wish your version of this file to be governed by only the CDDL or only 27 281 yongsun # the LGPL Version 2.1, indicate your decision by adding "[Contributor]" elects to 28 281 yongsun # include this software in this distribution under the [CDDL or LGPL Version 2.1] 29 281 yongsun # license." If you don't indicate a single choice of license, a recipient has the 30 281 yongsun # option to distribute your version of this file under either the CDDL or the LGPL 31 281 yongsun # Version 2.1, or to extend the choice of license to its licensees as provided 32 281 yongsun # above. However, if you add LGPL Version 2.1 code and therefore, elected the LGPL 33 281 yongsun # Version 2 license, then the option applies only if the new code is made subject 34 281 yongsun # to such option by the copyright holder. 35 281 yongsun 36 281 yongsun cdef extern from "Python.h": 37 297 yongsun ctypedef struct PyUnicodeObject: 38 297 yongsun pass 39 281 yongsun ctypedef unsigned wchar_t 40 281 yongsun ctypedef wchar_t * const_wchar_t_ptr "const wchar_t *" 41 281 yongsun char * PyString_AsString (object) 42 281 yongsun object PyUnicode_FromWideChar (wchar_t *, Py_ssize_t) 43 297 yongsun Py_ssize_t PyUnicode_AsWideChar (PyUnicodeObject *, wchar_t *, Py_ssize_t) 44 281 yongsun 45 281 yongsun cdef extern from "portability.h": 46 281 yongsun ctypedef unsigned TWCHAR 47 281 yongsun ctypedef TWCHAR * const_TWCHAR_ptr "const TWCHAR *" 48 281 yongsun unsigned WCSLEN (const_TWCHAR_ptr ws) 49 281 yongsun 50 281 yongsun cdef extern from "pytrie.h": 51 282 yongsun ctypedef struct CPinyinTrie_TWord "CPinyinTrie::TWordIdInfo": 52 282 yongsun unsigned m_bSeen 53 282 yongsun unsigned m_csLevel 54 282 yongsun unsigned m_id 55 282 yongsun 56 281 yongsun ctypedef struct CPinyinTrie_TNode "CPinyinTrie::TNode": 57 282 yongsun unsigned m_nWordId 58 282 yongsun CPinyinTrie_TWord * getWordIdPtr () 59 281 yongsun 60 281 yongsun ctypedef struct CPinyinTrie "CPinyinTrie": 61 281 yongsun bint load(char *filename) 62 281 yongsun void free() 63 281 yongsun CPinyinTrie_TNode * getRootNode () 64 281 yongsun CPinyinTrie_TNode * transfer (CPinyinTrie_TNode *, unsigned char*) 65 281 yongsun const_TWCHAR_ptr getitem "operator []" (unsigned) 66 297 yongsun int getSimbolId (const_TWCHAR_ptr) 67 281 yongsun bint isValid (CPinyinTrie_TNode*, bint, unsigned) 68 281 yongsun 69 281 yongsun CPinyinTrie *new_CPinyinTrie "new CPinyinTrie" () 70 281 yongsun void del_CPinyinTrie "delete" (CPinyinTrie *pytrie) 71 281 yongsun 72 281 yongsun cdef class PinyinTrieNode: 73 281 yongsun cdef CPinyinTrie_TNode *pnode 74 282 yongsun def get_word_ids (self): 75 282 yongsun wids = [] 76 282 yongsun cdef CPinyinTrie_TWord *p= <CPinyinTrie_TWord*> self.pnode.getWordIdPtr () 77 282 yongsun for i in xrange (self.pnode.m_nWordId): 78 282 yongsun wids.append (p[i].m_id) 79 282 yongsun return wids 80 281 yongsun 81 281 yongsun cdef class PinyinTrie: 82 281 yongsun cdef CPinyinTrie *thisptr 83 281 yongsun 84 281 yongsun def __cinit__ (self): 85 281 yongsun self.thisptr = new_CPinyinTrie () 86 281 yongsun 87 281 yongsun def __dealloc__ (self): 88 281 yongsun del_CPinyinTrie (self.thisptr) 89 281 yongsun 90 281 yongsun def load (self, fname): 91 281 yongsun return self.thisptr.load (fname) 92 281 yongsun 93 281 yongsun def free (self): 94 281 yongsun self.thisptr.free () 95 281 yongsun 96 281 yongsun def get_root_node (self): 97 281 yongsun cdef CPinyinTrie_TNode * pnode = <CPinyinTrie_TNode*> self.thisptr.getRootNode () 98 281 yongsun node = PinyinTrieNode () 99 281 yongsun (<PinyinTrieNode>node).pnode = pnode 100 281 yongsun return node 101 281 yongsun 102 281 yongsun def transfer (self, node, str): 103 281 yongsun cdef CPinyinTrie_TNode *pnode = (<PinyinTrieNode>node).pnode 104 281 yongsun pnode = <CPinyinTrie_TNode*> self.thisptr.transfer (pnode, <unsigned char*>PyString_AsString(str)) 105 281 yongsun if not pnode: 106 281 yongsun return None 107 281 yongsun 108 281 yongsun node = PinyinTrieNode () 109 281 yongsun (<PinyinTrieNode>node).pnode = pnode 110 281 yongsun return node 111 281 yongsun 112 281 yongsun def __getitem__ (self, idx): 113 281 yongsun cdef const_TWCHAR_ptr cwstr = self.thisptr.getitem (idx) 114 281 yongsun return PyUnicode_FromWideChar (<const_wchar_t_ptr>cwstr, WCSLEN(cwstr)) 115 281 yongsun 116 297 yongsun def get_symbol_id (self, symbol): 117 297 yongsun cdef wchar_t buf[2] 118 297 yongsun if len (symbol) != 1: return 0 119 297 yongsun PyUnicode_AsWideChar (<PyUnicodeObject*> symbol, buf, sizeof(buf)) 120 297 yongsun return self.thisptr.getSimbolId (<const_TWCHAR_ptr> buf) 121 297 yongsun 122 281 yongsun def is_valid (self, node, allowNonComplete=True, csLevel=0): 123 281 yongsun cdef CPinyinTrie_TNode *pnode = (<PinyinTrieNode>node).pnode 124 281 yongsun return self.thisptr.isValid (pnode, allowNonComplete, csLevel) 125