1 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. 2 # 3 # Copyright (c) 2007 Sun Microsystems, Inc. All Rights Reserved. 4 # 5 # The contents of this file are subject to the terms of either the GNU Lesser 6 # General Public License Version 2.1 only ("LGPL") or the Common Development and 7 # Distribution License ("CDDL")(collectively, the "License"). You may not use this 8 # file except in compliance with the License. You can obtain a copy of the CDDL at 9 # http://www.opensource.org/licenses/cddl1.php and a copy of the LGPLv2.1 at 10 # http://www.opensource.org/licenses/lgpl-license.php. See the License for the 11 # specific language governing permissions and limitations under the License. When 12 # distributing the software, include this License Header Notice in each file and 13 # include the full text of the License in the License file as well as the 14 # following notice: 15 # 16 # NOTICE PURSUANT TO SECTION 9 OF THE COMMON DEVELOPMENT AND DISTRIBUTION LICENSE 17 # (CDDL) 18 # For Covered Software in this distribution, this License shall be governed by the 19 # laws of the State of California (excluding conflict-of-law provisions). 20 # Any litigation relating to this License shall be subject to the jurisdiction of 21 # the Federal Courts of the Northern District of California and the state courts 22 # of the State of California, with venue lying in Santa Clara County, California. 23 # 24 # Contributor(s): 25 # 26 # If you wish your version of this file to be governed by only the CDDL or only 27 # the LGPL Version 2.1, indicate your decision by adding "[Contributor]" elects to 28 # include this software in this distribution under the [CDDL or LGPL Version 2.1] 29 # license." If you don't indicate a single choice of license, a recipient has the 30 # option to distribute your version of this file under either the CDDL or the LGPL 31 # Version 2.1, or to extend the choice of license to its licensees as provided 32 # above. However, if you add LGPL Version 2.1 code and therefore, elected the LGPL 33 # Version 2 license, then the option applies only if the new code is made subject 34 # to such option by the copyright holder. 35 36 cdef extern from "Python.h": 37 ctypedef struct PyUnicodeObject: 38 pass 39 ctypedef unsigned wchar_t 40 ctypedef wchar_t * const_wchar_t_ptr "const wchar_t *" 41 char * PyString_AsString (object) 42 object PyUnicode_FromWideChar (wchar_t *, Py_ssize_t) 43 Py_ssize_t PyUnicode_AsWideChar (PyUnicodeObject *, wchar_t *, Py_ssize_t) 44 45 cdef extern from "portability.h": 46 ctypedef unsigned TWCHAR 47 ctypedef TWCHAR * const_TWCHAR_ptr "const TWCHAR *" 48 unsigned WCSLEN (const_TWCHAR_ptr ws) 49 50 cdef extern from "pytrie.h": 51 ctypedef struct CPinyinTrie_TWord "CPinyinTrie::TWordIdInfo": 52 unsigned m_bSeen 53 unsigned m_csLevel 54 unsigned m_id 55 56 ctypedef struct CPinyinTrie_TNode "CPinyinTrie::TNode": 57 unsigned m_nWordId 58 CPinyinTrie_TWord * getWordIdPtr () 59 60 ctypedef struct CPinyinTrie "CPinyinTrie": 61 bint load(char *filename) 62 void free() 63 CPinyinTrie_TNode * getRootNode () 64 CPinyinTrie_TNode * transfer (CPinyinTrie_TNode *, unsigned char*) 65 const_TWCHAR_ptr getitem "operator []" (unsigned) 66 int getSimbolId (const_TWCHAR_ptr) 67 bint isValid (CPinyinTrie_TNode*, bint, unsigned) 68 69 CPinyinTrie *new_CPinyinTrie "new CPinyinTrie" () 70 void del_CPinyinTrie "delete" (CPinyinTrie *pytrie) 71 72 cdef class PinyinTrieNode: 73 cdef CPinyinTrie_TNode *pnode 74 def get_word_ids (self): 75 wids = [] 76 cdef CPinyinTrie_TWord *p= <CPinyinTrie_TWord*> self.pnode.getWordIdPtr () 77 for i in xrange (self.pnode.m_nWordId): 78 wids.append (p[i].m_id) 79 return wids 80 81 cdef class PinyinTrie: 82 cdef CPinyinTrie *thisptr 83 84 def __cinit__ (self): 85 self.thisptr = new_CPinyinTrie () 86 87 def __dealloc__ (self): 88 del_CPinyinTrie (self.thisptr) 89 90 def load (self, fname): 91 return self.thisptr.load (fname) 92 93 def free (self): 94 self.thisptr.free () 95 96 def get_root_node (self): 97 cdef CPinyinTrie_TNode * pnode = <CPinyinTrie_TNode*> self.thisptr.getRootNode () 98 node = PinyinTrieNode () 99 (<PinyinTrieNode>node).pnode = pnode 100 return node 101 102 def transfer (self, node, str): 103 cdef CPinyinTrie_TNode *pnode = (<PinyinTrieNode>node).pnode 104 pnode = <CPinyinTrie_TNode*> self.thisptr.transfer (pnode, <unsigned char*>PyString_AsString(str)) 105 if not pnode: 106 return None 107 108 node = PinyinTrieNode () 109 (<PinyinTrieNode>node).pnode = pnode 110 return node 111 112 def __getitem__ (self, idx): 113 cdef const_TWCHAR_ptr cwstr = self.thisptr.getitem (idx) 114 return PyUnicode_FromWideChar (<const_wchar_t_ptr>cwstr, WCSLEN(cwstr)) 115 116 def get_symbol_id (self, symbol): 117 cdef wchar_t buf[2] 118 if len (symbol) != 1: return 0 119 PyUnicode_AsWideChar (<PyUnicodeObject*> symbol, buf, sizeof(buf)) 120 return self.thisptr.getSimbolId (<const_TWCHAR_ptr> buf) 121 122 def is_valid (self, node, allowNonComplete=True, csLevel=0): 123 cdef CPinyinTrie_TNode *pnode = (<PinyinTrieNode>node).pnode 124 return self.thisptr.isValid (pnode, allowNonComplete, csLevel) 125