Home | History | Annotate | Download | only in python
      1 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
      2 # 
      3 # Copyright (c) 2007 Sun Microsystems, Inc. All Rights Reserved.
      4 # 
      5 # The contents of this file are subject to the terms of either the GNU Lesser
      6 # General Public License Version 2.1 only ("LGPL") or the Common Development and
      7 # Distribution License ("CDDL")(collectively, the "License"). You may not use this
      8 # file except in compliance with the License. You can obtain a copy of the CDDL at
      9 # http://www.opensource.org/licenses/cddl1.php and a copy of the LGPLv2.1 at
     10 # http://www.opensource.org/licenses/lgpl-license.php. See the License for the 
     11 # specific language governing permissions and limitations under the License. When
     12 # distributing the software, include this License Header Notice in each file and
     13 # include the full text of the License in the License file as well as the
     14 # following notice:
     15 # 
     16 # NOTICE PURSUANT TO SECTION 9 OF THE COMMON DEVELOPMENT AND DISTRIBUTION LICENSE
     17 # (CDDL)
     18 # For Covered Software in this distribution, this License shall be governed by the
     19 # laws of the State of California (excluding conflict-of-law provisions).
     20 # Any litigation relating to this License shall be subject to the jurisdiction of
     21 # the Federal Courts of the Northern District of California and the state courts
     22 # of the State of California, with venue lying in Santa Clara County, California.
     23 # 
     24 # Contributor(s):
     25 # 
     26 # If you wish your version of this file to be governed by only the CDDL or only
     27 # the LGPL Version 2.1, indicate your decision by adding "[Contributor]" elects to
     28 # include this software in this distribution under the [CDDL or LGPL Version 2.1]
     29 # license." If you don't indicate a single choice of license, a recipient has the
     30 # option to distribute your version of this file under either the CDDL or the LGPL
     31 # Version 2.1, or to extend the choice of license to its licensees as provided
     32 # above. However, if you add LGPL Version 2.1 code and therefore, elected the LGPL
     33 # Version 2 license, then the option applies only if the new code is made subject
     34 # to such option by the copyright holder. 
     35 
     36 cdef extern from "Python.h":
     37     ctypedef struct PyUnicodeObject:
     38         pass
     39     ctypedef unsigned wchar_t
     40     ctypedef wchar_t * const_wchar_t_ptr "const wchar_t *"
     41     char * PyString_AsString (object)
     42     object PyUnicode_FromWideChar (wchar_t *, Py_ssize_t)
     43     Py_ssize_t PyUnicode_AsWideChar (PyUnicodeObject *, wchar_t *, Py_ssize_t)
     44 
     45 cdef extern from "portability.h":
     46     ctypedef unsigned TWCHAR
     47     ctypedef TWCHAR * const_TWCHAR_ptr "const TWCHAR *"
     48     unsigned WCSLEN (const_TWCHAR_ptr ws)
     49 
     50 cdef extern from "pytrie.h":
     51     ctypedef struct CPinyinTrie_TWord "CPinyinTrie::TWordIdInfo":
     52         unsigned        m_bSeen
     53         unsigned        m_csLevel
     54         unsigned        m_id
     55 
     56     ctypedef struct CPinyinTrie_TNode "CPinyinTrie::TNode":
     57         unsigned m_nWordId
     58         CPinyinTrie_TWord * getWordIdPtr ()
     59 
     60     ctypedef struct CPinyinTrie "CPinyinTrie":
     61         bint load(char *filename)
     62         void free()
     63         CPinyinTrie_TNode * getRootNode ()
     64         CPinyinTrie_TNode * transfer (CPinyinTrie_TNode *, unsigned char*)
     65         const_TWCHAR_ptr getitem "operator []" (unsigned)
     66         int getSimbolId (const_TWCHAR_ptr)
     67         bint isValid (CPinyinTrie_TNode*, bint, unsigned)
     68 
     69     CPinyinTrie *new_CPinyinTrie "new CPinyinTrie" ()
     70     void del_CPinyinTrie "delete" (CPinyinTrie *pytrie)
     71 
     72 cdef class PinyinTrieNode:
     73     cdef CPinyinTrie_TNode *pnode
     74     def get_word_ids (self):
     75         wids = []
     76         cdef CPinyinTrie_TWord *p= <CPinyinTrie_TWord*> self.pnode.getWordIdPtr ()
     77         for i in xrange (self.pnode.m_nWordId):
     78             wids.append (p[i].m_id)
     79         return wids
     80 
     81 cdef class PinyinTrie:
     82     cdef CPinyinTrie *thisptr
     83 
     84     def __cinit__ (self):
     85         self.thisptr = new_CPinyinTrie ()
     86 
     87     def __dealloc__ (self):
     88         del_CPinyinTrie (self.thisptr)
     89 
     90     def load (self, fname):
     91         return self.thisptr.load (fname)
     92 
     93     def free (self):
     94         self.thisptr.free ()
     95 
     96     def get_root_node (self):
     97         cdef CPinyinTrie_TNode * pnode = <CPinyinTrie_TNode*> self.thisptr.getRootNode ()
     98         node = PinyinTrieNode ()
     99         (<PinyinTrieNode>node).pnode = pnode
    100         return node
    101 
    102     def transfer (self, node, str):
    103         cdef CPinyinTrie_TNode *pnode = (<PinyinTrieNode>node).pnode
    104         pnode = <CPinyinTrie_TNode*> self.thisptr.transfer (pnode, <unsigned char*>PyString_AsString(str))
    105         if not pnode:
    106             return None
    107 
    108         node = PinyinTrieNode ()
    109         (<PinyinTrieNode>node).pnode = pnode
    110         return node
    111 
    112     def __getitem__ (self, idx):
    113         cdef const_TWCHAR_ptr cwstr = self.thisptr.getitem (idx)
    114         return PyUnicode_FromWideChar (<const_wchar_t_ptr>cwstr, WCSLEN(cwstr))
    115 
    116     def get_symbol_id (self, symbol):
    117         cdef wchar_t buf[2]
    118         if len (symbol) != 1: return 0
    119         PyUnicode_AsWideChar (<PyUnicodeObject*> symbol, buf, sizeof(buf))
    120         return self.thisptr.getSimbolId (<const_TWCHAR_ptr> buf)
    121 
    122     def is_valid (self, node, allowNonComplete=True, csLevel=0):
    123         cdef CPinyinTrie_TNode *pnode = (<PinyinTrieNode>node).pnode
    124         return self.thisptr.isValid (pnode, allowNonComplete, csLevel)
    125