Home | History | Annotate | Download | only in python
      1  281  yongsun # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
      2  281  yongsun # 
      3  281  yongsun # Copyright (c) 2007 Sun Microsystems, Inc. All Rights Reserved.
      4  281  yongsun # 
      5  281  yongsun # The contents of this file are subject to the terms of either the GNU Lesser
      6  281  yongsun # General Public License Version 2.1 only ("LGPL") or the Common Development and
      7  281  yongsun # Distribution License ("CDDL")(collectively, the "License"). You may not use this
      8  281  yongsun # file except in compliance with the License. You can obtain a copy of the CDDL at
      9  281  yongsun # http://www.opensource.org/licenses/cddl1.php and a copy of the LGPLv2.1 at
     10  281  yongsun # http://www.opensource.org/licenses/lgpl-license.php. See the License for the 
     11  281  yongsun # specific language governing permissions and limitations under the License. When
     12  281  yongsun # distributing the software, include this License Header Notice in each file and
     13  281  yongsun # include the full text of the License in the License file as well as the
     14  281  yongsun # following notice:
     15  281  yongsun # 
     16  281  yongsun # NOTICE PURSUANT TO SECTION 9 OF THE COMMON DEVELOPMENT AND DISTRIBUTION LICENSE
     17  281  yongsun # (CDDL)
     18  281  yongsun # For Covered Software in this distribution, this License shall be governed by the
     19  281  yongsun # laws of the State of California (excluding conflict-of-law provisions).
     20  281  yongsun # Any litigation relating to this License shall be subject to the jurisdiction of
     21  281  yongsun # the Federal Courts of the Northern District of California and the state courts
     22  281  yongsun # of the State of California, with venue lying in Santa Clara County, California.
     23  281  yongsun # 
     24  281  yongsun # Contributor(s):
     25  281  yongsun # 
     26  281  yongsun # If you wish your version of this file to be governed by only the CDDL or only
     27  281  yongsun # the LGPL Version 2.1, indicate your decision by adding "[Contributor]" elects to
     28  281  yongsun # include this software in this distribution under the [CDDL or LGPL Version 2.1]
     29  281  yongsun # license." If you don't indicate a single choice of license, a recipient has the
     30  281  yongsun # option to distribute your version of this file under either the CDDL or the LGPL
     31  281  yongsun # Version 2.1, or to extend the choice of license to its licensees as provided
     32  281  yongsun # above. However, if you add LGPL Version 2.1 code and therefore, elected the LGPL
     33  281  yongsun # Version 2 license, then the option applies only if the new code is made subject
     34  281  yongsun # to such option by the copyright holder. 
     35  281  yongsun 
     36  281  yongsun cdef extern from "Python.h":
     37  297  yongsun     ctypedef struct PyUnicodeObject:
     38  297  yongsun         pass
     39  281  yongsun     ctypedef unsigned wchar_t
     40  281  yongsun     ctypedef wchar_t * const_wchar_t_ptr "const wchar_t *"
     41  281  yongsun     char * PyString_AsString (object)
     42  281  yongsun     object PyUnicode_FromWideChar (wchar_t *, Py_ssize_t)
     43  297  yongsun     Py_ssize_t PyUnicode_AsWideChar (PyUnicodeObject *, wchar_t *, Py_ssize_t)
     44  281  yongsun 
     45  281  yongsun cdef extern from "portability.h":
     46  281  yongsun     ctypedef unsigned TWCHAR
     47  281  yongsun     ctypedef TWCHAR * const_TWCHAR_ptr "const TWCHAR *"
     48  281  yongsun     unsigned WCSLEN (const_TWCHAR_ptr ws)
     49  281  yongsun 
     50  281  yongsun cdef extern from "pytrie.h":
     51  282  yongsun     ctypedef struct CPinyinTrie_TWord "CPinyinTrie::TWordIdInfo":
     52  282  yongsun         unsigned        m_bSeen
     53  282  yongsun         unsigned        m_csLevel
     54  282  yongsun         unsigned        m_id
     55  282  yongsun 
     56  281  yongsun     ctypedef struct CPinyinTrie_TNode "CPinyinTrie::TNode":
     57  282  yongsun         unsigned m_nWordId
     58  282  yongsun         CPinyinTrie_TWord * getWordIdPtr ()
     59  281  yongsun 
     60  281  yongsun     ctypedef struct CPinyinTrie "CPinyinTrie":
     61  281  yongsun         bint load(char *filename)
     62  281  yongsun         void free()
     63  281  yongsun         CPinyinTrie_TNode * getRootNode ()
     64  281  yongsun         CPinyinTrie_TNode * transfer (CPinyinTrie_TNode *, unsigned char*)
     65  281  yongsun         const_TWCHAR_ptr getitem "operator []" (unsigned)
     66  297  yongsun         int getSimbolId (const_TWCHAR_ptr)
     67  281  yongsun         bint isValid (CPinyinTrie_TNode*, bint, unsigned)
     68  281  yongsun 
     69  281  yongsun     CPinyinTrie *new_CPinyinTrie "new CPinyinTrie" ()
     70  281  yongsun     void del_CPinyinTrie "delete" (CPinyinTrie *pytrie)
     71  281  yongsun 
     72  281  yongsun cdef class PinyinTrieNode:
     73  281  yongsun     cdef CPinyinTrie_TNode *pnode
     74  282  yongsun     def get_word_ids (self):
     75  282  yongsun         wids = []
     76  282  yongsun         cdef CPinyinTrie_TWord *p= <CPinyinTrie_TWord*> self.pnode.getWordIdPtr ()
     77  282  yongsun         for i in xrange (self.pnode.m_nWordId):
     78  282  yongsun             wids.append (p[i].m_id)
     79  282  yongsun         return wids
     80  281  yongsun 
     81  281  yongsun cdef class PinyinTrie:
     82  281  yongsun     cdef CPinyinTrie *thisptr
     83  281  yongsun 
     84  281  yongsun     def __cinit__ (self):
     85  281  yongsun         self.thisptr = new_CPinyinTrie ()
     86  281  yongsun 
     87  281  yongsun     def __dealloc__ (self):
     88  281  yongsun         del_CPinyinTrie (self.thisptr)
     89  281  yongsun 
     90  281  yongsun     def load (self, fname):
     91  281  yongsun         return self.thisptr.load (fname)
     92  281  yongsun 
     93  281  yongsun     def free (self):
     94  281  yongsun         self.thisptr.free ()
     95  281  yongsun 
     96  281  yongsun     def get_root_node (self):
     97  281  yongsun         cdef CPinyinTrie_TNode * pnode = <CPinyinTrie_TNode*> self.thisptr.getRootNode ()
     98  281  yongsun         node = PinyinTrieNode ()
     99  281  yongsun         (<PinyinTrieNode>node).pnode = pnode
    100  281  yongsun         return node
    101  281  yongsun 
    102  281  yongsun     def transfer (self, node, str):
    103  281  yongsun         cdef CPinyinTrie_TNode *pnode = (<PinyinTrieNode>node).pnode
    104  281  yongsun         pnode = <CPinyinTrie_TNode*> self.thisptr.transfer (pnode, <unsigned char*>PyString_AsString(str))
    105  281  yongsun         if not pnode:
    106  281  yongsun             return None
    107  281  yongsun 
    108  281  yongsun         node = PinyinTrieNode ()
    109  281  yongsun         (<PinyinTrieNode>node).pnode = pnode
    110  281  yongsun         return node
    111  281  yongsun 
    112  281  yongsun     def __getitem__ (self, idx):
    113  281  yongsun         cdef const_TWCHAR_ptr cwstr = self.thisptr.getitem (idx)
    114  281  yongsun         return PyUnicode_FromWideChar (<const_wchar_t_ptr>cwstr, WCSLEN(cwstr))
    115  281  yongsun 
    116  297  yongsun     def get_symbol_id (self, symbol):
    117  297  yongsun         cdef wchar_t buf[2]
    118  297  yongsun         if len (symbol) != 1: return 0
    119  297  yongsun         PyUnicode_AsWideChar (<PyUnicodeObject*> symbol, buf, sizeof(buf))
    120  297  yongsun         return self.thisptr.getSimbolId (<const_TWCHAR_ptr> buf)
    121  297  yongsun 
    122  281  yongsun     def is_valid (self, node, allowNonComplete=True, csLevel=0):
    123  281  yongsun         cdef CPinyinTrie_TNode *pnode = (<PinyinTrieNode>node).pnode
    124  281  yongsun         return self.thisptr.isValid (pnode, allowNonComplete, csLevel)
    125