OpenGrok

Cross Reference: datrie_impl.h
xref: /nv-g11n/inputmethod/sunpinyin2/src/pinyin/datrie_impl.h
Home | History | Annotate | Line # | Download | only in pinyin
      1 /*
      2  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
      3  *
      4  * Copyright (c) 2007 Sun Microsystems, Inc. All Rights Reserved.
      5  *
      6  * The contents of this file are subject to the terms of either the GNU Lesser
      7  * General Public License Version 2.1 only ("LGPL") or the Common Development and
      8  * Distribution License ("CDDL")(collectively, the "License"). You may not use this
      9  * file except in compliance with the License. You can obtain a copy of the CDDL at
     10  * http://www.opensource.org/licenses/cddl1.php and a copy of the LGPLv2.1 at
     11  * http://www.opensource.org/licenses/lgpl-license.php. See the License for the
     12  * specific language governing permissions and limitations under the License. When
     13  * distributing the software, include this License Header Notice in each file and
     14  * include the full text of the License in the License file as well as the
     15  * following notice:
     16  *
     17  * NOTICE PURSUANT TO SECTION 9 OF THE COMMON DEVELOPMENT AND DISTRIBUTION LICENSE
     18  * (CDDL)
     19  * For Covered Software in this distribution, this License shall be governed by the
     20  * laws of the State of California (excluding conflict-of-law provisions).
     21  * Any litigation relating to this License shall be subject to the jurisdiction of
     22  * the Federal Courts of the Northern District of California and the state courts
     23  * of the State of California, with venue lying in Santa Clara County, California.
     24  *
     25  * Contributor(s):
     26  *
     27  * If you wish your version of this file to be governed by only the CDDL or only
     28  * the LGPL Version 2.1, indicate your decision by adding "[Contributor]" elects to
     29  * include this software in this distribution under the [CDDL or LGPL Version 2.1]
     30  * license." If you don't indicate a single choice of license, a recipient has the
     31  * option to distribute your version of this file under either the CDDL or the LGPL
     32  * Version 2.1, or to extend the choice of license to its licensees as provided
     33  * above. However, if you add LGPL Version 2.1 code and therefore, elected the LGPL
     34  * Version 2 license, then the option applies only if the new code is made subject
     35  * to such option by the copyright holder.
     36  */
     37 
     38 #include <stdio.h>
     39 #include <fcntl.h>
     40 #include <unistd.h>
     41 #include <stdlib.h>
     42 
     43 #ifdef HAVE_CONFIG_H
     44 #include <config.h>
     45 #endif
     46 
     47 #ifdef HAVE_SYS_MMAN_H
     48 #include <sys/mman.h>
     49 #endif
     50 
     51 #include "datrie.h"
     52 
     53 template <typename T, encoder_func_ptr encoder>
     54 bool CDATrie<T, encoder>::load (const char * fname)
     55 {
     56     free ();
     57 
     58     bool suc = false;
     59     int fd =  open (fname, O_RDONLY);
     60     if (fd == -1) return false;
     61 
     62     m_memSize = lseek (fd, 0, SEEK_END);
     63     lseek (fd, 0, SEEK_SET);
     64 
     65 #ifdef HAVE_SYS_MMAN_H
     66     suc = (m_mem = (char*) mmap (NULL, m_memSize, PROT_READ, MAP_SHARED, fd, 0)) != MAP_FAILED;
     67 #else
     68     suc = (m_mem = new char [m_memSize]) != NULL;
     69     suc = suc && (read (fd, m_mem, m_memSize) > 0);
     70 #endif /* HAVE_SYS_MMAN_H */
     71     close (fd);
     72 
     73     if (!suc)
     74         return suc;
     75 
     76     m_len = * ((unsigned *) m_mem);
     77     unsigned short elm_size  = * ((unsigned short*) (m_mem + sizeof(m_len)));
     78     unsigned short has_value = * ((unsigned short*) (m_mem + sizeof(m_len) + sizeof(elm_size)));
     79 
     80     if (sizeof (T) != elm_size)
     81         return false;
     82 
     83     m_base = (T *) (m_mem + sizeof(m_len) + sizeof(elm_size) + sizeof(has_value));
     84     m_check = m_base + m_len;
     85     m_value = has_value? (int *)(m_check + m_len): NULL;
     86 
     87     return suc;
     88 }
     89 
     90 template <typename T, encoder_func_ptr encoder>
     91 void CDATrie<T, encoder>::free ()
     92 {
     93     if (m_mem) {
     94 #ifdef HAVE_SYS_MMAN_H
     95         munmap (m_mem, m_memSize);
     96 #else
     97         delete [] m_mem;
     98 #endif
     99         m_mem = NULL;
    100     }
    101 
    102     m_len = 0;
    103     m_base = m_check = NULL;
    104     m_value = NULL;
    105 }
    106 
    107 template <typename T, encoder_func_ptr encoder>
    108 unsigned CDATrie<T, encoder>::walk (unsigned s, unsigned ch, int &v)
    109 {
    110     unsigned c = encoder(ch);
    111     unsigned t = abs (m_base[s]) + c;
    112 
    113     if (t < m_len && m_check[t] == (T)s && m_base[t]) {
    114         if (m_value)
    115             v = m_value[t];
    116         else
    117             v = m_base[t] < 0? -1: 0;
    118 
    119         return t;
    120     }
    121 
    122     v = 0;
    123     return 0;
    124 }
    125 
    126 template <typename T, encoder_func_ptr encoder>
    127 int CDATrie<T, encoder>::match_longest (const char *str, unsigned &length)
    128 {
    129     return match_longest (str, str+strlen(str), length);
    130 }
    131 
    132 template <typename T, encoder_func_ptr encoder>
    133 int CDATrie<T, encoder>::match_longest (wstring wstr, unsigned &length)
    134 {
    135     return match_longest (wstr.begin(), wstr.end(), length);
    136 }
    137 
    138 template <typename T, encoder_func_ptr encoder>
    139 template <typename InputIterator>
    140 int CDATrie<T, encoder>::match_longest (InputIterator first, InputIterator last, unsigned &length)
    141 {
    142     int l=0, ret_v=0, curr_state=0;
    143     length = 0;
    144 
    145     for (; first != last; ++first) {
    146         unsigned ch = *first;
    147         int val;
    148         curr_state = walk (curr_state, ch, val);
    149         if (!curr_state) break;
    150 
    151         l += 1;
    152         if (val) {
    153             length = l;
    154             ret_v = val;
    155         }
    156     }
    157 
    158     return ret_v;
    159 }
    160