OpenGrok

Cross Reference: ic_history.h
xref: /nv-g11n/inputmethod/sunpinyin/ime/src/ic_history.h
Home | History | Annotate | Line # | Download | only in src
      1 /*
      2  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
      3  *
      4  * Copyright (c) 2007 Sun Microsystems, Inc. All Rights Reserved.
      5  *
      6  * The contents of this file are subject to the terms of either the GNU Lesser
      7  * General Public License Version 2.1 only ("LGPL") or the Common Development and
      8  * Distribution License ("CDDL")(collectively, the "License"). You may not use this
      9  * file except in compliance with the License. You can obtain a copy of the CDDL at
     10  * http://www.opensource.org/licenses/cddl1.php and a copy of the LGPLv2.1 at
     11  * http://www.opensource.org/licenses/lgpl-license.php. See the License for the
     12  * specific language governing permissions and limitations under the License. When
     13  * distributing the software, include this License Header Notice in each file and
     14  * include the full text of the License in the License file as well as the
     15  * following notice:
     16  *
     17  * NOTICE PURSUANT TO SECTION 9 OF THE COMMON DEVELOPMENT AND DISTRIBUTION LICENSE
     18  * (CDDL)
     19  * For Covered Software in this distribution, this License shall be governed by the
     20  * laws of the State of California (excluding conflict-of-law provisions).
     21  * Any litigation relating to this License shall be subject to the jurisdiction of
     22  * the Federal Courts of the Northern District of California and the state courts
     23  * of the State of California, with venue lying in Santa Clara County, California.
     24  *
     25  * Contributor(s):
     26  *
     27  * If you wish your version of this file to be governed by only the CDDL or only
     28  * the LGPL Version 2.1, indicate your decision by adding "[Contributor]" elects to
     29  * include this software in this distribution under the [CDDL or LGPL Version 2.1]
     30  * license." If you don't indicate a single choice of license, a recipient has the
     31  * option to distribute your version of this file under either the CDDL or the LGPL
     32  * Version 2.1, or to extend the choice of license to its licensees as provided
     33  * above. However, if you add LGPL Version 2.1 code and therefore, elected the LGPL
     34  * Version 2 license, then the option applies only if the new code is made subject
     35  * to such option by the copyright holder.
     36  */
     37 
     38 #ifndef _SUNPINYIN_CONTEXT_HISTORY_H
     39 #define _SUNPINYIN_CONTEXT_HISTORY_H
     40 
     41 #include "portability.h"
     42 
     43 #include <map>
     44 #include <deque>
     45 #include <set>
     46 
     47 /**
     48 * A forget all history memory
     49 */
     50 class CICHistory {
     51 public:
     52     /** don't care word id, or seperator word id */
     53     static const unsigned int DCWID;
     54 
     55     virtual ~CICHistory();
     56 
     57     virtual bool seenBefore(unsigned int wid);
     58 
     59     /**
     60     * memorize the context stream pointed by [its_wid, ite_wid)
     61     */
     62     virtual bool memorize(unsigned int* its_wid, unsigned int* ite_wid);
     63 
     64     /**
     65     * @param its_wid is the first word pointer of the context stream
     66     * @param ite_wid is the last (exclusive) word pointer of the context stream
     67     * @return pr(*(ite_wid-1) | *its_wid, ..., *(ite_wid-2))
     68     * The return value could be zero, i.e. no need to smooth the probabilities
     69     */
     70     virtual double pr(unsigned int* its_wid, unsigned int* ite_wid);
     71 
     72     /**
     73     * @param its_wid is the first word pointer of the history stream
     74     * @param ite_wid is the last (exclusive) word pointer of the history stream
     75     * @return pr(*wid | *its_wid, ..., *(ite_wid-1))
     76     * The return value could be zero, i.e. no need to smooth the probabilities
     77     */
     78     virtual double pr(unsigned int* its_wid, unsigned int* ite_wid, unsigned int wid);
     79 
     80     /**
     81     * allocate a buffer, and put the context memory's contect into it
     82     * @param buf_ptr would be stored the buffer pointer
     83     * @param sz would be the size in byte of the buffer allocated
     84     * @return false on error
     85     * Note: the buf_ptr should be used free(*buf_ptr) to free after usage
     86     */
     87     virtual bool
     88     bufferize(void** buf_ptr, size_t* sz);
     89 
     90     /**
     91     * Load context memory according to the buf
     92     * @param buf_ptr uffer pointer
     93     * @param sz is the size in byte of the buffer
     94     * @return false on error
     95     * call with buf_ptr with NULL value would clear the context memory
     96     */
     97     virtual bool
     98     loadFromBuffer(void* buf_ptr, size_t sz);
     99 };
    100 
    101 class CBigramHistory : public CICHistory {
    102 public:
    103     static void initClass();
    104 
    105     CBigramHistory();
    106 
    107     virtual ~CBigramHistory();
    108 
    109     virtual bool seenBefore(unsigned int wid);
    110 
    111     virtual bool memorize(unsigned int* its_wid, unsigned int* ite_wid);
    112 
    113     /**
    114     * @param its_wid is the first word pointer of the context stream
    115     * @param ite_wid is the last (exclusive) word pointer of the context stream
    116     * @return pr(*(ite_wid-1) | *(ite_wid-2))
    117     */
    118     virtual double pr(unsigned int* its_wid, unsigned int* ite_wid);
    119 
    120     /**
    121     * @param its_wid is the first word pointer of the history stream
    122     * @param ite_wid is the last (exclusive) word pointer of the history stream
    123     * @return pr(*wid | *(ite_wid-1))
    124     */
    125     virtual double pr(unsigned int* its_wid, unsigned int* ite_wid, unsigned int wid);
    126 
    127     virtual bool
    128     bufferize(void** buf_ptr, size_t* sz);
    129 
    130     virtual bool
    131     loadFromBuffer(void* buf_ptr, size_t sz);
    132 
    133 protected:
    134     typedef unsigned                              TWordId;
    135     typedef std::pair<TWordId, TWordId>           TBigram;
    136     typedef TWordId                               TUnigram;
    137     typedef std::map<TBigram, int>                TBigramPool;
    138     typedef std::map<TUnigram, int>               TUnigramPool;
    139     typedef std::deque<TWordId>                   TContextMemory;
    140 
    141     static const size_t contxt_memory_size;
    142 
    143     TContextMemory          m_memory;
    144     TUnigramPool            m_unifreq;
    145     TBigramPool             m_bifreq;
    146 
    147     static std::set<unsigned int>                  s_stopWords;
    148 
    149 protected:
    150     double pr(TBigram& bg);
    151     int  uniFreq(TUnigram& ug);
    152     int  biFreq(TBigram& bg);
    153 
    154     void decUniFreq(TUnigram& ug);
    155     void decBiFreq(TBigram& bg);
    156     void incUniFreq(TUnigram& ug);
    157     void incBiFreq(TBigram& bg);
    158 };
    159 
    160 #endif
    161