Home | History | Annotate | Download | only in slmprune
      1    0  yongsun /*
      2   82  yongsun  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
      3   82  yongsun  *
      4   82  yongsun  * Copyright (c) 2007 Sun Microsystems, Inc. All Rights Reserved.
      5   82  yongsun  *
      6   82  yongsun  * The contents of this file are subject to the terms of either the GNU Lesser
      7   82  yongsun  * General Public License Version 2.1 only ("LGPL") or the Common Development and
      8   82  yongsun  * Distribution License ("CDDL")(collectively, the "License"). You may not use this
      9   82  yongsun  * file except in compliance with the License. You can obtain a copy of the CDDL at
     10   82  yongsun  * http://www.opensource.org/licenses/cddl1.php and a copy of the LGPLv2.1 at
     11   82  yongsun  * http://www.opensource.org/licenses/lgpl-license.php. See the License for the
     12   82  yongsun  * specific language governing permissions and limitations under the License. When
     13   82  yongsun  * distributing the software, include this License Header Notice in each file and
     14   82  yongsun  * include the full text of the License in the License file as well as the
     15   82  yongsun  * following notice:
     16   82  yongsun  *
     17   82  yongsun  * NOTICE PURSUANT TO SECTION 9 OF THE COMMON DEVELOPMENT AND DISTRIBUTION LICENSE
     18   82  yongsun  * (CDDL)
     19   82  yongsun  * For Covered Software in this distribution, this License shall be governed by the
     20   82  yongsun  * laws of the State of California (excluding conflict-of-law provisions).
     21   82  yongsun  * Any litigation relating to this License shall be subject to the jurisdiction of
     22   82  yongsun  * the Federal Courts of the Northern District of California and the state courts
     23   82  yongsun  * of the State of California, with venue lying in Santa Clara County, California.
     24   82  yongsun  *
     25   82  yongsun  * Contributor(s):
     26   82  yongsun  *
     27   82  yongsun  * If you wish your version of this file to be governed by only the CDDL or only
     28   82  yongsun  * the LGPL Version 2.1, indicate your decision by adding "[Contributor]" elects to
     29   82  yongsun  * include this software in this distribution under the [CDDL or LGPL Version 2.1]
     30   82  yongsun  * license." If you don't indicate a single choice of license, a recipient has the
     31   82  yongsun  * option to distribute your version of this file under either the CDDL or the LGPL
     32   82  yongsun  * Version 2.1, or to extend the choice of license to its licensees as provided
     33   82  yongsun  * above. However, if you add LGPL Version 2.1 code and therefore, elected the LGPL
     34   82  yongsun  * Version 2 license, then the option applies only if the new code is made subject
     35   82  yongsun  * to such option by the copyright holder.
     36    0  yongsun  */
     37   82  yongsun 
     38    0  yongsun #ifdef HAVE_CONFIG_H
     39    0  yongsun #include "config.h"
     40    0  yongsun #endif
     41    0  yongsun 
     42    0  yongsun #ifdef HAVE_ASSERT_H
     43    0  yongsun #include <assert.h>
     44    0  yongsun #endif
     45    0  yongsun 
     46    0  yongsun #include <stdio.h>
     47    0  yongsun #include <math.h>
     48    0  yongsun 
     49    0  yongsun #include "../sim_slm.h"
     50    0  yongsun #include <algorithm>
     51    0  yongsun 
     52    0  yongsun class TNodeInfo {
     53    0  yongsun public:
     54    0  yongsun     double d;
     55  212  yongsun #ifndef WORDS_BIGENDIAN
     56    0  yongsun     unsigned child : 1;
     57    0  yongsun     unsigned idx : 31;
     58    0  yongsun #else
     59    0  yongsun     unsigned idx : 31;
     60    0  yongsun     unsigned child : 1;
     61    0  yongsun #endif
     62    0  yongsun 
     63    0  yongsun public:
     64    0  yongsun     TNodeInfo(double distance=0.0, int pos=0, bool children=0) : d(distance)
     65    0  yongsun     { idx = pos; child = (children==0)?0:1; }
     66    0  yongsun 
     67    0  yongsun     bool operator< (const TNodeInfo& r) const
     68    0  yongsun     { return ((child ^ r.child) == 0)?(d < r.d):(child == 0); }
     69    0  yongsun 
     70    0  yongsun     bool operator==(const TNodeInfo& r) const
     71    0  yongsun     { return (child == r.child && d == r.d); }
     72    0  yongsun };
     73    0  yongsun 
     74    0  yongsun class CSlmPruner : public CSIMSlm {
     75    0  yongsun public:
     76    0  yongsun     CSlmPruner() : CSIMSlm(), cut(NULL)
     77    0  yongsun     { }
     78    0  yongsun 
     79    0  yongsun     ~CSlmPruner()
     80    0  yongsun     { if (cut) delete [] cut; }
     81    0  yongsun 
     82    0  yongsun     void SetCut(int* nCut);
     83    0  yongsun     void SetReserve(int* nReserve);
     84    0  yongsun     void Prune();
     85    0  yongsun     void Write(const char* filename);
     86    0  yongsun 
     87    0  yongsun protected:
     88    0  yongsun     void PruneLevel(int lvl);
     89    0  yongsun     double CalcDistance(int lvl, int* idx, TSIMWordId* hw);
     90    0  yongsun     void CalcBOW();
     91    0  yongsun 
     92    0  yongsun protected:
     93    0  yongsun     int* cut;
     94    0  yongsun     int cache_level, cache_idx; // to accelerate the pruning method
     95    0  yongsun     double cache_PA, cache_PB;
     96    0  yongsun };
     97    0  yongsun 
     98    0  yongsun void CSlmPruner::Prune()
     99    0  yongsun {
    100    0  yongsun     printf("Erasing items using Entropy distance"); fflush(stdout);
    101    0  yongsun     for (int lvl=N; lvl>0; --lvl)
    102    0  yongsun         PruneLevel(lvl);
    103    0  yongsun     printf("\n"); fflush(stdout);
    104    0  yongsun     CalcBOW();
    105    0  yongsun }
    106    0  yongsun void CSlmPruner::Write(const char* filename)
    107    0  yongsun {
    108    0  yongsun     FILE* out = fopen(filename, "wb");
    109    0  yongsun     fwrite(&N, sizeof(N), 1, out);
    110    0  yongsun     fwrite(&bUseLogPr, sizeof(bUseLogPr), 1, out);
    111    0  yongsun     fwrite(sz, sizeof(int), N+1, out);
    112    0  yongsun     for (int i=0; i<N; ++i) {
    113    0  yongsun         fwrite(level[i], sizeof(TNode), sz[i], out);
    114    0  yongsun     }
    115    0  yongsun     fwrite(level[N], sizeof(TLeaf), sz[N], out);
    116    0  yongsun     fclose(out);
    117    0  yongsun }
    118    0  yongsun 
    119    0  yongsun void CSlmPruner::SetReserve(int* nReserve)
    120    0  yongsun {
    121    0  yongsun     cut = new int [N+1];
    122    0  yongsun     cut[0] = 0;
    123    0  yongsun     for (int lvl=1; lvl<=N; ++lvl) {
    124    0  yongsun         cut[lvl] = sz[lvl] - 1 - nReserve[lvl];
    125    0  yongsun         if (cut[lvl] < 0) cut[lvl] = 0;
    126    0  yongsun     }
    127    0  yongsun }
    128    0  yongsun 
    129    0  yongsun void CSlmPruner::SetCut(int* nCut)
    130    0  yongsun {
    131    0  yongsun     cut = new int [N+1];
    132    0  yongsun     cut[0] = 0;
    133    0  yongsun     for (int lvl=1; lvl<=N; ++lvl)
    134    0  yongsun         cut[lvl] = nCut[lvl];
    135    0  yongsun }
    136    0  yongsun 
    137    0  yongsun template <class chIterator>
    138    0  yongsun int CutLevel(CSIMSlm::TNode* pfirst, CSIMSlm::TNode* plast, chIterator chfirst, chIterator chlast, bool bUseLogPr)
    139    0  yongsun {
    140    0  yongsun    int idxfirst, idxchk;
    141    0  yongsun    chIterator chchk = chfirst;
    142    0  yongsun    for (idxfirst=idxchk=0; chchk != chlast; ++chchk, ++idxchk) {
    143    0  yongsun         //cut item whoese pr == 1.0; and not psuedo tail
    144    0  yongsun         if (chchk->pr != ((bUseLogPr)?0.0:1.0) || (chchk+1) == chlast) {
    145    0  yongsun             if (idxfirst < idxchk) *chfirst = *chchk;
    146    0  yongsun             while (pfirst != plast && pfirst->child <= idxchk)
    147    0  yongsun                 pfirst++->child = idxfirst;
    148    0  yongsun             ++idxfirst;
    149    0  yongsun             ++chfirst;
    150    0  yongsun         }
    151    0  yongsun     }
    152    0  yongsun     return idxfirst;
    153    0  yongsun }
    154    0  yongsun 
    155    0  yongsun void CSlmPruner::PruneLevel(int lvl)
    156    0  yongsun {
    157    0  yongsun     cache_level = cache_idx = -1;
    158    0  yongsun 
    159    0  yongsun     if (cut[lvl] <= 0) {
    160    0  yongsun         printf("\n  Level %d (%d items), no need to cut as your command!", lvl, sz[lvl]-1); fflush(stdout);
    161    0  yongsun         return;
    162    0  yongsun     }
    163    0  yongsun 
    164    0  yongsun     printf("\n  Level %d (%d items), allocating...", lvl, sz[lvl]-1); fflush(stdout);
    165    0  yongsun 
    166    0  yongsun     int n = sz[lvl] - 1; //do not count last psuedo tail
    167    0  yongsun     if (cut[lvl] >= n) cut[lvl] = n-1;
    168    0  yongsun     TNodeInfo* pbuf = new TNodeInfo[n];
    169    0  yongsun     TSIMWordId hw[16]; // it should be lvl+1, yet some compiler do not support it
    170    0  yongsun     int idx[16];       // it should be lvl+1, yet some compiler do not support it
    171    0  yongsun 
    172    0  yongsun     printf(", Calculating..."); fflush(stdout);
    173    0  yongsun     for (int i=0; i <=lvl; ++i)
    174    0  yongsun         idx[i] = 0;
    175    0  yongsun     while (idx[lvl] < n) {
    176    0  yongsun         if (lvl == N) {
    177    0  yongsun             hw[lvl] = (((TLeaf*)level[lvl])+idx[lvl])->id;
    178    0  yongsun         } else {
    179    0  yongsun             hw[lvl] = (((TNode*)level[lvl])+idx[lvl])->id;
    180    0  yongsun         }
    181    0  yongsun         for (int j=lvl-1; j >= 0; --j) {
    182    0  yongsun             TNode* pnode = ((TNode*)level[j])+idx[j];
    183    0  yongsun             for (; (pnode+1)->child <= idx[j+1]; ++pnode, ++idx[j])
    184    0  yongsun                 ;
    185    0  yongsun             hw[j] = pnode->id;
    186    0  yongsun         }
    187    0  yongsun         bool has_child = false;
    188    0  yongsun         if (lvl != N) {
    189    0  yongsun             TNode* pn = ((TNode*)level[lvl]) + idx[lvl];
    190    0  yongsun             if ((pn+1)->child > pn->child)
    191    0  yongsun                 has_child = true;
    192    0  yongsun         }
    193    0  yongsun         pbuf[idx[lvl]].child = (has_child)?1:0;
    194    0  yongsun         pbuf[idx[lvl]].idx = idx[lvl];
    195    0  yongsun         if (!has_child)
    196    0  yongsun             pbuf[idx[lvl]].d = CalcDistance(lvl, idx, hw);
    197    0  yongsun         ++idx[lvl];
    198    0  yongsun     }
    199    0  yongsun     printf(", sorting...");
    200    0  yongsun     std::make_heap(pbuf, pbuf+n);
    201    0  yongsun     std::sort_heap(pbuf, pbuf+n);
    202    0  yongsun 
    203    0  yongsun     int k = 0;
    204    0  yongsun     // because pr in model can not be 1.0, so we use this to mark a item to be prune
    205    0  yongsun     for (TNodeInfo* pinfo = pbuf; k < cut[lvl] && pinfo->child == 0; ++k, ++pinfo) {
    206    0  yongsun         if (lvl == N) {
    207    0  yongsun             if (bUseLogPr)
    208    0  yongsun                 (((TLeaf*)level[lvl]) + pinfo->idx)->pr = 0.0; // -log(1.0)
    209    0  yongsun             else
    210    0  yongsun                 (((TLeaf*)level[lvl]) + pinfo->idx)->pr = 1.0;
    211    0  yongsun         } else {
    212    0  yongsun             if (bUseLogPr)
    213    0  yongsun                 (((TNode*)level[lvl]) + pinfo->idx)->pr = 0.0; // -log(1.0)
    214    0  yongsun             else
    215    0  yongsun                 (((TNode*)level[lvl]) + pinfo->idx)->pr = 1.0; // -log(1.0)
    216    0  yongsun         }
    217    0  yongsun     }
    218    0  yongsun     printf("(cut %d items), build parent ptr...", k); fflush(stdout);
    219    0  yongsun     if (lvl == N) {
    220    0  yongsun         k = CutLevel((TNode*)level[lvl-1], ((TNode*)level[lvl-1])+sz[lvl-1], (TLeaf*)level[lvl], ((TLeaf*)level[lvl])+sz[lvl], bUseLogPr);
    221    0  yongsun     } else {
    222    0  yongsun         k = CutLevel((TNode*)level[lvl-1], ((TNode*)level[lvl-1])+sz[lvl-1], (TNode*)level[lvl], ((TNode*)level[lvl])+sz[lvl], bUseLogPr);
    223    0  yongsun     }
    224    0  yongsun     sz[lvl] = k; //k is new size
    225    0  yongsun     printf("done!");
    226    0  yongsun     delete [] pbuf;
    227    0  yongsun     cache_level = cache_idx = -1;
    228    0  yongsun }
    229    0  yongsun 
    230    0  yongsun template<class chIterator>
    231    0  yongsun double CalcNodeBow(CSlmPruner* pruner, int lvl, TSIMWordId words[], chIterator chh, chIterator cht, bool bUseLogPr)
    232    0  yongsun {
    233    0  yongsun     double sumnext = 0.0, sum=0.0;
    234    0  yongsun     if (chh == cht)
    235    0  yongsun         return 1.0;
    236    0  yongsun     for (; chh < cht; ++chh) {
    237    0  yongsun         if (bUseLogPr)
    238    0  yongsun             sumnext += exp(-double(chh->pr));
    239    0  yongsun         else
    240    0  yongsun             sumnext += double(chh->pr);
    241    0  yongsun         words[lvl+1] = chh->id;
    242    0  yongsun         sum += pruner->getPr(lvl, words+2);
    243    0  yongsun     }
    244    0  yongsun     assert(sumnext >= 0.0 && sumnext < 1.0);
    245    0  yongsun     assert(sum >= 0.0 && sum < 1.0);
    246    0  yongsun     return (1.0-sumnext)/(1.0-sum);
    247    0  yongsun }
    248    0  yongsun 
    249    0  yongsun void CSlmPruner::CalcBOW()
    250    0  yongsun {
    251    0  yongsun     printf("\nUpdating back-off weight"); fflush(stdout);
    252    0  yongsun     for (int lvl=0; lvl < N; ++lvl) {
    253    0  yongsun         printf("\n    Level %d...", lvl); fflush(stdout);
    254    0  yongsun         TNode* base[16]; //it should be lvl+1, yet some compiler do not support it
    255    0  yongsun         int idx[16];     //it should be lvl+1, yet some compiler do not support it
    256    0  yongsun         for (int i=0; i <= lvl; ++i) {
    257    0  yongsun             base[i] = (TNode*)level[i];
    258    0  yongsun             idx[i] = 0;
    259    0  yongsun         }
    260    0  yongsun         TSIMWordId words[17];   //it should be lvl+2, yet some compiler do not support it
    261    0  yongsun         for (int lsz = sz[lvl]-1; idx[lvl] < lsz; ++idx[lvl]) {
    262    0  yongsun             words[lvl] = base[lvl][idx[lvl]].id;
    263    0  yongsun             for (int k=lvl-1; k >= 0; --k) {
    264    0  yongsun                 while (base[k][idx[k]+1].child <= idx[k+1])
    265    0  yongsun                     ++idx[k];
    266    0  yongsun                 words[k] = base[k][idx[k]].id;
    267    0  yongsun             }
    268    0  yongsun             TNode & node = base[lvl][idx[lvl]];
    269    0  yongsun             TNode & nodenext = *((&node)+1);
    270    0  yongsun 
    271    0  yongsun             double bow = 1.0;
    272    0  yongsun             if (lvl == N-1) {
    273    0  yongsun                 TLeaf* ch = (TLeaf*)level[lvl+1];
    274    0  yongsun                 bow = CalcNodeBow(this, lvl, words, &(ch[node.child]), &(ch[nodenext.child]), bUseLogPr);
    275    0  yongsun             } else {
    276    0  yongsun                 TNode* ch = (TNode*)level[lvl+1];
    277    0  yongsun                 bow = CalcNodeBow(this, lvl, words, &(ch[node.child]), &(ch[nodenext.child]), bUseLogPr);
    278    0  yongsun             }
    279    0  yongsun             if (bUseLogPr)
    280    0  yongsun                 node.bow = PR_TYPE(-log(bow));
    281    0  yongsun             else
    282    0  yongsun                 node.bow = PR_TYPE(bow);
    283    0  yongsun         }
    284    0  yongsun     }
    285    0  yongsun     printf("\n"); fflush(stdout);
    286    0  yongsun }
    287    0  yongsun 
    288    0  yongsun double CSlmPruner::CalcDistance(int lvl, int* idx, TSIMWordId* hw)
    289    0  yongsun {
    290    0  yongsun     double PA, PB, PHW, PH_W, PH, BOW, _BOW, pr, p_r;
    291    0  yongsun     TSIMWordId w = hw[lvl];
    292    0  yongsun 
    293    0  yongsun     PH=1.0;
    294    0  yongsun     TNode* parent = ((TNode*)level[lvl-1])+idx[lvl-1];
    295    0  yongsun     if (bUseLogPr)
    296    0  yongsun         BOW = exp(-double(parent->bow));  //Fix original bug to use the BOW directly
    297    0  yongsun     else
    298    0  yongsun         BOW = double(parent->bow);
    299    0  yongsun 
    300    0  yongsun     for (int i=1; i < lvl; ++i)
    301    0  yongsun         PH *= getPr(i, hw+1+(lvl-i));
    302    0  yongsun     assert(PH <= 1.0 && PH >0.0);
    303    0  yongsun 
    304    0  yongsun     if (lvl == N) {
    305    0  yongsun         if (bUseLogPr)
    306    0  yongsun             PHW = exp(-((((TLeaf*)level[lvl])+idx[lvl])->pr));
    307    0  yongsun         else
    308    0  yongsun             PHW = ((((TLeaf*)level[lvl])+idx[lvl])->pr);
    309    0  yongsun         assert(w == (((TLeaf*)level[lvl])+idx[lvl])->id);
    310    0  yongsun     } else {
    311    0  yongsun         if (bUseLogPr)
    312    0  yongsun             PHW = exp(-((((TNode*)level[lvl])+idx[lvl])->pr));
    313    0  yongsun         else
    314    0  yongsun             PHW = ((((TNode*)level[lvl])+idx[lvl])->pr);
    315    0  yongsun         assert(w == (((TNode*)level[lvl])+idx[lvl])->id);
    316    0  yongsun 
    317    0  yongsun     }
    318    0  yongsun     PH_W = getPr(lvl-1, hw+2);
    319    0  yongsun     assert(PHW > 0.0 && PHW < 1.0);
    320    0  yongsun     assert(PH_W > 0.0 && PH_W < 1.0);
    321    0  yongsun 
    322    0  yongsun     if (cache_level != lvl-1 || cache_idx != idx[lvl-1]) {
    323    0  yongsun         cache_level = lvl-1;
    324    0  yongsun         cache_idx = idx[lvl-1];
    325    0  yongsun         cache_PA = cache_PB = 1.0;
    326    0  yongsun         for (int h=parent->child, t = (parent+1)->child; h<t; ++h) {
    327    0  yongsun             TSIMWordId id;
    328    0  yongsun             if (lvl == N) {
    329    0  yongsun                 if (bUseLogPr)
    330    0  yongsun                     pr = exp(-((((TLeaf*)level[lvl])+h)->pr));
    331    0  yongsun                 else
    332    0  yongsun                     pr = ((((TLeaf*)level[lvl])+h)->pr);
    333    0  yongsun                 id = (((TLeaf*)level[lvl])+h)->id;
    334    0  yongsun 
    335    0  yongsun             } else {
    336    0  yongsun                 if (bUseLogPr)
    337    0  yongsun                     pr = exp(-((((TNode*)level[lvl])+h)->pr));
    338    0  yongsun                 else
    339    0  yongsun                     pr = ((((TNode*)level[lvl])+h)->pr);
    340    0  yongsun                 id = (((TNode*)level[lvl])+h)->id;
    341    0  yongsun 
    342    0  yongsun             }
    343    0  yongsun             assert(pr > 0.0 && pr < 1.0);
    344    0  yongsun             cache_PA -= pr;
    345    0  yongsun 
    346    0  yongsun             hw[lvl] = id;
    347    0  yongsun             p_r = getPr(lvl-1, hw+2);  // Fix bug from pr = getPr(lvl-1, hw+1)
    348    0  yongsun             assert(p_r > 0.0 && p_r < 1.0);
    349    0  yongsun             cache_PB -= p_r;
    350    0  yongsun         }
    351    0  yongsun         assert(cache_PA > -0.01 && cache_PB > -0.01);
    352    0  yongsun         if (cache_PA < 0.00001 || cache_PB < 0.00001) {
    353    0  yongsun             printf("\n precision problem on %d gram:", lvl-1);
    354    0  yongsun             for (int i=1; i < lvl; ++i) printf("%d ", idx[i]);
    355    0  yongsun             printf("   ");
    356    0  yongsun             if (cache_PA < 0.00001) {
    357    0  yongsun                 printf("{1.0 - sigma p(w|h)} ==> 0.00001");
    358    0  yongsun                 cache_PA = 0.00001;
    359    0  yongsun             }
    360    0  yongsun             if (cache_PB < 0.00001) {
    361    0  yongsun                 printf("{1.0 - sigma p(w|h')} ==> 0.00001");
    362    0  yongsun                 cache_PB = 0.00001;
    363    0  yongsun             }
    364    0  yongsun         }
    365    0  yongsun     }
    366    0  yongsun     PA = cache_PA;
    367    0  yongsun     PB = cache_PB;
    368    0  yongsun 
    369    0  yongsun     _BOW = (PA+PHW) / (PB+PH_W); // Fix bug from "(1.0-PA+PHW)/(1.0-PB+PH_W);"
    370    0  yongsun 
    371    0  yongsun     assert(BOW > 0.0);
    372    0  yongsun     assert(_BOW > 0.0);
    373    0  yongsun     assert(PA+PHW < 1.01);     // %1 error rate
    374    0  yongsun     assert(PB+PH_W < 1.01);    // %1 error rate
    375    0  yongsun 
    376   12  yongsun     /*
    377   12  yongsun      * PH = P(h), PHW = P(w|h), PH_W = P(w|h'), _BOW = bow'(h) (the new bow)
    378   12  yongsun      * BOW = bow(h) (the original bow), PA = sum_{w_i:C(w_i,h)=0} P(w_i|h),
    379   12  yongsun      * PB = sum_{w_i:C(w_i,h)=0} P(w_i|h')
    380   12  yongsun      */
    381    0  yongsun     return -(PH * (PHW * (log(PH_W)+log(_BOW)-log(PHW)) + PA * (log(_BOW)-log(BOW)) ));
    382    0  yongsun }
    383    0  yongsun 
    384    0  yongsun void ShowUsage(void)
    385    0  yongsun {
    386    0  yongsun     printf("Usage:\n");
    387    0  yongsun     printf("    slmprune input_slm result_slm [R|C] num1 num2...\n");
    388    0  yongsun     printf("\nDescription:\n");
    389    0  yongsun     printf("\
    390    0  yongsun       This program uses entropy-based method to prune the size of back-off \n\
    391    0  yongsun   language model 'input_slm' to a specific size and write to 'result_slm'. \n\
    392    0  yongsun   the third parameter [R|C] means the following numbers is the number for\n\
    393    0  yongsun   (R)eserve or (C)ut. If (C)ut, the num[k] means how many items in level K\n\
    394    0  yongsun   would be cut. If (R)eserve, num[k] means how many item would be reserved\n\
    395    0  yongsun   in level k. \n\
    396    0  yongsun       Note that we do not ensure that during pruning process,  exactly the\n\
    397    0  yongsun   the given number of items are cut or reserved, because some items may \n\
    398    0  yongsun   contains high level children, so could not be cut. \n\
    399    0  yongsun       Also it's your responsiblity to give right number of arguments based\n\
    400    0  yongsun   on 'input_slm'.\n\
    401    0  yongsun \nSee Also:\n\
    402    0  yongsun     To get information of the back-off language model, try 'slminfo'.\n\n");
    403    0  yongsun }
    404    0  yongsun 
    405    0  yongsun int nCut[32];
    406    0  yongsun const char* srcfilename, *tgtfilename;
    407    0  yongsun 
    408    0  yongsun int main(int argc, char* argv[])
    409    0  yongsun {
    410    0  yongsun     memset(nCut, 0, sizeof(nCut));
    411    0  yongsun     if (argc < 5) {
    412    0  yongsun         ShowUsage(); exit(100);
    413    0  yongsun     }
    414    0  yongsun     srcfilename = argv[1];
    415    0  yongsun     tgtfilename = argv[2];
    416    0  yongsun     bool bCut = (argv[3][0] == 'C' || argv[3][0] == 'c');
    417    0  yongsun 
    418    0  yongsun     CSlmPruner pruner;
    419    0  yongsun     printf("Reading language model %s...", srcfilename); fflush(stdout);
    420    0  yongsun     pruner.Load(srcfilename);
    421    0  yongsun     printf("done!\n"); fflush(stdout);
    422    0  yongsun 
    423    0  yongsun     for (int i=4; i < argc && i < 100; ++i)
    424    0  yongsun         nCut[i-3] = atoi(argv[i]);
    425    0  yongsun 
    426    0  yongsun     if (bCut)
    427    0  yongsun         pruner.SetCut(nCut);
    428    0  yongsun     else
    429    0  yongsun         pruner.SetReserve(nCut);
    430    0  yongsun     pruner.Prune();
    431    0  yongsun 
    432    0  yongsun     printf("Writing target language model %s...", tgtfilename); fflush(stdout);
    433    0  yongsun     pruner.Write(tgtfilename);
    434    0  yongsun     printf("done!\n\n"); fflush(stdout);
    435    0  yongsun 
    436    0  yongsun     pruner.Free();
    437    0  yongsun     return 0;
    438    0  yongsun }
    439