Home | History | Annotate | Download | only in common
      1 /*
      2  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
      3  * Use is subject to license terms.
      4  */
      5 
      6 /*
      7  * The contents of this file are subject to the Netscape Public
      8  * License Version 1.1 (the "License"); you may not use this file
      9  * except in compliance with the License. You may obtain a copy of
     10  * the License at http://www.mozilla.org/NPL/
     11  *
     12  * Software distributed under the License is distributed on an "AS
     13  * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
     14  * implied. See the License for the specific language governing
     15  * rights and limitations under the License.
     16  *
     17  * The Original Code is Mozilla Communicator client code, released
     18  * March 31, 1998.
     19  *
     20  * The Initial Developer of the Original Code is Netscape
     21  * Communications Corporation. Portions created by Netscape are
     22  * Copyright (C) 1998-1999 Netscape Communications Corporation. All
     23  * Rights Reserved.
     24  *
     25  * Contributor(s):
     26  */
     27 
     28 #include <stdio.h>
     29 #include <stdlib.h>
     30 #include <string.h>
     31 #include <locale.h>
     32 #include <ctype.h>
     33 
     34 #ifndef HAVE_LIBICU
     35 
     36 #ifdef SOLARIS_LDAP_CMD
     37 #include <errno.h>
     38 #include <langinfo.h>
     39 #include <iconv.h>
     40 #endif
     41 
     42 #ifdef __cplusplus
     43 extern "C" {
     44 #endif
     45 
     46 extern char	*ldaptool_charset;
     47 char		*ldaptool_convdir = NULL;
     48 static		int charsetset = 0;
     49 char		*ldaptool_local2UTF8( const char *src );
     50 
     51 #ifdef SOLARIS_LDAP_CMD
     52 static char 	*ldaptool_convert( const char *src, const char *fcode,
     53 				const char *tcode);
     54 char		*ldaptool_UTF82local( const char *src );
     55 #endif	/* SOLARIS_LDAP_CMD */
     56 
     57 #ifdef SOLARIS_LDAP_CMD
     58 /*
     59  * ICU version always returns string, unless strdup fails.
     60  * As in ICU version, in case of error strdup(src)
     61  * Usually strdup(src) will be ASCII and legal anyways.
     62  */
     63 
     64 static char *
     65 ldaptool_convert( const char *src, const char *fcode,
     66 				 const char *tcode) {
     67     char	*dest, *tptr, *tmp;
     68     const char	*fptr;
     69     iconv_t	cd;
     70     size_t	ileft, oleft, ret, size;
     71 
     72     if (src == NULL)
     73 	return (NULL);
     74 
     75     if (fcode == NULL || tcode == NULL)
     76 	return (strdup(src));
     77 
     78     if (strcasecmp(fcode, tcode) == 0)
     79 	return (strdup(src));
     80 
     81     if ((cd = iconv_open(tcode, fcode)) == (iconv_t)-1) {
     82 	/* conversion table not available */
     83 	return (strdup(src));
     84     }
     85 
     86     ileft = strlen(src);
     87     oleft = 2 * ileft;
     88     size = oleft;
     89     ret = -1;
     90     if ((dest = (char *)malloc(size)) == NULL) {
     91 	(void) iconv_close(cd);
     92 	/* maybe sizeof strlen(src) memory still exists */
     93 	return (strdup(src));
     94     }
     95     tptr = dest;
     96     fptr = src;
     97 
     98     for (;;) {
     99 	ret = iconv(cd, &fptr, &ileft, &tptr, &oleft);
    100 
    101 	if (ret != (size_t)-1) {
    102 		/*
    103 		 * Success. Place 'cd' into its initial shift
    104 		 * state before returning.
    105 		 */
    106 		if (fptr == NULL) /* already in initial state  */
    107 			break;
    108 		fptr = NULL;
    109 		ileft = 0;
    110 		continue;
    111 	} if (errno == E2BIG) {
    112 		/*
    113 		 * Lack of space in output buffer.
    114 		 * Hence double the size and retry.
    115 		 * But before calling  iconv(), oleft
    116 		 * and tptr have to re-adjusted, so that
    117 		 * iconv() doesn't overwrite the data
    118 		 * which has already been converted.
    119 		 */
    120 		oleft += size;
    121 		size *= 2;
    122 		if ((tmp = (char *) realloc(dest, size)) == NULL)
    123 			break;
    124 		tptr = tmp + (tptr - dest);
    125 		dest = tmp;
    126 		continue;
    127 	} else {
    128 		/* Other errors */
    129 		break;
    130 	}
    131     }
    132 
    133     if (dest != NULL) {
    134 	if (ret == -1) {
    135     		/* Free malloc'ed memory on failure */
    136 		free(dest);
    137 		dest = NULL;
    138 	} else if (oleft > 0) {
    139 		/* NULL terminate the return value */
    140 		*(dest + (size - oleft)) = '\0';
    141 	} else {
    142 		/* realloc one more byte and NULL terminate */
    143 		if ((tmp = (char *) realloc(dest, size + 1)) == NULL) {
    144 			free(dest);
    145 			dest = NULL;
    146 		} else {
    147 			*(dest + size) = '\0';
    148 		}
    149 	}
    150     }
    151 
    152     (void) iconv_close(cd);
    153     if (dest == NULL) {
    154 	/* last chance in case some other failure along the way occurs */
    155 	return (strdup(src));
    156     }
    157     return (dest);
    158 }
    159 
    160 char *
    161 ldaptool_UTF82local( const char *src )
    162 {
    163     char *to_code;
    164     if ((to_code = nl_langinfo(CODESET)) == NULL)
    165 	return (strdup(src));
    166     return (ldaptool_convert(src, "UTF-8", (const char *)to_code));
    167 }
    168 #endif	/* SOLARIS_LDAP_CMD */
    169 
    170 char *
    171 ldaptool_local2UTF8( const char *src )
    172 {
    173 #ifdef SOLARIS_LDAP_CMD
    174     char *from_code;
    175     if ((from_code = nl_langinfo(CODESET)) == NULL)
    176 	return (strdup(src));
    177     return (ldaptool_convert(src, (const char *)from_code, "UTF-8"));
    178 #else
    179     char *utf8;
    180     charsetset = 0;
    181     if (src == NULL)
    182     {
    183 	return NULL;
    184     }
    185     utf8 = strdup(src);
    186     return ( utf8 );
    187 #endif	/* SOLARIS_LDAP_CMD */
    188 }
    189 
    190 #else /* HAVE_LIBICU */
    191 
    192 #include "unicode/utypes.h"
    193 #include "unicode/ucnv.h"
    194 
    195 #define NSPR20
    196 
    197 #ifdef XP_WIN32
    198 #define  VC_EXTRALEAN
    199 #include <afxwin.h>
    200 #include <winnls.h>
    201 #endif
    202 
    203 extern char *ldaptool_charset;
    204 static int charsetset = 0;
    205 
    206 extern "C" {
    207 char *ldaptool_convdir = NULL;
    208 char *ldaptool_local2UTF8( const char * );
    209 }
    210 
    211 #ifndef XP_WIN32
    212 char * GetNormalizedLocaleName(void);
    213 
    214 
    215 char *
    216 GetNormalizedLocaleName(void)
    217 {
    218 #ifdef _HPUX_SOURCE
    219 
    220     int    len;
    221     char    *locale;
    222 
    223     locale = setlocale(LC_CTYPE, "");
    224     if (locale && *locale) {
    225         len = strlen(locale);
    226     } else {
    227         locale = "C";
    228         len = 1;
    229     }
    230 
    231     if ((!strncmp(locale, "/\x03:", 3)) &&
    232         (!strcmp(&locale[len - 2], ";/"))) {
    233         locale += 3;
    234         len -= 5;
    235     }
    236 
    237     locale = strdup(locale);
    238     if (locale) {
    239         locale[len] = 0;
    240     }
    241 
    242     return locale;
    243 
    244 #else
    245 
    246     char    *locale;
    247 
    248     locale = setlocale(LC_CTYPE, "");
    249     if (locale && *locale) {
    250         return strdup(locale);
    251     }
    252 
    253     return strdup("C");
    254 
    255 #endif
    256 }
    257 
    258 #if defined(IRIX)
    259 const char *CHARCONVTABLE[] =
    260 {
    261 "! This table maps the host's locale names to IANA charsets",
    262 "!",
    263 "C:             ISO_8859-1:1987",
    264 "cs:            ISO_8859-2:1987",
    265 "da:            ISO_8859-1:1987",
    266 "de:            ISO_8859-1:1987",
    267 "de_AT:         ISO_8859-1:1987",
    268 "de_CH:         ISO_8859-1:1987",
    269 "en:            ISO_8859-1:1987",
    270 "en_AU:         ISO_8859-1:1987",
    271 "en_CA:         ISO_8859-1:1987",
    272 "en_TH:         ISO_8859-1:1987",
    273 "en_US:         ISO_8859-1:1987",
    274 "es:            ISO_8859-1:1987",
    275 "fi:            ISO_8859-1:1987",
    276 "fr:            ISO_8859-1:1987",
    277 "fr_BE:         ISO_8859-1:1987",
    278 "fr_CA:         ISO_8859-1:1987",
    279 "fr_CH:         ISO_8859-1:1987",
    280 "is:            ISO_8859-1:1987",
    281 "it:            ISO_8859-1:1987",
    282 "it_CH:         ISO_8859-1:1987",
    283 "ja_JP.EUC:     Extended_UNIX_Code_Packed_Format_for_Japanese",
    284 "ko_KR.euc:     EUC-KR",
    285 "nl:            ISO_8859-1:1987",
    286 "nl_BE:         ISO_8859-1:1987",
    287 "no:            ISO_8859-1:1987",
    288 "pl:            ISO_8859-2:1987",
    289 "pt:            ISO_8859-1:1987",
    290 "sh:            ISO_8859-2:1987",
    291 "sk:            ISO_8859-2:1987",
    292 "sv:            ISO_8859-1:1987",
    293 "zh_CN.ugb:     GB2312",
    294 "zh_TW.ucns:    cns11643_1",
    295 NULL
    296 };
    297 #elif defined(SOLARIS)
    298 const char *CHARCONVTABLE[] =
    299 {
    300 "! This table maps the host's locale names to IANA charsets",
    301 "!",
    302 "C:             ISO_8859-1:1987",
    303 "ja:            Extended_UNIX_Code_Packed_Format_for_Japanese",
    304 "ja_JP.EUC:     Extended_UNIX_Code_Packed_Format_for_Japanese",
    305 "ja_JP.PCK:     Shift_JIS",
    306 "en:		ISO_8859-1:1987",
    307 "en_AU:		ISO_8859-1:1987",
    308 "en_CA:		ISO_8859-1:1987",
    309 "en_UK:		ISO_8859-1:1987",
    310 "en_US:		ISO_8859-1:1987",
    311 "es:		ISO_8859-1:1987",
    312 "es_AR:		ISO_8859-1:1987",
    313 "es_BO:		ISO_8859-1:1987",
    314 "es_CL:		ISO_8859-1:1987",
    315 "es_CO:		ISO_8859-1:1987",
    316 "es_CR:		ISO_8859-1:1987",
    317 "es_EC:		ISO_8859-1:1987",
    318 "es_GT:		ISO_8859-1:1987",
    319 "es_MX:		ISO_8859-1:1987",
    320 "es_NI:		ISO_8859-1:1987",
    321 "es_PA:		ISO_8859-1:1987",
    322 "es_PE:		ISO_8859-1:1987",
    323 "es_PY:		ISO_8859-1:1987",
    324 "es_SV:		ISO_8859-1:1987",
    325 "es_UY:		ISO_8859-1:1987",
    326 "es_VE:		ISO_8859-1:1987",
    327 "fr:		ISO_8859-1:1987",
    328 "fr_BE:		ISO_8859-1:1987",
    329 "fr_CA:		ISO_8859-1:1987",
    330 "fr_CH:		ISO_8859-1:1987",
    331 "de:		ISO_8859-1:1987",
    332 "de_AT:		ISO_8859-1:1987",
    333 "de_CH:		ISO_8859-1:1987",
    334 "nl:		ISO_8859-1:1987",
    335 "nl_BE:		ISO_8859-1:1987",
    336 "it:		ISO_8859-1:1987",
    337 "sv:		ISO_8859-1:1987",
    338 "no:		ISO_8859-1:1987",
    339 "da:		ISO_8859-1:1987",
    340 "iso_8859_1:    ISO_8859-1:1987",
    341 "japanese:      Extended_UNIX_Code_Packed_Format_for_Japanese",
    342 "ko:            EUC-KR",
    343 "zh:            GB2312",
    344 "zh_TW:         cns11643_1",
    345 NULL
    346 };
    347 #elif defined(OSF1)
    348 const char *CHARCONVTABLE[] =
    349 {
    350 "! This table maps the host's locale names to IANA charsets",
    351 "!",
    352 "C:                     ISO_8859-1:1987",
    353 "cs_CZ.ISO8859-2:       ISO_8859-2:1987",
    354 "cs_CZ:                 ISO_8859-2:1987",
    355 "da_DK.ISO8859-1:       ISO_8859-1:1987",
    356 "de_CH.ISO8859-1:       ISO_8859-1:1987",
    357 "de_DE.ISO8859-1:       ISO_8859-1:1987",
    358 "en_GB.ISO8859-1:       ISO_8859-1:1987",
    359 "en_US.ISO8859-1:       ISO_8859-1:1987",
    360 "es_ES.ISO8859-1:       ISO_8859-1:1987",
    361 "fi_FI.ISO8859-1:       ISO_8859-1:1987",
    362 "fr_BE.ISO8859-1:       ISO_8859-1:1987",
    363 "fr_CA.ISO8859-1:       ISO_8859-1:1987",
    364 "fr_CH.ISO8859-1:       ISO_8859-1:1987",
    365 "fr_FR.ISO8859-1:       ISO_8859-1:1987",
    366 "hu_HU.ISO8859-2:       ISO_8859-2:1987",
    367 "hu_HU:                 ISO_8859-2:1987",
    368 "is_IS.ISO8859-1:       ISO_8859-1:1987",
    369 "it_IT.ISO8859-1:       ISO_8859-1:1987",
    370 "ja_JP.SJIS:            Shift_JIS",
    371 "ja_JP.eucJP:           Extended_UNIX_Code_Packed_Format_for_Japanese",
    372 "ja_JP:                 Extended_UNIX_Code_Packed_Format_for_Japanese",
    373 "ko_KR.eucKR:           EUC-KR",
    374 "ko_KR:                 EUC-KR",
    375 "nl_BE.ISO8859-1:       ISO_8859-1:1987",
    376 "nl_NL.ISO8859-1:       ISO_8859-1:1987",
    377 "no_NO.ISO8859-1:       ISO_8859-1:1987",
    378 "pl_PL.ISO8859-2:       ISO_8859-2:1987",
    379 "pl_PL:                 ISO_8859-2:1987",
    380 "pt_PT.ISO8859-1:       ISO_8859-1:1987",
    381 "sk_SK.ISO8859-2:       ISO_8859-2:1987",
    382 "sk_SK:                 ISO_8859-2:1987",
    383 "sv_SE.ISO8859-1:       ISO_8859-1:1987",
    384 "zh_CN:                 GB2312",
    385 "zh_HK.big5:            Big5",
    386 "zh_HK.eucTW:           cns11643_1",
    387 "zh_TW.big5:            Big5",
    388 "zh_TW.big5@chuyin:     Big5",
    389 "zh_TW.big5@radical:    Big5",
    390 "zh_TW.big5@stroke:     Big5",
    391 "zh_TW.eucTW:           cns11643_1",
    392 "zh_TW.eucTW@chuyin:    cns11643_1",
    393 "zh_TW.eucTW@radical:   cns11643_1",
    394 "zh_TW.eucTW@stroke:    cns11643_1",
    395 "zh_TW:                 cns11643_1",
    396 NULL
    397 };
    398 #elif defined(HPUX)
    399 const char *CHARCONVTABLE[] =
    400 {
    401 "! This table maps the host's locale names to IANA charsets",
    402 "!",
    403 "C:			ISO_8859-1:1987",
    404 "ja_JP:			Extended_UNIX_Code_Packed_Format_for_Japanese",
    405 "ja_JP.SJIS:		Shift_JIS",
    406 "ja_JP.eucJP:		Extended_UNIX_Code_Packed_Format_for_Japanese",
    407 "es_ES:			ISO_8859-1:1987",
    408 "es_ES.iso88591:	ISO_8859-1:1987",
    409 "sv_SE:			ISO_8859-1:1987",
    410 "sv_SE.iso88591:	ISO_8859-1:1987",
    411 "da_DK:			ISO_8859-1:1987",
    412 "da_DK.iso88591:	ISO_8859-1:1987",
    413 "nl_NL:			ISO_8859-1:1987",
    414 "nl_NL.iso88591:	ISO_8859-1:1987",
    415 "en:			ISO_8859-1:1987",
    416 "en_GB:			ISO_8859-1:1987",
    417 "en_GB.iso88591:	ISO_8859-1:1987",
    418 "en_US:			ISO_8859-1:1987",
    419 "en_US.iso88591:	ISO_8859-1:1987",
    420 "fi_FI:			ISO_8859-1:1987",
    421 "fi_FI.iso88591:	ISO_8859-1:1987",
    422 "fr_CA:			ISO_8859-1:1987",
    423 "fr_CA.iso88591:	ISO_8859-1:1987",
    424 "fr_FR:			ISO_8859-1:1987",
    425 "fr_FR.iso88591:	ISO_8859-1:1987",
    426 "de_DE:			ISO_8859-1:1987",
    427 "de_DE.iso88591:	ISO_8859-1:1987",
    428 "is_IS:			ISO_8859-1:1987",
    429 "is_IS.iso88591:	ISO_8859-1:1987",
    430 "it_IT:			ISO_8859-1:1987",
    431 "it_IT.iso88591:	ISO_8859-1:1987",
    432 "no_NO:			ISO_8859-1:1987",
    433 "no_NO.iso88591:	ISO_8859-1:1987",
    434 "pt_PT:			ISO_8859-1:1987",
    435 "pt_PT.iso88591:	ISO_8859-1:1987",
    436 "hu_HU:			ISO_8859-2:1987",
    437 "hu_HU.iso88592:	ISO_8859-2:1987",
    438 "cs_CZ:			ISO_8859-2:1987",
    439 "cs_CZ.iso88592:	ISO_8859-2:1987",
    440 "pl_PL:			ISO_8859-2:1987",
    441 "pl_PL.iso88592:	ISO_8859-2:1987",
    442 "ro_RO:			ISO_8859-2:1987",
    443 "ro_RO.iso88592:	ISO_8859-2:1987",
    444 "hr_HR:			ISO_8859-2:1987",
    445 "hr_HR.iso88592:	ISO_8859-2:1987",
    446 "sk_SK:			ISO_8859-2:1987",
    447 "sk_SK.iso88592:	ISO_8859-2:1987",
    448 "sl_SI:			ISO_8859-2:1987",
    449 "sl_SI.iso88592:	ISO_8859-2:1987",
    450 "american.iso88591:     ISO_8859-1:1987",
    451 "bulgarian:             ISO_8859-2:1987",
    452 "c-french.iso88591:     ISO_8859-1:1987",
    453 "chinese-s:             GB2312",
    454 "chinese-t.big5:                Big5",
    455 "czech:                 ISO_8859-2:1987",
    456 "danish.iso88591:       ISO_8859-1:1987",
    457 "dutch.iso88591:                ISO_8859-1:1987",
    458 "english.iso88591:      ISO_8859-1:1987",
    459 "finnish.iso88591:      ISO_8859-1:1987",
    460 "french.iso88591:       ISO_8859-1:1987",
    461 "german.iso88591:       ISO_8859-1:1987",
    462 "hungarian:             ISO_8859-2:1987",
    463 "icelandic.iso88591:    ISO_8859-1:1987",
    464 "italian.iso88591:      ISO_8859-1:1987",
    465 "japanese.euc:          Extended_UNIX_Code_Packed_Format_for_Japanese",
    466 "japanese:              Shift_JIS",
    467 "katakana:              Shift_JIS",
    468 "korean:                        EUC-KR",
    469 "norwegian.iso88591:    ISO_8859-1:1987",
    470 "polish:                        ISO_8859-2:1987",
    471 "portuguese.iso88591:   ISO_8859-1:1987",
    472 "rumanian:              ISO_8859-2:1987",
    473 "serbocroatian:         ISO_8859-2:1987",
    474 "slovene:               ISO_8859-2:1987",
    475 "spanish.iso88591:      ISO_8859-1:1987",
    476 "swedish.iso88591:      ISO_8859-1:1987",
    477 NULL
    478 };
    479 #elif defined(AIX)
    480 const char *CHARCONVTABLE[] =
    481 {
    482 "! This table maps the host's locale names to IANA charsets",
    483 "!",
    484 "C:                     ISO_8859-1:1987",
    485 "En_JP.IBM-932:         Shift_JIS",
    486 "En_JP:                 Shift_JIS",
    487 "Ja_JP.IBM-932:         Shift_JIS",
    488 "Ja_JP:                 Shift_JIS",
    489 "da_DK.ISO8859-1:       ISO_8859-1:1987",
    490 "da_DK:                 ISO_8859-1:1987",
    491 "de_CH.ISO8859-1:       ISO_8859-1:1987",
    492 "de_CH:                 ISO_8859-1:1987",
    493 "de_DE.ISO8859-1:       ISO_8859-1:1987",
    494 "de_DE:                 ISO_8859-1:1987",
    495 "en_GB.ISO8859-1:       ISO_8859-1:1987",
    496 "en_GB:                 ISO_8859-1:1987",
    497 "en_JP.IBM-eucJP:       Extended_UNIX_Code_Packed_Format_for_Japanese",
    498 "en_JP:                 Extended_UNIX_Code_Packed_Format_for_Japanese",
    499 "en_KR.IBM-eucKR:       EUC-KR",
    500 "en_KR:                 EUC-KR",
    501 "en_TW.IBM-eucTW:       cns11643_1",
    502 "en_TW:                 cns11643_1",
    503 "en_US.ISO8859-1:       ISO_8859-1:1987",
    504 "en_US:                 ISO_8859-1:1987",
    505 "es_ES.ISO8859-1:       ISO_8859-1:1987",
    506 "es_ES:                 ISO_8859-1:1987",
    507 "fi_FI.ISO8859-1:       ISO_8859-1:1987",
    508 "fi_FI:                 ISO_8859-1:1987",
    509 "fr_BE.ISO8859-1:       ISO_8859-1:1987",
    510 "fr_BE:                 ISO_8859-1:1987",
    511 "fr_CA.ISO8859-1:       ISO_8859-1:1987",
    512 "fr_CA:                 ISO_8859-1:1987",
    513 "fr_CH.ISO8859-1:       ISO_8859-1:1987",
    514 "fr_CH:                 ISO_8859-1:1987",
    515 "fr_FR.ISO8859-1:       ISO_8859-1:1987",
    516 "fr_FR:                 ISO_8859-1:1987",
    517 "is_IS.ISO8859-1:       ISO_8859-1:1987",
    518 "is_IS:                 ISO_8859-1:1987",
    519 "it_IT.ISO8859-1:       ISO_8859-1:1987",
    520 "it_IT:                 ISO_8859-1:1987",
    521 "ja_JP.IBM-eucJP:       Extended_UNIX_Code_Packed_Format_for_Japanese",
    522 "ja_JP:                 Extended_UNIX_Code_Packed_Format_for_Japanese",
    523 "ko_KR.IBM-eucKR:       EUC-KR",
    524 "ko_KR:                 EUC-KR",
    525 "nl_BE.ISO8859-1:       ISO_8859-1:1987",
    526 "nl_BE:                 ISO_8859-1:1987",
    527 "nl_NL.ISO8859-1:       ISO_8859-1:1987",
    528 "nl_NL:                 ISO_8859-1:1987",
    529 "no_NO.ISO8859-1:       ISO_8859-1:1987",
    530 "no_NO:                 ISO_8859-1:1987",
    531 "pt_PT.ISO8859-1:       ISO_8859-1:1987",
    532 "pt_PT:                 ISO_8859-1:1987",
    533 "sv_SE.ISO8859-1:       ISO_8859-1:1987",
    534 "sv_SE:                 ISO_8859-1:1987",
    535 "zh_TW.IBM-eucTW:       cns11643_1",
    536 "zh_TW:                 cns11643_1",
    537 NULL
    538 };
    539 #else   // sunos by default
    540 const char *CHARCONVTABLE[] =
    541 {
    542 "! This table maps the host's locale names to IANA charsets",
    543 "!",
    544 "C:             ISO_8859-1:1987",
    545 "de:            ISO_8859-1:1987",
    546 "en_US:         ISO_8859-1:1987",
    547 "es:            ISO_8859-1:1987",
    548 "fr:            ISO_8859-1:1987",
    549 "iso_8859_1:    ISO_8859-1:1987",
    550 "it:            ISO_8859-1:1987",
    551 "ja:            Extended_UNIX_Code_Packed_Format_for_Japanese",
    552 "ja_JP.EUC:     Extended_UNIX_Code_Packed_Format_for_Japanese",
    553 "japanese:      Extended_UNIX_Code_Packed_Format_for_Japanese",
    554 "ko:            EUC-KR",
    555 "sv:            ISO_8859-1:1987",
    556 "zh:            GB2312",
    557 "zh_TW:         cns11643_1",
    558 NULL
    559 };
    560 #endif
    561 
    562 #define BSZ     256
    563 
    564 char *
    565 GetCharsetFromLocale(char *locale)
    566 {
    567     char *tmpcharset = NULL;
    568     char buf[BSZ];
    569     char *p;
    570     const char *line;
    571     int i=0;
    572 
    573     line = CHARCONVTABLE[i];
    574     while (line != NULL)
    575     {
    576        if (*line == 0)
    577        {
    578           break;
    579        }
    580 
    581        strcpy(buf, line);
    582        line = CHARCONVTABLE[++i];
    583 
    584        if (strlen(buf) == 0 || buf[0] == '!')
    585        {
    586           continue;
    587        }
    588        p = strchr(buf, ':');
    589        if (p == NULL)
    590        {
    591           tmpcharset = NULL;
    592           break;
    593        }
    594        *p = 0;
    595        if (strcmp(buf, locale) == 0) {
    596           while (*++p == ' ' || *p == '\t')
    597              ;
    598           if (isalpha(*p)) {
    599              tmpcharset = strdup(p);
    600           } else
    601              tmpcharset = NULL;
    602 
    603           break;
    604        }
    605     }
    606     return tmpcharset;
    607 }
    608 
    609 #endif /* Not defined XP_WIN32 */
    610 
    611 #ifdef XP_WIN32
    612 char *_convertor(const char *instr, int bFromUTF8)
    613 {
    614     char  *outstr = NULL;
    615     int    inlen, wclen, outlen;
    616     LPWSTR wcstr;
    617 
    618     if (instr == NULL)
    619             return NULL;
    620 
    621     if ((inlen = strlen(instr)) <= 0)
    622             return NULL;
    623 
    624     /* output never becomes longer than input,
    625      * thus we don't have to ask for the length
    626      */
    627     wcstr = (LPWSTR) malloc( sizeof( WCHAR ) * (inlen+1) );
    628     if (!wcstr)
    629         return NULL;
    630 
    631     wclen = MultiByteToWideChar(bFromUTF8 ? CP_UTF8 : CP_ACP, 0, instr,
    632                                  inlen, wcstr, inlen);
    633     outlen = WideCharToMultiByte(bFromUTF8 ? CP_ACP : CP_UTF8, 0, wcstr,
    634                                   wclen, NULL, 0, NULL, NULL);
    635 
    636     if (outlen > 0) {
    637         outstr = (char *) malloc(outlen + 2);
    638         outlen = WideCharToMultiByte(bFromUTF8 ? CP_ACP : CP_UTF8, 0, wcstr,
    639                                       wclen, outstr, outlen, NULL, NULL);
    640         if (outlen > 0)
    641             *(outstr+outlen) = _T('\0');
    642         else
    643             return NULL;
    644     }
    645     free( wcstr );
    646     return outstr;
    647 }
    648 #endif
    649 
    650 char *
    651 ldaptool_local2UTF8( const char *src )
    652 {
    653     char *utf8;
    654 #ifndef XP_WIN32
    655     char *locale, *newcharset;
    656     size_t outLen, resultLen;
    657     UErrorCode err = U_ZERO_ERROR;
    658     UConverter *cnv;
    659 
    660     if (src == NULL)
    661     {
    662       return NULL;
    663     }
    664     else if (*src == 0 || (ldaptool_charset == NULL)
    665 	     || (!strcmp( ldaptool_charset, "" )))
    666     {
    667 	/* no option specified, so assume it's already in utf-8 */
    668         utf8 = strdup(src);
    669         return utf8;
    670     }
    671 
    672     if( !strcmp( ldaptool_charset, "0" )
    673 	    && (!charsetset) )
    674     {
    675 	/* zero option specified, so try to get default codepage
    676 	   this sucker is strdup'd immediately so it's OK to cast */
    677 	newcharset = (char *)ucnv_getDefaultName();
    678 	if (newcharset != NULL) {
    679 	    free( ldaptool_charset );
    680 	    /* the default codepage lives in ICU */
    681 	    ldaptool_charset = strdup(newcharset);
    682 	    if (ldaptool_charset == NULL) {
    683 		return strdup(src);
    684 	    }
    685 	}
    686 	charsetset = 1;
    687     }
    688     else
    689     if( strcmp( ldaptool_charset, "" ) && (!charsetset) )
    690     {
    691 	/* -i option specified with charset name */
    692         charsetset = 1;
    693     }
    694 
    695     /* do the preflight - get the size needed for the target buffer */
    696     outLen = (size_t) ucnv_convert( "utf-8", ldaptool_charset, NULL, 0, src,
    697                                       strlen( src ) * sizeof(char), &err);
    698 
    699     if ((err != U_BUFFER_OVERFLOW_ERROR) || (outLen == 0)) {
    700       /* default to just a copy of the string - this covers
    701          the case of an illegal charset also */
    702       return strdup(src);
    703     }
    704 
    705     utf8 =  (char *) malloc( outLen + 1);
    706     if( utf8 == NULL ) {
    707       /* if we're already out of memory, does strdup just return NULL? */
    708        return strdup(src);
    709     }
    710 
    711     /* do the actual conversion this time */
    712     err = U_ZERO_ERROR;
    713     resultLen = ucnv_convert( "utf-8", ldaptool_charset, utf8, (outLen + 1), src,
    714 		       strlen(src) * sizeof(char), &err );
    715 
    716     if (!U_SUCCESS(err)) {
    717       free(utf8);
    718       return strdup(src);
    719     }
    720 
    721 #else
    722     utf8 = _convertor(src, FALSE);
    723     if( utf8 == NULL )
    724         utf8 = strdup(src);
    725 #endif
    726 
    727     return utf8;
    728 }
    729 #endif /* HAVE_LIBICU */
    730 
    731 #ifndef HAVE_LIBICU
    732 #ifdef __cplusplus
    733 }
    734 #endif
    735 #endif
    736