Home | History | Annotate | Download | only in smb
      1 /*
      2  * Copyright (c) 2001 Apple Computer, Inc. All rights reserved.
      3  *
      4  * @APPLE_LICENSE_HEADER_START@
      5  *
      6  * "Portions Copyright (c) 1999 Apple Computer, Inc.  All Rights
      7  * Reserved.  This file contains Original Code and/or Modifications of
      8  * Original Code as defined in and that are subject to the Apple Public
      9  * Source License Version 1.0 (the 'License').  You may not use this file
     10  * except in compliance with the License.  Please obtain a copy of the
     11  * License at http://www.apple.com/publicsource and read it before using
     12  * this file.
     13  *
     14  * The Original Code and all software distributed under the License are
     15  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
     16  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
     17  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
     18  * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
     19  * License for the specific language governing rights and limitations
     20  * under the License."
     21  *
     22  * @APPLE_LICENSE_HEADER_END@
     23  */
     24 /* CSTYLED */
     25 /*
     26  *      @(#)charsets.c      *
     27  *      (c) 2004   Apple Computer, Inc.  All Rights Reserved
     28  *
     29  *
     30  *      charsets.c -- Routines converting between UTF-8, 16-bit
     31  *			little-endian Unicode, and various Windows
     32  *			code pages.
     33  *
     34  *      MODIFICATION HISTORY:
     35  *       28-Nov-2004     Guy Harris	New today
     36  */
     37 
     38 #include <stdlib.h>
     39 #include <stdio.h>
     40 #include <string.h>
     41 #include <ctype.h>
     42 #include <errno.h>
     43 #include <iconv.h>
     44 #include <langinfo.h>
     45 #include <strings.h>
     46 #include <libintl.h>
     47 
     48 #include <sys/isa_defs.h>
     49 #include <netsmb/smb_lib.h>
     50 #include <netsmb/mchain.h>
     51 
     52 #include "charsets.h"
     53 
     54 /*
     55  * On Solaris, we will need to do some rewriting to use our iconv
     56  * routines for the conversions.  For now, we're effectively
     57  * stubbing out code, leaving the details of what happens on
     58  * Darwin in case it's useful as a guide later.
     59  */
     60 
     61 static unsigned
     62 xtoi(char u)
     63 {
     64 	if (isdigit(u))
     65 		return (u - '0');
     66 	else if (islower(u))
     67 		return (10 + u - 'a');
     68 	else if (isupper(u))
     69 		return (10 + u - 'A');
     70 	return (16);
     71 }
     72 
     73 
     74 /*
     75  * Removes the "%" escape sequences from a URL component.
     76  * See IETF RFC 2396.
     77  */
     78 char *
     79 unpercent(char *component)
     80 {
     81 	char c, *s;
     82 	unsigned hi, lo;
     83 
     84 	if (component == NULL)
     85 		return (component);
     86 
     87 	for (s = component; (c = *s) != 0; s++) {
     88 		if (c != '%')
     89 			continue;
     90 		if ((hi = xtoi(s[1])) > 15 || (lo = xtoi(s[2])) > 15)
     91 			continue; /* ignore invalid escapes */
     92 		s[0] = hi*16 + lo;
     93 		/*
     94 		 * This was strcpy(s + 1, s + 3);
     95 		 * But nowadays leftward overlapping copies are
     96 		 * officially undefined in C.  Ours seems to
     97 		 * work or not depending upon alignment.
     98 		 */
     99 		memmove(s+1, s+3, strlen(s+3) + 1);
    100 	}
    101 	return (component);
    102 }
    103 
    104 /* BEGIN CSTYLED */
    105 #ifdef NOTPORTED
    106 static CFStringEncoding
    107 get_windows_encoding_equivalent( void )
    108 {
    109 
    110 	CFStringEncoding encoding;
    111 	uint32_t index,region;
    112 
    113 	/* important! use root ID so you can read the config file! */
    114 	seteuid(eff_uid);
    115 	__CFStringGetInstallationEncodingAndRegion(&index,&region);
    116 	seteuid(real_uid);
    117 
    118 	switch ( index )
    119 	{
    120 		case	kCFStringEncodingMacRoman:
    121 			if (region) /* anything nonzero is not US */
    122 				encoding = kCFStringEncodingDOSLatin1;
    123 			else /* US region */
    124 				encoding = kCFStringEncodingDOSLatinUS;
    125 			break;
    126 
    127 		case	kCFStringEncodingMacJapanese:
    128 			encoding = kCFStringEncodingDOSJapanese;
    129 			break;
    130 
    131 		case	kCFStringEncodingMacChineseTrad:
    132 			encoding = kCFStringEncodingDOSChineseTrad;
    133 			break;
    134 
    135 		case	kCFStringEncodingMacKorean:
    136 			encoding = kCFStringEncodingDOSKorean;
    137 			break;
    138 
    139 		case	kCFStringEncodingMacArabic:
    140 			encoding = kCFStringEncodingDOSArabic;
    141 			break;
    142 
    143 		case	kCFStringEncodingMacHebrew:
    144 			encoding = kCFStringEncodingDOSHebrew;
    145 			break;
    146 
    147 		case	kCFStringEncodingMacGreek:
    148 			encoding = kCFStringEncodingDOSGreek;
    149 			break;
    150 
    151 		case	kCFStringEncodingMacCyrillic:
    152 			encoding = kCFStringEncodingDOSCyrillic;
    153 			break;
    154 
    155 		case	kCFStringEncodingMacThai:
    156 			encoding = kCFStringEncodingDOSThai;
    157 			break;
    158 
    159 		case	kCFStringEncodingMacChineseSimp:
    160 			encoding = kCFStringEncodingDOSChineseSimplif;
    161 			break;
    162 
    163 		case	kCFStringEncodingMacCentralEurRoman:
    164 			encoding = kCFStringEncodingDOSLatin2;
    165 			break;
    166 
    167 		case	kCFStringEncodingMacTurkish:
    168 			encoding = kCFStringEncodingDOSTurkish;
    169 			break;
    170 
    171 		case	kCFStringEncodingMacCroatian:
    172 			encoding = kCFStringEncodingDOSLatin2;
    173 			break;
    174 
    175 		case	kCFStringEncodingMacIcelandic:
    176 			encoding = kCFStringEncodingDOSIcelandic;
    177 			break;
    178 
    179 		case	kCFStringEncodingMacRomanian:
    180 			encoding = kCFStringEncodingDOSLatin2;
    181 			break;
    182 
    183 		case	kCFStringEncodingMacFarsi:
    184 			encoding = kCFStringEncodingDOSArabic;
    185 			break;
    186 
    187 		case	kCFStringEncodingMacUkrainian:
    188 			encoding = kCFStringEncodingDOSCyrillic;
    189 			break;
    190 
    191 		default:
    192 			encoding = kCFStringEncodingDOSLatin1;
    193 			break;
    194 	}
    195 
    196 	return encoding;
    197 }
    198 #endif /* NOTPORTED */
    199 
    200 /*
    201  * XXX - NLS, or CF?  We should probably use the same routine for all
    202  * conversions.
    203  */
    204 char *
    205 convert_wincs_to_utf8(const char *windows_string)
    206 {
    207 #ifdef NOTPORTED
    208 	CFStringRef s;
    209 	CFIndex maxlen;
    210 	char *result;
    211 
    212 	s = CFStringCreateWithCString(NULL, windows_string,
    213 		get_windows_encoding_equivalent());
    214 	if (s == NULL) {
    215 		smb_error("CFStringCreateWithCString for Windows code page failed on \"%s\" ", -1,
    216 		    windows_string);
    217 
    218 		/* kCFStringEncodingMacRoman should always succeed */
    219 		s = CFStringCreateWithCString(NULL, windows_string,
    220 		    kCFStringEncodingMacRoman);
    221 		if (s == NULL) {
    222 			smb_error("CFStringCreateWithCString for Windows code page failed on \"%s\" with kCFStringEncodingMacRoman - skipping",
    223 			    -1, windows_string);
    224 			return NULL;
    225 		}
    226 	}
    227 
    228 	maxlen = CFStringGetMaximumSizeForEncoding(CFStringGetLength(s),
    229 	    kCFStringEncodingUTF8) + 1;
    230 	result = malloc(maxlen);
    231 	if (result == NULL) {
    232 		smb_error("Couldn't allocate buffer for UTF-8 string for \"%s\" - skipping", -1,
    233 		    windows_string);
    234 		CFRelease(s);
    235 		return NULL;
    236 	}
    237 	if (!CFStringGetCString(s, result, maxlen, kCFStringEncodingUTF8)) {
    238 		smb_error("CFStringGetCString for UTF-8 failed on \"%s\" - skipping",
    239 		    -1, windows_string);
    240 		CFRelease(s);
    241 		return NULL;
    242 	}
    243 	CFRelease(s);
    244 	return result;
    245 #else /* NOTPORTED */
    246 	return (strdup((char*)windows_string));
    247 #endif /* NOTPORTED */
    248 }
    249 
    250 /*
    251  * XXX - NLS, or CF?  We should probably use the same routine for all
    252  * conversions.
    253  */
    254 char *
    255 convert_utf8_to_wincs(const char *utf8_string)
    256 {
    257 #ifdef NOTPORTED
    258 	CFStringRef s;
    259 	CFIndex maxlen;
    260 	char *result;
    261 
    262 	s = CFStringCreateWithCString(NULL, utf8_string,
    263 	    kCFStringEncodingUTF8);
    264 	if (s == NULL) {
    265 		smb_error("CFStringCreateWithCString for UTF-8 failed on \"%s\"", -1,
    266 		    utf8_string);
    267 		return NULL;
    268 	}
    269 
    270 	maxlen = CFStringGetMaximumSizeForEncoding(CFStringGetLength(s),
    271 	    get_windows_encoding_equivalent()) + 1;
    272 	result = malloc(maxlen);
    273 	if (result == NULL) {
    274 		smb_error("Couldn't allocate buffer for Windows code page string for \"%s\" - skipping", -1,
    275 		    utf8_string);
    276 		CFRelease(s);
    277 		return NULL;
    278 	}
    279 	if (!CFStringGetCString(s, result, maxlen,
    280 	    get_windows_encoding_equivalent())) {
    281 		smb_error("CFStringGetCString for Windows code page failed on \"%s\" - skipping",
    282 		    -1, utf8_string);
    283 		CFRelease(s);
    284 		return NULL;
    285 	}
    286 	CFRelease(s);
    287 	return result;
    288 #else /* NOTPORTED */
    289 	return (strdup((char*)utf8_string));
    290 #endif /* NOTPORTED */
    291 }
    292 /* END CSTYLED */
    293 
    294 /*
    295  * We replaced these routines for Solaris:
    296  *	convert_leunicode_to_utf8
    297  *	convert_unicode_to_utf8
    298  *	convert_utf8_to_leunicode
    299  * with new code in: utf_str.c
    300  */
    301