Home | History | Annotate | Download | only in smb
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 
     22 /*
     23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     24  * Use is subject to license terms.
     25  */
     26 
     27 /*
     28  * Unicode conversions (yet more)
     29  */
     30 
     31 #include <stdio.h>
     32 #include <stdlib.h>
     33 #include <string.h>
     34 #include <errno.h>
     35 #include <iconv.h>
     36 #include <libintl.h>
     37 
     38 #include <sys/u8_textprep.h>
     39 
     40 #include <netsmb/smb_lib.h>
     41 #include "charsets.h"
     42 
     43 
     44 /*
     45  * Number of unicode symbols in the string,
     46  * not including the 2-byte null terminator.
     47  * (multiply by two for storage size)
     48  */
     49 size_t
     50 unicode_strlen(const uint16_t *us)
     51 {
     52 	size_t len = 0;
     53 	while (*us++)
     54 		len++;
     55 	return (len);
     56 }
     57 
     58 static char *convert_ucs2xx_to_utf8(iconv_t, const uint16_t *);
     59 
     60 /*
     61  * Convert (native) Unicode string to UTF-8.
     62  * Returns allocated memory.
     63  */
     64 char *
     65 convert_unicode_to_utf8(uint16_t *us)
     66 {
     67 	static iconv_t cd1 = (iconv_t)-1;
     68 
     69 	/* Get conversion descriptor (to, from) */
     70 	if (cd1 == (iconv_t)-1)
     71 		cd1 = iconv_open("UTF-8", "UCS-2");
     72 
     73 	return (convert_ucs2xx_to_utf8(cd1, us));
     74 }
     75 
     76 /*
     77  * Convert little-endian Unicode string to UTF-8.
     78  * Returns allocated memory.
     79  */
     80 char *
     81 convert_leunicode_to_utf8(unsigned short *us)
     82 {
     83 	static iconv_t cd2 = (iconv_t)-1;
     84 
     85 	/* Get conversion descriptor (to, from) */
     86 	if (cd2 == (iconv_t)-1)
     87 		cd2 = iconv_open("UTF-8", "UCS-2LE");
     88 
     89 	return (convert_ucs2xx_to_utf8(cd2, us));
     90 }
     91 
     92 static char *
     93 convert_ucs2xx_to_utf8(iconv_t cd, const uint16_t *us)
     94 {
     95 	char *obuf, *optr;
     96 	const char *iptr;
     97 	size_t  ileft, obsize, oleft, ret;
     98 
     99 	if (cd == (iconv_t)-1) {
    100 		smb_error(dgettext(TEXT_DOMAIN,
    101 		    "iconv_open(UTF-8/UCS-2)"), -1);
    102 		return (NULL);
    103 	}
    104 
    105 	iptr = (const char *)us;
    106 	ileft = unicode_strlen(us);
    107 	ileft *= 2; /* now bytes */
    108 
    109 	/* Worst-case output size is 2x input size. */
    110 	oleft = ileft * 2;
    111 	obsize = oleft + 2; /* room for null */
    112 	obuf = malloc(obsize);
    113 	if (!obuf)
    114 		return (NULL);
    115 	optr = obuf;
    116 
    117 	ret = iconv(cd, &iptr, &ileft, &optr, &oleft);
    118 	*optr = '\0';
    119 	if (ret == (size_t)-1) {
    120 		smb_error(dgettext(TEXT_DOMAIN,
    121 		    "iconv(%s) failed"), errno, obuf);
    122 	}
    123 	if (ileft) {
    124 		smb_error(dgettext(TEXT_DOMAIN,
    125 		    "iconv(%s) failed"), -1, obuf);
    126 		/*
    127 		 * XXX: What's better?  return NULL?
    128 		 * The truncated string? << for now
    129 		 */
    130 	}
    131 
    132 	return (obuf);
    133 }
    134 
    135 static uint16_t *convert_utf8_to_ucs2xx(iconv_t, const char *);
    136 
    137 /*
    138  * Convert UTF-8 string to Unicode.
    139  * Returns allocated memory.
    140  */
    141 uint16_t *
    142 convert_utf8_to_unicode(const char *utf8_string)
    143 {
    144 	static iconv_t cd3 = (iconv_t)-1;
    145 
    146 	/* Get conversion descriptor (to, from) */
    147 	if (cd3 == (iconv_t)-1)
    148 		cd3 = iconv_open("UCS-2", "UTF-8");
    149 	return (convert_utf8_to_ucs2xx(cd3, utf8_string));
    150 }
    151 
    152 /*
    153  * Convert UTF-8 string to little-endian Unicode.
    154  * Returns allocated memory.
    155  */
    156 uint16_t *
    157 convert_utf8_to_leunicode(const char *utf8_string)
    158 {
    159 	static iconv_t cd4 = (iconv_t)-1;
    160 
    161 	/* Get conversion descriptor (to, from) */
    162 	if (cd4 == (iconv_t)-1)
    163 		cd4 = iconv_open("UCS-2LE", "UTF-8");
    164 	return (convert_utf8_to_ucs2xx(cd4, utf8_string));
    165 }
    166 
    167 static uint16_t *
    168 convert_utf8_to_ucs2xx(iconv_t cd, const char *utf8_string)
    169 {
    170 	uint16_t *obuf, *optr;
    171 	const char *iptr;
    172 	size_t  ileft, obsize, oleft, ret;
    173 
    174 	if (cd == (iconv_t)-1) {
    175 		smb_error(dgettext(TEXT_DOMAIN,
    176 		    "iconv_open(UCS-2/UTF-8)"), -1);
    177 		return (NULL);
    178 	}
    179 
    180 	iptr = utf8_string;
    181 	ileft = strlen(iptr);
    182 
    183 	/* Worst-case output size is 2x input size. */
    184 	oleft = ileft * 2;
    185 	obsize = oleft + 2; /* room for null */
    186 	obuf = malloc(obsize);
    187 	if (!obuf)
    188 		return (NULL);
    189 	optr = obuf;
    190 
    191 	ret = iconv(cd, &iptr, &ileft, (char **)&optr, &oleft);
    192 	*optr = '\0';
    193 	if (ret == (size_t)-1) {
    194 		smb_error(dgettext(TEXT_DOMAIN,
    195 		    "iconv(%s) failed"), errno, utf8_string);
    196 	}
    197 	if (ileft) {
    198 		smb_error(dgettext(TEXT_DOMAIN,
    199 		    "iconv(%s) failed"), -1, utf8_string);
    200 		/*
    201 		 * XXX: What's better?  return NULL?
    202 		 * The truncated string? << for now
    203 		 */
    204 	}
    205 
    206 	return (obuf);
    207 }
    208 
    209 
    210 /*
    211  * A simple wrapper around u8_textprep_str() that returns the Unicode
    212  * upper-case version of some string.  Returns memory from malloc.
    213  * Borrowed from idmapd.
    214  */
    215 static char *
    216 utf8_str_to_upper_or_lower(const char *s, int upper_lower)
    217 {
    218 	char *res = NULL;
    219 	char *outs;
    220 	size_t inlen, outlen, inbleft, outbleft;
    221 	int rc, err;
    222 
    223 	/*
    224 	 * u8_textprep_str() does not allocate memory.  The input and
    225 	 * output buffers may differ in size (though that would be more
    226 	 * likely when normalization is done).  We have to loop over it...
    227 	 *
    228 	 * To improve the chances that we can avoid looping we add 10
    229 	 * bytes of output buffer room the first go around.
    230 	 */
    231 	inlen = inbleft = strlen(s);
    232 	outlen = outbleft = inlen + 10;
    233 	if ((res = malloc(outlen)) == NULL)
    234 		return (NULL);
    235 	outs = res;
    236 
    237 	while ((rc = u8_textprep_str((char *)s, &inbleft, outs,
    238 	    &outbleft, upper_lower, U8_UNICODE_LATEST, &err)) < 0 &&
    239 	    err == E2BIG) {
    240 		if ((res = realloc(res, outlen + inbleft)) == NULL)
    241 			return (NULL);
    242 		/* adjust input/output buffer pointers */
    243 		s += (inlen - inbleft);
    244 		outs = res + outlen - outbleft;
    245 		/* adjust outbleft and outlen */
    246 		outlen += inbleft;
    247 		outbleft += inbleft;
    248 	}
    249 
    250 	if (rc < 0) {
    251 		free(res);
    252 		res = NULL;
    253 		return (NULL);
    254 	}
    255 
    256 	res[outlen - outbleft] = '\0';
    257 
    258 	return (res);
    259 }
    260 
    261 char *
    262 utf8_str_toupper(const char *s)
    263 {
    264 	return (utf8_str_to_upper_or_lower(s, U8_TEXTPREP_TOUPPER));
    265 }
    266 
    267 char *
    268 utf8_str_tolower(const char *s)
    269 {
    270 	return (utf8_str_to_upper_or_lower(s, U8_TEXTPREP_TOLOWER));
    271 }
    272