Home | History | Annotate | Download | only in common
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 
     22 /*
     23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     24  * Use is subject to license terms.
     25  */
     26 
     27 #include <repcache_protocol.h>
     28 #include "scf_type.h"
     29 #include <errno.h>
     30 #include <libgen.h>
     31 #include <libscf_priv.h>
     32 #include <stdlib.h>
     33 #include <string.h>
     34 
     35 #define	UTF8_TOP_N(n) \
     36 	(0xff ^ (0xff >> (n)))		/* top N bits set */
     37 
     38 #define	UTF8_BOTTOM_N(n) \
     39 	((1 << (n)) - 1)		/* bottom N bits set */
     40 
     41 /*
     42  * The first byte of an n-byte UTF8 encoded character looks like:
     43  *
     44  *	n	bits
     45  *
     46  *	1	0xxxxxxx
     47  *	2	110xxxxx
     48  *	3	1110xxxx
     49  *	4	11110xxx
     50  *	5	111110xx
     51  *	6	1111110x
     52  *
     53  * Continuation bytes are 01xxxxxx.
     54  */
     55 
     56 #define	UTF8_MAX_BYTES	6
     57 
     58 /*
     59  * number of bits in an n-byte UTF-8 encoding.  for multi-byte encodings,
     60  * You get (7 - n) bits in the first byte, and 6 bits for each additional byte.
     61  */
     62 #define	UTF8_BITS(n)	/* 1 <= n <= 6 */			\
     63 	((n) == 1)? 7 :						\
     64 	(7 - (n) + 6 * ((n) - 1))
     65 
     66 #define	UTF8_SINGLE_BYTE(c) \
     67 	(((c) & UTF8_TOP_N(1)) == 0)	/* 0xxxxxxx */
     68 
     69 #define	UTF8_HEAD_CHECK(c, n)		/* 2 <= n <= 6 */		\
     70 	(((c) & UTF8_TOP_N((n) + 1)) == UTF8_TOP_N(n))
     71 
     72 #define	UTF8_HEAD_VALUE(c, n)		/* 2 <= n <= 6 */		\
     73 	((c) & UTF8_BOTTOM_N(7 - (n)))	/* 'x' mask */
     74 
     75 #define	UTF8_CONT_CHECK(c) \
     76 	(((c) & UTF8_TOP_N(2)) == UTF8_TOP_N(1))	/* 10xxxxxx */
     77 
     78 /*
     79  * adds in the 6 new bits from a continuation byte
     80  */
     81 #define	UTF8_VALUE_UPDATE(v, c) \
     82 	(((v) << 6) | ((c) & UTF8_BOTTOM_N(6)))
     83 
     84 /*
     85  * URI components
     86  */
     87 
     88 #define	URI_COMPONENT_COUNT	5
     89 
     90 enum {
     91 	URI_SCHEME = 0x0,		/* URI scheme */
     92 	URI_AUTHORITY,			/* URI authority */
     93 	URI_PATH,			/* URI path */
     94 	URI_QUERY,			/* URI query */
     95 	URI_FRAGMENT			/* URI fragment  */
     96 };
     97 
     98 static int
     99 valid_utf8(const char *str_arg)
    100 {
    101 	const char *str = str_arg;
    102 	uint_t c;
    103 	uint32_t v;
    104 	int i, n;
    105 
    106 	while ((c = *str++) != 0) {
    107 		if (UTF8_SINGLE_BYTE(c))
    108 			continue;	/* ascii */
    109 
    110 		for (n = 2; n <= UTF8_MAX_BYTES; n++)
    111 			if (UTF8_HEAD_CHECK(c, n))
    112 				break;
    113 
    114 		if (n > UTF8_MAX_BYTES)
    115 			return (0);		/* invalid head byte */
    116 
    117 		v = UTF8_HEAD_VALUE(c, n);
    118 
    119 		for (i = 1; i < n; i++) {
    120 			c = *str++;
    121 			if (!UTF8_CONT_CHECK(c))
    122 				return (0);	/* invalid byte */
    123 
    124 			v = UTF8_VALUE_UPDATE(v, c);
    125 		}
    126 
    127 		/*
    128 		 * if v could have been encoded in the next smallest
    129 		 * encoding, the string is not well-formed UTF-8.
    130 		 */
    131 		if ((v >> (UTF8_BITS(n - 1))) == 0)
    132 			return (0);
    133 	}
    134 
    135 	/*
    136 	 * we've reached the end of the string -- make sure it is short enough
    137 	 */
    138 	return ((str - str_arg) < REP_PROTOCOL_VALUE_LEN);
    139 }
    140 
    141 static int
    142 valid_string(const char *str)
    143 {
    144 	return (strlen(str) < REP_PROTOCOL_VALUE_LEN);
    145 }
    146 
    147 static int
    148 valid_opaque(const char *str_arg)
    149 {
    150 	const char *str = str_arg;
    151 	uint_t c;
    152 	ptrdiff_t len;
    153 
    154 	while ((c = *str++) != 0)
    155 		if ((c < '0' || c > '9') && (c < 'a' || c > 'f') &&
    156 		    (c < 'A' || c > 'F'))
    157 			return (0);		/* not hex digit */
    158 
    159 	len = (str - str_arg) - 1;		/* not counting NIL byte */
    160 	return ((len % 2) == 0 && len / 2 < REP_PROTOCOL_VALUE_LEN);
    161 }
    162 
    163 /*
    164  * Return 1 if the supplied parameter is a conformant URI (as defined
    165  * by RFC 2396), 0 otherwise.
    166  */
    167 static int
    168 valid_uri(const char *str)
    169 {
    170 	/*
    171 	 * URI Regular Expression. Compiled with regcmp(1).
    172 	 *
    173 	 * ^(([^:/?#]+:){0,1})$0(//([^/?#]*)$1){0,1}([^?#]*)$2
    174 	 * (?([^#]*)$3){0,1}(#(.*)$4){0,1}
    175 	 */
    176 	char exp[] = {
    177 		040, 074, 00, 060, 012, 0126, 05, 072, 057, 077, 043, 024,
    178 		072, 057, 00, 00, 01, 014, 00, 00, 060, 020, 024, 057,
    179 		024, 057, 074, 01, 0125, 04, 057, 077, 043, 014, 01, 01,
    180 		057, 01, 00, 01, 074, 02, 0125, 03, 077, 043, 014, 02,
    181 		02, 060, 014, 024, 077, 074, 03, 0125, 02, 043, 014, 03,
    182 		03, 057, 02, 00, 01, 060, 012, 024, 043, 074, 04, 021,
    183 		014, 04, 04, 057, 03, 00, 01, 064, 00,
    184 		0};
    185 	char uri[URI_COMPONENT_COUNT][REP_PROTOCOL_VALUE_LEN];
    186 
    187 	/*
    188 	 * If the string is too long, then the URI cannot be valid. Also,
    189 	 * this protects against buffer overflow attacks on the uri array.
    190 	 */
    191 	if (strlen(str) >= REP_PROTOCOL_VALUE_LEN)
    192 		return (0);
    193 
    194 	if (regex(exp, str, uri[URI_SCHEME], uri[URI_AUTHORITY], uri[URI_PATH],
    195 	    uri[URI_QUERY], uri[URI_FRAGMENT]) == NULL) {
    196 		return (0);
    197 	}
    198 	/*
    199 	 * To be a valid URI, the length of the URI_PATH must not be zero
    200 	 */
    201 	if (strlen(uri[URI_PATH]) == 0) {
    202 		return (0);
    203 	}
    204 	return (1);
    205 }
    206 
    207 /*
    208  * Return 1 if the supplied parameter is a conformant fmri, 0
    209  * otherwise.
    210  */
    211 static int
    212 valid_fmri(const char *str)
    213 {
    214 	int ret;
    215 	char fmri[REP_PROTOCOL_VALUE_LEN] = { 0 };
    216 
    217 	/*
    218 	 * Try to parse the fmri, if we can parse it then it
    219 	 * must be syntactically correct. Work on a copy of
    220 	 * the fmri since the parsing process can modify the
    221 	 * supplied string.
    222 	 */
    223 	if (strlcpy(fmri, str, sizeof (fmri)) >= sizeof (fmri))
    224 		return (0);
    225 
    226 	ret = ! scf_parse_fmri(fmri, NULL, NULL, NULL, NULL, NULL, NULL);
    227 
    228 	return (ret);
    229 }
    230 
    231 rep_protocol_value_type_t
    232 scf_proto_underlying_type(rep_protocol_value_type_t t)
    233 {
    234 	switch (t) {
    235 	case REP_PROTOCOL_TYPE_BOOLEAN:
    236 	case REP_PROTOCOL_TYPE_COUNT:
    237 	case REP_PROTOCOL_TYPE_INTEGER:
    238 	case REP_PROTOCOL_TYPE_TIME:
    239 	case REP_PROTOCOL_TYPE_STRING:
    240 	case REP_PROTOCOL_TYPE_OPAQUE:
    241 		return (t);
    242 
    243 	case REP_PROTOCOL_SUBTYPE_USTRING:
    244 		return (REP_PROTOCOL_TYPE_STRING);
    245 
    246 	case REP_PROTOCOL_SUBTYPE_URI:
    247 		return (REP_PROTOCOL_SUBTYPE_USTRING);
    248 	case REP_PROTOCOL_SUBTYPE_FMRI:
    249 		return (REP_PROTOCOL_SUBTYPE_URI);
    250 
    251 	case REP_PROTOCOL_SUBTYPE_HOST:
    252 		return (REP_PROTOCOL_SUBTYPE_USTRING);
    253 	case REP_PROTOCOL_SUBTYPE_HOSTNAME:
    254 		return (REP_PROTOCOL_SUBTYPE_HOST);
    255 	case REP_PROTOCOL_SUBTYPE_NETADDR_V4:
    256 		return (REP_PROTOCOL_SUBTYPE_HOST);
    257 	case REP_PROTOCOL_SUBTYPE_NETADDR_V6:
    258 		return (REP_PROTOCOL_SUBTYPE_HOST);
    259 
    260 	case REP_PROTOCOL_TYPE_INVALID:
    261 	default:
    262 		return (REP_PROTOCOL_TYPE_INVALID);
    263 	}
    264 }
    265 
    266 int
    267 scf_is_compatible_protocol_type(rep_protocol_value_type_t base,
    268     rep_protocol_value_type_t new)
    269 {
    270 	rep_protocol_value_type_t t, cur;
    271 
    272 	if (base == REP_PROTOCOL_TYPE_INVALID)
    273 		return (0);
    274 
    275 	if (base == new)
    276 		return (1);
    277 
    278 	for (t = new; t != (cur = scf_proto_underlying_type(t)); t = cur) {
    279 		if (cur == REP_PROTOCOL_TYPE_INVALID)
    280 			return (0);
    281 		if (cur == base)
    282 			return (1);		/* base is parent of new */
    283 	}
    284 	return (0);
    285 }
    286 
    287 static int
    288 valid_encoded_value(rep_protocol_value_type_t t, const char *v)
    289 {
    290 	char *p;
    291 	ulong_t ns;
    292 
    293 	switch (t) {
    294 	case REP_PROTOCOL_TYPE_BOOLEAN:
    295 		return ((*v == '0' || *v == '1') && v[1] == 0);
    296 
    297 	case REP_PROTOCOL_TYPE_COUNT:
    298 		errno = 0;
    299 		if (strtoull(v, &p, 10) != 0 && *v == '0')
    300 			return (0);
    301 		return (errno == 0 && p != v && *p == 0);
    302 
    303 	case REP_PROTOCOL_TYPE_INTEGER:
    304 		errno = 0;
    305 		if (strtoll(v, &p, 10) != 0 && *v == '0')
    306 			return (0);
    307 		return (errno == 0 && p != v && *p == 0);
    308 
    309 	case REP_PROTOCOL_TYPE_TIME:
    310 		errno = 0;
    311 		(void) strtoll(v, &p, 10);
    312 		if (errno != 0 || p == v || (*p != 0 && *p != '.'))
    313 			return (0);
    314 		if (*p == '.') {
    315 			v = p + 1;
    316 			errno = 0;
    317 			ns = strtoul(v, &p, 10);
    318 
    319 			/* must be exactly 9 digits */
    320 			if ((p - v) != 9 || errno != 0 || *p != 0)
    321 				return (0);
    322 			if (ns >= NANOSEC)
    323 				return (0);
    324 		}
    325 		return (1);
    326 
    327 	case REP_PROTOCOL_TYPE_STRING:
    328 		return (valid_string(v));
    329 
    330 	case REP_PROTOCOL_TYPE_OPAQUE:
    331 		return (valid_opaque(v));
    332 
    333 	/*
    334 	 * The remaining types are subtypes -- because of the way
    335 	 * scf_validate_encoded_value() works, we can rely on the fact
    336 	 * that v is a valid example of our base type.  We only have to
    337 	 * check our own additional restrictions.
    338 	 */
    339 	case REP_PROTOCOL_SUBTYPE_USTRING:
    340 		return (valid_utf8(v));
    341 
    342 	case REP_PROTOCOL_SUBTYPE_URI:
    343 		return (valid_uri(v));
    344 
    345 	case REP_PROTOCOL_SUBTYPE_FMRI:
    346 		return (valid_fmri(v));
    347 
    348 	case REP_PROTOCOL_SUBTYPE_HOST:
    349 		return (valid_encoded_value(REP_PROTOCOL_SUBTYPE_HOSTNAME, v) ||
    350 		    valid_encoded_value(REP_PROTOCOL_SUBTYPE_NETADDR_V4, v) ||
    351 		    valid_encoded_value(REP_PROTOCOL_SUBTYPE_NETADDR_V6, v));
    352 
    353 	case REP_PROTOCOL_SUBTYPE_HOSTNAME:
    354 		/* XXX check for valid hostname */
    355 		return (valid_utf8(v));
    356 
    357 	case REP_PROTOCOL_SUBTYPE_NETADDR_V4:
    358 	case REP_PROTOCOL_SUBTYPE_NETADDR_V6:
    359 		/* XXX check for valid netaddr */
    360 		return (valid_utf8(v));
    361 
    362 	case REP_PROTOCOL_TYPE_INVALID:
    363 	default:
    364 		return (0);
    365 	}
    366 }
    367 
    368 int
    369 scf_validate_encoded_value(rep_protocol_value_type_t t, const char *v)
    370 {
    371 	rep_protocol_value_type_t base, cur;
    372 
    373 	base = scf_proto_underlying_type(t);
    374 	while ((cur = scf_proto_underlying_type(base)) != base)
    375 		base = cur;
    376 
    377 	if (base != t && !valid_encoded_value(base, v))
    378 		return (0);
    379 
    380 	return (valid_encoded_value(t, v));
    381 }
    382