Home | History | Annotate | Download | only in ip
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 #include <sys/types.h>
     27 #include <sys/socket.h>
     28 #include <sys/ksynch.h>
     29 #include <sys/kmem.h>
     30 #include <sys/errno.h>
     31 #include <sys/systm.h>
     32 #include <sys/sysmacros.h>
     33 #include <sys/cmn_err.h>
     34 #include <sys/strsun.h>
     35 #include <sys/zone.h>
     36 #include <netinet/in.h>
     37 #include <inet/common.h>
     38 #include <inet/ip.h>
     39 #include <inet/ip6.h>
     40 #include <inet/ip6_asp.h>
     41 #include <inet/ip_ire.h>
     42 #include <inet/ip_if.h>
     43 #include <inet/ipclassifier.h>
     44 
     45 #define	IN6ADDR_MASK128_INIT \
     46 	{ 0xffffffffU, 0xffffffffU, 0xffffffffU, 0xffffffffU }
     47 #define	IN6ADDR_MASK96_INIT	{ 0xffffffffU, 0xffffffffU, 0xffffffffU, 0 }
     48 #ifdef _BIG_ENDIAN
     49 #define	IN6ADDR_MASK16_INIT	{ 0xffff0000U, 0, 0, 0 }
     50 #else
     51 #define	IN6ADDR_MASK16_INIT	{ 0x0000ffffU, 0, 0, 0 }
     52 #endif
     53 
     54 
     55 /*
     56  * This table is ordered such that longest prefix matches are hit first
     57  * (longer prefix lengths first).  The last entry must be the "default"
     58  * entry (::0/0).
     59  */
     60 static ip6_asp_t default_ip6_asp_table[] = {
     61 	{ IN6ADDR_LOOPBACK_INIT,	IN6ADDR_MASK128_INIT,
     62 	    "Loopback", 50 },
     63 	{ IN6ADDR_ANY_INIT,		IN6ADDR_MASK96_INIT,
     64 	    "IPv4_Compatible", 20 },
     65 #ifdef _BIG_ENDIAN
     66 	{ { 0, 0, 0x0000ffffU, 0 },	IN6ADDR_MASK96_INIT,
     67 	    "IPv4", 10 },
     68 	{ { 0x20020000U, 0, 0, 0 },	IN6ADDR_MASK16_INIT,
     69 	    "6to4", 30 },
     70 #else
     71 	{ { 0, 0, 0xffff0000U, 0 },	IN6ADDR_MASK96_INIT,
     72 	    "IPv4", 10 },
     73 	{ { 0x00000220U, 0, 0, 0 },	IN6ADDR_MASK16_INIT,
     74 	    "6to4", 30 },
     75 #endif
     76 	{ IN6ADDR_ANY_INIT,		IN6ADDR_ANY_INIT,
     77 	    "Default", 40 }
     78 };
     79 
     80 /*
     81  * The IPv6 Default Address Selection policy table.
     82  * Until someone up above reconfigures the policy table, use the global
     83  * default.  The table needs no lock since the only way to alter it is
     84  * through the SIOCSIP6ADDRPOLICY which is exclusive in ip.
     85  */
     86 static void ip6_asp_copy(ip6_asp_t *, ip6_asp_t *, uint_t);
     87 static void ip6_asp_check_for_updates(ip_stack_t *);
     88 
     89 void
     90 ip6_asp_init(ip_stack_t *ipst)
     91 {
     92 	/* Initialize the table lock */
     93 	mutex_init(&ipst->ips_ip6_asp_lock, NULL, MUTEX_DEFAULT, NULL);
     94 
     95 	ipst->ips_ip6_asp_table = default_ip6_asp_table;
     96 
     97 	ipst->ips_ip6_asp_table_count =
     98 	    sizeof (default_ip6_asp_table) / sizeof (ip6_asp_t);
     99 }
    100 
    101 void
    102 ip6_asp_free(ip_stack_t *ipst)
    103 {
    104 	if (ipst->ips_ip6_asp_table != default_ip6_asp_table) {
    105 		kmem_free(ipst->ips_ip6_asp_table,
    106 		    ipst->ips_ip6_asp_table_count * sizeof (ip6_asp_t));
    107 		ipst->ips_ip6_asp_table = NULL;
    108 	}
    109 	mutex_destroy(&ipst->ips_ip6_asp_lock);
    110 }
    111 
    112 /*
    113  * Return false if the table is being updated. Else, increment the ref
    114  * count and return true.
    115  */
    116 boolean_t
    117 ip6_asp_can_lookup(ip_stack_t *ipst)
    118 {
    119 	mutex_enter(&ipst->ips_ip6_asp_lock);
    120 	if (ipst->ips_ip6_asp_uip) {
    121 		mutex_exit(&ipst->ips_ip6_asp_lock);
    122 		return (B_FALSE);
    123 	}
    124 	IP6_ASP_TABLE_REFHOLD(ipst);
    125 	mutex_exit(&ipst->ips_ip6_asp_lock);
    126 	return (B_TRUE);
    127 
    128 }
    129 
    130 void
    131 ip6_asp_pending_op(queue_t *q, mblk_t *mp, aspfunc_t func)
    132 {
    133 	conn_t	*connp = Q_TO_CONN(q);
    134 	ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
    135 
    136 	ASSERT((mp->b_prev == NULL) && (mp->b_queue == NULL) &&
    137 	    (mp->b_next == NULL));
    138 	mp->b_queue = (void *)q;
    139 	mp->b_prev = (void *)func;
    140 	mp->b_next = NULL;
    141 
    142 	mutex_enter(&ipst->ips_ip6_asp_lock);
    143 	if (ipst->ips_ip6_asp_pending_ops == NULL) {
    144 		ASSERT(ipst->ips_ip6_asp_pending_ops_tail == NULL);
    145 		ipst->ips_ip6_asp_pending_ops =
    146 		    ipst->ips_ip6_asp_pending_ops_tail = mp;
    147 	} else {
    148 		ipst->ips_ip6_asp_pending_ops_tail->b_next = mp;
    149 		ipst->ips_ip6_asp_pending_ops_tail = mp;
    150 	}
    151 	mutex_exit(&ipst->ips_ip6_asp_lock);
    152 }
    153 
    154 static void
    155 ip6_asp_complete_op(ip_stack_t *ipst)
    156 {
    157 	mblk_t		*mp;
    158 	queue_t		*q;
    159 	aspfunc_t	func;
    160 
    161 	mutex_enter(&ipst->ips_ip6_asp_lock);
    162 	while (ipst->ips_ip6_asp_pending_ops != NULL) {
    163 		mp = ipst->ips_ip6_asp_pending_ops;
    164 		ipst->ips_ip6_asp_pending_ops = mp->b_next;
    165 		mp->b_next = NULL;
    166 		if (ipst->ips_ip6_asp_pending_ops == NULL)
    167 			ipst->ips_ip6_asp_pending_ops_tail = NULL;
    168 		mutex_exit(&ipst->ips_ip6_asp_lock);
    169 
    170 		q = (queue_t *)mp->b_queue;
    171 		func = (aspfunc_t)mp->b_prev;
    172 
    173 		mp->b_prev = NULL;
    174 		mp->b_queue = NULL;
    175 
    176 
    177 		(*func)(NULL, q, mp, NULL);
    178 		mutex_enter(&ipst->ips_ip6_asp_lock);
    179 	}
    180 	mutex_exit(&ipst->ips_ip6_asp_lock);
    181 }
    182 
    183 /*
    184  * Decrement reference count. When it gets to 0, we check for (pending)
    185  * saved update to the table, if any.
    186  */
    187 void
    188 ip6_asp_table_refrele(ip_stack_t *ipst)
    189 {
    190 	IP6_ASP_TABLE_REFRELE(ipst);
    191 }
    192 
    193 /*
    194  * This function is guaranteed never to return a NULL pointer.  It
    195  * will always return information from one of the entries in the
    196  * asp_table (which will never be empty).  If a pointer is passed
    197  * in for the precedence, the precedence value will be set; a
    198  * pointer to the label will be returned by the function.
    199  *
    200  * Since the table is only anticipated to have five or six entries
    201  * total, the lookup algorithm hasn't been optimized to anything
    202  * better than O(n).
    203  */
    204 char *
    205 ip6_asp_lookup(const in6_addr_t *addr, uint32_t *precedence, ip_stack_t *ipst)
    206 {
    207 	ip6_asp_t *aspp;
    208 	ip6_asp_t *match = NULL;
    209 	ip6_asp_t *default_policy;
    210 
    211 	aspp = ipst->ips_ip6_asp_table;
    212 	/* The default entry must always be the last one */
    213 	default_policy = aspp + ipst->ips_ip6_asp_table_count - 1;
    214 
    215 	while (match == NULL) {
    216 		if (aspp == default_policy) {
    217 			match = aspp;
    218 		} else {
    219 			if (V6_MASK_EQ(*addr, aspp->ip6_asp_mask,
    220 			    aspp->ip6_asp_prefix))
    221 				match = aspp;
    222 			else
    223 				aspp++;
    224 		}
    225 	}
    226 
    227 	if (precedence != NULL)
    228 		*precedence = match->ip6_asp_precedence;
    229 	return (match->ip6_asp_label);
    230 }
    231 
    232 /*
    233  * If we had deferred updating the table because of outstanding references,
    234  * do it now. Note, we don't do error checking on the queued IOCTL mblk, since
    235  * ip_sioctl_ip6addrpolicy() has already done it for us.
    236  */
    237 void
    238 ip6_asp_check_for_updates(ip_stack_t *ipst)
    239 {
    240 	ip6_asp_t *table;
    241 	size_t	table_size;
    242 	mblk_t	*data_mp, *mp;
    243 	struct iocblk *iocp;
    244 
    245 	mutex_enter(&ipst->ips_ip6_asp_lock);
    246 	if (ipst->ips_ip6_asp_pending_update == NULL ||
    247 	    ipst->ips_ip6_asp_refcnt > 0) {
    248 		mutex_exit(&ipst->ips_ip6_asp_lock);
    249 		return;
    250 	}
    251 
    252 	mp = ipst->ips_ip6_asp_pending_update;
    253 	ipst->ips_ip6_asp_pending_update = NULL;
    254 	ASSERT(mp->b_prev != NULL);
    255 
    256 	ipst->ips_ip6_asp_uip = B_TRUE;
    257 
    258 	iocp = (struct iocblk *)mp->b_rptr;
    259 	data_mp = mp->b_cont;
    260 	if (data_mp == NULL) {
    261 		table = NULL;
    262 		table_size = iocp->ioc_count;
    263 	} else {
    264 		table = (ip6_asp_t *)data_mp->b_rptr;
    265 		table_size = iocp->ioc_count;
    266 	}
    267 
    268 	ip6_asp_replace(mp, table, table_size, B_TRUE, ipst,
    269 	    iocp->ioc_flag & IOC_MODELS);
    270 }
    271 
    272 /*
    273  * ip6_asp_replace replaces the contents of the IPv6 address selection
    274  * policy table with those specified in new_table.  If new_table is NULL,
    275  * this indicates that the caller wishes ip to use the default policy
    276  * table.  The caller is responsible for making sure that there are exactly
    277  * new_count policy entries in new_table.
    278  */
    279 /*ARGSUSED5*/
    280 void
    281 ip6_asp_replace(mblk_t *mp, ip6_asp_t *new_table, size_t new_size,
    282     boolean_t locked, ip_stack_t *ipst, model_t datamodel)
    283 {
    284 	int			ret_val = 0;
    285 	ip6_asp_t		*tmp_table;
    286 	uint_t			count;
    287 	queue_t			*q;
    288 	struct iocblk		*iocp;
    289 #if defined(_SYSCALL32_IMPL) && _LONG_LONG_ALIGNMENT_32 == 4
    290 	size_t ip6_asp_size = SIZEOF_STRUCT(ip6_asp, datamodel);
    291 #else
    292 	const size_t ip6_asp_size = sizeof (ip6_asp_t);
    293 #endif
    294 
    295 	if (new_size % ip6_asp_size != 0) {
    296 		ip1dbg(("ip6_asp_replace: invalid table size\n"));
    297 		ret_val = EINVAL;
    298 		if (locked)
    299 			goto unlock_end;
    300 		goto replace_end;
    301 	} else {
    302 		count = new_size / ip6_asp_size;
    303 	}
    304 
    305 
    306 	if (!locked)
    307 		mutex_enter(&ipst->ips_ip6_asp_lock);
    308 	/*
    309 	 * Check if we are in the process of creating any IRE using the
    310 	 * current information. If so, wait till that is done.
    311 	 */
    312 	if (!locked && ipst->ips_ip6_asp_refcnt > 0) {
    313 		/* Save this request for later processing */
    314 		if (ipst->ips_ip6_asp_pending_update == NULL) {
    315 			ipst->ips_ip6_asp_pending_update = mp;
    316 		} else {
    317 			/* Let's not queue multiple requests for now */
    318 			ip1dbg(("ip6_asp_replace: discarding request\n"));
    319 			mutex_exit(&ipst->ips_ip6_asp_lock);
    320 			ret_val =  EAGAIN;
    321 			goto replace_end;
    322 		}
    323 		mutex_exit(&ipst->ips_ip6_asp_lock);
    324 		return;
    325 	}
    326 
    327 	/* Prevent lookups till the table have been updated */
    328 	if (!locked)
    329 		ipst->ips_ip6_asp_uip = B_TRUE;
    330 
    331 	ASSERT(ipst->ips_ip6_asp_refcnt == 0);
    332 
    333 	if (new_table == NULL) {
    334 		/*
    335 		 * This is a special case.  The user wants to revert
    336 		 * back to using the default table.
    337 		 */
    338 		if (ipst->ips_ip6_asp_table == default_ip6_asp_table)
    339 			goto unlock_end;
    340 
    341 		kmem_free(ipst->ips_ip6_asp_table,
    342 		    ipst->ips_ip6_asp_table_count * sizeof (ip6_asp_t));
    343 		ipst->ips_ip6_asp_table = default_ip6_asp_table;
    344 		ipst->ips_ip6_asp_table_count =
    345 		    sizeof (default_ip6_asp_table) / sizeof (ip6_asp_t);
    346 		goto unlock_end;
    347 	}
    348 
    349 	if (count == 0) {
    350 		ret_val = EINVAL;
    351 		ip1dbg(("ip6_asp_replace: empty table\n"));
    352 		goto unlock_end;
    353 	}
    354 
    355 	if ((tmp_table = kmem_alloc(count * sizeof (ip6_asp_t), KM_NOSLEEP)) ==
    356 	    NULL) {
    357 		ret_val = ENOMEM;
    358 		goto unlock_end;
    359 	}
    360 
    361 #if defined(_SYSCALL32_IMPL) && _LONG_LONG_ALIGNMENT_32 == 4
    362 
    363 	/*
    364 	 * If 'new_table' -actually- originates from a 32-bit process
    365 	 * then the nicely aligned ip6_asp_label array will be
    366 	 * subtlely misaligned on this kernel, because the structure
    367 	 * is 8 byte aligned in the kernel, but only 4 byte aligned in
    368 	 * userland.  Fix it up here.
    369 	 *
    370 	 * XX64	See the notes in ip_sioctl_ip6addrpolicy.  Perhaps we could
    371 	 *	do the datamodel transformation (below) there instead of here?
    372 	 */
    373 	if (datamodel == IOC_ILP32) {
    374 		ip6_asp_t *dst;
    375 		ip6_asp32_t *src;
    376 		int i;
    377 
    378 		if ((dst = kmem_zalloc(count * sizeof (*dst),
    379 		    KM_NOSLEEP)) == NULL) {
    380 			kmem_free(tmp_table, count * sizeof (ip6_asp_t));
    381 			ret_val = ENOMEM;
    382 			goto unlock_end;
    383 		}
    384 
    385 		/*
    386 		 * Copy each element of the table from ip6_asp32_t
    387 		 * format into ip6_asp_t format.  Fortunately, since
    388 		 * we're just dealing with a trailing structure pad,
    389 		 * we can do this straightforwardly with a flurry of
    390 		 * bcopying.
    391 		 */
    392 		src = (void *)new_table;
    393 		for (i = 0; i < count; i++)
    394 			bcopy(src + i, dst + i, sizeof (*src));
    395 
    396 		ip6_asp_copy(dst, tmp_table, count);
    397 		kmem_free(dst, count * sizeof (*dst));
    398 	} else
    399 #endif
    400 		ip6_asp_copy(new_table, tmp_table, count);
    401 
    402 	/* Make sure the last entry is the default entry */
    403 	if (!IN6_IS_ADDR_UNSPECIFIED(&tmp_table[count - 1].ip6_asp_prefix) ||
    404 	    !IN6_IS_ADDR_UNSPECIFIED(&tmp_table[count - 1].ip6_asp_mask)) {
    405 		ret_val = EINVAL;
    406 		kmem_free(tmp_table, count * sizeof (ip6_asp_t));
    407 		ip1dbg(("ip6_asp_replace: bad table: no default entry\n"));
    408 		goto unlock_end;
    409 	}
    410 	if (ipst->ips_ip6_asp_table != default_ip6_asp_table) {
    411 		kmem_free(ipst->ips_ip6_asp_table,
    412 		    ipst->ips_ip6_asp_table_count * sizeof (ip6_asp_t));
    413 	}
    414 	ipst->ips_ip6_asp_table = tmp_table;
    415 	ipst->ips_ip6_asp_table_count = count;
    416 
    417 unlock_end:
    418 	ipst->ips_ip6_asp_uip = B_FALSE;
    419 	mutex_exit(&ipst->ips_ip6_asp_lock);
    420 
    421 	/* Let conn_ixa caching know that source address selection changed */
    422 	ip_update_source_selection(ipst);
    423 
    424 replace_end:
    425 	/* Reply to the ioctl */
    426 	q = (queue_t *)mp->b_prev;
    427 	mp->b_prev = NULL;
    428 	if (q == NULL) {
    429 		freemsg(mp);
    430 		goto check_binds;
    431 	}
    432 	iocp = (struct iocblk *)mp->b_rptr;
    433 	iocp->ioc_error = ret_val;
    434 	iocp->ioc_count = 0;
    435 	DB_TYPE(mp) = (iocp->ioc_error == 0) ? M_IOCACK : M_IOCNAK;
    436 	qreply(q, mp);
    437 check_binds:
    438 	ip6_asp_complete_op(ipst);
    439 }
    440 
    441 /*
    442  * Copies the contents of src_table to dst_table, and sorts the
    443  * entries in decending order of prefix lengths.  It assumes that both
    444  * tables are appropriately sized to contain count entries.
    445  */
    446 static void
    447 ip6_asp_copy(ip6_asp_t *src_table, ip6_asp_t *dst_table, uint_t count)
    448 {
    449 	ip6_asp_t *src_ptr, *src_limit, *dst_ptr, *dst_limit, *dp;
    450 
    451 	dst_table[0] = src_table[0];
    452 	if (count == 1)
    453 		return;
    454 
    455 	/*
    456 	 * Sort the entries in descending order of prefix lengths.
    457 	 *
    458 	 * Note: this should be a small table.  In 99% of cases, we
    459 	 * expect the table to have 5 entries.  In the remaining 1%
    460 	 * of cases, we expect the table to have one or two more
    461 	 * entries.  It would be very rare for the table to have
    462 	 * double-digit entries.
    463 	 */
    464 	src_limit = src_table + count;
    465 	dst_limit = dst_table + 1;
    466 	for (src_ptr = src_table + 1; src_ptr != src_limit;
    467 	    src_ptr++, dst_limit++) {
    468 		for (dst_ptr = dst_table; dst_ptr < dst_limit; dst_ptr++) {
    469 			if (ip_mask_to_plen_v6(&src_ptr->ip6_asp_mask) >
    470 			    ip_mask_to_plen_v6(&dst_ptr->ip6_asp_mask)) {
    471 				/*
    472 				 * Make room to insert the source entry
    473 				 * before dst_ptr by shifting entries to
    474 				 * the right.
    475 				 */
    476 				for (dp = dst_limit - 1; dp >= dst_ptr; dp--)
    477 					*(dp + 1) = *dp;
    478 				break;
    479 			}
    480 		}
    481 		*dst_ptr = *src_ptr;
    482 	}
    483 }
    484 
    485 /*
    486  * This function copies as many entries from ip6_asp_table as will fit
    487  * into dtable.  The dtable_size parameter is the size of dtable
    488  * in bytes.  This function returns the number of entries in
    489  * ip6_asp_table, even if it's not able to fit all of the entries into
    490  * dtable.
    491  */
    492 int
    493 ip6_asp_get(ip6_asp_t *dtable, size_t dtable_size, ip_stack_t *ipst)
    494 {
    495 	uint_t dtable_count;
    496 
    497 	if (dtable != NULL) {
    498 		if (dtable_size < sizeof (ip6_asp_t))
    499 			return (-1);
    500 
    501 		dtable_count = dtable_size / sizeof (ip6_asp_t);
    502 		bcopy(ipst->ips_ip6_asp_table, dtable,
    503 		    MIN(ipst->ips_ip6_asp_table_count, dtable_count) *
    504 		    sizeof (ip6_asp_t));
    505 	}
    506 
    507 	return (ipst->ips_ip6_asp_table_count);
    508 }
    509 
    510 /*
    511  * Compare two labels.  Return B_TRUE if they are equal, B_FALSE
    512  * otherwise.
    513  */
    514 boolean_t
    515 ip6_asp_labelcmp(const char *label1, const char *label2)
    516 {
    517 	int64_t *llptr1, *llptr2;
    518 
    519 	/*
    520 	 * The common case, the two labels are actually the same string
    521 	 * from the policy table.
    522 	 */
    523 	if (label1 == label2)
    524 		return (B_TRUE);
    525 
    526 	/*
    527 	 * Since we know the labels are at most 16 bytes long, compare
    528 	 * the two strings as two 8-byte long integers.  The ip6_asp_t
    529 	 * structure guarantees that the labels are 8 byte alligned.
    530 	 */
    531 	llptr1 = (int64_t *)label1;
    532 	llptr2 = (int64_t *)label2;
    533 	if (llptr1[0] == llptr2[0] && llptr1[1] == llptr2[1])
    534 		return (B_TRUE);
    535 	return (B_FALSE);
    536 }
    537