Home | History | Annotate | Download | only in ip
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 #include <sys/types.h>
     27 #include <sys/systm.h>
     28 #include <sys/kmem.h>
     29 #include <sys/disp.h>
     30 #include <sys/stream.h>
     31 #include <sys/strsubr.h>
     32 #include <sys/strsun.h>
     33 #include <sys/policy.h>
     34 #include <sys/tsol/label_macro.h>
     35 #include <sys/tsol/tndb.h>
     36 #include <sys/tsol/tnet.h>
     37 #include <inet/ip.h>
     38 #include <inet/ip6.h>
     39 #include <inet/tcp.h>
     40 #include <inet/ipclassifier.h>
     41 #include <inet/ip_ire.h>
     42 #include <inet/ip_ftable.h>
     43 
     44 /*
     45  * This routine takes a sensitivity label as input and creates a CIPSO
     46  * option in the specified buffer.  It returns the size of the CIPSO option.
     47  * If the sensitivity label is too large for the CIPSO option, then 0
     48  * is returned.
     49  *
     50  * tsol2cipso_tt1 returns 0 for failure and greater than 0 for success
     51  * (more accurately, success means a return value between 10 and 40).
     52  */
     53 
     54 static int
     55 tsol2cipso_tt1(const bslabel_t *sl, unsigned char *cop, uint32_t doi)
     56 {
     57 	struct cipso_tag_type_1 *tt1;
     58 	const _bslabel_impl_t *bsl;
     59 	const uchar_t *ucp;
     60 	int i;
     61 
     62 	if (doi == 0)
     63 		return (0);
     64 
     65 	/* check for Admin High sensitivity label */
     66 	if (blequal(sl, label2bslabel(l_admin_high)))
     67 		return (0);
     68 
     69 	/* check whether classification will fit in one octet */
     70 	bsl = (const _bslabel_impl_t *)sl;
     71 	if (LCLASS(bsl) & 0xFF00)
     72 		return (0);
     73 
     74 	/*
     75 	 * Check whether compartments will fit in 30 octets.
     76 	 * Compartments 241 - 256 are not allowed.
     77 	 */
     78 	if (ntohl(bsl->compartments.c8) & 0x0000FFFF)
     79 		return (0);
     80 
     81 	/*
     82 	 * Compute option length and tag length.
     83 	 * 'p' points to the last two bytes in the Sensitivity Label's
     84 	 * compartments; these cannot be mapped into CIPSO compartments.
     85 	 */
     86 	ucp = (const uchar_t *)&bsl->compartments.c8 + 2;
     87 	while (--ucp >= (const uchar_t *)&bsl->compartments.c1)
     88 		if (*ucp != 0)
     89 			break;
     90 
     91 	i =  ucp - (const uchar_t *)&bsl->compartments.c1 + 1;
     92 
     93 	if (cop == NULL)
     94 		return (10 + i);
     95 
     96 	doi = htonl(doi);
     97 	ucp = (const uchar_t *)&doi;
     98 	cop[IPOPT_OPTVAL] = IPOPT_COMSEC;
     99 	cop[IPOPT_OLEN] = 10 + i;
    100 	cop[IPOPT_OLEN+1] = ucp[0];
    101 	cop[IPOPT_OLEN+2] = ucp[1];
    102 	cop[IPOPT_OLEN+3] = ucp[2];
    103 	cop[IPOPT_OLEN+4] = ucp[3];
    104 	tt1 = (struct cipso_tag_type_1 *)&cop[IPOPT_OLEN + 5];
    105 	tt1->tag_type = 1;
    106 	tt1->tag_align = 0;
    107 	tt1->tag_sl = LCLASS(bsl);
    108 	tt1->tag_length = 4 + i;
    109 
    110 	bcopy(&bsl->compartments.c1, tt1->tag_cat, i);
    111 
    112 	return (cop[IPOPT_OLEN]);
    113 }
    114 
    115 /*
    116  * The following routine searches for a security label in an IPv4 datagram.
    117  * It returns label_type of:
    118  *    OPT_CIPSO if a CIPSO IP option is found.
    119  *    OPT_NONE if no security label is found.
    120  *
    121  * If OPT_CIPSO, a pointer to the CIPSO IP option will be returned in
    122  * the buffer parameter.
    123  *
    124  * The function will return with B_FALSE if an IP format error
    125  * is encountered.
    126  */
    127 
    128 boolean_t
    129 tsol_get_option_v4(mblk_t *mp, tsol_ip_label_t *label_type, uchar_t **buffer)
    130 {
    131 	ipha_t	*ipha;
    132 	uchar_t	*opt;
    133 	uint32_t	totallen;
    134 	uint32_t	optval;
    135 	uint32_t	optlen;
    136 
    137 	*label_type = OPT_NONE;
    138 
    139 	/*
    140 	 * Get length (in 4 byte octets) of IP header options.
    141 	 * If header doesn't contain options, then return a label_type
    142 	 * of OPT_NONE.
    143 	 */
    144 	ipha = (ipha_t *)mp->b_rptr;
    145 	totallen = ipha->ipha_version_and_hdr_length -
    146 	    (uint8_t)((IP_VERSION << 4));
    147 	totallen <<= 2;
    148 	if (totallen < IP_SIMPLE_HDR_LENGTH || totallen > MBLKL(mp))
    149 		return (B_FALSE);
    150 	totallen -= IP_SIMPLE_HDR_LENGTH;
    151 	if (totallen == 0)
    152 		return (B_TRUE);
    153 
    154 	/*
    155 	 * Search for CIPSO option.
    156 	 * If no such option is present, then return OPT_NONE.
    157 	 */
    158 	opt = (uchar_t *)&ipha[1];
    159 	while (totallen != 0) {
    160 		switch (optval = opt[IPOPT_OPTVAL]) {
    161 		case IPOPT_EOL:
    162 			return (B_TRUE);
    163 		case IPOPT_NOP:
    164 			optlen = 1;
    165 			break;
    166 		default:
    167 			if (totallen <= IPOPT_OLEN)
    168 				return (B_FALSE);
    169 			optlen = opt[IPOPT_OLEN];
    170 			if (optlen < 2)
    171 				return (B_FALSE);
    172 		}
    173 		if (optlen > totallen)
    174 			return (B_FALSE);
    175 		/*
    176 		 * Copy pointer to option into '*buffer' and
    177 		 * return the option type.
    178 		 */
    179 		switch (optval) {
    180 		case IPOPT_COMSEC:
    181 			if (TSOL_CIPSO_TAG_OFFSET < optlen &&
    182 			    opt[TSOL_CIPSO_TAG_OFFSET] == 1) {
    183 				*label_type = OPT_CIPSO;
    184 				*buffer = opt;
    185 				return (B_TRUE);
    186 			}
    187 			return (B_FALSE);
    188 		}
    189 		totallen -= optlen;
    190 		opt += optlen;
    191 	}
    192 	return (B_TRUE);
    193 }
    194 
    195 /*
    196  * The following routine searches for a security label in an IPv6 datagram.
    197  * It returns label_type of:
    198  *    OPT_CIPSO if a CIPSO IP option is found.
    199  *    OPT_NONE if no security label is found.
    200  *
    201  * If OPT_CIPSO, a pointer to the IPv4 portion of the CIPSO IP option will
    202  * be returned in the buffer parameter.
    203  *
    204  * The function will return with B_FALSE if an IP format error
    205  * or an unexpected label content error is encountered.
    206  */
    207 
    208 boolean_t
    209 tsol_get_option_v6(mblk_t *mp, tsol_ip_label_t *label_type, uchar_t **buffer)
    210 {
    211 	uchar_t		*opt_ptr = NULL;
    212 	uchar_t		*after_secopt;
    213 	boolean_t	hbh_needed;
    214 	const uchar_t	*ip6hbh;
    215 	size_t		optlen;
    216 	uint32_t	doi;
    217 	const ip6_t	*ip6h;
    218 
    219 	*label_type = OPT_NONE;
    220 	*buffer = NULL;
    221 	ip6h = (const ip6_t *)mp->b_rptr;
    222 	if (ip6h->ip6_nxt != IPPROTO_HOPOPTS)
    223 		return (B_TRUE);
    224 	ip6hbh = (const uchar_t *)&ip6h[1];
    225 	if (ip6hbh + MIN_EHDR_LEN > mp->b_wptr)
    226 		return (B_FALSE);
    227 	optlen = (ip6hbh[1] + 1) << 3;
    228 	if (ip6hbh + optlen > mp->b_wptr)
    229 		return (B_FALSE);
    230 	if (!tsol_find_secopt_v6(ip6hbh, optlen,
    231 	    &opt_ptr, &after_secopt, &hbh_needed))
    232 		return (B_FALSE);
    233 	/* tsol_find_secopt_v6 guarantees some sanity */
    234 	if (opt_ptr != NULL) {
    235 		/*
    236 		 * IPv6 Option
    237 		 *   opt_ptr[0]: Option type
    238 		 *   opt_ptr[1]: Length of option data in bytes
    239 		 *   opt_ptr[2]: First byte of option data
    240 		 */
    241 		if ((optlen = opt_ptr[1]) < 8)
    242 			return (B_FALSE);
    243 		opt_ptr += 2;
    244 		/*
    245 		 * From "Generalized Labeled Security Option for IPv6" draft
    246 		 *   opt_ptr[0] - opt_ptr[4]: DOI = IP6LS_DOI_V4
    247 		 *   opt_ptr[4]: Tag type = IP6LS_TT_V4
    248 		 *   opt_ptr[5]: Tag length in bytes starting at Tag type field
    249 		 * IPv4 CIPSO Option
    250 		 *   opt_ptr[6]: option type
    251 		 *   opt_ptr[7]: option length in bytes starting at type field
    252 		 */
    253 		bcopy(opt_ptr, &doi, sizeof (doi));
    254 		doi = ntohl(doi);
    255 		if (doi == IP6LS_DOI_V4 &&
    256 		    opt_ptr[4] == IP6LS_TT_V4 &&
    257 		    opt_ptr[5] <= optlen - 4 &&
    258 		    opt_ptr[7] <= optlen - 6 &&
    259 		    opt_ptr[7] <= opt_ptr[5] - 2) {
    260 			opt_ptr += sizeof (doi) + 2;
    261 			*label_type = OPT_CIPSO;
    262 			*buffer = opt_ptr;
    263 			return (B_TRUE);
    264 		}
    265 		return (B_FALSE);
    266 	}
    267 	return (B_TRUE);
    268 }
    269 
    270 /*
    271  * tsol_check_dest()
    272  *
    273  * This routine verifies if a destination is allowed to recieve messages
    274  * based on the security label. If any adjustments to the label are needed
    275  * due to the connection's MAC mode or the destination's ability
    276  * to receive labels, an "effective label" will be returned.
    277  *
    278  * zone_is_global is set if the actual zoneid is global. That is, it is
    279  * not set for an exclusive-IP zone.
    280  *
    281  * On successful return, effective_tsl will point to the new label needed
    282  * or will be NULL if a new label isn't needed. On error, effective_tsl will
    283  * point to NULL.
    284  *
    285  * Returns:
    286  *      0		Label (was|is now) correct
    287  *	EHOSTUNREACH	The label failed the remote host accreditation
    288  *      ENOMEM		Memory allocation failure
    289  */
    290 int
    291 tsol_check_dest(const ts_label_t *tsl, const void *dst,
    292     uchar_t version, uint_t mac_mode, boolean_t zone_is_global,
    293     ts_label_t **effective_tsl)
    294 {
    295 	ts_label_t	*newtsl = NULL;
    296 	tsol_tpc_t	*dst_rhtp;
    297 
    298 	if (effective_tsl != NULL)
    299 		*effective_tsl = NULL;
    300 	ASSERT(version == IPV4_VERSION ||
    301 	    (version == IPV6_VERSION &&
    302 	    !IN6_IS_ADDR_V4MAPPED((in6_addr_t *)dst)));
    303 
    304 	/* Always pass kernel level communication (NULL label) */
    305 	if (tsl == NULL) {
    306 		DTRACE_PROBE2(tx__tnopt__log__info__labeling__mac__allownull,
    307 		    char *, "destination ip(1) with null label was passed",
    308 		    ipaddr_t, dst);
    309 		return (0);
    310 	}
    311 
    312 	if (tsl->tsl_flags & TSLF_IMPLICIT_IN) {
    313 		DTRACE_PROBE3(tx__tnopt__log__info__labeling__unresolved__label,
    314 		    char *,
    315 		    "implicit-in packet to ip(1) reached tsol_check_dest "
    316 		    "with implied security label sl(2)",
    317 		    ipaddr_t, dst, ts_label_t *, tsl);
    318 	}
    319 
    320 	/* Always pass multicast */
    321 	if (version == IPV4_VERSION &&
    322 	    CLASSD(*(ipaddr_t *)dst)) {
    323 		DTRACE_PROBE2(tx__tnopt__log__info__labeling__mac__allowmult,
    324 		    char *, "destination ip(1) with multicast dest was passed",
    325 		    ipaddr_t, dst);
    326 		return (0);
    327 	} else if (version == IPV6_VERSION &&
    328 	    IN6_IS_ADDR_MULTICAST((in6_addr_t *)dst)) {
    329 		DTRACE_PROBE2(tx__tnopt__log__info__labeling__mac__allowmult_v6,
    330 		    char *, "destination ip(1) with multicast dest was passed",
    331 		    in6_addr_t *, dst);
    332 		return (0);
    333 	}
    334 
    335 	/* Never pass an undefined destination */
    336 	if ((dst_rhtp = find_tpc(dst, version, B_FALSE)) == NULL) {
    337 		DTRACE_PROBE2(tx__tnopt__log__info__labeling__lookupdst,
    338 		    char *, "destination ip(1) not in tn database.",
    339 		    void *, dst);
    340 		return (EHOSTUNREACH);
    341 	}
    342 
    343 	switch (dst_rhtp->tpc_tp.host_type) {
    344 	case UNLABELED:
    345 		/*
    346 		 * Can talk to unlabeled hosts if
    347 		 * (1) zone's label matches the default label, or
    348 		 * (2) SO_MAC_EXEMPT is on and we
    349 		 * dominate the peer's label, or
    350 		 * (3) SO_MAC_EXEMPT is on and
    351 		 * this is the global zone
    352 		 */
    353 		if (dst_rhtp->tpc_tp.tp_doi != tsl->tsl_doi) {
    354 			DTRACE_PROBE4(tx__tnopt__log__info__labeling__doi,
    355 			    char *, "unlabeled dest ip(1)/tpc(2) doi does "
    356 			    "not match msg label(3) doi.", void *, dst,
    357 			    tsol_tpc_t *, dst_rhtp, ts_label_t *, tsl);
    358 			TPC_RELE(dst_rhtp);
    359 			return (EHOSTUNREACH);
    360 		}
    361 		if (!blequal(&dst_rhtp->tpc_tp.tp_def_label,
    362 		    &tsl->tsl_label)) {
    363 			if (mac_mode != CONN_MAC_AWARE ||
    364 			    !(zone_is_global ||
    365 			    bldominates(&tsl->tsl_label,
    366 			    &dst_rhtp->tpc_tp.tp_def_label))) {
    367 				DTRACE_PROBE4(
    368 				    tx__tnopt__log__info__labeling__mac,
    369 				    char *, "unlabeled dest ip(1)/tpc(2) does "
    370 				    "not match msg label(3).", void *, dst,
    371 				    tsol_tpc_t *, dst_rhtp, ts_label_t *, tsl);
    372 				TPC_RELE(dst_rhtp);
    373 				return (EHOSTUNREACH);
    374 			}
    375 			/*
    376 			 * This is a downlabel MAC-exempt exchange.
    377 			 * Use the remote destination's default label
    378 			 * as the label of the message data.
    379 			 */
    380 			if ((newtsl = labelalloc(&dst_rhtp->tpc_tp.tp_def_label,
    381 			    dst_rhtp->tpc_tp.tp_doi, KM_NOSLEEP)) == NULL) {
    382 				TPC_RELE(dst_rhtp);
    383 				return (ENOMEM);
    384 			}
    385 			newtsl->tsl_flags |= TSLF_UNLABELED;
    386 
    387 		} else if (!(tsl->tsl_flags & TSLF_UNLABELED)) {
    388 			/*
    389 			 * The security labels are the same but we need
    390 			 * to flag that the remote node is unlabeled.
    391 			 */
    392 			if ((newtsl = labeldup(tsl, KM_NOSLEEP)) == NULL) {
    393 				TPC_RELE(dst_rhtp);
    394 				return (ENOMEM);
    395 			}
    396 			newtsl->tsl_flags |= TSLF_UNLABELED;
    397 		}
    398 		break;
    399 
    400 	case SUN_CIPSO:
    401 		/*
    402 		 * Can talk to labeled hosts if zone's label is within target's
    403 		 * label range or set.
    404 		 */
    405 		if (dst_rhtp->tpc_tp.tp_cipso_doi_cipso != tsl->tsl_doi ||
    406 		    (!_blinrange(&tsl->tsl_label,
    407 		    &dst_rhtp->tpc_tp.tp_sl_range_cipso) &&
    408 		    !blinlset(&tsl->tsl_label,
    409 		    dst_rhtp->tpc_tp.tp_sl_set_cipso))) {
    410 			DTRACE_PROBE4(tx__tnopt__log__info__labeling__mac,
    411 			    char *, "labeled dest ip(1)/tpc(2) does not "
    412 			    "match msg label(3).", void *, dst,
    413 			    tsol_tpc_t *, dst_rhtp, ts_label_t *, tsl);
    414 			TPC_RELE(dst_rhtp);
    415 			return (EHOSTUNREACH);
    416 		}
    417 		if ((tsl->tsl_flags & TSLF_UNLABELED) ||
    418 		    (mac_mode == CONN_MAC_IMPLICIT)) {
    419 			/*
    420 			 * Copy label so we can modify the flags
    421 			 */
    422 			if ((newtsl = labeldup(tsl, KM_NOSLEEP)) == NULL) {
    423 				TPC_RELE(dst_rhtp);
    424 				return (ENOMEM);
    425 			}
    426 			/*
    427 			 * The security label is a match but we need to
    428 			 * clear the unlabeled flag for this remote node.
    429 			 */
    430 			newtsl->tsl_flags &= ~TSLF_UNLABELED;
    431 			if (mac_mode == CONN_MAC_IMPLICIT)
    432 				newtsl->tsl_flags |= TSLF_IMPLICIT_OUT;
    433 		}
    434 		break;
    435 
    436 	default:
    437 		TPC_RELE(dst_rhtp);
    438 		return (EHOSTUNREACH);
    439 	}
    440 
    441 	/*
    442 	 * Return the new label.
    443 	 */
    444 	if (newtsl != NULL) {
    445 		if (effective_tsl != NULL)
    446 			*effective_tsl = newtsl;
    447 		else
    448 			label_rele(newtsl);
    449 	}
    450 	TPC_RELE(dst_rhtp);
    451 	return (0);
    452 }
    453 
    454 /*
    455  * tsol_compute_label_v4()
    456  *
    457  * This routine computes the IP label that should be on a packet based on the
    458  * connection and destination information.
    459  *
    460  * The zoneid is the IP zoneid (i.e., GLOBAL_ZONEID for exlusive-IP zones).
    461  *
    462  * Returns:
    463  *      0		Fetched label
    464  *	EHOSTUNREACH	No route to destination
    465  *	EINVAL		Label cannot be computed
    466  */
    467 int
    468 tsol_compute_label_v4(const ts_label_t *tsl, zoneid_t zoneid, ipaddr_t dst,
    469     uchar_t *opt_storage, ip_stack_t *ipst)
    470 {
    471 	uint_t		sec_opt_len;
    472 	ire_t		*ire;
    473 	tsol_ire_gw_secattr_t *attrp = NULL;
    474 
    475 	if (opt_storage != NULL)
    476 		opt_storage[IPOPT_OLEN] = 0;
    477 
    478 	if (tsl == NULL)
    479 		return (0);
    480 
    481 	/* always pass multicast */
    482 	if (CLASSD(dst))
    483 		return (0);
    484 
    485 	if (tsl->tsl_flags & TSLF_IMPLICIT_OUT)
    486 		return (0);
    487 
    488 	if (tsl->tsl_flags & TSLF_UNLABELED) {
    489 		/*
    490 		 * The destination is unlabeled. Only add a label if the
    491 		 * destination is not a broadcast/local/loopback address,
    492 		 * the destination is not on the same subnet, and the
    493 		 * next-hop gateway is labeled.
    494 		 */
    495 		ire = ire_route_recursive_v4(dst, 0, NULL, zoneid, tsl,
    496 		    MATCH_IRE_SECATTR, IRR_ALLOCATE, 0, ipst, NULL, &attrp,
    497 		    NULL);
    498 		ASSERT(ire != NULL);
    499 		if (ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
    500 			/* no route to destination */
    501 			ire_refrele(ire);
    502 			DTRACE_PROBE3(
    503 			    tx__tnopt__log__info__labeling__routedst__v4,
    504 			    char *, "No route to unlabeled dest ip(1) with "
    505 			    "with label(2).", ipaddr_t, dst, ts_label_t *, tsl);
    506 			return (EHOSTUNREACH);
    507 		}
    508 		if (ire->ire_type & (IRE_BROADCAST | IRE_LOCAL | IRE_LOOPBACK |
    509 		    IRE_INTERFACE)) {
    510 			ire_refrele(ire);
    511 			return (0);
    512 		}
    513 
    514 		/*
    515 		 * ire_route_recursive gives us the first attrp it finds
    516 		 * in the recursive lookup.
    517 		 */
    518 		/*
    519 		 * Return now if next hop gateway is unlabeled. There is
    520 		 * no need to generate a CIPSO option for this message.
    521 		 */
    522 		if (attrp == NULL || attrp->igsa_rhc == NULL ||
    523 		    attrp->igsa_rhc->rhc_tpc->tpc_tp.host_type == UNLABELED) {
    524 			ire_refrele(ire);
    525 			return (0);
    526 		}
    527 		ire_refrele(ire);
    528 	}
    529 
    530 	/* compute the CIPSO option */
    531 	sec_opt_len = tsol2cipso_tt1(&tsl->tsl_label, opt_storage,
    532 	    tsl->tsl_doi);
    533 
    534 	if (sec_opt_len == 0) {
    535 		DTRACE_PROBE3(tx__tnopt__log__error__labeling__lostops__v4,
    536 		    char *, "options lack length for dest ip(1) with label(2).",
    537 		    ipaddr_t, dst, ts_label_t *, tsl);
    538 		return (EINVAL);
    539 	}
    540 
    541 	return (0);
    542 }
    543 
    544 /*
    545  * Remove any existing security option (CIPSO) from the given IP
    546  * header, move the 'buflen' bytes back to fill the gap, and return the number
    547  * of bytes removed (as zero or negative number).  Assumes that the headers are
    548  * sane.
    549  *
    550  * Note that tsol_remove_secopt does not adjust ipha_length but
    551  * tsol_remove_secopt_v6 does adjust ip6_plen.
    552  */
    553 int
    554 tsol_remove_secopt(ipha_t *ipha, int buflen)
    555 {
    556 	int remlen, olen, oval, delta;
    557 	uchar_t *fptr, *tptr;
    558 	boolean_t noop_keep;
    559 
    560 	remlen = IPH_HDR_LENGTH(ipha) - IP_SIMPLE_HDR_LENGTH;
    561 	fptr = tptr = (uchar_t *)(ipha + 1);
    562 	noop_keep = B_TRUE;
    563 	while (remlen > 0) {
    564 		oval = fptr[IPOPT_OPTVAL];
    565 
    566 		/* terminate on end of list */
    567 		if (oval == IPOPT_EOL)
    568 			break;
    569 
    570 		/*
    571 		 * Delete any no-ops following a deleted option, at least up
    572 		 * to a 4 octet alignment; copy others.
    573 		 */
    574 		if (oval == IPOPT_NOP) {
    575 			if (((fptr - (uchar_t *)ipha) & 3) == 0)
    576 				noop_keep = B_TRUE;
    577 			if (noop_keep)
    578 				*tptr++ = oval;
    579 			fptr++;
    580 			remlen--;
    581 			continue;
    582 		}
    583 
    584 		/* stop on corrupted list; just do nothing. */
    585 		if (remlen < 2)
    586 			return (0);
    587 		olen = fptr[IPOPT_OLEN];
    588 		if (olen < 2 || olen > remlen)
    589 			return (0);
    590 
    591 		/* skip over security options to delete them */
    592 		if (oval == IPOPT_COMSEC || oval == IPOPT_SECURITY) {
    593 			noop_keep = B_FALSE;
    594 			fptr += olen;
    595 			remlen -= olen;
    596 			continue;
    597 		}
    598 
    599 		/* copy the rest */
    600 		noop_keep = B_TRUE;
    601 		if (tptr != fptr)
    602 			ovbcopy(fptr, tptr, olen);
    603 		fptr += olen;
    604 		tptr += olen;
    605 		remlen -= olen;
    606 	}
    607 
    608 	fptr += remlen;
    609 
    610 	/* figure how much padding we'll need for header alignment */
    611 	olen = (tptr - (uchar_t *)ipha) & 3;
    612 	if (olen > 0) {
    613 		olen = 4 - olen;
    614 		/* pad with end-of-list */
    615 		bzero(tptr, olen);
    616 		tptr += olen;
    617 	}
    618 
    619 	/* slide back the headers that follow and update the IP header */
    620 	delta = fptr - tptr;
    621 	if (delta != 0) {
    622 		ovbcopy(fptr, tptr, ((uchar_t *)ipha + buflen) - fptr);
    623 		ipha->ipha_version_and_hdr_length -= delta / 4;
    624 	}
    625 	return (-delta);
    626 }
    627 
    628 /*
    629  * Insert the option in 'optbuf' into the IP header pointed to by 'ipha', and
    630  * move the data following the IP header (up to buflen) to accomodate the new
    631  * option.  Assumes that up to IP_MAX_OPT_LENGTH bytes are available (in total)
    632  * for IP options.  Returns the number of bytes actually inserted, or -1 if the
    633  * option cannot be inserted.  (Note that negative return values are possible
    634  * when noops must be compressed, and that only -1 indicates error.  Successful
    635  * return value is always evenly divisible by 4, by definition.)
    636  *
    637  * Note that tsol_prepend_option does not adjust ipha_length but
    638  * tsol_prepend_option_v6 does adjust ip6_plen.
    639  */
    640 int
    641 tsol_prepend_option(uchar_t *optbuf, ipha_t *ipha, int buflen)
    642 {
    643 	int remlen, padding, lastpad, totlen;
    644 	int oval, olen;
    645 	int delta;
    646 	uchar_t *optr;
    647 	uchar_t tempopt[IP_MAX_OPT_LENGTH], *toptr;
    648 
    649 	if (optbuf[IPOPT_OPTVAL] == IPOPT_EOL ||
    650 	    optbuf[IPOPT_OPTVAL] == IPOPT_NOP ||
    651 	    optbuf[IPOPT_OLEN] == 0)
    652 		return (0);
    653 
    654 	ASSERT(optbuf[IPOPT_OLEN] >= 2 &&
    655 	    optbuf[IPOPT_OLEN] <= IP_MAX_OPT_LENGTH);
    656 
    657 	/* first find the real (unpadded) length of the existing options */
    658 	remlen = IPH_HDR_LENGTH(ipha) - IP_SIMPLE_HDR_LENGTH;
    659 	padding = totlen = lastpad = 0;
    660 	optr = (uchar_t *)(ipha + 1);
    661 	while (remlen > 0) {
    662 		oval = optr[IPOPT_OPTVAL];
    663 
    664 		/* stop at end of list */
    665 		if (oval == IPOPT_EOL)
    666 			break;
    667 
    668 		/* skip no-ops, noting that length byte isn't present */
    669 		if (oval == IPOPT_NOP) {
    670 			optr++;
    671 			padding++;
    672 			lastpad++;
    673 			totlen++;
    674 			remlen--;
    675 			continue;
    676 		}
    677 
    678 		/* give up on a corrupted list; report failure */
    679 		if (remlen < 2)
    680 			return (-1);
    681 		olen = optr[IPOPT_OLEN];
    682 		if (olen < 2 || olen > remlen)
    683 			return (-1);
    684 
    685 		lastpad = 0;
    686 		optr += olen;
    687 		totlen += olen;
    688 		remlen -= olen;
    689 	}
    690 
    691 	/* completely ignore any trailing padding */
    692 	totlen -= lastpad;
    693 	padding -= lastpad;
    694 
    695 	/*
    696 	 * If some sort of inter-option alignment was present, try to preserve
    697 	 * that alignment.  If alignment pushes us out past the maximum, then
    698 	 * discard it and try to compress to fit.  (We just "assume" that any
    699 	 * padding added was attempting to get 32 bit alignment.  If that's
    700 	 * wrong, that's just too bad.)
    701 	 */
    702 	if (padding > 0) {
    703 		olen = (optbuf[IPOPT_OLEN] + 3) & ~3;
    704 		if (olen + totlen > IP_MAX_OPT_LENGTH) {
    705 			totlen -= padding;
    706 			if (olen + totlen > IP_MAX_OPT_LENGTH)
    707 				return (-1);
    708 			padding = 0;
    709 		}
    710 	}
    711 
    712 	/*
    713 	 * Since we may need to compress or expand the option list, we write to
    714 	 * a temporary buffer and then copy the results back to the IP header.
    715 	 */
    716 	toptr = tempopt;
    717 
    718 	/* compute actual option to insert */
    719 	olen = optbuf[IPOPT_OLEN];
    720 	bcopy(optbuf, toptr, olen);
    721 	toptr += olen;
    722 	if (padding > 0) {
    723 		while ((olen & 3) != 0) {
    724 			*toptr++ = IPOPT_NOP;
    725 			olen++;
    726 		}
    727 	}
    728 
    729 	/* copy over the existing options */
    730 	optr = (uchar_t *)(ipha + 1);
    731 	while (totlen > 0) {
    732 		oval = optr[IPOPT_OPTVAL];
    733 
    734 		/* totlen doesn't include end-of-list marker */
    735 		ASSERT(oval != IPOPT_EOL);
    736 
    737 		/* handle no-ops; copy if desired, ignore otherwise */
    738 		if (oval == IPOPT_NOP) {
    739 			if (padding > 0) {
    740 				/* note: cannot overflow due to checks above */
    741 				ASSERT(toptr < tempopt + IP_MAX_OPT_LENGTH);
    742 				*toptr++ = oval;
    743 			}
    744 			optr++;
    745 			totlen--;
    746 			continue;
    747 		}
    748 
    749 		/* list cannot be corrupt at this point */
    750 		ASSERT(totlen >= 2);
    751 		olen = optr[IPOPT_OLEN];
    752 		ASSERT(olen >= 2 && olen <= totlen);
    753 
    754 		/* cannot run out of room due to tests above */
    755 		ASSERT(toptr + olen <= tempopt + IP_MAX_OPT_LENGTH);
    756 
    757 		bcopy(optr, toptr, olen);
    758 		optr += olen;
    759 		toptr += olen;
    760 		totlen -= olen;
    761 	}
    762 
    763 	/* figure how much padding we'll need for header alignment */
    764 	olen = (toptr - tempopt) & 3;
    765 	if (olen > 0) {
    766 		olen = 4 - olen;
    767 		ASSERT(toptr + olen <= tempopt + IP_MAX_OPT_LENGTH);
    768 		/* pad with end-of-list value */
    769 		bzero(toptr, olen);
    770 		toptr += olen;
    771 	}
    772 
    773 	/* move the headers as needed and update IP header */
    774 	olen = (toptr - tempopt) + IP_SIMPLE_HDR_LENGTH;
    775 	remlen = IPH_HDR_LENGTH(ipha);
    776 	delta = olen - remlen;
    777 	if (delta != 0) {
    778 		ovbcopy((uchar_t *)ipha + remlen, (uchar_t *)ipha + olen,
    779 		    buflen - remlen);
    780 		ipha->ipha_version_and_hdr_length += delta / 4;
    781 	}
    782 
    783 	/* slap in the new options */
    784 	bcopy(tempopt, ipha + 1, olen - IP_SIMPLE_HDR_LENGTH);
    785 
    786 	return (delta);
    787 }
    788 
    789 /*
    790  * tsol_check_label_v4()
    791  *
    792  * This routine computes the IP label that should be on the packet based on the
    793  * connection and destination information.  It's called by the IP forwarding
    794  * logic and by ip_output_simple. The ULPs generate the labels before calling
    795  * conn_ip_output. If any adjustments to
    796  * the label are needed due to the connection's MAC-exempt status or
    797  * the destination's ability to receive labels, an "effective label"
    798  * will be returned.
    799  *
    800  * The packet's header is clear before entering IPsec's engine.
    801  *
    802  * The zoneid is the IP zoneid (i.e., GLOBAL_ZONEID for exlusive-IP zones).
    803  * zone_is_global is set if the actual zoneid is global.
    804  *
    805  * On successful return, effective_tslp will point to the new label needed
    806  * or will be NULL if a new label isn't needed. On error, effective_tsl will
    807  * point to NULL.
    808  *
    809  * Returns:
    810  *      0		Label (was|is now) correct
    811  *      EACCES		The packet failed the remote host accreditation.
    812  *      ENOMEM		Memory allocation failure.
    813  *	EINVAL		Label cannot be computed
    814  */
    815 int
    816 tsol_check_label_v4(const ts_label_t *tsl, zoneid_t zoneid, mblk_t **mpp,
    817     uint_t mac_mode, boolean_t zone_is_global, ip_stack_t *ipst,
    818     ts_label_t **effective_tslp)
    819 {
    820 	mblk_t *mp = *mpp;
    821 	ipha_t  *ipha;
    822 	ts_label_t *effective_tsl = NULL;
    823 	uchar_t opt_storage[IP_MAX_OPT_LENGTH];
    824 	uint_t hlen;
    825 	uint_t sec_opt_len;
    826 	uchar_t *optr;
    827 	int delta_remove = 0, delta_add, adjust;
    828 	int retv;
    829 
    830 	*effective_tslp = NULL;
    831 	opt_storage[IPOPT_OPTVAL] = 0;
    832 
    833 	ipha = (ipha_t *)mp->b_rptr;
    834 
    835 	/*
    836 	 * Verify the destination is allowed to receive packets at
    837 	 * the security label of the message data. tsol_check_dest()
    838 	 * may create a new effective label or label flags.
    839 	 */
    840 	retv = tsol_check_dest(tsl, &ipha->ipha_dst, IPV4_VERSION,
    841 	    mac_mode, zone_is_global, &effective_tsl);
    842 	if (retv != 0)
    843 		return (retv);
    844 
    845 	/*
    846 	 * Calculate the security label to be placed in the text
    847 	 * of the message (if any).
    848 	 */
    849 	if (effective_tsl != NULL) {
    850 		if ((retv = tsol_compute_label_v4(effective_tsl, zoneid,
    851 		    ipha->ipha_dst, opt_storage, ipst)) != 0) {
    852 			label_rele(effective_tsl);
    853 			return (retv);
    854 		}
    855 		*effective_tslp = effective_tsl;
    856 	} else {
    857 		if ((retv = tsol_compute_label_v4(tsl, zoneid,
    858 		    ipha->ipha_dst, opt_storage, ipst)) != 0) {
    859 			return (retv);
    860 		}
    861 	}
    862 
    863 	optr = (uchar_t *)(ipha + 1);
    864 	hlen = IPH_HDR_LENGTH(ipha) - IP_SIMPLE_HDR_LENGTH;
    865 	sec_opt_len = opt_storage[IPOPT_OLEN];
    866 
    867 	if (hlen >= sec_opt_len) {
    868 		/* If no option is supposed to be there, make sure it's not */
    869 		if (sec_opt_len == 0 && hlen > 0 &&
    870 		    optr[IPOPT_OPTVAL] != IPOPT_COMSEC &&
    871 		    optr[IPOPT_OPTVAL] != IPOPT_SECURITY)
    872 			return (0);
    873 		/* if the option is there, it's always first */
    874 		if (sec_opt_len != 0 &&
    875 		    bcmp(opt_storage, optr, sec_opt_len) == 0)
    876 			return (0);
    877 	}
    878 
    879 	/*
    880 	 * If there is an option there, then it must be the wrong one; delete.
    881 	 */
    882 	if (hlen > 0) {
    883 		delta_remove = tsol_remove_secopt(ipha, MBLKL(mp));
    884 		mp->b_wptr += delta_remove;
    885 	}
    886 
    887 	/* Make sure we have room for the worst-case addition */
    888 	hlen = IPH_HDR_LENGTH(ipha) + opt_storage[IPOPT_OLEN];
    889 	hlen = (hlen + 3) & ~3;
    890 	if (hlen > IP_MAX_HDR_LENGTH)
    891 		hlen = IP_MAX_HDR_LENGTH;
    892 	hlen -= IPH_HDR_LENGTH(ipha);
    893 	if (mp->b_wptr + hlen > mp->b_datap->db_lim) {
    894 		int copylen;
    895 		mblk_t *new_mp;
    896 
    897 		/* allocate enough to be meaningful, but not *too* much */
    898 		copylen = MBLKL(mp);
    899 		if (copylen > 256)
    900 			copylen = 256;
    901 		new_mp = allocb_tmpl(hlen + copylen +
    902 		    (mp->b_rptr - mp->b_datap->db_base), mp);
    903 		if (new_mp == NULL) {
    904 			if (effective_tsl != NULL) {
    905 				label_rele(effective_tsl);
    906 				*effective_tslp = NULL;
    907 			}
    908 			return (ENOMEM);
    909 		}
    910 
    911 		/* keep the bias */
    912 		new_mp->b_rptr += mp->b_rptr - mp->b_datap->db_base;
    913 		new_mp->b_wptr = new_mp->b_rptr + copylen;
    914 		bcopy(mp->b_rptr, new_mp->b_rptr, copylen);
    915 		new_mp->b_cont = mp;
    916 		if ((mp->b_rptr += copylen) >= mp->b_wptr) {
    917 			new_mp->b_cont = mp->b_cont;
    918 			freeb(mp);
    919 		}
    920 		*mpp = mp = new_mp;
    921 		ipha = (ipha_t *)mp->b_rptr;
    922 	}
    923 
    924 	delta_add = tsol_prepend_option(opt_storage, ipha, MBLKL(mp));
    925 	if (delta_add == -1)
    926 		goto param_prob;
    927 
    928 	ASSERT((mp->b_wptr + delta_add) <= DB_LIM(mp));
    929 	mp->b_wptr += delta_add;
    930 
    931 	adjust = delta_remove + delta_add;
    932 	adjust += ntohs(ipha->ipha_length);
    933 	ipha->ipha_length = htons(adjust);
    934 
    935 	return (0);
    936 
    937 param_prob:
    938 	if (effective_tsl != NULL) {
    939 		label_rele(effective_tsl);
    940 		*effective_tslp = NULL;
    941 	}
    942 	return (EINVAL);
    943 }
    944 
    945 /*
    946  * IPv6 HopOpt extension header for the label option layout:
    947  *	- One octet giving the type of the 'next extension header'
    948  *	- Header extension length in 8-byte words, not including the
    949  *	  1st 8 bytes, but including any pad bytes at the end.
    950  *	  Eg. A value of 2 means 16 bytes not including the 1st 8 bytes.
    951  *	- Followed by TLV encoded IPv6 label option. Option layout is
    952  *		* One octet, IP6OPT_LS
    953  *		* One octet option length in bytes of the option data following
    954  *		  the length, but not including any pad bytes at the end.
    955  *		* Four-octet DOI (IP6LS_DOI_V4)
    956  *		* One octet suboption, IP6LS_TT_V4
    957  *		* One octet suboption length in bytes of the suboption
    958  *		  following the suboption length, including the suboption
    959  *		  header length, but not including any pad bytes at the end.
    960  *	- Pad to make the extension header a multiple of 8 bytes.
    961  *
    962  * This function returns the contents of 'IPv6 option structure' in the above.
    963  * i.e starting from the IP6OPT_LS but not including the pad at the end.
    964  * The user must prepend two octets (either padding or next header / length)
    965  * and append padding out to the next 8 octet boundary.
    966  *
    967  * The zoneid is the IP zoneid (i.e., GLOBAL_ZONEID for exlusive-IP zones).
    968  */
    969 int
    970 tsol_compute_label_v6(const ts_label_t *tsl, zoneid_t zoneid,
    971     const in6_addr_t *dst, uchar_t *opt_storage, ip_stack_t *ipst)
    972 {
    973 	uint_t		sec_opt_len;
    974 	uint32_t	doi;
    975 	ire_t		*ire;
    976 	tsol_ire_gw_secattr_t *attrp = NULL;
    977 
    978 	if (ip6opt_ls == 0)
    979 		return (EINVAL);
    980 
    981 	if (opt_storage != NULL)
    982 		opt_storage[IPOPT_OLEN] = 0;
    983 
    984 	if (tsl == NULL)
    985 		return (0);
    986 
    987 	/* Always pass multicast */
    988 	if (IN6_IS_ADDR_MULTICAST(dst))
    989 		return (0);
    990 
    991 	/*
    992 	 * Fill in a V6 label.  If a new format is added here, make certain
    993 	 * that the maximum size of this label is reflected in sys/tsol/tnet.h
    994 	 * as TSOL_MAX_IPV6_OPTION.
    995 	 */
    996 	if (tsl->tsl_flags & TSLF_IMPLICIT_OUT)
    997 		return (0);
    998 
    999 	if (tsl->tsl_flags & TSLF_UNLABELED) {
   1000 		/*
   1001 		 * The destination is unlabeled. Only add a label if the
   1002 		 * destination is not a broadcast/local/loopback address,
   1003 		 * the destination is not on the same subnet, and the
   1004 		 * next-hop gateway is labeled.
   1005 		 */
   1006 		ire = ire_route_recursive_v6(dst, 0, NULL, zoneid, tsl,
   1007 		    MATCH_IRE_SECATTR, IRR_ALLOCATE, 0, ipst, NULL, &attrp,
   1008 		    NULL);
   1009 		ASSERT(ire != NULL);
   1010 		if (ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
   1011 			/* no route to destination */
   1012 			ire_refrele(ire);
   1013 			DTRACE_PROBE3(
   1014 			    tx__tnopt__log__info__labeling__routedst__v6,
   1015 			    char *, "No route to unlabeled dest ip6(1) with "
   1016 			    "label(2).", in6_addr_t *, dst, ts_label_t *, tsl);
   1017 			return (EHOSTUNREACH);
   1018 		}
   1019 		if (ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK |
   1020 		    IRE_INTERFACE)) {
   1021 			ire_refrele(ire);
   1022 			return (0);
   1023 		}
   1024 		/*
   1025 		 * ire_route_recursive gives us the first attrp it finds
   1026 		 * in the recursive lookup.
   1027 		 */
   1028 		/*
   1029 		 * Return now if next hop gateway is unlabeled. There is
   1030 		 * no need to generate a CIPSO option for this message.
   1031 		 */
   1032 		if (attrp == NULL || attrp->igsa_rhc == NULL ||
   1033 		    attrp->igsa_rhc->rhc_tpc->tpc_tp.host_type == UNLABELED) {
   1034 			ire_refrele(ire);
   1035 			return (0);
   1036 		}
   1037 		ire_refrele(ire);
   1038 	}
   1039 
   1040 	/* compute the CIPSO option */
   1041 	if (opt_storage != NULL)
   1042 		opt_storage += 8;
   1043 	sec_opt_len = tsol2cipso_tt1(&tsl->tsl_label, opt_storage,
   1044 	    tsl->tsl_doi);
   1045 
   1046 	if (sec_opt_len == 0) {
   1047 		DTRACE_PROBE3(tx__tnopt__log__error__labeling__lostops__v6,
   1048 		    char *, "options lack length for dest ip6(1) with "
   1049 		    "label(2).", in6_addr_t *, dst, ts_label_t *, tsl);
   1050 		return (EINVAL);
   1051 	}
   1052 
   1053 	if (opt_storage == NULL)
   1054 		return (0);
   1055 
   1056 	if (sec_opt_len < IP_MAX_OPT_LENGTH)
   1057 		opt_storage[sec_opt_len] = IPOPT_EOL;
   1058 
   1059 	/*
   1060 	 * Just in case the option length is odd, round it up to the next even
   1061 	 * multiple.  The IPv6 option definition doesn't like odd numbers for
   1062 	 * some reason.
   1063 	 *
   1064 	 * Length in the overall option header (IP6OPT_LS) does not include the
   1065 	 * option header itself, but the length in the suboption does include
   1066 	 * the suboption header.  Thus, when there's just one suboption, the
   1067 	 * length in the option header is the suboption length plus 4 (for the
   1068 	 * DOI value).
   1069 	 */
   1070 	opt_storage[-2] = IP6LS_TT_V4;
   1071 	opt_storage[-1] = (sec_opt_len + 2 + 1) & ~1;
   1072 	opt_storage[-8] = ip6opt_ls;
   1073 	opt_storage[-7] = opt_storage[-1] + 4;
   1074 	doi = htons(IP6LS_DOI_V4);
   1075 	bcopy(&doi, opt_storage - 6, 4);
   1076 
   1077 	return (0);
   1078 }
   1079 
   1080 /*
   1081  * Locate the start of the IP6OPT_LS label option and return it.
   1082  * Also return the start of the next non-pad option in after_secoptp.
   1083  * Usually the label option is the first option at least when packets
   1084  * are generated, but for generality we don't assume that on received packets.
   1085  *
   1086  * The function will return with B_FALSE if an IP format error
   1087  * or an unexpected label content error is encountered.
   1088  */
   1089 boolean_t
   1090 tsol_find_secopt_v6(
   1091     const uchar_t *ip6hbh,	/* Start of the hop-by-hop extension header */
   1092     uint_t hbhlen,		/* Length of the hop-by-hop extension header */
   1093     uchar_t **secoptp,		/* Location of IP6OPT_LS label option */
   1094     uchar_t **after_secoptp,	/* Non-pad option following the label option */
   1095     boolean_t *hbh_needed)	/* Is hop-by-hop hdr needed w/o label */
   1096 {
   1097 	uint_t	optlen;
   1098 	uint_t	optused;
   1099 	const uchar_t *optptr;
   1100 	uchar_t	opt_type;
   1101 
   1102 	*secoptp = NULL;
   1103 	*hbh_needed = B_FALSE;
   1104 	*after_secoptp = NULL;
   1105 	optlen = hbhlen - 2;
   1106 	optptr = ip6hbh + 2;
   1107 	while (optlen != 0) {
   1108 		opt_type = *optptr;
   1109 		if (opt_type == IP6OPT_PAD1) {
   1110 			optptr++;
   1111 			optlen--;
   1112 			continue;
   1113 		}
   1114 		if (optlen == 1)
   1115 			return (B_FALSE);
   1116 		optused = 2 + optptr[1];
   1117 		if (optused > optlen)
   1118 			return (B_FALSE);
   1119 		/*
   1120 		 * if we get here, ip6opt_ls can
   1121 		 * not be 0 because it will always
   1122 		 * match the IP6OPT_PAD1 above.
   1123 		 * Therefore ip6opt_ls == 0 forces
   1124 		 * this test to always fail here.
   1125 		 */
   1126 		if (opt_type == ip6opt_ls) {
   1127 			if (*secoptp != NULL)
   1128 				/* More than one security option found */
   1129 				return (B_FALSE);
   1130 			*secoptp = (uchar_t *)optptr;
   1131 		} else switch (opt_type) {
   1132 		case IP6OPT_PADN:
   1133 			break;
   1134 		default:
   1135 			/*
   1136 			 * There is at least 1 option other than
   1137 			 * the label option. So the hop-by-hop header is needed
   1138 			 */
   1139 			*hbh_needed = B_TRUE;
   1140 			if (*secoptp != NULL) {
   1141 				*after_secoptp = (uchar_t *)optptr;
   1142 				return (B_TRUE);
   1143 			}
   1144 			break;
   1145 		}
   1146 		optlen -= optused;
   1147 		optptr += optused;
   1148 	}
   1149 	return (B_TRUE);
   1150 }
   1151 
   1152 /*
   1153  * Remove the label option from the hop-by-hop options header if it exists.
   1154  * 'buflen' is the total length of the packet typically b_wptr - b_rptr.
   1155  * Header and data following the label option that is deleted are copied
   1156  * (i.e. slid backward) to the right position, and returns the number
   1157  * of bytes removed (as zero or negative number.)
   1158  *
   1159  * Note that tsol_remove_secopt does not adjust ipha_length but
   1160  * tsol_remove_secopt_v6 does adjust ip6_plen.
   1161  */
   1162 int
   1163 tsol_remove_secopt_v6(ip6_t *ip6h, int buflen)
   1164 {
   1165 	uchar_t	*ip6hbh;	/* hop-by-hop header */
   1166 	uint_t	hbhlen;		/* hop-by-hop extension header length */
   1167 	uchar_t *secopt = NULL;
   1168 	uchar_t *after_secopt;
   1169 	uint_t	pad;
   1170 	uint_t	delta;
   1171 	boolean_t hbh_needed;
   1172 
   1173 	/*
   1174 	 * hop-by-hop extension header must appear first, if it does not
   1175 	 * exist, there is no label option.
   1176 	 */
   1177 	if (ip6h->ip6_nxt != IPPROTO_HOPOPTS)
   1178 		return (0);
   1179 
   1180 	ip6hbh = (uchar_t *)&ip6h[1];
   1181 	hbhlen = (ip6hbh[1] + 1) << 3;
   1182 	/*
   1183 	 * Locate the start of the label option if it exists and the end
   1184 	 * of the label option including pads if any.
   1185 	 */
   1186 	if (!tsol_find_secopt_v6(ip6hbh, hbhlen, &secopt, &after_secopt,
   1187 	    &hbh_needed)) {
   1188 		/*
   1189 		 * This function should not see invalid messages.
   1190 		 * If one occurs, it would indicate either an
   1191 		 * option previously verified in the forwarding
   1192 		 * path has been corrupted or an option was
   1193 		 * incorrectly generated locally.
   1194 		 */
   1195 		ASSERT(0);
   1196 		return (0);
   1197 	}
   1198 	if (secopt == NULL)
   1199 		return (0);
   1200 	if (!hbh_needed) {
   1201 		uchar_t	next_hdr;
   1202 		/*
   1203 		 * The label option was the only option in the hop-by-hop
   1204 		 * header. We don't need the hop-by-hop header itself any
   1205 		 * longer.
   1206 		 */
   1207 		next_hdr = ip6hbh[0];
   1208 		ovbcopy(ip6hbh + hbhlen, ip6hbh,
   1209 		    buflen - (IPV6_HDR_LEN + hbhlen));
   1210 		ip6h->ip6_plen = htons(ntohs(ip6h->ip6_plen) - hbhlen);
   1211 		ip6h->ip6_nxt = next_hdr;
   1212 		return (-hbhlen);
   1213 	}
   1214 
   1215 	if (after_secopt == NULL) {
   1216 		/* There is no option following the label option */
   1217 		after_secopt = ip6hbh + hbhlen;
   1218 	}
   1219 
   1220 	/*
   1221 	 * After deleting the label option, we need to slide the headers
   1222 	 * and data back, while still maintaining the same alignment (module 8)
   1223 	 * for the other options. So we slide the headers and data back only
   1224 	 * by an integral multiple of 8 bytes, and fill the remaining bytes
   1225 	 * with pads.
   1226 	 */
   1227 	delta = after_secopt - secopt;
   1228 	pad = delta % 8;
   1229 	if (pad == 1) {
   1230 		secopt[0] = IP6OPT_PAD1;
   1231 	} else if (pad > 1) {
   1232 		secopt[0] = IP6OPT_PADN;
   1233 		secopt[1] = pad - 2;
   1234 		if (pad > 2)
   1235 			bzero(&secopt[2], pad - 2);
   1236 	}
   1237 	secopt += pad;
   1238 	delta -= pad;
   1239 	ovbcopy(after_secopt, secopt,
   1240 	    (uchar_t *)ip6h + buflen - after_secopt);
   1241 	ip6hbh[1] -= delta/8;
   1242 	ip6h->ip6_plen = htons(ntohs(ip6h->ip6_plen) - delta);
   1243 
   1244 	return (-delta);
   1245 }
   1246 
   1247 /*
   1248  * 'optbuf' contains a CIPSO label embedded in an IPv6 hop-by-hop option,
   1249  * starting with the IP6OPT_LS option type. The format of this hop-by-hop
   1250  * option is described in the block comment above tsol_compute_label_v6.
   1251  * This function prepends this hop-by-hop option before any other hop-by-hop
   1252  * options in the hop-by-hop header if one already exists, else a new
   1253  * hop-by-hop header is created and stuffed into the packet following
   1254  * the IPv6 header. 'buflen' is the total length of the packet i.e.
   1255  * b_wptr - b_rptr. The caller ensures that there is enough space for the
   1256  * extra option being added. Header and data following the position where
   1257  * the label option is inserted are copied (i.e. slid forward) to the right
   1258  * position.
   1259  *
   1260  * Note that tsol_prepend_option does not adjust ipha_length but
   1261  * tsol_prepend_option_v6 does adjust ip6_plen.
   1262  */
   1263 int
   1264 tsol_prepend_option_v6(uchar_t *optbuf, ip6_t *ip6h, int buflen)
   1265 {
   1266 	/*
   1267 	 * rawlen is the length of the label option in bytes, not including
   1268 	 * any pads, starting from the IP6OPT_LS (option type) byte.
   1269 	 */
   1270 	uint_t	rawlen;
   1271 
   1272 	uint_t	optlen;		/* rawlen rounded to an 8 byte multiple */
   1273 	uchar_t	*ip6hbh;	/* start of the hop-by-hop extension header */
   1274 	uint_t	hbhlen;		/* Length of the hop-by-hop extension header */
   1275 	uint_t	pad_len;
   1276 	uchar_t	*pad_position;
   1277 	int	delta;		/* Actual number of bytes inserted */
   1278 
   1279 	rawlen = optbuf[1] + 2;	/* Add 2 for the option type, option length */
   1280 	ip6hbh = (uchar_t *)&ip6h[1];
   1281 	if (ip6h->ip6_nxt == IPPROTO_HOPOPTS) {
   1282 		/*
   1283 		 * There is a hop-by-hop header present already. In order to
   1284 		 * preserve the alignment of the other options at the existing
   1285 		 * value (modulo 8) we need to pad the label option to a
   1286 		 * multiple of 8 bytes before prepending it to the other
   1287 		 * options. Slide the extension headers and data forward to
   1288 		 * accomodate the label option at the start of the hop-by-hop
   1289 		 * header
   1290 		 */
   1291 		delta = optlen = (rawlen + 7) & ~7;
   1292 		pad_len = optlen - rawlen;
   1293 		pad_position = ip6hbh + 2 + rawlen;
   1294 		ovbcopy(ip6hbh + 2, ip6hbh + 2 + optlen,
   1295 		    buflen - (IPV6_HDR_LEN + 2));
   1296 		/*
   1297 		 * Bump up the hop-by-hop extension header length by
   1298 		 * the number of 8-byte words added
   1299 		 */
   1300 		optlen >>= 3;
   1301 		if (ip6hbh[1] + optlen > 255)
   1302 			return (-1);
   1303 		ip6hbh[1] += optlen;
   1304 	} else {
   1305 		/*
   1306 		 * There is no hop-by-hop header in the packet. Construct a
   1307 		 * new Hop-by-hop extension header (a multiple of 8 bytes).
   1308 		 * Slide any other extension headers and data forward to
   1309 		 * accomodate this hop-by-hop header
   1310 		 */
   1311 		delta = hbhlen = (2 + rawlen + 7) & ~7; /* +2 for nxthdr, len */
   1312 		pad_len = hbhlen - (2 + rawlen);
   1313 		pad_position = ip6hbh + 2 + rawlen;
   1314 		ovbcopy(ip6hbh, ip6hbh + hbhlen, buflen - IPV6_HDR_LEN);
   1315 		ip6hbh[0] = ip6h->ip6_nxt;
   1316 		/*
   1317 		 * hop-by-hop extension header length in 8-byte words, not
   1318 		 * including the 1st 8 bytes of the hop-by-hop header.
   1319 		 */
   1320 		ip6hbh[1] = (hbhlen >> 3) - 1;
   1321 		ip6h->ip6_nxt = IPPROTO_HOPOPTS;
   1322 	}
   1323 	/*
   1324 	 * Copy the label option into the hop-by-hop header and insert any
   1325 	 * needed pads
   1326 	 */
   1327 	bcopy(optbuf, ip6hbh + 2, rawlen);
   1328 	if (pad_len == 1) {
   1329 		pad_position[0] = IP6OPT_PAD1;
   1330 	} else if (pad_len > 1) {
   1331 		pad_position[0] = IP6OPT_PADN;
   1332 		pad_position[1] = pad_len - 2;
   1333 		if (pad_len > 2)
   1334 			bzero(pad_position + 2, pad_len - 2);
   1335 	}
   1336 	ip6h->ip6_plen = htons(ntohs(ip6h->ip6_plen) + delta);
   1337 	return (delta);
   1338 }
   1339 
   1340 /*
   1341  * tsol_check_label_v6()
   1342  *
   1343  * This routine computes the IP label that should be on the packet based on the
   1344  * connection and destination information.  It's called by the IP forwarding
   1345  * logic and by ip_output_simple. The ULPs generate the labels before calling
   1346  * conn_ip_output. If any adjustments to
   1347  * the label are needed due to the connection's MAC-exempt status or
   1348  * the destination's ability to receive labels, an "effective label"
   1349  * will be returned.
   1350  *
   1351  * The packet's header is clear before entering IPsec's engine.
   1352  *
   1353  * The zoneid is the IP zoneid (i.e., GLOBAL_ZONEID for exlusive-IP zones).
   1354  * zone_is_global is set if the actual zoneid is global.
   1355  *
   1356  * On successful return, effective_tslp will point to the new label needed
   1357  * or will be NULL if a new label isn't needed. On error, effective_tsl will
   1358  * point to NULL.
   1359  *
   1360  * Returns:
   1361  *      0		Label (was|is now) correct
   1362  *      EACCES		The packet failed the remote host accreditation.
   1363  *      ENOMEM		Memory allocation failure.
   1364  *	EINVAL		Label cannot be computed
   1365  */
   1366 int
   1367 tsol_check_label_v6(const ts_label_t *tsl, zoneid_t zoneid, mblk_t **mpp,
   1368     uint_t mac_mode, boolean_t zone_is_global, ip_stack_t *ipst,
   1369     ts_label_t **effective_tslp)
   1370 {
   1371 	mblk_t *mp = *mpp;
   1372 	ip6_t  *ip6h;
   1373 	ts_label_t *effective_tsl = NULL;
   1374 	/*
   1375 	 * Label option length is limited to IP_MAX_OPT_LENGTH for
   1376 	 * symmetry with IPv4. Can be relaxed if needed
   1377 	 */
   1378 	uchar_t opt_storage[TSOL_MAX_IPV6_OPTION];
   1379 	uint_t hlen;
   1380 	uint_t sec_opt_len; /* label option length not including type, len */
   1381 	int delta_remove = 0, delta_add;
   1382 	int retv;
   1383 	uchar_t	*after_secopt;
   1384 	uchar_t	*secopt = NULL;
   1385 	uchar_t	*ip6hbh;
   1386 	uint_t	hbhlen;
   1387 	boolean_t hbh_needed;
   1388 
   1389 	*effective_tslp = NULL;
   1390 
   1391 	/*
   1392 	 * Verify the destination is allowed to receive packets at
   1393 	 * the security label of the message data. tsol_check_dest()
   1394 	 * may create a new effective label or label flags.
   1395 	 */
   1396 	ip6h = (ip6_t *)mp->b_rptr;
   1397 	retv = tsol_check_dest(tsl, &ip6h->ip6_dst, IPV6_VERSION,
   1398 	    mac_mode, zone_is_global, &effective_tsl);
   1399 	if (retv != 0)
   1400 		return (retv);
   1401 
   1402 	/*
   1403 	 * Calculate the security label to be placed in the text
   1404 	 * of the message (if any).
   1405 	 */
   1406 	if (effective_tsl != NULL) {
   1407 		if ((retv = tsol_compute_label_v6(effective_tsl, zoneid,
   1408 		    &ip6h->ip6_dst, opt_storage, ipst)) != 0) {
   1409 			label_rele(effective_tsl);
   1410 			return (retv);
   1411 		}
   1412 		*effective_tslp = effective_tsl;
   1413 	} else {
   1414 		if ((retv = tsol_compute_label_v6(tsl, zoneid,
   1415 		    &ip6h->ip6_dst, opt_storage, ipst)) != 0)
   1416 			return (retv);
   1417 	}
   1418 
   1419 	sec_opt_len = opt_storage[1];
   1420 
   1421 	if (ip6h->ip6_nxt == IPPROTO_HOPOPTS) {
   1422 		ip6hbh = (uchar_t *)&ip6h[1];
   1423 		hbhlen = (ip6hbh[1] + 1) << 3;
   1424 		if (!tsol_find_secopt_v6(ip6hbh, hbhlen, &secopt,
   1425 		    &after_secopt, &hbh_needed)) {
   1426 			/*
   1427 			 * This function should not see invalid messages.
   1428 			 * If one occurs, it would indicate either an
   1429 			 * option previously verified in the forwarding
   1430 			 * path has been corrupted or an option was
   1431 			 * incorrectly generated locally.
   1432 			 */
   1433 			ASSERT(0);
   1434 			return (EACCES);
   1435 		}
   1436 	}
   1437 
   1438 	if (sec_opt_len == 0 && secopt == NULL) {
   1439 		/*
   1440 		 * The packet is not supposed to have a label, and it
   1441 		 * does not have one currently
   1442 		 */
   1443 		return (0);
   1444 	}
   1445 
   1446 	if (secopt != NULL && sec_opt_len != 0 &&
   1447 	    (bcmp(opt_storage, secopt, sec_opt_len + 2) == 0)) {
   1448 		/* The packet has the correct label already */
   1449 		return (0);
   1450 	}
   1451 
   1452 	/*
   1453 	 * If there is an option there, then it must be the wrong one; delete.
   1454 	 */
   1455 	if (secopt != NULL) {
   1456 		delta_remove = tsol_remove_secopt_v6(ip6h, MBLKL(mp));
   1457 		mp->b_wptr += delta_remove;
   1458 	}
   1459 
   1460 	/*
   1461 	 * Make sure we have room for the worst-case addition. Add 2 bytes for
   1462 	 * the hop-by-hop ext header's next header and length fields. Add
   1463 	 * another 2 bytes for the label option type, len and then round
   1464 	 * up to the next 8-byte multiple.
   1465 	 */
   1466 	hlen = (4 + sec_opt_len + 7) & ~7;
   1467 	if (mp->b_wptr + hlen > mp->b_datap->db_lim) {
   1468 		int copylen;
   1469 		mblk_t *new_mp;
   1470 		uint16_t hdr_len;
   1471 
   1472 		hdr_len = ip_hdr_length_v6(mp, ip6h);
   1473 		/*
   1474 		 * Allocate enough to be meaningful, but not *too* much.
   1475 		 * Also all the IPv6 extension headers must be in the same mblk
   1476 		 */
   1477 		copylen = MBLKL(mp);
   1478 		if (copylen > 256)
   1479 			copylen = 256;
   1480 		if (copylen < hdr_len)
   1481 			copylen = hdr_len;
   1482 		new_mp = allocb_tmpl(hlen + copylen +
   1483 		    (mp->b_rptr - mp->b_datap->db_base), mp);
   1484 		if (new_mp == NULL) {
   1485 			if (effective_tsl != NULL) {
   1486 				label_rele(effective_tsl);
   1487 				*effective_tslp = NULL;
   1488 			}
   1489 			return (ENOMEM);
   1490 		}
   1491 
   1492 		/* keep the bias */
   1493 		new_mp->b_rptr += mp->b_rptr - mp->b_datap->db_base;
   1494 		new_mp->b_wptr = new_mp->b_rptr + copylen;
   1495 		bcopy(mp->b_rptr, new_mp->b_rptr, copylen);
   1496 		new_mp->b_cont = mp;
   1497 		if ((mp->b_rptr += copylen) >= mp->b_wptr) {
   1498 			new_mp->b_cont = mp->b_cont;
   1499 			freeb(mp);
   1500 		}
   1501 		*mpp = mp = new_mp;
   1502 		ip6h = (ip6_t *)mp->b_rptr;
   1503 	}
   1504 
   1505 	delta_add = tsol_prepend_option_v6(opt_storage, ip6h, MBLKL(mp));
   1506 	if (delta_add == -1)
   1507 		goto param_prob;
   1508 
   1509 	ASSERT(mp->b_wptr + delta_add <= DB_LIM(mp));
   1510 	mp->b_wptr += delta_add;
   1511 
   1512 	/* tsol_prepend_option_v6 has adjusted ip6_plen */
   1513 	return (0);
   1514 
   1515 param_prob:
   1516 	if (effective_tsl != NULL) {
   1517 		label_rele(effective_tsl);
   1518 		*effective_tslp = NULL;
   1519 	}
   1520 	return (EINVAL);
   1521 }
   1522