Home | History | Annotate | Download | only in ip
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 /*
     26  * Copyright (c) 1990 Mentat Inc.
     27  */
     28 
     29 #include <sys/types.h>
     30 #include <sys/stream.h>
     31 #include <sys/dlpi.h>
     32 #include <sys/stropts.h>
     33 #include <sys/sysmacros.h>
     34 #include <sys/strsun.h>
     35 #include <sys/strlog.h>
     36 #include <sys/strsubr.h>
     37 #define	_SUN_TPI_VERSION	2
     38 #include <sys/tihdr.h>
     39 #include <sys/ddi.h>
     40 #include <sys/sunddi.h>
     41 #include <sys/cmn_err.h>
     42 #include <sys/debug.h>
     43 #include <sys/sdt.h>
     44 #include <sys/kobj.h>
     45 #include <sys/zone.h>
     46 #include <sys/neti.h>
     47 #include <sys/hook.h>
     48 
     49 #include <sys/kmem.h>
     50 #include <sys/systm.h>
     51 #include <sys/param.h>
     52 #include <sys/socket.h>
     53 #include <sys/vtrace.h>
     54 #include <sys/isa_defs.h>
     55 #include <sys/atomic.h>
     56 #include <sys/policy.h>
     57 #include <sys/mac.h>
     58 #include <net/if.h>
     59 #include <net/if_types.h>
     60 #include <net/route.h>
     61 #include <net/if_dl.h>
     62 #include <sys/sockio.h>
     63 #include <netinet/in.h>
     64 #include <netinet/ip6.h>
     65 #include <netinet/icmp6.h>
     66 #include <netinet/sctp.h>
     67 
     68 #include <inet/common.h>
     69 #include <inet/mi.h>
     70 #include <inet/optcom.h>
     71 #include <inet/mib2.h>
     72 #include <inet/nd.h>
     73 #include <inet/arp.h>
     74 
     75 #include <inet/ip.h>
     76 #include <inet/ip_impl.h>
     77 #include <inet/ip6.h>
     78 #include <inet/ip6_asp.h>
     79 #include <inet/tcp.h>
     80 #include <inet/tcp_impl.h>
     81 #include <inet/udp_impl.h>
     82 #include <inet/ipp_common.h>
     83 
     84 #include <inet/ip_multi.h>
     85 #include <inet/ip_if.h>
     86 #include <inet/ip_ire.h>
     87 #include <inet/ip_rts.h>
     88 #include <inet/ip_ndp.h>
     89 #include <net/pfkeyv2.h>
     90 #include <inet/sadb.h>
     91 #include <inet/ipsec_impl.h>
     92 #include <inet/iptun/iptun_impl.h>
     93 #include <inet/sctp_ip.h>
     94 #include <sys/pattr.h>
     95 #include <inet/ipclassifier.h>
     96 #include <inet/ipsecah.h>
     97 #include <inet/rawip_impl.h>
     98 #include <inet/rts_impl.h>
     99 #include <sys/squeue_impl.h>
    100 #include <sys/squeue.h>
    101 
    102 #include <sys/tsol/label.h>
    103 #include <sys/tsol/tnet.h>
    104 
    105 /* Temporary; for CR 6451644 work-around */
    106 #include <sys/ethernet.h>
    107 
    108 /*
    109  * Naming conventions:
    110  *      These rules should be judiciously applied
    111  *	if there is a need to identify something as IPv6 versus IPv4
    112  *	IPv6 funcions will end with _v6 in the ip module.
    113  *	IPv6 funcions will end with _ipv6 in the transport modules.
    114  *	IPv6 macros:
    115  *		Some macros end with _V6; e.g. ILL_FRAG_HASH_V6
    116  *		Some macros start with V6_; e.g. V6_OR_V4_INADDR_ANY
    117  *		And then there are ..V4_PART_OF_V6.
    118  *		The intent is that macros in the ip module end with _V6.
    119  *	IPv6 global variables will start with ipv6_
    120  *	IPv6 structures will start with ipv6
    121  *	IPv6 defined constants should start with IPV6_
    122  *		(but then there are NDP_DEFAULT_VERS_PRI_AND_FLOW, etc)
    123  */
    124 
    125 /*
    126  * ip6opt_ls is used to enable IPv6 (via /etc/system on TX systems).
    127  * We need to do this because we didn't obtain the IP6OPT_LS (0x0a)
    128  * from IANA. This mechanism will remain in effect until an official
    129  * number is obtained.
    130  */
    131 uchar_t ip6opt_ls;
    132 
    133 const in6_addr_t ipv6_all_ones =
    134 	{ 0xffffffffU, 0xffffffffU, 0xffffffffU, 0xffffffffU };
    135 const in6_addr_t ipv6_all_zeros = { 0, 0, 0, 0 };
    136 
    137 #ifdef	_BIG_ENDIAN
    138 const in6_addr_t ipv6_unspecified_group = { 0xff000000U, 0, 0, 0 };
    139 #else	/* _BIG_ENDIAN */
    140 const in6_addr_t ipv6_unspecified_group = { 0x000000ffU, 0, 0, 0 };
    141 #endif	/* _BIG_ENDIAN */
    142 
    143 #ifdef	_BIG_ENDIAN
    144 const in6_addr_t ipv6_loopback = { 0, 0, 0, 0x00000001U };
    145 #else  /* _BIG_ENDIAN */
    146 const in6_addr_t ipv6_loopback = { 0, 0, 0, 0x01000000U };
    147 #endif /* _BIG_ENDIAN */
    148 
    149 #ifdef _BIG_ENDIAN
    150 const in6_addr_t ipv6_all_hosts_mcast = { 0xff020000U, 0, 0, 0x00000001U };
    151 #else  /* _BIG_ENDIAN */
    152 const in6_addr_t ipv6_all_hosts_mcast = { 0x000002ffU, 0, 0, 0x01000000U };
    153 #endif /* _BIG_ENDIAN */
    154 
    155 #ifdef _BIG_ENDIAN
    156 const in6_addr_t ipv6_all_rtrs_mcast = { 0xff020000U, 0, 0, 0x00000002U };
    157 #else  /* _BIG_ENDIAN */
    158 const in6_addr_t ipv6_all_rtrs_mcast = { 0x000002ffU, 0, 0, 0x02000000U };
    159 #endif /* _BIG_ENDIAN */
    160 
    161 #ifdef _BIG_ENDIAN
    162 const in6_addr_t ipv6_all_v2rtrs_mcast = { 0xff020000U, 0, 0, 0x00000016U };
    163 #else  /* _BIG_ENDIAN */
    164 const in6_addr_t ipv6_all_v2rtrs_mcast = { 0x000002ffU, 0, 0, 0x16000000U };
    165 #endif /* _BIG_ENDIAN */
    166 
    167 #ifdef _BIG_ENDIAN
    168 const in6_addr_t ipv6_solicited_node_mcast =
    169 			{ 0xff020000U, 0, 0x00000001U, 0xff000000U };
    170 #else  /* _BIG_ENDIAN */
    171 const in6_addr_t ipv6_solicited_node_mcast =
    172 			{ 0x000002ffU, 0, 0x01000000U, 0x000000ffU };
    173 #endif /* _BIG_ENDIAN */
    174 
    175 static boolean_t icmp_inbound_verify_v6(mblk_t *, icmp6_t *, ip_recv_attr_t *);
    176 static void	icmp_inbound_too_big_v6(icmp6_t *, ip_recv_attr_t *);
    177 static void	icmp_pkt_v6(mblk_t *, void *, size_t, const in6_addr_t *,
    178     ip_recv_attr_t *);
    179 static void	icmp_redirect_v6(mblk_t *, ip6_t *, nd_redirect_t *,
    180     ip_recv_attr_t *);
    181 static void	icmp_send_redirect_v6(mblk_t *, in6_addr_t *,
    182     in6_addr_t *, ip_recv_attr_t *);
    183 static void	icmp_send_reply_v6(mblk_t *, ip6_t *, icmp6_t *,
    184     ip_recv_attr_t *);
    185 static boolean_t	ip_source_routed_v6(ip6_t *, mblk_t *, ip_stack_t *);
    186 
    187 /*
    188  * icmp_inbound_v6 deals with ICMP messages that are handled by IP.
    189  * If the ICMP message is consumed by IP, i.e., it should not be delivered
    190  * to any IPPROTO_ICMP raw sockets, then it returns NULL.
    191  * Likewise, if the ICMP error is misformed (too short, etc), then it
    192  * returns NULL. The caller uses this to determine whether or not to send
    193  * to raw sockets.
    194  *
    195  * All error messages are passed to the matching transport stream.
    196  *
    197  * See comment for icmp_inbound_v4() on how IPsec is handled.
    198  */
    199 mblk_t *
    200 icmp_inbound_v6(mblk_t *mp, ip_recv_attr_t *ira)
    201 {
    202 	icmp6_t		*icmp6;
    203 	ip6_t		*ip6h;		/* Outer header */
    204 	int		ip_hdr_length;	/* Outer header length */
    205 	boolean_t	interested;
    206 	ill_t		*ill = ira->ira_ill;
    207 	ip_stack_t	*ipst = ill->ill_ipst;
    208 	mblk_t		*mp_ret = NULL;
    209 
    210 	ip6h = (ip6_t *)mp->b_rptr;
    211 
    212 	BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs);
    213 
    214 	/* Make sure ira_l2src is set for ndp_input */
    215 	if (!(ira->ira_flags & IRAF_L2SRC_SET))
    216 		ip_setl2src(mp, ira, ira->ira_rill);
    217 
    218 	ip_hdr_length = ira->ira_ip_hdr_length;
    219 	if ((mp->b_wptr - mp->b_rptr) < (ip_hdr_length + ICMP6_MINLEN)) {
    220 		if (ira->ira_pktlen < (ip_hdr_length + ICMP6_MINLEN)) {
    221 			BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts);
    222 			ip_drop_input("ipIfStatsInTruncatedPkts", mp, ill);
    223 			freemsg(mp);
    224 			return (NULL);
    225 		}
    226 		ip6h = ip_pullup(mp, ip_hdr_length + ICMP6_MINLEN, ira);
    227 		if (ip6h == NULL) {
    228 			BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors);
    229 			freemsg(mp);
    230 			return (NULL);
    231 		}
    232 	}
    233 
    234 	icmp6 = (icmp6_t *)(&mp->b_rptr[ip_hdr_length]);
    235 	DTRACE_PROBE2(icmp__inbound__v6, ip6_t *, ip6h, icmp6_t *, icmp6);
    236 	ip2dbg(("icmp_inbound_v6: type %d code %d\n", icmp6->icmp6_type,
    237 	    icmp6->icmp6_code));
    238 
    239 	/*
    240 	 * We will set "interested" to "true" if we should pass a copy to
    241 	 * the transport i.e., if it is an error message.
    242 	 */
    243 	interested = !(icmp6->icmp6_type & ICMP6_INFOMSG_MASK);
    244 
    245 	switch (icmp6->icmp6_type) {
    246 	case ICMP6_DST_UNREACH:
    247 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInDestUnreachs);
    248 		if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN)
    249 			BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInAdminProhibs);
    250 		break;
    251 
    252 	case ICMP6_TIME_EXCEEDED:
    253 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInTimeExcds);
    254 		break;
    255 
    256 	case ICMP6_PARAM_PROB:
    257 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInParmProblems);
    258 		break;
    259 
    260 	case ICMP6_PACKET_TOO_BIG:
    261 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInPktTooBigs);
    262 		break;
    263 
    264 	case ICMP6_ECHO_REQUEST:
    265 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchos);
    266 		if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) &&
    267 		    !ipst->ips_ipv6_resp_echo_mcast)
    268 			break;
    269 
    270 		/*
    271 		 * We must have exclusive use of the mblk to convert it to
    272 		 * a response.
    273 		 * If not, we copy it.
    274 		 */
    275 		if (mp->b_datap->db_ref > 1) {
    276 			mblk_t	*mp1;
    277 
    278 			mp1 = copymsg(mp);
    279 			if (mp1 == NULL) {
    280 				BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
    281 				ip_drop_input("ipIfStatsInDiscards - copymsg",
    282 				    mp, ill);
    283 				freemsg(mp);
    284 				return (NULL);
    285 			}
    286 			freemsg(mp);
    287 			mp = mp1;
    288 			ip6h = (ip6_t *)mp->b_rptr;
    289 			icmp6 = (icmp6_t *)(&mp->b_rptr[ip_hdr_length]);
    290 		}
    291 
    292 		icmp6->icmp6_type = ICMP6_ECHO_REPLY;
    293 		icmp_send_reply_v6(mp, ip6h, icmp6, ira);
    294 		return (NULL);
    295 
    296 	case ICMP6_ECHO_REPLY:
    297 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchoReplies);
    298 		break;
    299 
    300 	case ND_ROUTER_SOLICIT:
    301 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterSolicits);
    302 		break;
    303 
    304 	case ND_ROUTER_ADVERT:
    305 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterAdvertisements);
    306 		break;
    307 
    308 	case ND_NEIGHBOR_SOLICIT:
    309 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInNeighborSolicits);
    310 		ndp_input(mp, ira);
    311 		return (NULL);
    312 
    313 	case ND_NEIGHBOR_ADVERT:
    314 		BUMP_MIB(ill->ill_icmp6_mib,
    315 		    ipv6IfIcmpInNeighborAdvertisements);
    316 		ndp_input(mp, ira);
    317 		return (NULL);
    318 
    319 	case ND_REDIRECT:
    320 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRedirects);
    321 
    322 		if (ipst->ips_ipv6_ignore_redirect)
    323 			break;
    324 
    325 		/* We now allow a RAW socket to receive this. */
    326 		interested = B_TRUE;
    327 		break;
    328 
    329 	/*
    330 	 * The next three icmp messages will be handled by MLD.
    331 	 * Pass all valid MLD packets up to any process(es)
    332 	 * listening on a raw ICMP socket.
    333 	 */
    334 	case MLD_LISTENER_QUERY:
    335 	case MLD_LISTENER_REPORT:
    336 	case MLD_LISTENER_REDUCTION:
    337 		mp = mld_input(mp, ira);
    338 		return (mp);
    339 	default:
    340 		break;
    341 	}
    342 	/*
    343 	 * See if there is an ICMP client to avoid an extra copymsg/freemsg
    344 	 * if there isn't one.
    345 	 */
    346 	if (ipst->ips_ipcl_proto_fanout_v6[IPPROTO_ICMPV6].connf_head != NULL) {
    347 		/* If there is an ICMP client and we want one too, copy it. */
    348 
    349 		if (!interested) {
    350 			/* Caller will deliver to RAW sockets */
    351 			return (mp);
    352 		}
    353 		mp_ret = copymsg(mp);
    354 		if (mp_ret == NULL) {
    355 			BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
    356 			ip_drop_input("ipIfStatsInDiscards - copymsg", mp, ill);
    357 		}
    358 	} else if (!interested) {
    359 		/* Neither we nor raw sockets are interested. Drop packet now */
    360 		freemsg(mp);
    361 		return (NULL);
    362 	}
    363 
    364 	/*
    365 	 * ICMP error or redirect packet. Make sure we have enough of
    366 	 * the header and that db_ref == 1 since we might end up modifying
    367 	 * the packet.
    368 	 */
    369 	if (mp->b_cont != NULL) {
    370 		if (ip_pullup(mp, -1, ira) == NULL) {
    371 			BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
    372 			ip_drop_input("ipIfStatsInDiscards - ip_pullup",
    373 			    mp, ill);
    374 			freemsg(mp);
    375 			return (mp_ret);
    376 		}
    377 	}
    378 
    379 	if (mp->b_datap->db_ref > 1) {
    380 		mblk_t	*mp1;
    381 
    382 		mp1 = copymsg(mp);
    383 		if (mp1 == NULL) {
    384 			BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
    385 			ip_drop_input("ipIfStatsInDiscards - copymsg", mp, ill);
    386 			freemsg(mp);
    387 			return (mp_ret);
    388 		}
    389 		freemsg(mp);
    390 		mp = mp1;
    391 	}
    392 
    393 	/*
    394 	 * In case mp has changed, verify the message before any further
    395 	 * processes.
    396 	 */
    397 	ip6h = (ip6_t *)mp->b_rptr;
    398 	icmp6 = (icmp6_t *)(&mp->b_rptr[ip_hdr_length]);
    399 	if (!icmp_inbound_verify_v6(mp, icmp6, ira)) {
    400 		freemsg(mp);
    401 		return (mp_ret);
    402 	}
    403 
    404 	switch (icmp6->icmp6_type) {
    405 	case ND_REDIRECT:
    406 		icmp_redirect_v6(mp, ip6h, (nd_redirect_t *)icmp6, ira);
    407 		break;
    408 	case ICMP6_PACKET_TOO_BIG:
    409 		/* Update DCE and adjust MTU is icmp header if needed */
    410 		icmp_inbound_too_big_v6(icmp6, ira);
    411 		/* FALLTHRU */
    412 	default:
    413 		icmp_inbound_error_fanout_v6(mp, icmp6, ira);
    414 		break;
    415 	}
    416 
    417 	return (mp_ret);
    418 }
    419 
    420 /*
    421  * Send an ICMP echo reply.
    422  * The caller has already updated the payload part of the packet.
    423  * We handle the ICMP checksum, IP source address selection and feed
    424  * the packet into ip_output_simple.
    425  */
    426 static void
    427 icmp_send_reply_v6(mblk_t *mp, ip6_t *ip6h, icmp6_t *icmp6,
    428     ip_recv_attr_t *ira)
    429 {
    430 	uint_t		ip_hdr_length = ira->ira_ip_hdr_length;
    431 	ill_t		*ill = ira->ira_ill;
    432 	ip_stack_t	*ipst = ill->ill_ipst;
    433 	ip_xmit_attr_t	ixas;
    434 	in6_addr_t	origsrc;
    435 
    436 	/*
    437 	 * Remove any extension headers (do not reverse a source route)
    438 	 * and clear the flow id (keep traffic class for now).
    439 	 */
    440 	if (ip_hdr_length != IPV6_HDR_LEN) {
    441 		int	i;
    442 
    443 		for (i = 0; i < IPV6_HDR_LEN; i++) {
    444 			mp->b_rptr[ip_hdr_length - i - 1] =
    445 			    mp->b_rptr[IPV6_HDR_LEN - i - 1];
    446 		}
    447 		mp->b_rptr += (ip_hdr_length - IPV6_HDR_LEN);
    448 		ip6h = (ip6_t *)mp->b_rptr;
    449 		ip6h->ip6_nxt = IPPROTO_ICMPV6;
    450 		i = ntohs(ip6h->ip6_plen);
    451 		i -= (ip_hdr_length - IPV6_HDR_LEN);
    452 		ip6h->ip6_plen = htons(i);
    453 		ip_hdr_length = IPV6_HDR_LEN;
    454 		ASSERT(ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN == msgdsize(mp));
    455 	}
    456 	ip6h->ip6_vcf &= ~IPV6_FLOWINFO_FLOWLABEL;
    457 
    458 	/* Reverse the source and destination addresses. */
    459 	origsrc = ip6h->ip6_src;
    460 	ip6h->ip6_src = ip6h->ip6_dst;
    461 	ip6h->ip6_dst = origsrc;
    462 
    463 	/* set the hop limit */
    464 	ip6h->ip6_hops = ipst->ips_ipv6_def_hops;
    465 
    466 	/*
    467 	 * Prepare for checksum by putting icmp length in the icmp
    468 	 * checksum field. The checksum is calculated in ip_output
    469 	 */
    470 	icmp6->icmp6_cksum = ip6h->ip6_plen;
    471 
    472 	bzero(&ixas, sizeof (ixas));
    473 	ixas.ixa_flags = IXAF_BASIC_SIMPLE_V6;
    474 	ixas.ixa_zoneid = ira->ira_zoneid;
    475 	ixas.ixa_cred = kcred;
    476 	ixas.ixa_cpid = NOPID;
    477 	ixas.ixa_tsl = ira->ira_tsl;	/* Behave as a multi-level responder */
    478 	ixas.ixa_ifindex = 0;
    479 	ixas.ixa_ipst = ipst;
    480 	ixas.ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
    481 
    482 	if (!(ira->ira_flags & IRAF_IPSEC_SECURE)) {
    483 		/*
    484 		 * This packet should go out the same way as it
    485 		 * came in i.e in clear, independent of the IPsec
    486 		 * policy for transmitting packets.
    487 		 */
    488 		ixas.ixa_flags |= IXAF_NO_IPSEC;
    489 	} else {
    490 		if (!ipsec_in_to_out(ira, &ixas, mp, NULL, ip6h)) {
    491 			BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
    492 			/* Note: mp already consumed and ip_drop_packet done */
    493 			return;
    494 		}
    495 	}
    496 
    497 	/* Was the destination (now source) link-local? Send out same group */
    498 	if (IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) {
    499 		ixas.ixa_flags |= IXAF_SCOPEID_SET;
    500 		if (IS_UNDER_IPMP(ill))
    501 			ixas.ixa_scopeid = ill_get_upper_ifindex(ill);
    502 		else
    503 			ixas.ixa_scopeid = ill->ill_phyint->phyint_ifindex;
    504 	}
    505 
    506 	if (ira->ira_flags & IRAF_MULTIBROADCAST) {
    507 		/*
    508 		 * Not one or our addresses (IRE_LOCALs), thus we let
    509 		 * ip_output_simple pick the source.
    510 		 */
    511 		ip6h->ip6_src = ipv6_all_zeros;
    512 		ixas.ixa_flags |= IXAF_SET_SOURCE;
    513 	}
    514 
    515 	/* Should we send using dce_pmtu? */
    516 	if (ipst->ips_ipv6_icmp_return_pmtu)
    517 		ixas.ixa_flags |= IXAF_PMTU_DISCOVERY;
    518 
    519 	(void) ip_output_simple(mp, &ixas);
    520 	ixa_cleanup(&ixas);
    521 
    522 }
    523 
    524 /*
    525  * Verify the ICMP messages for either for ICMP error or redirect packet.
    526  * The caller should have fully pulled up the message. If it's a redirect
    527  * packet, only basic checks on IP header will be done; otherwise, verify
    528  * the packet by looking at the included ULP header.
    529  *
    530  * Called before icmp_inbound_error_fanout_v6 is called.
    531  */
    532 static boolean_t
    533 icmp_inbound_verify_v6(mblk_t *mp, icmp6_t *icmp6, ip_recv_attr_t *ira)
    534 {
    535 	ill_t		*ill = ira->ira_ill;
    536 	uint16_t	hdr_length;
    537 	uint8_t		*nexthdrp;
    538 	uint8_t		nexthdr;
    539 	ip_stack_t	*ipst = ill->ill_ipst;
    540 	conn_t		*connp;
    541 	ip6_t		*ip6h;	/* Inner header */
    542 
    543 	ip6h = (ip6_t *)&icmp6[1];
    544 	if ((uchar_t *)ip6h + IPV6_HDR_LEN > mp->b_wptr)
    545 		goto truncated;
    546 
    547 	if (icmp6->icmp6_type == ND_REDIRECT) {
    548 		hdr_length = sizeof (nd_redirect_t);
    549 	} else {
    550 		if ((IPH_HDR_VERSION(ip6h) != IPV6_VERSION))
    551 			goto discard_pkt;
    552 		hdr_length = IPV6_HDR_LEN;
    553 	}
    554 
    555 	if ((uchar_t *)ip6h + hdr_length > mp->b_wptr)
    556 		goto truncated;
    557 
    558 	/*
    559 	 * Stop here for ICMP_REDIRECT.
    560 	 */
    561 	if (icmp6->icmp6_type == ND_REDIRECT)
    562 		return (B_TRUE);
    563 
    564 	/*
    565 	 * ICMP errors only.
    566 	 */
    567 	if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp))
    568 		goto discard_pkt;
    569 	nexthdr = *nexthdrp;
    570 
    571 	/* Try to pass the ICMP message to clients who need it */
    572 	switch (nexthdr) {
    573 	case IPPROTO_UDP:
    574 		/*
    575 		 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of
    576 		 * transport header.
    577 		 */
    578 		if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN >
    579 		    mp->b_wptr)
    580 			goto truncated;
    581 		break;
    582 	case IPPROTO_TCP: {
    583 		tcpha_t		*tcpha;
    584 
    585 		/*
    586 		 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of
    587 		 * transport header.
    588 		 */
    589 		if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN >
    590 		    mp->b_wptr)
    591 			goto truncated;
    592 
    593 		tcpha = (tcpha_t *)((uchar_t *)ip6h + hdr_length);
    594 		/*
    595 		 * With IPMP we need to match across group, which we do
    596 		 * since we have the upper ill from ira_ill.
    597 		 */
    598 		connp = ipcl_tcp_lookup_reversed_ipv6(ip6h, tcpha, TCPS_LISTEN,
    599 		    ill->ill_phyint->phyint_ifindex, ipst);
    600 		if (connp == NULL)
    601 			goto discard_pkt;
    602 
    603 		if ((connp->conn_verifyicmp != NULL) &&
    604 		    !connp->conn_verifyicmp(connp, tcpha, NULL, icmp6, ira)) {
    605 			CONN_DEC_REF(connp);
    606 			goto discard_pkt;
    607 		}
    608 		CONN_DEC_REF(connp);
    609 		break;
    610 	}
    611 	case IPPROTO_SCTP:
    612 		/*
    613 		 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of
    614 		 * transport header.
    615 		 */
    616 		if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN >
    617 		    mp->b_wptr)
    618 			goto truncated;
    619 		break;
    620 	case IPPROTO_ESP:
    621 	case IPPROTO_AH:
    622 		break;
    623 	case IPPROTO_ENCAP:
    624 	case IPPROTO_IPV6: {
    625 		/* Look for self-encapsulated packets that caused an error */
    626 		ip6_t *in_ip6h;
    627 
    628 		in_ip6h = (ip6_t *)((uint8_t *)ip6h + hdr_length);
    629 		if ((uint8_t *)in_ip6h + (nexthdr == IPPROTO_ENCAP ?
    630 		    sizeof (ipha_t) : sizeof (ip6_t)) > mp->b_wptr)
    631 			goto truncated;
    632 		break;
    633 	}
    634 	default:
    635 		break;
    636 	}
    637 
    638 	return (B_TRUE);
    639 
    640 discard_pkt:
    641 	/* Bogus ICMP error. */
    642 	BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
    643 	return (B_FALSE);
    644 
    645 truncated:
    646 	/* We pulled up everthing already. Must be truncated */
    647 	BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors);
    648 	return (B_FALSE);
    649 }
    650 
    651 /*
    652  * Process received IPv6 ICMP Packet too big.
    653  * The caller is responsible for validating the packet before passing it in
    654  * and also to fanout the ICMP error to any matching transport conns. Assumes
    655  * the message has been fully pulled up.
    656  *
    657  * Before getting here, the caller has called icmp_inbound_verify_v6()
    658  * that should have verified with ULP to prevent undoing the changes we're
    659  * going to make to DCE. For example, TCP might have verified that the packet
    660  * which generated error is in the send window.
    661  *
    662  * In some cases modified this MTU in the ICMP header packet; the caller
    663  * should pass to the matching ULP after this returns.
    664  */
    665 static void
    666 icmp_inbound_too_big_v6(icmp6_t *icmp6, ip_recv_attr_t *ira)
    667 {
    668 	uint32_t	mtu;
    669 	dce_t		*dce;
    670 	ill_t		*ill = ira->ira_ill;	/* Upper ill if IPMP */
    671 	ip_stack_t	*ipst = ill->ill_ipst;
    672 	int		old_max_frag;
    673 	in6_addr_t	final_dst;
    674 	ip6_t		*ip6h;	/* Inner IP header */
    675 
    676 	/* Caller has already pulled up everything. */
    677 	ip6h = (ip6_t *)&icmp6[1];
    678 	final_dst = ip_get_dst_v6(ip6h, NULL, NULL);
    679 
    680 	/*
    681 	 * For link local destinations matching simply on address is not
    682 	 * sufficient. Same link local addresses for different ILL's is
    683 	 * possible.
    684 	 */
    685 	if (IN6_IS_ADDR_LINKSCOPE(&final_dst)) {
    686 		dce = dce_lookup_and_add_v6(&final_dst,
    687 		    ill->ill_phyint->phyint_ifindex, ipst);
    688 	} else {
    689 		dce = dce_lookup_and_add_v6(&final_dst, 0, ipst);
    690 	}
    691 	if (dce == NULL) {
    692 		/* Couldn't add a unique one - ENOMEM */
    693 		if (ip_debug > 2) {
    694 			/* ip1dbg */
    695 			pr_addr_dbg("icmp_inbound_too_big_v6:"
    696 			    "no dce for dst %s\n", AF_INET6,
    697 			    &final_dst);
    698 		}
    699 		return;
    700 	}
    701 
    702 	mtu = ntohl(icmp6->icmp6_mtu);
    703 
    704 	mutex_enter(&dce->dce_lock);
    705 	if (dce->dce_flags & DCEF_PMTU)
    706 		old_max_frag = dce->dce_pmtu;
    707 	else
    708 		old_max_frag = ill->ill_mtu;
    709 
    710 	if (mtu < IPV6_MIN_MTU) {
    711 		ip1dbg(("Received mtu less than IPv6 "
    712 		    "min mtu %d: %d\n", IPV6_MIN_MTU, mtu));
    713 		mtu = IPV6_MIN_MTU;
    714 		/*
    715 		 * If an mtu less than IPv6 min mtu is received,
    716 		 * we must include a fragment header in
    717 		 * subsequent packets.
    718 		 */
    719 		dce->dce_flags |= DCEF_TOO_SMALL_PMTU;
    720 	} else {
    721 		dce->dce_flags &= ~DCEF_TOO_SMALL_PMTU;
    722 	}
    723 	ip1dbg(("Received mtu from router: %d\n", mtu));
    724 	dce->dce_pmtu = MIN(old_max_frag, mtu);
    725 
    726 	/* Prepare to send the new max frag size for the ULP. */
    727 	if (dce->dce_flags & DCEF_TOO_SMALL_PMTU) {
    728 		/*
    729 		 * If we need a fragment header in every packet
    730 		 * (above case or multirouting), make sure the
    731 		 * ULP takes it into account when computing the
    732 		 * payload size.
    733 		 */
    734 		icmp6->icmp6_mtu = htonl(dce->dce_pmtu - sizeof (ip6_frag_t));
    735 	} else {
    736 		icmp6->icmp6_mtu = htonl(dce->dce_pmtu);
    737 	}
    738 	/* We now have a PMTU for sure */
    739 	dce->dce_flags |= DCEF_PMTU;
    740 	dce->dce_last_change_time = TICK_TO_SEC(ddi_get_lbolt64());
    741 	mutex_exit(&dce->dce_lock);
    742 	/*
    743 	 * After dropping the lock the new value is visible to everyone.
    744 	 * Then we bump the generation number so any cached values reinspect
    745 	 * the dce_t.
    746 	 */
    747 	dce_increment_generation(dce);
    748 	dce_refrele(dce);
    749 }
    750 
    751 /*
    752  * Fanout received ICMPv6 error packets to the transports.
    753  * Assumes the IPv6 plus ICMPv6 headers have been pulled up but nothing else.
    754  *
    755  * The caller must have called icmp_inbound_verify_v6.
    756  */
    757 void
    758 icmp_inbound_error_fanout_v6(mblk_t *mp, icmp6_t *icmp6, ip_recv_attr_t *ira)
    759 {
    760 	uint16_t	*up;	/* Pointer to ports in ULP header */
    761 	uint32_t	ports;	/* reversed ports for fanout */
    762 	ip6_t		rip6h;	/* With reversed addresses */
    763 	ip6_t		*ip6h;	/* Inner IP header */
    764 	uint16_t	hdr_length; /* Inner IP header length */
    765 	uint8_t		*nexthdrp;
    766 	uint8_t		nexthdr;
    767 	tcpha_t		*tcpha;
    768 	conn_t		*connp;
    769 	ill_t		*ill = ira->ira_ill;	/* Upper in the case of IPMP */
    770 	ip_stack_t	*ipst = ill->ill_ipst;
    771 	ipsec_stack_t	*ipss = ipst->ips_netstack->netstack_ipsec;
    772 
    773 	/* Caller has already pulled up everything. */
    774 	ip6h = (ip6_t *)&icmp6[1];
    775 	ASSERT(mp->b_cont == NULL);
    776 	ASSERT((uchar_t *)&ip6h[1] <= mp->b_wptr);
    777 
    778 	if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp))
    779 		goto drop_pkt;
    780 	nexthdr = *nexthdrp;
    781 	ira->ira_protocol = nexthdr;
    782 
    783 	/*
    784 	 * We need a separate IP header with the source and destination
    785 	 * addresses reversed to do fanout/classification because the ip6h in
    786 	 * the ICMPv6 error is in the form we sent it out.
    787 	 */
    788 	rip6h.ip6_src = ip6h->ip6_dst;
    789 	rip6h.ip6_dst = ip6h->ip6_src;
    790 	rip6h.ip6_nxt = nexthdr;
    791 
    792 	/* Try to pass the ICMP message to clients who need it */
    793 	switch (nexthdr) {
    794 	case IPPROTO_UDP: {
    795 		/* Attempt to find a client stream based on port. */
    796 		up = (uint16_t *)((uchar_t *)ip6h + hdr_length);
    797 
    798 		/* Note that we send error to all matches. */
    799 		ira->ira_flags |= IRAF_ICMP_ERROR;
    800 		ip_fanout_udp_multi_v6(mp, &rip6h, up[0], up[1], ira);
    801 		ira->ira_flags &= ~IRAF_ICMP_ERROR;
    802 		return;
    803 	}
    804 	case IPPROTO_TCP: {
    805 		/*
    806 		 * Attempt to find a client stream based on port.
    807 		 * Note that we do a reverse lookup since the header is
    808 		 * in the form we sent it out.
    809 		 */
    810 		tcpha = (tcpha_t *)((uchar_t *)ip6h + hdr_length);
    811 		/*
    812 		 * With IPMP we need to match across group, which we do
    813 		 * since we have the upper ill from ira_ill.
    814 		 */
    815 		connp = ipcl_tcp_lookup_reversed_ipv6(ip6h, tcpha,
    816 		    TCPS_LISTEN, ill->ill_phyint->phyint_ifindex, ipst);
    817 		if (connp == NULL) {
    818 			goto drop_pkt;
    819 		}
    820 
    821 		if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) ||
    822 		    (ira->ira_flags & IRAF_IPSEC_SECURE)) {
    823 			mp = ipsec_check_inbound_policy(mp, connp,
    824 			    NULL, ip6h, ira);
    825 			if (mp == NULL) {
    826 				BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
    827 				/* Note that mp is NULL */
    828 				ip_drop_input("ipIfStatsInDiscards", mp, ill);
    829 				CONN_DEC_REF(connp);
    830 				return;
    831 			}
    832 		}
    833 
    834 		ira->ira_flags |= IRAF_ICMP_ERROR;
    835 		if (IPCL_IS_TCP(connp)) {
    836 			SQUEUE_ENTER_ONE(connp->conn_sqp, mp,
    837 			    connp->conn_recvicmp, connp, ira, SQ_FILL,
    838 			    SQTAG_TCP6_INPUT_ICMP_ERR);
    839 		} else {
    840 			/* Not TCP; must be SOCK_RAW, IPPROTO_TCP */
    841 			ill_t *rill = ira->ira_rill;
    842 
    843 			ira->ira_ill = ira->ira_rill = NULL;
    844 			(connp->conn_recv)(connp, mp, NULL, ira);
    845 			CONN_DEC_REF(connp);
    846 			ira->ira_ill = ill;
    847 			ira->ira_rill = rill;
    848 		}
    849 		ira->ira_flags &= ~IRAF_ICMP_ERROR;
    850 		return;
    851 
    852 	}
    853 	case IPPROTO_SCTP:
    854 		up = (uint16_t *)((uchar_t *)ip6h + hdr_length);
    855 		/* Find a SCTP client stream for this packet. */
    856 		((uint16_t *)&ports)[0] = up[1];
    857 		((uint16_t *)&ports)[1] = up[0];
    858 
    859 		ira->ira_flags |= IRAF_ICMP_ERROR;
    860 		ip_fanout_sctp(mp, NULL, &rip6h, ports, ira);
    861 		ira->ira_flags &= ~IRAF_ICMP_ERROR;
    862 		return;
    863 
    864 	case IPPROTO_ESP:
    865 	case IPPROTO_AH:
    866 		if (!ipsec_loaded(ipss)) {
    867 			ip_proto_not_sup(mp, ira);
    868 			return;
    869 		}
    870 
    871 		if (nexthdr == IPPROTO_ESP)
    872 			mp = ipsecesp_icmp_error(mp, ira);
    873 		else
    874 			mp = ipsecah_icmp_error(mp, ira);
    875 		if (mp == NULL)
    876 			return;
    877 
    878 		/* Just in case ipsec didn't preserve the NULL b_cont */
    879 		if (mp->b_cont != NULL) {
    880 			if (!pullupmsg(mp, -1))
    881 				goto drop_pkt;
    882 		}
    883 
    884 		/*
    885 		 * If succesful, the mp has been modified to not include
    886 		 * the ESP/AH header so we can fanout to the ULP's icmp
    887 		 * error handler.
    888 		 */
    889 		if (mp->b_wptr - mp->b_rptr < IPV6_HDR_LEN)
    890 			goto drop_pkt;
    891 
    892 		ip6h = (ip6_t *)mp->b_rptr;
    893 		/* Don't call hdr_length_v6() unless you have to. */
    894 		if (ip6h->ip6_nxt != IPPROTO_ICMPV6)
    895 			hdr_length = ip_hdr_length_v6(mp, ip6h);
    896 		else
    897 			hdr_length = IPV6_HDR_LEN;
    898 
    899 		/* Verify the modified message before any further processes. */
    900 		icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]);
    901 		if (!icmp_inbound_verify_v6(mp, icmp6, ira)) {
    902 			freemsg(mp);
    903 			return;
    904 		}
    905 
    906 		icmp_inbound_error_fanout_v6(mp, icmp6, ira);
    907 		return;
    908 
    909 	case IPPROTO_IPV6: {
    910 		/* Look for self-encapsulated packets that caused an error */
    911 		ip6_t *in_ip6h;
    912 
    913 		in_ip6h = (ip6_t *)((uint8_t *)ip6h + hdr_length);
    914 
    915 		if (IN6_ARE_ADDR_EQUAL(&in_ip6h->ip6_src, &ip6h->ip6_src) &&
    916 		    IN6_ARE_ADDR_EQUAL(&in_ip6h->ip6_dst, &ip6h->ip6_dst)) {
    917 			/*
    918 			 * Self-encapsulated case. As in the ipv4 case,
    919 			 * we need to strip the 2nd IP header. Since mp
    920 			 * is already pulled-up, we can simply bcopy
    921 			 * the 3rd header + data over the 2nd header.
    922 			 */
    923 			uint16_t unused_len;
    924 
    925 			/*
    926 			 * Make sure we don't do recursion more than once.
    927 			 */
    928 			if (!ip_hdr_length_nexthdr_v6(mp, in_ip6h,
    929 			    &unused_len, &nexthdrp) ||
    930 			    *nexthdrp == IPPROTO_IPV6) {
    931 				goto drop_pkt;
    932 			}
    933 
    934 			/*
    935 			 * Copy the 3rd header + remaining data on top
    936 			 * of the 2nd header.
    937 			 */
    938 			bcopy(in_ip6h, ip6h, mp->b_wptr - (uchar_t *)in_ip6h);
    939 
    940 			/*
    941 			 * Subtract length of the 2nd header.
    942 			 */
    943 			mp->b_wptr -= hdr_length;
    944 
    945 			ip6h = (ip6_t *)mp->b_rptr;
    946 			/* Don't call hdr_length_v6() unless you have to. */
    947 			if (ip6h->ip6_nxt != IPPROTO_ICMPV6)
    948 				hdr_length = ip_hdr_length_v6(mp, ip6h);
    949 			else
    950 				hdr_length = IPV6_HDR_LEN;
    951 
    952 			/*
    953 			 * Verify the modified message before any further
    954 			 * processes.
    955 			 */
    956 			icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]);
    957 			if (!icmp_inbound_verify_v6(mp, icmp6, ira)) {
    958 				freemsg(mp);
    959 				return;
    960 			}
    961 
    962 			/*
    963 			 * Now recurse, and see what I _really_ should be
    964 			 * doing here.
    965 			 */
    966 			icmp_inbound_error_fanout_v6(mp, icmp6, ira);
    967 			return;
    968 		}
    969 		/* FALLTHRU */
    970 	}
    971 	case IPPROTO_ENCAP:
    972 		if ((connp = ipcl_iptun_classify_v6(&rip6h.ip6_src,
    973 		    &rip6h.ip6_dst, ipst)) != NULL) {
    974 			ira->ira_flags |= IRAF_ICMP_ERROR;
    975 			connp->conn_recvicmp(connp, mp, NULL, ira);
    976 			CONN_DEC_REF(connp);
    977 			ira->ira_flags &= ~IRAF_ICMP_ERROR;
    978 			return;
    979 		}
    980 		/*
    981 		 * No IP tunnel is interested, fallthrough and see
    982 		 * if a raw socket will want it.
    983 		 */
    984 		/* FALLTHRU */
    985 	default:
    986 		ira->ira_flags |= IRAF_ICMP_ERROR;
    987 		ASSERT(ira->ira_protocol == nexthdr);
    988 		ip_fanout_proto_v6(mp, &rip6h, ira);
    989 		ira->ira_flags &= ~IRAF_ICMP_ERROR;
    990 		return;
    991 	}
    992 	/* NOTREACHED */
    993 drop_pkt:
    994 	BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors);
    995 	ip1dbg(("icmp_inbound_error_fanout_v6: drop pkt\n"));
    996 	freemsg(mp);
    997 }
    998 
    999 /*
   1000  * Process received IPv6 ICMP Redirect messages.
   1001  * Assumes the caller has verified that the headers are in the pulled up mblk.
   1002  * Consumes mp.
   1003  */
   1004 /* ARGSUSED */
   1005 static void
   1006 icmp_redirect_v6(mblk_t *mp, ip6_t *ip6h, nd_redirect_t *rd,
   1007     ip_recv_attr_t *ira)
   1008 {
   1009 	ire_t		*ire, *nire;
   1010 	ire_t		*prev_ire = NULL;
   1011 	ire_t		*redir_ire;
   1012 	in6_addr_t	*src, *dst, *gateway;
   1013 	nd_opt_hdr_t	*opt;
   1014 	nce_t		*nce;
   1015 	int		ncec_flags = 0;
   1016 	int		err = 0;
   1017 	boolean_t	redirect_to_router = B_FALSE;
   1018 	int		len;
   1019 	int		optlen;
   1020 	ill_t		*ill = ira->ira_rill;
   1021 	ill_t		*rill = ira->ira_rill;
   1022 	ip_stack_t	*ipst = ill->ill_ipst;
   1023 
   1024 	/*
   1025 	 * Since ira_ill is where the IRE_LOCAL was hosted we use ira_rill
   1026 	 * and make it be the IPMP upper so avoid being confused by a packet
   1027 	 * addressed to a unicast address on a different ill.
   1028 	 */
   1029 	if (IS_UNDER_IPMP(rill)) {
   1030 		rill = ipmp_ill_hold_ipmp_ill(rill);
   1031 		if (rill == NULL) {
   1032 			BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects);
   1033 			ip_drop_input("ipv6IfIcmpInBadRedirects - IPMP ill",
   1034 			    mp, ill);
   1035 			freemsg(mp);
   1036 			return;
   1037 		}
   1038 		ASSERT(rill != ira->ira_rill);
   1039 	}
   1040 
   1041 	len = mp->b_wptr - (uchar_t *)rd;
   1042 	src = &ip6h->ip6_src;
   1043 	dst = &rd->nd_rd_dst;
   1044 	gateway = &rd->nd_rd_target;
   1045 
   1046 	/* Verify if it is a valid redirect */
   1047 	if (!IN6_IS_ADDR_LINKLOCAL(src) ||
   1048 	    (ip6h->ip6_hops != IPV6_MAX_HOPS) ||
   1049 	    (rd->nd_rd_code != 0) ||
   1050 	    (len < sizeof (nd_redirect_t)) ||
   1051 	    (IN6_IS_ADDR_V4MAPPED(dst)) ||
   1052 	    (IN6_IS_ADDR_MULTICAST(dst))) {
   1053 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects);
   1054 		ip_drop_input("ipv6IfIcmpInBadRedirects - addr/len", mp, ill);
   1055 		goto fail_redirect;
   1056 	}
   1057 
   1058 	if (!(IN6_IS_ADDR_LINKLOCAL(gateway) ||
   1059 	    IN6_ARE_ADDR_EQUAL(gateway, dst))) {
   1060 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects);
   1061 		ip_drop_input("ipv6IfIcmpInBadRedirects - bad gateway",
   1062 		    mp, ill);
   1063 		goto fail_redirect;
   1064 	}
   1065 
   1066 	optlen = len - sizeof (nd_redirect_t);
   1067 	if (optlen != 0) {
   1068 		if (!ndp_verify_optlen((nd_opt_hdr_t *)&rd[1], optlen)) {
   1069 			BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects);
   1070 			ip_drop_input("ipv6IfIcmpInBadRedirects - options",
   1071 			    mp, ill);
   1072 			goto fail_redirect;
   1073 		}
   1074 	}
   1075 
   1076 	if (!IN6_ARE_ADDR_EQUAL(gateway, dst)) {
   1077 		redirect_to_router = B_TRUE;
   1078 		ncec_flags |= NCE_F_ISROUTER;
   1079 	} else {
   1080 		gateway = dst;	/* Add nce for dst */
   1081 	}
   1082 
   1083 
   1084 	/*
   1085 	 * Verify that the IP source address of the redirect is
   1086 	 * the same as the current first-hop router for the specified
   1087 	 * ICMP destination address.
   1088 	 * Also, Make sure we had a route for the dest in question and
   1089 	 * that route was pointing to the old gateway (the source of the
   1090 	 * redirect packet.)
   1091 	 * Note: this merely says that there is some IRE which matches that
   1092 	 * gateway; not that the longest match matches that gateway.
   1093 	 */
   1094 	prev_ire = ire_ftable_lookup_v6(dst, 0, src, 0, rill,
   1095 	    ALL_ZONES, NULL, MATCH_IRE_GW | MATCH_IRE_ILL, 0, ipst, NULL);
   1096 
   1097 	/*
   1098 	 * Check that
   1099 	 *	the redirect was not from ourselves
   1100 	 *	old gateway is still directly reachable
   1101 	 */
   1102 	if (prev_ire == NULL ||
   1103 	    (prev_ire->ire_type & (IRE_LOCAL|IRE_LOOPBACK)) ||
   1104 	    (prev_ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE))) {
   1105 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects);
   1106 		ip_drop_input("ipv6IfIcmpInBadRedirects - ire", mp, ill);
   1107 		goto fail_redirect;
   1108 	}
   1109 
   1110 	ASSERT(prev_ire->ire_ill != NULL);
   1111 	if (prev_ire->ire_ill->ill_flags & ILLF_NONUD)
   1112 		ncec_flags |= NCE_F_NONUD;
   1113 
   1114 	opt = (nd_opt_hdr_t *)&rd[1];
   1115 	opt = ndp_get_option(opt, optlen, ND_OPT_TARGET_LINKADDR);
   1116 	if (opt != NULL) {
   1117 		err = nce_lookup_then_add_v6(rill,
   1118 		    (uchar_t *)&opt[1],		/* Link layer address */
   1119 		    rill->ill_phys_addr_length,
   1120 		    gateway, ncec_flags, ND_STALE, &nce);
   1121 		switch (err) {
   1122 		case 0:
   1123 			nce_refrele(nce);
   1124 			break;
   1125 		case EEXIST:
   1126 			/*
   1127 			 * Check to see if link layer address has changed and
   1128 			 * process the ncec_state accordingly.
   1129 			 */
   1130 			nce_process(nce->nce_common,
   1131 			    (uchar_t *)&opt[1], 0, B_FALSE);
   1132 			nce_refrele(nce);
   1133 			break;
   1134 		default:
   1135 			ip1dbg(("icmp_redirect_v6: NCE create failed %d\n",
   1136 			    err));
   1137 			goto fail_redirect;
   1138 		}
   1139 	}
   1140 	if (redirect_to_router) {
   1141 		ASSERT(IN6_IS_ADDR_LINKLOCAL(gateway));
   1142 
   1143 		/*
   1144 		 * Create a Route Association.  This will allow us to remember
   1145 		 * a router told us to use the particular gateway.
   1146 		 */
   1147 		ire = ire_create_v6(
   1148 		    dst,
   1149 		    &ipv6_all_ones,		/* mask */
   1150 		    gateway,			/* gateway addr */
   1151 		    IRE_HOST,
   1152 		    prev_ire->ire_ill,
   1153 		    ALL_ZONES,
   1154 		    (RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST),
   1155 		    NULL,
   1156 		    ipst);
   1157 	} else {
   1158 		ipif_t *ipif;
   1159 		in6_addr_t gw;
   1160 
   1161 		/*
   1162 		 * Just create an on link entry, i.e. interface route.
   1163 		 * The gateway field is our link-local on the ill.
   1164 		 */
   1165 		mutex_enter(&rill->ill_lock);
   1166 		for (ipif = rill->ill_ipif; ipif != NULL;
   1167 		    ipif = ipif->ipif_next) {
   1168 			if (!(ipif->ipif_state_flags & IPIF_CONDEMNED) &&
   1169 			    IN6_IS_ADDR_LINKLOCAL(&ipif->ipif_v6lcl_addr))
   1170 				break;
   1171 		}
   1172 		if (ipif == NULL) {
   1173 			/* We have no link-local address! */
   1174 			mutex_exit(&rill->ill_lock);
   1175 			goto fail_redirect;
   1176 		}
   1177 		gw = ipif->ipif_v6lcl_addr;
   1178 		mutex_exit(&rill->ill_lock);
   1179 
   1180 		ire = ire_create_v6(
   1181 		    dst,				/* gateway == dst */
   1182 		    &ipv6_all_ones,			/* mask */
   1183 		    &gw,				/* gateway addr */
   1184 		    rill->ill_net_type,			/* IF_[NO]RESOLVER */
   1185 		    prev_ire->ire_ill,
   1186 		    ALL_ZONES,
   1187 		    (RTF_DYNAMIC | RTF_HOST),
   1188 		    NULL,
   1189 		    ipst);
   1190 	}
   1191 
   1192 	if (ire == NULL)
   1193 		goto fail_redirect;
   1194 
   1195 	nire = ire_add(ire);
   1196 	/* Check if it was a duplicate entry */
   1197 	if (nire != NULL && nire != ire) {
   1198 		ASSERT(nire->ire_identical_ref > 1);
   1199 		ire_delete(nire);
   1200 		ire_refrele(nire);
   1201 		nire = NULL;
   1202 	}
   1203 	ire = nire;
   1204 	if (ire != NULL) {
   1205 		ire_refrele(ire);		/* Held in ire_add */
   1206 
   1207 		/* tell routing sockets that we received a redirect */
   1208 		ip_rts_change_v6(RTM_REDIRECT,
   1209 		    &rd->nd_rd_dst,
   1210 		    &rd->nd_rd_target,
   1211 		    &ipv6_all_ones, 0, src,
   1212 		    (RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST), 0,
   1213 		    (RTA_DST | RTA_GATEWAY | RTA_NETMASK | RTA_AUTHOR), ipst);
   1214 
   1215 		/*
   1216 		 * Delete any existing IRE_HOST type ires for this destination.
   1217 		 * This together with the added IRE has the effect of
   1218 		 * modifying an existing redirect.
   1219 		 */
   1220 		redir_ire = ire_ftable_lookup_v6(dst, 0, src, IRE_HOST,
   1221 		    prev_ire->ire_ill, ALL_ZONES, NULL,
   1222 		    (MATCH_IRE_GW | MATCH_IRE_TYPE | MATCH_IRE_ILL), 0, ipst,
   1223 		    NULL);
   1224 
   1225 		if (redir_ire != NULL) {
   1226 			if (redir_ire->ire_flags & RTF_DYNAMIC)
   1227 				ire_delete(redir_ire);
   1228 			ire_refrele(redir_ire);
   1229 		}
   1230 	}
   1231 
   1232 	ire_refrele(prev_ire);
   1233 	prev_ire = NULL;
   1234 
   1235 fail_redirect:
   1236 	if (prev_ire != NULL)
   1237 		ire_refrele(prev_ire);
   1238 	freemsg(mp);
   1239 	if (rill != ira->ira_rill)
   1240 		ill_refrele(rill);
   1241 }
   1242 
   1243 /*
   1244  * Build and ship an IPv6 ICMP message using the packet data in mp,
   1245  * and the ICMP header pointed to by "stuff".  (May be called as
   1246  * writer.)
   1247  * Note: assumes that icmp_pkt_err_ok_v6 has been called to
   1248  * verify that an icmp error packet can be sent.
   1249  *
   1250  * If v6src_ptr is set use it as a source. Otherwise select a reasonable
   1251  * source address (see above function).
   1252  */
   1253 static void
   1254 icmp_pkt_v6(mblk_t *mp, void *stuff, size_t len,
   1255     const in6_addr_t *v6src_ptr, ip_recv_attr_t *ira)
   1256 {
   1257 	ip6_t		*ip6h;
   1258 	in6_addr_t	v6dst;
   1259 	size_t		len_needed;
   1260 	size_t		msg_len;
   1261 	mblk_t		*mp1;
   1262 	icmp6_t		*icmp6;
   1263 	in6_addr_t	v6src;
   1264 	ill_t		*ill = ira->ira_ill;
   1265 	ip_stack_t	*ipst = ill->ill_ipst;
   1266 	ip_xmit_attr_t	ixas;
   1267 
   1268 	ip6h = (ip6_t *)mp->b_rptr;
   1269 
   1270 	bzero(&ixas, sizeof (ixas));
   1271 	ixas.ixa_flags = IXAF_BASIC_SIMPLE_V6;
   1272 	ixas.ixa_zoneid = ira->ira_zoneid;
   1273 	ixas.ixa_ifindex = 0;
   1274 	ixas.ixa_ipst = ipst;
   1275 	ixas.ixa_cred = kcred;
   1276 	ixas.ixa_cpid = NOPID;
   1277 	ixas.ixa_tsl = ira->ira_tsl;	/* Behave as a multi-level responder */
   1278 	ixas.ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
   1279 
   1280 	/*
   1281 	 * If the source of the original packet was link-local, then
   1282 	 * make sure we send on the same ill (group) as we received it on.
   1283 	 */
   1284 	if (IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) {
   1285 		ixas.ixa_flags |= IXAF_SCOPEID_SET;
   1286 		if (IS_UNDER_IPMP(ill))
   1287 			ixas.ixa_scopeid = ill_get_upper_ifindex(ill);
   1288 		else
   1289 			ixas.ixa_scopeid = ill->ill_phyint->phyint_ifindex;
   1290 	}
   1291 
   1292 	if (ira->ira_flags & IRAF_IPSEC_SECURE) {
   1293 		/*
   1294 		 * Apply IPsec based on how IPsec was applied to
   1295 		 * the packet that had the error.
   1296 		 *
   1297 		 * If it was an outbound packet that caused the ICMP
   1298 		 * error, then the caller will have setup the IRA
   1299 		 * appropriately.
   1300 		 */
   1301 		if (!ipsec_in_to_out(ira, &ixas, mp, NULL, ip6h)) {
   1302 			BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards);
   1303 			/* Note: mp already consumed and ip_drop_packet done */
   1304 			return;
   1305 		}
   1306 	} else {
   1307 		/*
   1308 		 * This is in clear. The icmp message we are building
   1309 		 * here should go out in clear, independent of our policy.
   1310 		 */
   1311 		ixas.ixa_flags |= IXAF_NO_IPSEC;
   1312 	}
   1313 
   1314 	/*
   1315 	 * If the caller specified the source we use that.
   1316 	 * Otherwise, if the packet was for one of our unicast addresses, make
   1317 	 * sure we respond with that as the source. Otherwise
   1318 	 * have ip_output_simple pick the source address.
   1319 	 */
   1320 	if (v6src_ptr != NULL) {
   1321 		v6src = *v6src_ptr;
   1322 	} else {
   1323 		ire_t *ire;
   1324 		uint_t match_flags = MATCH_IRE_TYPE | MATCH_IRE_ZONEONLY;
   1325 
   1326 		if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src) ||
   1327 		    IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst))
   1328 			match_flags |= MATCH_IRE_ILL;
   1329 
   1330 		ire = ire_ftable_lookup_v6(&ip6h->ip6_dst, 0, 0,
   1331 		    (IRE_LOCAL|IRE_LOOPBACK), ill, ira->ira_zoneid, NULL,
   1332 		    match_flags, 0, ipst, NULL);
   1333 		if (ire != NULL) {
   1334 			v6src = ip6h->ip6_dst;
   1335 			ire_refrele(ire);
   1336 		} else {
   1337 			v6src = ipv6_all_zeros;
   1338 			ixas.ixa_flags |= IXAF_SET_SOURCE;
   1339 		}
   1340 	}
   1341 	v6dst = ip6h->ip6_src;
   1342 	len_needed = ipst->ips_ipv6_icmp_return - IPV6_HDR_LEN - len;
   1343 	msg_len = msgdsize(mp);
   1344 	if (msg_len > len_needed) {
   1345 		if (!adjmsg(mp, len_needed - msg_len)) {
   1346 			BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors);
   1347 			freemsg(mp);
   1348 			return;
   1349 		}
   1350 		msg_len = len_needed;
   1351 	}
   1352 	mp1 = allocb(IPV6_HDR_LEN + len, BPRI_MED);
   1353 	if (mp1 == NULL) {
   1354 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors);
   1355 		freemsg(mp);
   1356 		return;
   1357 	}
   1358 	mp1->b_cont = mp;
   1359 	mp = mp1;
   1360 
   1361 	/*
   1362 	 * Set IXAF_TRUSTED_ICMP so we can let the ICMP messages this
   1363 	 * node generates be accepted in peace by all on-host destinations.
   1364 	 * If we do NOT assume that all on-host destinations trust
   1365 	 * self-generated ICMP messages, then rework here, ip6.c, and spd.c.
   1366 	 * (Look for IXAF_TRUSTED_ICMP).
   1367 	 */
   1368 	ixas.ixa_flags |= IXAF_TRUSTED_ICMP;
   1369 
   1370 	ip6h = (ip6_t *)mp->b_rptr;
   1371 	mp1->b_wptr = (uchar_t *)ip6h + (IPV6_HDR_LEN + len);
   1372 
   1373 	ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW;
   1374 	ip6h->ip6_nxt = IPPROTO_ICMPV6;
   1375 	ip6h->ip6_hops = ipst->ips_ipv6_def_hops;
   1376 	ip6h->ip6_dst = v6dst;
   1377 	ip6h->ip6_src = v6src;
   1378 	msg_len += IPV6_HDR_LEN + len;
   1379 	if (msg_len > IP_MAXPACKET + IPV6_HDR_LEN) {
   1380 		(void) adjmsg(mp, IP_MAXPACKET + IPV6_HDR_LEN - msg_len);
   1381 		msg_len = IP_MAXPACKET + IPV6_HDR_LEN;
   1382 	}
   1383 	ip6h->ip6_plen = htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN));
   1384 	icmp6 = (icmp6_t *)&ip6h[1];
   1385 	bcopy(stuff, (char *)icmp6, len);
   1386 	/*
   1387 	 * Prepare for checksum by putting icmp length in the icmp
   1388 	 * checksum field. The checksum is calculated in ip_output_wire_v6.
   1389 	 */
   1390 	icmp6->icmp6_cksum = ip6h->ip6_plen;
   1391 	if (icmp6->icmp6_type == ND_REDIRECT) {
   1392 		ip6h->ip6_hops = IPV6_MAX_HOPS;
   1393 	}
   1394 
   1395 	(void) ip_output_simple(mp, &ixas);
   1396 	ixa_cleanup(&ixas);
   1397 }
   1398 
   1399 /*
   1400  * Update the output mib when ICMPv6 packets are sent.
   1401  */
   1402 void
   1403 icmp_update_out_mib_v6(ill_t *ill, icmp6_t *icmp6)
   1404 {
   1405 	BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutMsgs);
   1406 
   1407 	switch (icmp6->icmp6_type) {
   1408 	case ICMP6_DST_UNREACH:
   1409 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutDestUnreachs);
   1410 		if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN)
   1411 			BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutAdminProhibs);
   1412 		break;
   1413 
   1414 	case ICMP6_TIME_EXCEEDED:
   1415 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutTimeExcds);
   1416 		break;
   1417 
   1418 	case ICMP6_PARAM_PROB:
   1419 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutParmProblems);
   1420 		break;
   1421 
   1422 	case ICMP6_PACKET_TOO_BIG:
   1423 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutPktTooBigs);
   1424 		break;
   1425 
   1426 	case ICMP6_ECHO_REQUEST:
   1427 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchos);
   1428 		break;
   1429 
   1430 	case ICMP6_ECHO_REPLY:
   1431 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchoReplies);
   1432 		break;
   1433 
   1434 	case ND_ROUTER_SOLICIT:
   1435 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterSolicits);
   1436 		break;
   1437 
   1438 	case ND_ROUTER_ADVERT:
   1439 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterAdvertisements);
   1440 		break;
   1441 
   1442 	case ND_NEIGHBOR_SOLICIT:
   1443 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutNeighborSolicits);
   1444 		break;
   1445 
   1446 	case ND_NEIGHBOR_ADVERT:
   1447 		BUMP_MIB(ill->ill_icmp6_mib,
   1448 		    ipv6IfIcmpOutNeighborAdvertisements);
   1449 		break;
   1450 
   1451 	case ND_REDIRECT:
   1452 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRedirects);
   1453 		break;
   1454 
   1455 	case MLD_LISTENER_QUERY:
   1456 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembQueries);
   1457 		break;
   1458 
   1459 	case MLD_LISTENER_REPORT:
   1460 	case MLD_V2_LISTENER_REPORT:
   1461 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembResponses);
   1462 		break;
   1463 
   1464 	case MLD_LISTENER_REDUCTION:
   1465 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembReductions);
   1466 		break;
   1467 	}
   1468 }
   1469 
   1470 /*
   1471  * Check if it is ok to send an ICMPv6 error packet in
   1472  * response to the IP packet in mp.
   1473  * Free the message and return null if no
   1474  * ICMP error packet should be sent.
   1475  */
   1476 static mblk_t *
   1477 icmp_pkt_err_ok_v6(mblk_t *mp, boolean_t mcast_ok, ip_recv_attr_t *ira)
   1478 {
   1479 	ill_t		*ill = ira->ira_ill;
   1480 	ip_stack_t	*ipst = ill->ill_ipst;
   1481 	boolean_t	llbcast;
   1482 	ip6_t		*ip6h;
   1483 
   1484 	if (!mp)
   1485 		return (NULL);
   1486 
   1487 	/* We view multicast and broadcast as the same.. */
   1488 	llbcast = (ira->ira_flags &
   1489 	    (IRAF_L2DST_MULTICAST|IRAF_L2DST_BROADCAST)) != 0;
   1490 	ip6h = (ip6_t *)mp->b_rptr;
   1491 
   1492 	/* Check if source address uniquely identifies the host */
   1493 
   1494 	if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_src) ||
   1495 	    IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_src) ||
   1496 	    IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) {
   1497 		freemsg(mp);
   1498 		return (NULL);
   1499 	}
   1500 
   1501 	if (ip6h->ip6_nxt == IPPROTO_ICMPV6) {
   1502 		size_t	len_needed = IPV6_HDR_LEN + ICMP6_MINLEN;
   1503 		icmp6_t		*icmp6;
   1504 
   1505 		if (mp->b_wptr - mp->b_rptr < len_needed) {
   1506 			if (!pullupmsg(mp, len_needed)) {
   1507 				BUMP_MIB(ill->ill_icmp6_mib,
   1508 				    ipv6IfIcmpInErrors);
   1509 				freemsg(mp);
   1510 				return (NULL);
   1511 			}
   1512 			ip6h = (ip6_t *)mp->b_rptr;
   1513 		}
   1514 		icmp6 = (icmp6_t *)&ip6h[1];
   1515 		/* Explicitly do not generate errors in response to redirects */
   1516 		if (ICMP6_IS_ERROR(icmp6->icmp6_type) ||
   1517 		    icmp6->icmp6_type == ND_REDIRECT) {
   1518 			freemsg(mp);
   1519 			return (NULL);
   1520 		}
   1521 	}
   1522 	/*
   1523 	 * Check that the destination is not multicast and that the packet
   1524 	 * was not sent on link layer broadcast or multicast.  (Exception
   1525 	 * is Packet too big message as per the draft - when mcast_ok is set.)
   1526 	 */
   1527 	if (!mcast_ok &&
   1528 	    (llbcast || IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst))) {
   1529 		freemsg(mp);
   1530 		return (NULL);
   1531 	}
   1532 	/*
   1533 	 * If this is a labeled system, then check to see if we're allowed to
   1534 	 * send a response to this particular sender.  If not, then just drop.
   1535 	 */
   1536 	if (is_system_labeled() && !tsol_can_reply_error(mp, ira)) {
   1537 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors);
   1538 		freemsg(mp);
   1539 		return (NULL);
   1540 	}
   1541 
   1542 	if (icmp_err_rate_limit(ipst)) {
   1543 		/*
   1544 		 * Only send ICMP error packets every so often.
   1545 		 * This should be done on a per port/source basis,
   1546 		 * but for now this will suffice.
   1547 		 */
   1548 		freemsg(mp);
   1549 		return (NULL);
   1550 	}
   1551 	return (mp);
   1552 }
   1553 
   1554 /*
   1555  * Called when a packet was sent out the same link that it arrived on.
   1556  * Check if it is ok to send a redirect and then send it.
   1557  */
   1558 void
   1559 ip_send_potential_redirect_v6(mblk_t *mp, ip6_t *ip6h, ire_t *ire,
   1560     ip_recv_attr_t *ira)
   1561 {
   1562 	ill_t		*ill = ira->ira_ill;
   1563 	ip_stack_t	*ipst = ill->ill_ipst;
   1564 	in6_addr_t	*v6targ;
   1565 	ire_t		*src_ire_v6 = NULL;
   1566 	mblk_t		*mp1;
   1567 	ire_t		*nhop_ire = NULL;
   1568 
   1569 	/*
   1570 	 * Don't send a redirect when forwarding a source
   1571 	 * routed packet.
   1572 	 */
   1573 	if (ip_source_routed_v6(ip6h, mp, ipst))
   1574 		return;
   1575 
   1576 	if (ire->ire_type & IRE_ONLINK) {
   1577 		/* Target is directly connected */
   1578 		v6targ = &ip6h->ip6_dst;
   1579 	} else {
   1580 		/* Determine the most specific IRE used to send the packets */
   1581 		nhop_ire = ire_nexthop(ire);
   1582 		if (nhop_ire == NULL)
   1583 			return;
   1584 
   1585 		/*
   1586 		 * We won't send redirects to a router
   1587 		 * that doesn't have a link local
   1588 		 * address, but will forward.
   1589 		 */
   1590 		if (!IN6_IS_ADDR_LINKLOCAL(&nhop_ire->ire_addr_v6)) {
   1591 			BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors);
   1592 			ip_drop_input("ipIfStatsInAddrErrors", mp, ill);
   1593 			ire_refrele(nhop_ire);
   1594 			return;
   1595 		}
   1596 		v6targ = &nhop_ire->ire_addr_v6;
   1597 	}
   1598 	src_ire_v6 = ire_ftable_lookup_v6(&ip6h->ip6_src,
   1599 	    NULL, NULL, IRE_INTERFACE, ire->ire_ill, ALL_ZONES, NULL,
   1600 	    MATCH_IRE_ILL | MATCH_IRE_TYPE, 0, ipst, NULL);
   1601 
   1602 	if (src_ire_v6 == NULL) {
   1603 		if (nhop_ire != NULL)
   1604 			ire_refrele(nhop_ire);
   1605 		return;
   1606 	}
   1607 
   1608 	/*
   1609 	 * The source is directly connected.
   1610 	 */
   1611 	mp1 = copymsg(mp);
   1612 	if (mp1 != NULL)
   1613 		icmp_send_redirect_v6(mp1, v6targ, &ip6h->ip6_dst, ira);
   1614 
   1615 	if (nhop_ire != NULL)
   1616 		ire_refrele(nhop_ire);
   1617 	ire_refrele(src_ire_v6);
   1618 }
   1619 
   1620 /*
   1621  * Generate an ICMPv6 redirect message.
   1622  * Include target link layer address option if it exits.
   1623  * Always include redirect header.
   1624  */
   1625 static void
   1626 icmp_send_redirect_v6(mblk_t *mp, in6_addr_t *targetp, in6_addr_t *dest,
   1627     ip_recv_attr_t *ira)
   1628 {
   1629 	nd_redirect_t	*rd;
   1630 	nd_opt_rd_hdr_t	*rdh;
   1631 	uchar_t		*buf;
   1632 	ncec_t		*ncec = NULL;
   1633 	nd_opt_hdr_t	*opt;
   1634 	int		len;
   1635 	int		ll_opt_len = 0;
   1636 	int		max_redir_hdr_data_len;
   1637 	int		pkt_len;
   1638 	in6_addr_t	*srcp;
   1639 	ill_t		*ill;
   1640 	boolean_t	need_refrele;
   1641 	ip_stack_t	*ipst = ira->ira_ill->ill_ipst;
   1642 
   1643 	mp = icmp_pkt_err_ok_v6(mp, B_FALSE, ira);
   1644 	if (mp == NULL)
   1645 		return;
   1646 
   1647 	if (IS_UNDER_IPMP(ira->ira_ill)) {
   1648 		ill = ipmp_ill_hold_ipmp_ill(ira->ira_ill);
   1649 		if (ill == NULL) {
   1650 			ill = ira->ira_ill;
   1651 			BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects);
   1652 			ip_drop_output("no IPMP ill for sending redirect",
   1653 			    mp, ill);
   1654 			freemsg(mp);
   1655 			return;
   1656 		}
   1657 		need_refrele = B_TRUE;
   1658 	} else {
   1659 		ill = ira->ira_ill;
   1660 		need_refrele = B_FALSE;
   1661 	}
   1662 
   1663 	ncec = ncec_lookup_illgrp_v6(ill, targetp);
   1664 	if (ncec != NULL && ncec->ncec_state != ND_INCOMPLETE &&
   1665 	    ncec->ncec_lladdr != NULL) {
   1666 		ll_opt_len = (sizeof (nd_opt_hdr_t) +
   1667 		    ill->ill_phys_addr_length + 7)/8 * 8;
   1668 	}
   1669 	len = sizeof (nd_redirect_t) + sizeof (nd_opt_rd_hdr_t) + ll_opt_len;
   1670 	ASSERT(len % 4 == 0);
   1671 	buf = kmem_alloc(len, KM_NOSLEEP);
   1672 	if (buf == NULL) {
   1673 		if (ncec != NULL)
   1674 			ncec_refrele(ncec);
   1675 		if (need_refrele)
   1676 			ill_refrele(ill);
   1677 		freemsg(mp);
   1678 		return;
   1679 	}
   1680 
   1681 	rd = (nd_redirect_t *)buf;
   1682 	rd->nd_rd_type = (uint8_t)ND_REDIRECT;
   1683 	rd->nd_rd_code = 0;
   1684 	rd->nd_rd_reserved = 0;
   1685 	rd->nd_rd_target = *targetp;
   1686 	rd->nd_rd_dst = *dest;
   1687 
   1688 	opt = (nd_opt_hdr_t *)(buf + sizeof (nd_redirect_t));
   1689 	if (ncec != NULL && ll_opt_len != 0) {
   1690 		opt->nd_opt_type = ND_OPT_TARGET_LINKADDR;
   1691 		opt->nd_opt_len = ll_opt_len/8;
   1692 		bcopy((char *)ncec->ncec_lladdr, &opt[1],
   1693 		    ill->ill_phys_addr_length);
   1694 	}
   1695 	if (ncec != NULL)
   1696 		ncec_refrele(ncec);
   1697 	rdh = (nd_opt_rd_hdr_t *)(buf + sizeof (nd_redirect_t) + ll_opt_len);
   1698 	rdh->nd_opt_rh_type = (uint8_t)ND_OPT_REDIRECTED_HEADER;
   1699 	/* max_redir_hdr_data_len and nd_opt_rh_len must be multiple of 8 */
   1700 	max_redir_hdr_data_len =
   1701 	    (ipst->ips_ipv6_icmp_return - IPV6_HDR_LEN - len)/8*8;
   1702 	pkt_len = msgdsize(mp);
   1703 	/* Make sure mp is 8 byte aligned */
   1704 	if (pkt_len > max_redir_hdr_data_len) {
   1705 		rdh->nd_opt_rh_len = (max_redir_hdr_data_len +
   1706 		    sizeof (nd_opt_rd_hdr_t))/8;
   1707 		(void) adjmsg(mp, max_redir_hdr_data_len - pkt_len);
   1708 	} else {
   1709 		rdh->nd_opt_rh_len = (pkt_len + sizeof (nd_opt_rd_hdr_t))/8;
   1710 		(void) adjmsg(mp, -(pkt_len % 8));
   1711 	}
   1712 	rdh->nd_opt_rh_reserved1 = 0;
   1713 	rdh->nd_opt_rh_reserved2 = 0;
   1714 	/* ipif_v6lcl_addr contains the link-local source address */
   1715 	srcp = &ill->ill_ipif->ipif_v6lcl_addr;
   1716 
   1717 	/* Redirects sent by router, and router is global zone */
   1718 	ASSERT(ira->ira_zoneid == ALL_ZONES);
   1719 	ira->ira_zoneid = GLOBAL_ZONEID;
   1720 	icmp_pkt_v6(mp, buf, len, srcp, ira);
   1721 	kmem_free(buf, len);
   1722 	if (need_refrele)
   1723 		ill_refrele(ill);
   1724 }
   1725 
   1726 
   1727 /* Generate an ICMP time exceeded message.  (May be called as writer.) */
   1728 void
   1729 icmp_time_exceeded_v6(mblk_t *mp, uint8_t code, boolean_t mcast_ok,
   1730     ip_recv_attr_t *ira)
   1731 {
   1732 	icmp6_t	icmp6;
   1733 
   1734 	mp = icmp_pkt_err_ok_v6(mp, mcast_ok, ira);
   1735 	if (mp == NULL)
   1736 		return;
   1737 
   1738 	bzero(&icmp6, sizeof (icmp6_t));
   1739 	icmp6.icmp6_type = ICMP6_TIME_EXCEEDED;
   1740 	icmp6.icmp6_code = code;
   1741 	icmp_pkt_v6(mp, &icmp6, sizeof (icmp6_t), NULL, ira);
   1742 }
   1743 
   1744 /*
   1745  * Generate an ICMP unreachable message.
   1746  * When called from ip_output side a minimal ip_recv_attr_t needs to be
   1747  * constructed by the caller.
   1748  */
   1749 void
   1750 icmp_unreachable_v6(mblk_t *mp, uint8_t code, boolean_t mcast_ok,
   1751     ip_recv_attr_t *ira)
   1752 {
   1753 	icmp6_t	icmp6;
   1754 
   1755 	mp = icmp_pkt_err_ok_v6(mp, mcast_ok, ira);
   1756 	if (mp == NULL)
   1757 		return;
   1758 
   1759 	bzero(&icmp6, sizeof (icmp6_t));
   1760 	icmp6.icmp6_type = ICMP6_DST_UNREACH;
   1761 	icmp6.icmp6_code = code;
   1762 	icmp_pkt_v6(mp, &icmp6, sizeof (icmp6_t), NULL, ira);
   1763 }
   1764 
   1765 /*
   1766  * Generate an ICMP pkt too big message.
   1767  * When called from ip_output side a minimal ip_recv_attr_t needs to be
   1768  * constructed by the caller.
   1769  */
   1770 void
   1771 icmp_pkt2big_v6(mblk_t *mp, uint32_t mtu, boolean_t mcast_ok,
   1772     ip_recv_attr_t *ira)
   1773 {
   1774 	icmp6_t	icmp6;
   1775 
   1776 	mp = icmp_pkt_err_ok_v6(mp, mcast_ok, ira);
   1777 	if (mp == NULL)
   1778 		return;
   1779 
   1780 	bzero(&icmp6, sizeof (icmp6_t));
   1781 	icmp6.icmp6_type = ICMP6_PACKET_TOO_BIG;
   1782 	icmp6.icmp6_code = 0;
   1783 	icmp6.icmp6_mtu = htonl(mtu);
   1784 
   1785 	icmp_pkt_v6(mp, &icmp6, sizeof (icmp6_t), NULL, ira);
   1786 }
   1787 
   1788 /*
   1789  * Generate an ICMP parameter problem message. (May be called as writer.)
   1790  * 'offset' is the offset from the beginning of the packet in error.
   1791  * When called from ip_output side a minimal ip_recv_attr_t needs to be
   1792  * constructed by the caller.
   1793  */
   1794 static void
   1795 icmp_param_problem_v6(mblk_t *mp, uint8_t code, uint32_t offset,
   1796     boolean_t mcast_ok, ip_recv_attr_t *ira)
   1797 {
   1798 	icmp6_t	icmp6;
   1799 
   1800 	mp = icmp_pkt_err_ok_v6(mp, mcast_ok, ira);
   1801 	if (mp == NULL)
   1802 		return;
   1803 
   1804 	bzero((char *)&icmp6, sizeof (icmp6_t));
   1805 	icmp6.icmp6_type = ICMP6_PARAM_PROB;
   1806 	icmp6.icmp6_code = code;
   1807 	icmp6.icmp6_pptr = htonl(offset);
   1808 	icmp_pkt_v6(mp, &icmp6, sizeof (icmp6_t), NULL, ira);
   1809 }
   1810 
   1811 void
   1812 icmp_param_problem_nexthdr_v6(mblk_t *mp, boolean_t mcast_ok,
   1813     ip_recv_attr_t *ira)
   1814 {
   1815 	ip6_t		*ip6h = (ip6_t *)mp->b_rptr;
   1816 	uint16_t	hdr_length;
   1817 	uint8_t		*nexthdrp;
   1818 	uint32_t	offset;
   1819 	ill_t		*ill = ira->ira_ill;
   1820 
   1821 	/* Determine the offset of the bad nexthdr value */
   1822 	if (!ip_hdr_length_nexthdr_v6(mp, ip6h,	&hdr_length, &nexthdrp)) {
   1823 		/* Malformed packet */
   1824 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
   1825 		ip_drop_input("ipIfStatsInDiscards", mp, ill);
   1826 		freemsg(mp);
   1827 		return;
   1828 	}
   1829 
   1830 	offset = nexthdrp - mp->b_rptr;
   1831 	icmp_param_problem_v6(mp, ICMP6_PARAMPROB_NEXTHEADER, offset,
   1832 	    mcast_ok, ira);
   1833 }
   1834 
   1835 /*
   1836  * Verify whether or not the IP address is a valid local address.
   1837  * Could be a unicast, including one for a down interface.
   1838  * If allow_mcbc then a multicast or broadcast address is also
   1839  * acceptable.
   1840  *
   1841  * In the case of a multicast address, however, the
   1842  * upper protocol is expected to reset the src address
   1843  * to zero when we return IPVL_MCAST so that
   1844  * no packets are emitted with multicast address as
   1845  * source address.
   1846  * The addresses valid for bind are:
   1847  *	(1) - in6addr_any
   1848  *	(2) - IP address of an UP interface
   1849  *	(3) - IP address of a DOWN interface
   1850  *	(4) - a multicast address. In this case
   1851  *	the conn will only receive packets destined to
   1852  *	the specified multicast address. Note: the
   1853  *	application still has to issue an
   1854  *	IPV6_JOIN_GROUP socket option.
   1855  *
   1856  * In all the above cases, the bound address must be valid in the current zone.
   1857  * When the address is loopback or multicast, there might be many matching IREs
   1858  * so bind has to look up based on the zone.
   1859  */
   1860 ip_laddr_t
   1861 ip_laddr_verify_v6(const in6_addr_t *v6src, zoneid_t zoneid,
   1862     ip_stack_t *ipst, boolean_t allow_mcbc, uint_t scopeid)
   1863 {
   1864 	ire_t		*src_ire;
   1865 	uint_t		match_flags;
   1866 	ill_t		*ill = NULL;
   1867 
   1868 	ASSERT(!IN6_IS_ADDR_V4MAPPED(v6src));
   1869 	ASSERT(!IN6_IS_ADDR_UNSPECIFIED(v6src));
   1870 
   1871 	match_flags = MATCH_IRE_ZONEONLY;
   1872 	if (scopeid != 0) {
   1873 		ill = ill_lookup_on_ifindex(scopeid, B_TRUE, ipst);
   1874 		if (ill == NULL)
   1875 			return (IPVL_BAD);
   1876 		match_flags |= MATCH_IRE_ILL;
   1877 	}
   1878 
   1879 	src_ire = ire_ftable_lookup_v6(v6src, NULL, NULL, 0,
   1880 	    ill, zoneid, NULL, match_flags, 0, ipst, NULL);
   1881 	if (ill != NULL)
   1882 		ill_refrele(ill);
   1883 
   1884 	/*
   1885 	 * If an address other than in6addr_any is requested,
   1886 	 * we verify that it is a valid address for bind
   1887 	 * Note: Following code is in if-else-if form for
   1888 	 * readability compared to a condition check.
   1889 	 */
   1890 	if (src_ire != NULL && (src_ire->ire_type & (IRE_LOCAL|IRE_LOOPBACK))) {
   1891 		/*
   1892 		 * (2) Bind to address of local UP interface
   1893 		 */
   1894 		ire_refrele(src_ire);
   1895 		return (IPVL_UNICAST_UP);
   1896 	} else if (IN6_IS_ADDR_MULTICAST(v6src)) {
   1897 		/* (4) bind to multicast address. */
   1898 		if (src_ire != NULL)
   1899 			ire_refrele(src_ire);
   1900 
   1901 		/*
   1902 		 * Note: caller should take IPV6_MULTICAST_IF
   1903 		 * into account when selecting a real source address.
   1904 		 */
   1905 		if (allow_mcbc)
   1906 			return (IPVL_MCAST);
   1907 		else
   1908 			return (IPVL_BAD);
   1909 	} else {
   1910 		ipif_t *ipif;
   1911 
   1912 		/*
   1913 		 * (3) Bind to address of local DOWN interface?
   1914 		 * (ipif_lookup_addr() looks up all interfaces
   1915 		 * but we do not get here for UP interfaces
   1916 		 * - case (2) above)
   1917 		 */
   1918 		if (src_ire != NULL)
   1919 			ire_refrele(src_ire);
   1920 
   1921 		ipif = ipif_lookup_addr_v6(v6src, NULL, zoneid, ipst);
   1922 		if (ipif == NULL)
   1923 			return (IPVL_BAD);
   1924 
   1925 		/* Not a useful source? */
   1926 		if (ipif->ipif_flags & (IPIF_NOLOCAL | IPIF_ANYCAST)) {
   1927 			ipif_refrele(ipif);
   1928 			return (IPVL_BAD);
   1929 		}
   1930 		ipif_refrele(ipif);
   1931 		return (IPVL_UNICAST_DOWN);
   1932 	}
   1933 }
   1934 
   1935 /*
   1936  * Verify that both the source and destination addresses are valid.  If
   1937  * IPDF_VERIFY_DST is not set, then the destination address may be unreachable,
   1938  * i.e. have no route to it.  Protocols like TCP want to verify destination
   1939  * reachability, while tunnels do not.
   1940  *
   1941  * Determine the route, the interface, and (optionally) the source address
   1942  * to use to reach a given destination.
   1943  * Note that we allow connect to broadcast and multicast addresses when
   1944  * IPDF_ALLOW_MCBC is set.
   1945  * first_hop and dst_addr are normally the same, but if source routing
   1946  * they will differ; in that case the first_hop is what we'll use for the
   1947  * routing lookup but the dce and label checks will be done on dst_addr,
   1948  *
   1949  * If uinfo is set, then we fill in the best available information
   1950  * we have for the destination. This is based on (in priority order) any
   1951  * metrics and path MTU stored in a dce_t, route metrics, and finally the
   1952  * ill_mtu.
   1953  *
   1954  * Tsol note: If we have a source route then dst_addr != firsthop. But we
   1955  * always do the label check on dst_addr.
   1956  *
   1957  * Assumes that the caller has set ixa_scopeid for link-local communication.
   1958  */
   1959 int
   1960 ip_set_destination_v6(in6_addr_t *src_addrp, const in6_addr_t *dst_addr,
   1961     const in6_addr_t *firsthop, ip_xmit_attr_t *ixa, iulp_t *uinfo,
   1962     uint32_t flags, uint_t mac_mode)
   1963 {
   1964 	ire_t		*ire;
   1965 	int		error = 0;
   1966 	in6_addr_t	setsrc;				/* RTF_SETSRC */
   1967 	zoneid_t	zoneid = ixa->ixa_zoneid;	/* Honors SO_ALLZONES */
   1968 	ip_stack_t	*ipst = ixa->ixa_ipst;
   1969 	dce_t		*dce;
   1970 	uint_t		pmtu;
   1971 	uint_t		ifindex;
   1972 	uint_t		generation;
   1973 	nce_t		*nce;
   1974 	ill_t		*ill = NULL;
   1975 	boolean_t	multirt = B_FALSE;
   1976 
   1977 	ASSERT(!IN6_IS_ADDR_V4MAPPED(dst_addr));
   1978 
   1979 	ASSERT(!(ixa->ixa_flags & IXAF_IS_IPV4));
   1980 
   1981 	/*
   1982 	 * We never send to zero; the ULPs map it to the loopback address.
   1983 	 * We can't allow it since we use zero to mean unitialized in some
   1984 	 * places.
   1985 	 */
   1986 	ASSERT(!IN6_IS_ADDR_UNSPECIFIED(dst_addr));
   1987 
   1988 	if (is_system_labeled()) {
   1989 		ts_label_t *tsl = NULL;
   1990 
   1991 		error = tsol_check_dest(ixa->ixa_tsl, dst_addr, IPV6_VERSION,
   1992 		    mac_mode, (flags & IPDF_ZONE_IS_GLOBAL) != 0, &tsl);
   1993 		if (error != 0)
   1994 			return (error);
   1995 		if (tsl != NULL) {
   1996 			/* Update the label */
   1997 			ip_xmit_attr_replace_tsl(ixa, tsl);
   1998 		}
   1999 	}
   2000 
   2001 	setsrc = ipv6_all_zeros;
   2002 	/*
   2003 	 * Select a route; For IPMP interfaces, we would only select
   2004 	 * a "hidden" route (i.e., going through a specific under_ill)
   2005 	 * if ixa_ifindex has been specified.
   2006 	 */
   2007 	ire = ip_select_route_v6(firsthop, ixa, &generation, &setsrc, &error,
   2008 	    &multirt);
   2009 	ASSERT(ire != NULL);	/* IRE_NOROUTE if none found */
   2010 	if (error != 0)
   2011 		goto bad_addr;
   2012 
   2013 	/*
   2014 	 * ire can't be a broadcast or multicast unless IPDF_ALLOW_MCBC is set.
   2015 	 * If IPDF_VERIFY_DST is set, the destination must be reachable.
   2016 	 * Otherwise the destination needn't be reachable.
   2017 	 *
   2018 	 * If we match on a reject or black hole, then we've got a
   2019 	 * local failure.  May as well fail out the connect() attempt,
   2020 	 * since it's never going to succeed.
   2021 	 */
   2022 	if (ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
   2023 		/*
   2024 		 * If we're verifying destination reachability, we always want
   2025 		 * to complain here.
   2026 		 *
   2027 		 * If we're not verifying destination reachability but the
   2028 		 * destination has a route, we still want to fail on the
   2029 		 * temporary address and broadcast address tests.
   2030 		 *
   2031 		 * In both cases do we let the code continue so some reasonable
   2032 		 * information is returned to the caller. That enables the
   2033 		 * caller to use (and even cache) the IRE. conn_ip_ouput will
   2034 		 * use the generation mismatch path to check for the unreachable
   2035 		 * case thereby avoiding any specific check in the main path.
   2036 		 */
   2037 		ASSERT(generation == IRE_GENERATION_VERIFY);
   2038 		if (flags & IPDF_VERIFY_DST) {
   2039 			/*
   2040 			 * Set errno but continue to set up ixa_ire to be
   2041 			 * the RTF_REJECT|RTF_BLACKHOLE IRE.
   2042 			 * That allows callers to use ip_output to get an
   2043 			 * ICMP error back.
   2044 			 */
   2045 			if (!(ire->ire_type & IRE_HOST))
   2046 				error = ENETUNREACH;
   2047 			else
   2048 				error = EHOSTUNREACH;
   2049 		}
   2050 	}
   2051 
   2052 	if ((ire->ire_type & (IRE_BROADCAST|IRE_MULTICAST)) &&
   2053 	    !(flags & IPDF_ALLOW_MCBC)) {
   2054 		ire_refrele(ire);
   2055 		ire = ire_reject(ipst, B_FALSE);
   2056 		generation = IRE_GENERATION_VERIFY;
   2057 		error = ENETUNREACH;
   2058 	}
   2059 
   2060 	/* Cache things */
   2061 	if (ixa->ixa_ire != NULL)
   2062 		ire_refrele_notr(ixa->ixa_ire);
   2063 #ifdef DEBUG
   2064 	ire_refhold_notr(ire);
   2065 	ire_refrele(ire);
   2066 #endif
   2067 	ixa->ixa_ire = ire;
   2068 	ixa->ixa_ire_generation = generation;
   2069 
   2070 	/*
   2071 	 * For multicast with multirt we have a flag passed back from
   2072 	 * ire_lookup_multi_ill_v6 since we don't have an IRE for each
   2073 	 * possible multicast address.
   2074 	 * We also need a flag for multicast since we can't check
   2075 	 * whether RTF_MULTIRT is set in ixa_ire for multicast.
   2076 	 */
   2077 	if (multirt) {
   2078 		ixa->ixa_postfragfn = ip_postfrag_multirt_v6;
   2079 		ixa->ixa_flags |= IXAF_MULTIRT_MULTICAST;
   2080 	} else {
   2081 		ixa->ixa_postfragfn = ire->ire_postfragfn;
   2082 		ixa->ixa_flags &= ~IXAF_MULTIRT_MULTICAST;
   2083 	}
   2084 	if (!(ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE))) {
   2085 		/* Get an nce to cache. */
   2086 		nce = ire_to_nce(ire, NULL, firsthop);
   2087 		if (nce == NULL) {
   2088 			/* Allocation failure? */
   2089 			ixa->ixa_ire_generation = IRE_GENERATION_VERIFY;
   2090 		} else {
   2091 			if (ixa->ixa_nce != NULL)
   2092 				nce_refrele(ixa->ixa_nce);
   2093 			ixa->ixa_nce = nce;
   2094 		}
   2095 	}
   2096 
   2097 	/*
   2098 	 * If the source address is a loopback address, the
   2099 	 * destination had best be local or multicast.
   2100 	 * If we are sending to an IRE_LOCAL using a loopback source then
   2101 	 * it had better be the same zoneid.
   2102 	 */
   2103 	if (IN6_IS_ADDR_LOOPBACK(src_addrp)) {
   2104 		if ((ire->ire_type & IRE_LOCAL) && ire->ire_zoneid != zoneid) {
   2105 			ire = NULL;	/* Stored in ixa_ire */
   2106 			error = EADDRNOTAVAIL;
   2107 			goto bad_addr;
   2108 		}
   2109 		if (!(ire->ire_type & (IRE_LOOPBACK|IRE_LOCAL|IRE_MULTICAST))) {
   2110 			ire = NULL;	/* Stored in ixa_ire */
   2111 			error = EADDRNOTAVAIL;
   2112 			goto bad_addr;
   2113 		}
   2114 	}
   2115 
   2116 	/*
   2117 	 * Does the caller want us to pick a source address?
   2118 	 */
   2119 	if (flags & IPDF_SELECT_SRC) {
   2120 		in6_addr_t	src_addr;
   2121 
   2122 		/*
   2123 		 * We use use ire_nexthop_ill to avoid the under ipmp
   2124 		 * interface for source address selection. Note that for ipmp
   2125 		 * probe packets, ixa_ifindex would have been specified, and
   2126 		 * the ip_select_route() invocation would have picked an ire
   2127 		 * will ire_ill pointing at an under interface.
   2128 		 */
   2129 		ill = ire_nexthop_ill(ire);
   2130 
   2131 		/* If unreachable we have no ill but need some source */
   2132 		if (ill == NULL) {
   2133 			src_addr = ipv6_loopback;
   2134 			/* Make sure we look for a better source address */
   2135 			generation = SRC_GENERATION_VERIFY;
   2136 		} else {
   2137 			error = ip_select_source_v6(ill, &setsrc, dst_addr,
   2138 			    zoneid, ipst, B_FALSE, ixa->ixa_src_preferences,
   2139 			    &src_addr, &generation, NULL);
   2140 			if (error != 0) {
   2141 				ire = NULL;	/* Stored in ixa_ire */
   2142 				goto bad_addr;
   2143 			}
   2144 		}
   2145 
   2146 		/*
   2147 		 * We allow the source address to to down.
   2148 		 * However, we check that we don't use the loopback address
   2149 		 * as a source when sending out on the wire.
   2150 		 */
   2151 		if (IN6_IS_ADDR_LOOPBACK(&src_addr) &&
   2152 		    !(ire->ire_type & (IRE_LOCAL|IRE_LOOPBACK|IRE_MULTICAST)) &&
   2153 		    !(ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE))) {
   2154 			ire = NULL;	/* Stored in ixa_ire */
   2155 			error = EADDRNOTAVAIL;
   2156 			goto bad_addr;
   2157 		}
   2158 
   2159 		*src_addrp = src_addr;
   2160 		ixa->ixa_src_generation = generation;
   2161 	}
   2162 
   2163 	/*
   2164 	 * Make sure we don't leave an unreachable ixa_nce in place
   2165 	 * since ip_select_route is used when we unplumb i.e., remove
   2166 	 * references on ixa_ire, ixa_nce, and ixa_dce.
   2167 	 */
   2168 	nce = ixa->ixa_nce;
   2169 	if (nce != NULL && nce->nce_is_condemned) {
   2170 		nce_refrele(nce);
   2171 		ixa->ixa_nce = NULL;
   2172 		ixa->ixa_ire_generation = IRE_GENERATION_VERIFY;
   2173 	}
   2174 
   2175 
   2176 	ifindex = 0;
   2177 	if (IN6_IS_ADDR_LINKSCOPE(dst_addr)) {
   2178 		/* If we are creating a DCE we'd better have an ifindex */
   2179 		if (ill != NULL)
   2180 			ifindex = ill->ill_phyint->phyint_ifindex;
   2181 		else
   2182 			flags &= ~IPDF_UNIQUE_DCE;
   2183 	}
   2184 
   2185 	if (flags & IPDF_UNIQUE_DCE) {
   2186 		/* Fallback to the default dce if allocation fails */
   2187 		dce = dce_lookup_and_add_v6(dst_addr, ifindex, ipst);
   2188 		if (dce != NULL) {
   2189 			generation = dce->dce_generation;
   2190 		} else {
   2191 			dce = dce_lookup_v6(dst_addr, ifindex, ipst,
   2192 			    &generation);
   2193 		}
   2194 	} else {
   2195 		dce = dce_lookup_v6(dst_addr, ifindex, ipst, &generation);
   2196 	}
   2197 	ASSERT(dce != NULL);
   2198 	if (ixa->ixa_dce != NULL)
   2199 		dce_refrele_notr(ixa->ixa_dce);
   2200 #ifdef DEBUG
   2201 	dce_refhold_notr(dce);
   2202 	dce_refrele(dce);
   2203 #endif
   2204 	ixa->ixa_dce = dce;
   2205 	ixa->ixa_dce_generation = generation;
   2206 
   2207 	/*
   2208 	 * Note that IPv6 multicast supports PMTU discovery unlike IPv4
   2209 	 * multicast. But pmtu discovery is only enabled for connected
   2210 	 * sockets in general.
   2211 	 */
   2212 
   2213 	/*
   2214 	 * Set initial value for fragmentation limit.  Either conn_ip_output
   2215 	 * or ULP might updates it when there are routing changes.
   2216 	 * Handles a NULL ixa_ire->ire_ill or a NULL ixa_nce for RTF_REJECT.
   2217 	 */
   2218 	pmtu = ip_get_pmtu(ixa);
   2219 	ixa->ixa_fragsize = pmtu;
   2220 	/* Make sure ixa_fragsize and ixa_pmtu remain identical */
   2221 	if (ixa->ixa_flags & IXAF_VERIFY_PMTU)
   2222 		ixa->ixa_pmtu = pmtu;
   2223 
   2224 	/*
   2225 	 * Extract information useful for some transports.
   2226 	 * First we look for DCE metrics. Then we take what we have in
   2227 	 * the metrics in the route, where the offlink is used if we have
   2228 	 * one.
   2229 	 */
   2230 	if (uinfo != NULL) {
   2231 		bzero(uinfo, sizeof (*uinfo));
   2232 
   2233 		if (dce->dce_flags & DCEF_UINFO)
   2234 			*uinfo = dce->dce_uinfo;
   2235 
   2236 		rts_merge_metrics(uinfo, &ire->ire_metrics);
   2237 
   2238 		/* Allow ire_metrics to decrease the path MTU from above */
   2239 		if (uinfo->iulp_mtu == 0 || uinfo->iulp_mtu > pmtu)
   2240 			uinfo->iulp_mtu = pmtu;
   2241 
   2242 		uinfo->iulp_localnet = (ire->ire_type & IRE_ONLINK) != 0;
   2243 		uinfo->iulp_loopback = (ire->ire_type & IRE_LOOPBACK) != 0;
   2244 		uinfo->iulp_local = (ire->ire_type & IRE_LOCAL) != 0;
   2245 	}
   2246 
   2247 	if (ill != NULL)
   2248 		ill_refrele(ill);
   2249 
   2250 	return (error);
   2251 
   2252 bad_addr:
   2253 	if (ire != NULL)
   2254 		ire_refrele(ire);
   2255 
   2256 	if (ill != NULL)
   2257 		ill_refrele(ill);
   2258 
   2259 	/*
   2260 	 * Make sure we don't leave an unreachable ixa_nce in place
   2261 	 * since ip_select_route is used when we unplumb i.e., remove
   2262 	 * references on ixa_ire, ixa_nce, and ixa_dce.
   2263 	 */
   2264 	nce = ixa->ixa_nce;
   2265 	if (nce != NULL && nce->nce_is_condemned) {
   2266 		nce_refrele(nce);
   2267 		ixa->ixa_nce = NULL;
   2268 		ixa->ixa_ire_generation = IRE_GENERATION_VERIFY;
   2269 	}
   2270 
   2271 	return (error);
   2272 }
   2273 
   2274 /*
   2275  * Handle protocols with which IP is less intimate.  There
   2276  * can be more than one stream bound to a particular
   2277  * protocol.  When this is the case, normally each one gets a copy
   2278  * of any incoming packets.
   2279  *
   2280  * Zones notes:
   2281  * Packets will be distributed to conns in all zones. This is really only
   2282  * useful for ICMPv6 as only applications in the global zone can create raw
   2283  * sockets for other protocols.
   2284  */
   2285 void
   2286 ip_fanout_proto_v6(mblk_t *mp, ip6_t *ip6h, ip_recv_attr_t *ira)
   2287 {
   2288 	mblk_t		*mp1;
   2289 	in6_addr_t	laddr = ip6h->ip6_dst;
   2290 	conn_t		*connp, *first_connp, *next_connp;
   2291 	connf_t		*connfp;
   2292 	ill_t		*ill = ira->ira_ill;
   2293 	ip_stack_t	*ipst = ill->ill_ipst;
   2294 
   2295 	connfp = &ipst->ips_ipcl_proto_fanout_v6[ira->ira_protocol];
   2296 	mutex_enter(&connfp->connf_lock);
   2297 	connp = connfp->connf_head;
   2298 	for (connp = connfp->connf_head; connp != NULL;
   2299 	    connp = connp->conn_next) {
   2300 		/* Note: IPCL_PROTO_MATCH_V6 includes conn_wantpacket */
   2301 		if (IPCL_PROTO_MATCH_V6(connp, ira, ip6h) &&
   2302 		    (!(ira->ira_flags & IRAF_SYSTEM_LABELED) ||
   2303 		    tsol_receive_local(mp, &laddr, IPV6_VERSION, ira, connp)))
   2304 			break;
   2305 	}
   2306 
   2307 	if (connp == NULL) {
   2308 		/*
   2309 		 * No one bound to this port.  Is
   2310 		 * there a client that wants all
   2311 		 * unclaimed datagrams?
   2312 		 */
   2313 		mutex_exit(&connfp->connf_lock);
   2314 		ip_fanout_send_icmp_v6(mp, ICMP6_PARAM_PROB,
   2315 		    ICMP6_PARAMPROB_NEXTHEADER, ira);
   2316 		return;
   2317 	}
   2318 
   2319 	ASSERT(IPCL_IS_NONSTR(connp) || connp->conn_rq != NULL);
   2320 
   2321 	CONN_INC_REF(connp);
   2322 	first_connp = connp;
   2323 
   2324 	/*
   2325 	 * XXX: Fix the multiple protocol listeners case. We should not
   2326 	 * be walking the conn->conn_next list here.
   2327 	 */
   2328 	connp = connp->conn_next;
   2329 	for (;;) {
   2330 		while (connp != NULL) {
   2331 			/* Note: IPCL_PROTO_MATCH_V6 includes conn_wantpacket */
   2332 			if (IPCL_PROTO_MATCH_V6(connp, ira, ip6h) &&
   2333 			    (!(ira->ira_flags & IRAF_SYSTEM_LABELED) ||
   2334 			    tsol_receive_local(mp, &laddr, IPV6_VERSION,
   2335 			    ira, connp)))
   2336 				break;
   2337 			connp = connp->conn_next;
   2338 		}
   2339 
   2340 		if (connp == NULL) {
   2341 			/* No more interested clients */
   2342 			connp = first_connp;
   2343 			break;
   2344 		}
   2345 		if (((mp1 = dupmsg(mp)) == NULL) &&
   2346 		    ((mp1 = copymsg(mp)) == NULL)) {
   2347 			/* Memory allocation failed */
   2348 			BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
   2349 			ip_drop_input("ipIfStatsInDiscards", mp, ill);
   2350 			connp = first_connp;
   2351 			break;
   2352 		}
   2353 
   2354 		CONN_INC_REF(connp);
   2355 		mutex_exit(&connfp->connf_lock);
   2356 
   2357 		ip_fanout_proto_conn(connp, mp1, NULL, (ip6_t *)mp1->b_rptr,
   2358 		    ira);
   2359 
   2360 		mutex_enter(&connfp->connf_lock);
   2361 		/* Follow the next pointer before releasing the conn. */
   2362 		next_connp = connp->conn_next;
   2363 		CONN_DEC_REF(connp);
   2364 		connp = next_connp;
   2365 	}
   2366 
   2367 	/* Last one.  Send it upstream. */
   2368 	mutex_exit(&connfp->connf_lock);
   2369 
   2370 	ip_fanout_proto_conn(connp, mp, NULL, ip6h, ira);
   2371 
   2372 	CONN_DEC_REF(connp);
   2373 }
   2374 
   2375 /*
   2376  * Called when it is conceptually a ULP that would sent the packet
   2377  * e.g., port unreachable and nexthdr unknown. Check that the packet
   2378  * would have passed the IPsec global policy before sending the error.
   2379  *
   2380  * Send an ICMP error after patching up the packet appropriately.
   2381  * Uses ip_drop_input and bumps the appropriate MIB.
   2382  * For ICMP6_PARAMPROB_NEXTHEADER we determine the offset to use.
   2383  */
   2384 void
   2385 ip_fanout_send_icmp_v6(mblk_t *mp, uint_t icmp_type, uint8_t icmp_code,
   2386     ip_recv_attr_t *ira)
   2387 {
   2388 	ip6_t		*ip6h;
   2389 	boolean_t	secure;
   2390 	ill_t		*ill = ira->ira_ill;
   2391 	ip_stack_t	*ipst = ill->ill_ipst;
   2392 	netstack_t	*ns = ipst->ips_netstack;
   2393 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
   2394 
   2395 	secure = ira->ira_flags & IRAF_IPSEC_SECURE;
   2396 
   2397 	/*
   2398 	 * We are generating an icmp error for some inbound packet.
   2399 	 * Called from all ip_fanout_(udp, tcp, proto) functions.
   2400 	 * Before we generate an error, check with global policy
   2401 	 * to see whether this is allowed to enter the system. As
   2402 	 * there is no "conn", we are checking with global policy.
   2403 	 */
   2404 	ip6h = (ip6_t *)mp->b_rptr;
   2405 	if (secure || ipss->ipsec_inbound_v6_policy_present) {
   2406 		mp = ipsec_check_global_policy(mp, NULL, NULL, ip6h, ira, ns);
   2407 		if (mp == NULL)
   2408 			return;
   2409 	}
   2410 
   2411 	/* We never send errors for protocols that we do implement */
   2412 	if (ira->ira_protocol == IPPROTO_ICMPV6) {
   2413 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
   2414 		ip_drop_input("ip_fanout_send_icmp_v6", mp, ill);
   2415 		freemsg(mp);
   2416 		return;
   2417 	}
   2418 
   2419 	switch (icmp_type) {
   2420 	case ICMP6_DST_UNREACH:
   2421 		ASSERT(icmp_code == ICMP6_DST_UNREACH_NOPORT);
   2422 
   2423 		BUMP_MIB(ill->ill_ip_mib, udpIfStatsNoPorts);
   2424 		ip_drop_input("ipIfStatsNoPorts", mp, ill);
   2425 
   2426 		icmp_unreachable_v6(mp, icmp_code, B_FALSE, ira);
   2427 		break;
   2428 	case ICMP6_PARAM_PROB:
   2429 		ASSERT(icmp_code == ICMP6_PARAMPROB_NEXTHEADER);
   2430 
   2431 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsInUnknownProtos);
   2432 		ip_drop_input("ipIfStatsInUnknownProtos", mp, ill);
   2433 
   2434 		/* Let the system determine the offset for this one */
   2435 		icmp_param_problem_nexthdr_v6(mp, B_FALSE, ira);
   2436 		break;
   2437 	default:
   2438 #ifdef DEBUG
   2439 		panic("ip_fanout_send_icmp_v6: wrong type");
   2440 		/*NOTREACHED*/
   2441 #else
   2442 		freemsg(mp);
   2443 		break;
   2444 #endif
   2445 	}
   2446 }
   2447 
   2448 /*
   2449  * Fanout for UDP packets that are multicast or ICMP errors.
   2450  * (Unicast fanout is handled in ip_input_v6.)
   2451  *
   2452  * If SO_REUSEADDR is set all multicast packets
   2453  * will be delivered to all conns bound to the same port.
   2454  *
   2455  * Fanout for UDP packets.
   2456  * The caller puts <fport, lport> in the ports parameter.
   2457  * ire_type must be IRE_BROADCAST for multicast and broadcast packets.
   2458  *
   2459  * If SO_REUSEADDR is set all multicast and broadcast packets
   2460  * will be delivered to all conns bound to the same port.
   2461  *
   2462  * Zones notes:
   2463  * Earlier in ip_input on a system with multiple shared-IP zones we
   2464  * duplicate the multicast and broadcast packets and send them up
   2465  * with each explicit zoneid that exists on that ill.
   2466  * This means that here we can match the zoneid with SO_ALLZONES being special.
   2467  */
   2468 void
   2469 ip_fanout_udp_multi_v6(mblk_t *mp, ip6_t *ip6h, uint16_t lport, uint16_t fport,
   2470     ip_recv_attr_t *ira)
   2471 {
   2472 	in6_addr_t	laddr;
   2473 	conn_t		*connp;
   2474 	connf_t		*connfp;
   2475 	in6_addr_t	faddr;
   2476 	ill_t		*ill = ira->ira_ill;
   2477 	ip_stack_t	*ipst = ill->ill_ipst;
   2478 
   2479 	ASSERT(ira->ira_flags & (IRAF_MULTIBROADCAST|IRAF_ICMP_ERROR));
   2480 
   2481 	laddr = ip6h->ip6_dst;
   2482 	faddr = ip6h->ip6_src;
   2483 
   2484 	/* Attempt to find a client stream based on destination port. */
   2485 	connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(lport, ipst)];
   2486 	mutex_enter(&connfp->connf_lock);
   2487 	connp = connfp->connf_head;
   2488 	while (connp != NULL) {
   2489 		if ((IPCL_UDP_MATCH_V6(connp, lport, laddr, fport, faddr)) &&
   2490 		    conn_wantpacket_v6(connp, ira, ip6h) &&
   2491 		    (!(ira->ira_flags & IRAF_SYSTEM_LABELED) ||
   2492 		    tsol_receive_local(mp, &laddr, IPV6_VERSION, ira, connp)))
   2493 			break;
   2494 		connp = connp->conn_next;
   2495 	}
   2496 
   2497 	if (connp == NULL)
   2498 		goto notfound;
   2499 
   2500 	CONN_INC_REF(connp);
   2501 
   2502 	if (connp->conn_reuseaddr) {
   2503 		conn_t		*first_connp = connp;
   2504 		conn_t		*next_connp;
   2505 		mblk_t		*mp1;
   2506 
   2507 		connp = connp->conn_next;
   2508 		for (;;) {
   2509 			while (connp != NULL) {
   2510 				if (IPCL_UDP_MATCH_V6(connp, lport, laddr,
   2511 				    fport, faddr) &&
   2512 				    conn_wantpacket_v6(connp, ira, ip6h) &&
   2513 				    (!(ira->ira_flags & IRAF_SYSTEM_LABELED) ||
   2514 				    tsol_receive_local(mp, &laddr, IPV6_VERSION,
   2515 				    ira, connp)))
   2516 					break;
   2517 				connp = connp->conn_next;
   2518 			}
   2519 			if (connp == NULL) {
   2520 				/* No more interested clients */
   2521 				connp = first_connp;
   2522 				break;
   2523 			}
   2524 			if (((mp1 = dupmsg(mp)) == NULL) &&
   2525 			    ((mp1 = copymsg(mp)) == NULL)) {
   2526 				/* Memory allocation failed */
   2527 				BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
   2528 				ip_drop_input("ipIfStatsInDiscards", mp, ill);
   2529 				connp = first_connp;
   2530 				break;
   2531 			}
   2532 
   2533 			CONN_INC_REF(connp);
   2534 			mutex_exit(&connfp->connf_lock);
   2535 
   2536 			IP6_STAT(ipst, ip6_udp_fanmb);
   2537 			ip_fanout_udp_conn(connp, mp1, NULL,
   2538 			    (ip6_t *)mp1->b_rptr, ira);
   2539 
   2540 			mutex_enter(&connfp->connf_lock);
   2541 			/* Follow the next pointer before releasing the conn. */
   2542 			next_connp = connp->conn_next;
   2543 			IP6_STAT(ipst, ip6_udp_fanmb);
   2544 			CONN_DEC_REF(connp);
   2545 			connp = next_connp;
   2546 		}
   2547 	}
   2548 
   2549 	/* Last one.  Send it upstream. */
   2550 	mutex_exit(&connfp->connf_lock);
   2551 
   2552 	IP6_STAT(ipst, ip6_udp_fanmb);
   2553 	ip_fanout_udp_conn(connp, mp, NULL, ip6h, ira);
   2554 	CONN_DEC_REF(connp);
   2555 	return;
   2556 
   2557 notfound:
   2558 	mutex_exit(&connfp->connf_lock);
   2559 	/*
   2560 	 * No one bound to this port.  Is
   2561 	 * there a client that wants all
   2562 	 * unclaimed datagrams?
   2563 	 */
   2564 	if (ipst->ips_ipcl_proto_fanout_v6[IPPROTO_UDP].connf_head != NULL) {
   2565 		ASSERT(ira->ira_protocol == IPPROTO_UDP);
   2566 		ip_fanout_proto_v6(mp, ip6h, ira);
   2567 	} else {
   2568 		ip_fanout_send_icmp_v6(mp, ICMP6_DST_UNREACH,
   2569 		    ICMP6_DST_UNREACH_NOPORT, ira);
   2570 	}
   2571 }
   2572 
   2573 /*
   2574  * int ip_find_hdr_v6()
   2575  *
   2576  * This routine is used by the upper layer protocols, iptun, and IPsec:
   2577  * - Set extension header pointers to appropriate locations
   2578  * - Determine IPv6 header length and return it
   2579  * - Return a pointer to the last nexthdr value
   2580  *
   2581  * The caller must initialize ipp_fields.
   2582  * The upper layer protocols normally set label_separate which makes the
   2583  * routine put the TX label in ipp_label_v6. If this is not set then
   2584  * the hop-by-hop options including the label are placed in ipp_hopopts.
   2585  *
   2586  * NOTE: If multiple extension headers of the same type are present,
   2587  * ip_find_hdr_v6() will set the respective extension header pointers
   2588  * to the first one that it encounters in the IPv6 header.  It also
   2589  * skips fragment headers.  This routine deals with malformed packets
   2590  * of various sorts in which case the returned length is up to the
   2591  * malformed part.
   2592  */
   2593 int
   2594 ip_find_hdr_v6(mblk_t *mp, ip6_t *ip6h, boolean_t label_separate, ip_pkt_t *ipp,
   2595     uint8_t *nexthdrp)
   2596 {
   2597 	uint_t	length, ehdrlen;
   2598 	uint8_t nexthdr;
   2599 	uint8_t *whereptr, *endptr;
   2600 	ip6_dest_t *tmpdstopts;
   2601 	ip6_rthdr_t *tmprthdr;
   2602 	ip6_hbh_t *tmphopopts;
   2603 	ip6_frag_t *tmpfraghdr;
   2604 
   2605 	ipp->ipp_fields |= IPPF_HOPLIMIT | IPPF_TCLASS | IPPF_ADDR;
   2606 	ipp->ipp_hoplimit = ip6h->ip6_hops;
   2607 	ipp->ipp_tclass = IPV6_FLOW_TCLASS(ip6h->ip6_flow);
   2608 	ipp->ipp_addr = ip6h->ip6_dst;
   2609 
   2610 	length = IPV6_HDR_LEN;
   2611 	whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */
   2612 	endptr = mp->b_wptr;
   2613 
   2614 	nexthdr = ip6h->ip6_nxt;
   2615 	while (whereptr < endptr) {
   2616 		/* Is there enough left for len + nexthdr? */
   2617 		if (whereptr + MIN_EHDR_LEN > endptr)
   2618 			goto done;
   2619 
   2620 		switch (nexthdr) {
   2621 		case IPPROTO_HOPOPTS: {
   2622 			/* We check for any CIPSO */
   2623 			uchar_t *secopt;
   2624 			boolean_t hbh_needed;
   2625 			uchar_t *after_secopt;
   2626 
   2627 			tmphopopts = (ip6_hbh_t *)whereptr;
   2628 			ehdrlen = 8 * (tmphopopts->ip6h_len + 1);
   2629 			if ((uchar_t *)tmphopopts +  ehdrlen > endptr)
   2630 				goto done;
   2631 			nexthdr = tmphopopts->ip6h_nxt;
   2632 
   2633 			if (!label_separate) {
   2634 				secopt = NULL;
   2635 				after_secopt = whereptr;
   2636 			} else {
   2637 				/*
   2638 				 * We have dropped packets with bad options in
   2639 				 * ip6_input. No need to check return value
   2640 				 * here.
   2641 				 */
   2642 				(void) tsol_find_secopt_v6(whereptr, ehdrlen,
   2643 				    &secopt, &after_secopt, &hbh_needed);
   2644 			}
   2645 			if (secopt != NULL && after_secopt - whereptr > 0) {
   2646 				ipp->ipp_fields |= IPPF_LABEL_V6;
   2647 				ipp->ipp_label_v6 = secopt;
   2648 				ipp->ipp_label_len_v6 = after_secopt - whereptr;
   2649 			} else {
   2650 				ipp->ipp_label_len_v6 = 0;
   2651 				after_secopt = whereptr;
   2652 				hbh_needed = B_TRUE;
   2653 			}
   2654 			/* return only 1st hbh */
   2655 			if (hbh_needed && !(ipp->ipp_fields & IPPF_HOPOPTS)) {
   2656 				ipp->ipp_fields |= IPPF_HOPOPTS;
   2657 				ipp->ipp_hopopts = (ip6_hbh_t *)after_secopt;
   2658 				ipp->ipp_hopoptslen = ehdrlen -
   2659 				    ipp->ipp_label_len_v6;
   2660 			}
   2661 			break;
   2662 		}
   2663 		case IPPROTO_DSTOPTS:
   2664 			tmpdstopts = (ip6_dest_t *)whereptr;
   2665 			ehdrlen = 8 * (tmpdstopts->ip6d_len + 1);
   2666 			if ((uchar_t *)tmpdstopts +  ehdrlen > endptr)
   2667 				goto done;
   2668 			nexthdr = tmpdstopts->ip6d_nxt;
   2669 			/*
   2670 			 * ipp_dstopts is set to the destination header after a
   2671 			 * routing header.
   2672 			 * Assume it is a post-rthdr destination header
   2673 			 * and adjust when we find an rthdr.
   2674 			 */
   2675 			if (!(ipp->ipp_fields & IPPF_DSTOPTS)) {
   2676 				ipp->ipp_fields |= IPPF_DSTOPTS;
   2677 				ipp->ipp_dstopts = tmpdstopts;
   2678 				ipp->ipp_dstoptslen = ehdrlen;
   2679 			}
   2680 			break;
   2681 		case IPPROTO_ROUTING:
   2682 			tmprthdr = (ip6_rthdr_t *)whereptr;
   2683 			ehdrlen = 8 * (tmprthdr->ip6r_len + 1);
   2684 			if ((uchar_t *)tmprthdr +  ehdrlen > endptr)
   2685 				goto done;
   2686 			nexthdr = tmprthdr->ip6r_nxt;
   2687 			/* return only 1st rthdr */
   2688 			if (!(ipp->ipp_fields & IPPF_RTHDR)) {
   2689 				ipp->ipp_fields |= IPPF_RTHDR;
   2690 				ipp->ipp_rthdr = tmprthdr;
   2691 				ipp->ipp_rthdrlen = ehdrlen;
   2692 			}
   2693 			/*
   2694 			 * Make any destination header we've seen be a
   2695 			 * pre-rthdr destination header.
   2696 			 */
   2697 			if (ipp->ipp_fields & IPPF_DSTOPTS) {
   2698 				ipp->ipp_fields &= ~IPPF_DSTOPTS;
   2699 				ipp->ipp_fields |= IPPF_RTHDRDSTOPTS;
   2700 				ipp->ipp_rthdrdstopts = ipp->ipp_dstopts;
   2701 				ipp->ipp_dstopts = NULL;
   2702 				ipp->ipp_rthdrdstoptslen = ipp->ipp_dstoptslen;
   2703 				ipp->ipp_dstoptslen = 0;
   2704 			}
   2705 			break;
   2706 		case IPPROTO_FRAGMENT:
   2707 			tmpfraghdr = (ip6_frag_t *)whereptr;
   2708 			ehdrlen = sizeof (ip6_frag_t);
   2709 			if ((uchar_t *)tmpfraghdr + ehdrlen > endptr)
   2710 				goto done;
   2711 			nexthdr = tmpfraghdr->ip6f_nxt;
   2712 			if (!(ipp->ipp_fields & IPPF_FRAGHDR)) {
   2713 				ipp->ipp_fields |= IPPF_FRAGHDR;
   2714 				ipp->ipp_fraghdr = tmpfraghdr;
   2715 				ipp->ipp_fraghdrlen = ehdrlen;
   2716 			}
   2717 			break;
   2718 		case IPPROTO_NONE:
   2719 		default:
   2720 			goto done;
   2721 		}
   2722 		length += ehdrlen;
   2723 		whereptr += ehdrlen;
   2724 	}
   2725 done:
   2726 	if (nexthdrp != NULL)
   2727 		*nexthdrp = nexthdr;
   2728 	return (length);
   2729 }
   2730 
   2731 /*
   2732  * Try to determine where and what are the IPv6 header length and
   2733  * pointer to nexthdr value for the upper layer protocol (or an
   2734  * unknown next hdr).
   2735  *
   2736  * Parameters returns a pointer to the nexthdr value;
   2737  * Must handle malformed packets of various sorts.
   2738  * Function returns failure for malformed cases.
   2739  */
   2740 boolean_t
   2741 ip_hdr_length_nexthdr_v6(mblk_t *mp, ip6_t *ip6h, uint16_t *hdr_length_ptr,
   2742     uint8_t **nexthdrpp)
   2743 {
   2744 	uint16_t length;
   2745 	uint_t	ehdrlen;
   2746 	uint8_t	*nexthdrp;
   2747 	uint8_t *whereptr;
   2748 	uint8_t *endptr;
   2749 	ip6_dest_t *desthdr;
   2750 	ip6_rthdr_t *rthdr;
   2751 	ip6_frag_t *fraghdr;
   2752 
   2753 	ASSERT(IPH_HDR_VERSION(ip6h) == IPV6_VERSION);
   2754 	length = IPV6_HDR_LEN;
   2755 	whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */
   2756 	endptr = mp->b_wptr;
   2757 
   2758 	nexthdrp = &ip6h->ip6_nxt;
   2759 	while (whereptr < endptr) {
   2760 		/* Is there enough left for len + nexthdr? */
   2761 		if (whereptr + MIN_EHDR_LEN > endptr)
   2762 			break;
   2763 
   2764 		switch (*nexthdrp) {
   2765 		case IPPROTO_HOPOPTS:
   2766 		case IPPROTO_DSTOPTS:
   2767 			/* Assumes the headers are identical for hbh and dst */
   2768 			desthdr = (ip6_dest_t *)whereptr;
   2769 			ehdrlen = 8 * (desthdr->ip6d_len + 1);
   2770 			if ((uchar_t *)desthdr +  ehdrlen > endptr)
   2771 				return (B_FALSE);
   2772 			nexthdrp = &desthdr->ip6d_nxt;
   2773 			break;
   2774 		case IPPROTO_ROUTING:
   2775 			rthdr = (ip6_rthdr_t *)whereptr;
   2776 			ehdrlen =  8 * (rthdr->ip6r_len + 1);
   2777 			if ((uchar_t *)rthdr +  ehdrlen > endptr)
   2778 				return (B_FALSE);
   2779 			nexthdrp = &rthdr->ip6r_nxt;
   2780 			break;
   2781 		case IPPROTO_FRAGMENT:
   2782 			fraghdr = (ip6_frag_t *)whereptr;
   2783 			ehdrlen = sizeof (ip6_frag_t);
   2784 			if ((uchar_t *)&fraghdr[1] > endptr)
   2785 				return (B_FALSE);
   2786 			nexthdrp = &fraghdr->ip6f_nxt;
   2787 			break;
   2788 		case IPPROTO_NONE:
   2789 			/* No next header means we're finished */
   2790 		default:
   2791 			*hdr_length_ptr = length;
   2792 			*nexthdrpp = nexthdrp;
   2793 			return (B_TRUE);
   2794 		}
   2795 		length += ehdrlen;
   2796 		whereptr += ehdrlen;
   2797 		*hdr_length_ptr = length;
   2798 		*nexthdrpp = nexthdrp;
   2799 	}
   2800 	switch (*nexthdrp) {
   2801 	case IPPROTO_HOPOPTS:
   2802 	case IPPROTO_DSTOPTS:
   2803 	case IPPROTO_ROUTING:
   2804 	case IPPROTO_FRAGMENT:
   2805 		/*
   2806 		 * If any know extension headers are still to be processed,
   2807 		 * the packet's malformed (or at least all the IP header(s) are
   2808 		 * not in the same mblk - and that should never happen.
   2809 		 */
   2810 		return (B_FALSE);
   2811 
   2812 	default:
   2813 		/*
   2814 		 * If we get here, we know that all of the IP headers were in
   2815 		 * the same mblk, even if the ULP header is in the next mblk.
   2816 		 */
   2817 		*hdr_length_ptr = length;
   2818 		*nexthdrpp = nexthdrp;
   2819 		return (B_TRUE);
   2820 	}
   2821 }
   2822 
   2823 /*
   2824  * Return the length of the IPv6 related headers (including extension headers)
   2825  * Returns a length even if the packet is malformed.
   2826  */
   2827 int
   2828 ip_hdr_length_v6(mblk_t *mp, ip6_t *ip6h)
   2829 {
   2830 	uint16_t hdr_len;
   2831 	uint8_t	*nexthdrp;
   2832 
   2833 	(void) ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_len, &nexthdrp);
   2834 	return (hdr_len);
   2835 }
   2836 
   2837 /*
   2838  * Parse and process any hop-by-hop or destination options.
   2839  *
   2840  * Assumes that q is an ill read queue so that ICMP errors for link-local
   2841  * destinations are sent out the correct interface.
   2842  *
   2843  * Returns -1 if there was an error and mp has been consumed.
   2844  * Returns 0 if no special action is needed.
   2845  * Returns 1 if the packet contained a router alert option for this node
   2846  * which is verified to be "interesting/known" for our implementation.
   2847  *
   2848  * XXX Note: In future as more hbh or dest options are defined,
   2849  * it may be better to have different routines for hbh and dest
   2850  * options as opt_type fields other than IP6OPT_PAD1 and IP6OPT_PADN
   2851  * may have same value in different namespaces. Or is it same namespace ??
   2852  * Current code checks for each opt_type (other than pads) if it is in
   2853  * the expected  nexthdr (hbh or dest)
   2854  */
   2855 int
   2856 ip_process_options_v6(mblk_t *mp, ip6_t *ip6h,
   2857     uint8_t *optptr, uint_t optlen, uint8_t hdr_type, ip_recv_attr_t *ira)
   2858 {
   2859 	uint8_t opt_type;
   2860 	uint_t optused;
   2861 	int ret = 0;
   2862 	const char *errtype;
   2863 	ill_t		*ill = ira->ira_ill;
   2864 	ip_stack_t	*ipst = ill->ill_ipst;
   2865 
   2866 	while (optlen != 0) {
   2867 		opt_type = *optptr;
   2868 		if (opt_type == IP6OPT_PAD1) {
   2869 			optused = 1;
   2870 		} else {
   2871 			if (optlen < 2)
   2872 				goto bad_opt;
   2873 			errtype = "malformed";
   2874 			if (opt_type == ip6opt_ls) {
   2875 				optused = 2 + optptr[1];
   2876 				if (optused > optlen)
   2877 					goto bad_opt;
   2878 			} else switch (opt_type) {
   2879 			case IP6OPT_PADN:
   2880 				/*
   2881 				 * Note:We don't verify that (N-2) pad octets
   2882 				 * are zero as required by spec. Adhere to
   2883 				 * "be liberal in what you accept..." part of
   2884 				 * implementation philosophy (RFC791,RFC1122)
   2885 				 */
   2886 				optused = 2 + optptr[1];
   2887 				if (optused > optlen)
   2888 					goto bad_opt;
   2889 				break;
   2890 
   2891 			case IP6OPT_JUMBO:
   2892 				if (hdr_type != IPPROTO_HOPOPTS)
   2893 					goto opt_error;
   2894 				goto opt_error; /* XXX Not implemented! */
   2895 
   2896 			case IP6OPT_ROUTER_ALERT: {
   2897 				struct ip6_opt_router *or;
   2898 
   2899 				if (hdr_type != IPPROTO_HOPOPTS)
   2900 					goto opt_error;
   2901 				optused = 2 + optptr[1];
   2902 				if (optused > optlen)
   2903 					goto bad_opt;
   2904 				or = (struct ip6_opt_router *)optptr;
   2905 				/* Check total length and alignment */
   2906 				if (optused != sizeof (*or) ||
   2907 				    ((uintptr_t)or->ip6or_value & 0x1) != 0)
   2908 					goto opt_error;
   2909 				/* Check value */
   2910 				switch (*((uint16_t *)or->ip6or_value)) {
   2911 				case IP6_ALERT_MLD:
   2912 				case IP6_ALERT_RSVP:
   2913 					ret = 1;
   2914 				}
   2915 				break;
   2916 			}
   2917 			case IP6OPT_HOME_ADDRESS: {
   2918 				/*
   2919 				 * Minimal support for the home address option
   2920 				 * (which is required by all IPv6 nodes).
   2921 				 * Implement by just swapping the home address
   2922 				 * and source address.
   2923 				 * XXX Note: this has IPsec implications since
   2924 				 * AH needs to take this into account.
   2925 				 * Also, when IPsec is used we need to ensure
   2926 				 * that this is only processed once
   2927 				 * in the received packet (to avoid swapping
   2928 				 * back and forth).
   2929 				 * NOTE:This option processing is considered
   2930 				 * to be unsafe and prone to a denial of
   2931 				 * service attack.
   2932 				 * The current processing is not safe even with
   2933 				 * IPsec secured IP packets. Since the home
   2934 				 * address option processing requirement still
   2935 				 * is in the IETF draft and in the process of
   2936 				 * being redefined for its usage, it has been
   2937 				 * decided to turn off the option by default.
   2938 				 * If this section of code needs to be executed,
   2939 				 * ndd variable ip6_ignore_home_address_opt
   2940 				 * should be set to 0 at the user's own risk.
   2941 				 */
   2942 				struct ip6_opt_home_address *oh;
   2943 				in6_addr_t tmp;
   2944 
   2945 				if (ipst->ips_ipv6_ignore_home_address_opt)
   2946 					goto opt_error;
   2947 
   2948 				if (hdr_type != IPPROTO_DSTOPTS)
   2949 					goto opt_error;
   2950 				optused = 2 + optptr[1];
   2951 				if (optused > optlen)
   2952 					goto bad_opt;
   2953 
   2954 				/*
   2955 				 * We did this dest. opt the first time
   2956 				 * around (i.e. before AH processing).
   2957 				 * If we've done AH... stop now.
   2958 				 */
   2959 				if ((ira->ira_flags & IRAF_IPSEC_SECURE) &&
   2960 				    ira->ira_ipsec_ah_sa != NULL)
   2961 					break;
   2962 
   2963 				oh = (struct ip6_opt_home_address *)optptr;
   2964 				/* Check total length and alignment */
   2965 				if (optused < sizeof (*oh) ||
   2966 				    ((uintptr_t)oh->ip6oh_addr & 0x7) != 0)
   2967 					goto opt_error;
   2968 				/* Swap ip6_src and the home address */
   2969 				tmp = ip6h->ip6_src;
   2970 				/* XXX Note: only 8 byte alignment option */
   2971 				ip6h->ip6_src = *(in6_addr_t *)oh->ip6oh_addr;
   2972 				*(in6_addr_t *)oh->ip6oh_addr = tmp;
   2973 				break;
   2974 			}
   2975 
   2976 			case IP6OPT_TUNNEL_LIMIT:
   2977 				if (hdr_type != IPPROTO_DSTOPTS) {
   2978 					goto opt_error;
   2979 				}
   2980 				optused = 2 + optptr[1];
   2981 				if (optused > optlen) {
   2982 					goto bad_opt;
   2983 				}
   2984 				if (optused != 3) {
   2985 					goto opt_error;
   2986 				}
   2987 				break;
   2988 
   2989 			default:
   2990 				errtype = "unknown";
   2991 				/* FALLTHROUGH */
   2992 			opt_error:
   2993 				/* Determine which zone should send error */
   2994 				switch (IP6OPT_TYPE(opt_type)) {
   2995 				case IP6OPT_TYPE_SKIP:
   2996 					optused = 2 + optptr[1];
   2997 					if (optused > optlen)
   2998 						goto bad_opt;
   2999 					ip1dbg(("ip_process_options_v6: %s "
   3000 					    "opt 0x%x skipped\n",
   3001 					    errtype, opt_type));
   3002 					break;
   3003 				case IP6OPT_TYPE_DISCARD:
   3004 					ip1dbg(("ip_process_options_v6: %s "
   3005 					    "opt 0x%x; packet dropped\n",
   3006 					    errtype, opt_type));
   3007 					BUMP_MIB(ill->ill_ip_mib,
   3008 					    ipIfStatsInHdrErrors);
   3009 					ip_drop_input("ipIfStatsInHdrErrors",
   3010 					    mp, ill);
   3011 					freemsg(mp);
   3012 					return (-1);
   3013 				case IP6OPT_TYPE_ICMP:
   3014 					BUMP_MIB(ill->ill_ip_mib,
   3015 					    ipIfStatsInHdrErrors);
   3016 					ip_drop_input("ipIfStatsInHdrErrors",
   3017 					    mp, ill);
   3018 					icmp_param_problem_v6(mp,
   3019 					    ICMP6_PARAMPROB_OPTION,
   3020 					    (uint32_t)(optptr -
   3021 					    (uint8_t *)ip6h),
   3022 					    B_FALSE, ira);
   3023 					return (-1);
   3024 				case IP6OPT_TYPE_FORCEICMP:
   3025 					BUMP_MIB(ill->ill_ip_mib,
   3026 					    ipIfStatsInHdrErrors);
   3027 					ip_drop_input("ipIfStatsInHdrErrors",
   3028 					    mp, ill);
   3029 					icmp_param_problem_v6(mp,
   3030 					    ICMP6_PARAMPROB_OPTION,
   3031 					    (uint32_t)(optptr -
   3032 					    (uint8_t *)ip6h),
   3033 					    B_TRUE, ira);
   3034 					return (-1);
   3035 				default:
   3036 					ASSERT(0);
   3037 				}
   3038 			}
   3039 		}
   3040 		optlen -= optused;
   3041 		optptr += optused;
   3042 	}
   3043 	return (ret);
   3044 
   3045 bad_opt:
   3046 	/* Determine which zone should send error */
   3047 	ip_drop_input("ICMP_PARAM_PROBLEM", mp, ill);
   3048 	icmp_param_problem_v6(mp, ICMP6_PARAMPROB_OPTION,
   3049 	    (uint32_t)(optptr - (uint8_t *)ip6h),
   3050 	    B_FALSE, ira);
   3051 	return (-1);
   3052 }
   3053 
   3054 /*
   3055  * Process a routing header that is not yet empty.
   3056  * Because of RFC 5095, we now reject all route headers.
   3057  */
   3058 void
   3059 ip_process_rthdr(mblk_t *mp, ip6_t *ip6h, ip6_rthdr_t *rth,
   3060     ip_recv_attr_t *ira)
   3061 {
   3062 	ill_t		*ill = ira->ira_ill;
   3063 	ip_stack_t	*ipst = ill->ill_ipst;
   3064 
   3065 	ASSERT(rth->ip6r_segleft != 0);
   3066 
   3067 	if (!ipst->ips_ipv6_forward_src_routed) {
   3068 		/* XXX Check for source routed out same interface? */
   3069 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits);
   3070 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors);
   3071 		ip_drop_input("ipIfStatsInAddrErrors", mp, ill);
   3072 		freemsg(mp);
   3073 		return;
   3074 	}
   3075 
   3076 	ip_drop_input("ICMP_PARAM_PROBLEM", mp, ill);
   3077 	icmp_param_problem_v6(mp, ICMP6_PARAMPROB_HEADER,
   3078 	    (uint32_t)((uchar_t *)&rth->ip6r_type - (uchar_t *)ip6h),
   3079 	    B_FALSE, ira);
   3080 }
   3081 
   3082 /*
   3083  * Read side put procedure for IPv6 module.
   3084  */
   3085 void
   3086 ip_rput_v6(queue_t *q, mblk_t *mp)
   3087 {
   3088 	ill_t		*ill;
   3089 
   3090 	ill = (ill_t *)q->q_ptr;
   3091 	if (ill->ill_state_flags & (ILL_CONDEMNED | ILL_LL_SUBNET_PENDING)) {
   3092 		union DL_primitives *dl;
   3093 
   3094 		dl = (union DL_primitives *)mp->b_rptr;
   3095 		/*
   3096 		 * Things are opening or closing - only accept DLPI
   3097 		 * ack messages. If the stream is closing and ip_wsrv
   3098 		 * has completed, ip_close is out of the qwait, but has
   3099 		 * not yet completed qprocsoff. Don't proceed any further
   3100 		 * because the ill has been cleaned up and things hanging
   3101 		 * off the ill have been freed.
   3102 		 */
   3103 		if ((mp->b_datap->db_type != M_PCPROTO) ||
   3104 		    (dl->dl_primitive == DL_UNITDATA_IND)) {
   3105 			inet_freemsg(mp);
   3106 			return;
   3107 		}
   3108 	}
   3109 	if (DB_TYPE(mp) == M_DATA) {
   3110 		struct mac_header_info_s mhi;
   3111 
   3112 		ip_mdata_to_mhi(ill, mp, &mhi);
   3113 		ip_input_v6(ill, NULL, mp, &mhi);
   3114 	} else {
   3115 		ip_rput_notdata(ill, mp);
   3116 	}
   3117 }
   3118 
   3119 /*
   3120  * Walk through the IPv6 packet in mp and see if there's an AH header
   3121  * in it.  See if the AH header needs to get done before other headers in
   3122  * the packet.  (Worker function for ipsec_early_ah_v6().)
   3123  */
   3124 #define	IPSEC_HDR_DONT_PROCESS	0
   3125 #define	IPSEC_HDR_PROCESS	1
   3126 #define	IPSEC_MEMORY_ERROR	2 /* or malformed packet */
   3127 static int
   3128 ipsec_needs_processing_v6(mblk_t *mp, uint8_t *nexthdr)
   3129 {
   3130 	uint_t	length;
   3131 	uint_t	ehdrlen;
   3132 	uint8_t *whereptr;
   3133 	uint8_t *endptr;
   3134 	uint8_t *nexthdrp;
   3135 	ip6_dest_t *desthdr;
   3136 	ip6_rthdr_t *rthdr;
   3137 	ip6_t	*ip6h;
   3138 
   3139 	/*
   3140 	 * For now just pullup everything.  In general, the less pullups,
   3141 	 * the better, but there's so much squirrelling through anyway,
   3142 	 * it's just easier this way.
   3143 	 */
   3144 	if (!pullupmsg(mp, -1)) {
   3145 		return (IPSEC_MEMORY_ERROR);
   3146 	}
   3147 
   3148 	ip6h = (ip6_t *)mp->b_rptr;
   3149 	length = IPV6_HDR_LEN;
   3150 	whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */
   3151 	endptr = mp->b_wptr;
   3152 
   3153 	/*
   3154 	 * We can't just use the argument nexthdr in the place
   3155 	 * of nexthdrp becaue we don't dereference nexthdrp
   3156 	 * till we confirm whether it is a valid address.
   3157 	 */
   3158 	nexthdrp = &ip6h->ip6_nxt;
   3159 	while (whereptr < endptr) {
   3160 		/* Is there enough left for len + nexthdr? */
   3161 		if (whereptr + MIN_EHDR_LEN > endptr)
   3162 			return (IPSEC_MEMORY_ERROR);
   3163 
   3164 		switch (*nexthdrp) {
   3165 		case IPPROTO_HOPOPTS:
   3166 		case IPPROTO_DSTOPTS:
   3167 			/* Assumes the headers are identical for hbh and dst */
   3168 			desthdr = (ip6_dest_t *)whereptr;
   3169 			ehdrlen = 8 * (desthdr->ip6d_len + 1);
   3170 			if ((uchar_t *)desthdr +  ehdrlen > endptr)
   3171 				return (IPSEC_MEMORY_ERROR);
   3172 			/*
   3173 			 * Return DONT_PROCESS because the destination
   3174 			 * options header may be for each hop in a
   3175 			 * routing-header, and we only want AH if we're
   3176 			 * finished with routing headers.
   3177 			 */
   3178 			if (*nexthdrp == IPPROTO_DSTOPTS)
   3179 				return (IPSEC_HDR_DONT_PROCESS);
   3180 			nexthdrp = &desthdr->ip6d_nxt;
   3181 			break;
   3182 		case IPPROTO_ROUTING:
   3183 			rthdr = (ip6_rthdr_t *)whereptr;
   3184 
   3185 			/*
   3186 			 * If there's more hops left on the routing header,
   3187 			 * return now with DON'T PROCESS.
   3188 			 */
   3189 			if (rthdr->ip6r_segleft > 0)
   3190 				return (IPSEC_HDR_DONT_PROCESS);
   3191 
   3192 			ehdrlen =  8 * (rthdr->ip6r_len + 1);
   3193 			if ((uchar_t *)rthdr +  ehdrlen > endptr)
   3194 				return (IPSEC_MEMORY_ERROR);
   3195 			nexthdrp = &rthdr->ip6r_nxt;
   3196 			break;
   3197 		case IPPROTO_FRAGMENT:
   3198 			/* Wait for reassembly */
   3199 			return (IPSEC_HDR_DONT_PROCESS);
   3200 		case IPPROTO_AH:
   3201 			*nexthdr = IPPROTO_AH;
   3202 			return (IPSEC_HDR_PROCESS);
   3203 		case IPPROTO_NONE:
   3204 			/* No next header means we're finished */
   3205 		default:
   3206 			return (IPSEC_HDR_DONT_PROCESS);
   3207 		}
   3208 		length += ehdrlen;
   3209 		whereptr += ehdrlen;
   3210 	}
   3211 	/*
   3212 	 * Malformed/truncated packet.
   3213 	 */
   3214 	return (IPSEC_MEMORY_ERROR);
   3215 }
   3216 
   3217 /*
   3218  * Path for AH if options are present.
   3219  * Returns NULL if the mblk was consumed.
   3220  *
   3221  * Sometimes AH needs to be done before other IPv6 headers for security
   3222  * reasons.  This function (and its ipsec_needs_processing_v6() above)
   3223  * indicates if that is so, and fans out to the appropriate IPsec protocol
   3224  * for the datagram passed in.
   3225  */
   3226 mblk_t *
   3227 ipsec_early_ah_v6(mblk_t *mp, ip_recv_attr_t *ira)
   3228 {
   3229 	uint8_t nexthdr;
   3230 	ah_t *ah;
   3231 	ill_t		*ill = ira->ira_ill;
   3232 	ip_stack_t	*ipst = ill->ill_ipst;
   3233 	ipsec_stack_t	*ipss = ipst->ips_netstack->netstack_ipsec;
   3234 
   3235 	switch (ipsec_needs_processing_v6(mp, &nexthdr)) {
   3236 	case IPSEC_MEMORY_ERROR:
   3237 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
   3238 		ip_drop_input("ipIfStatsInDiscards", mp, ill);
   3239 		freemsg(mp);
   3240 		return (NULL);
   3241 	case IPSEC_HDR_DONT_PROCESS:
   3242 		return (mp);
   3243 	}
   3244 
   3245 	/* Default means send it to AH! */
   3246 	ASSERT(nexthdr == IPPROTO_AH);
   3247 
   3248 	if (!ipsec_loaded(ipss)) {
   3249 		ip_proto_not_sup(mp, ira);
   3250 		return (NULL);
   3251 	}
   3252 
   3253 	mp = ipsec_inbound_ah_sa(mp, ira, &ah);
   3254 	if (mp == NULL)
   3255 		return (NULL);
   3256 	ASSERT(ah != NULL);
   3257 	ASSERT(ira->ira_flags & IRAF_IPSEC_SECURE);
   3258 	ASSERT(ira->ira_ipsec_ah_sa != NULL);
   3259 	ASSERT(ira->ira_ipsec_ah_sa->ipsa_input_func != NULL);
   3260 	mp = ira->ira_ipsec_ah_sa->ipsa_input_func(mp, ah, ira);
   3261 
   3262 	if (mp == NULL) {
   3263 		/*
   3264 		 * Either it failed or is pending. In the former case
   3265 		 * ipIfStatsInDiscards was increased.
   3266 		 */
   3267 		return (NULL);
   3268 	}
   3269 
   3270 	/* we're done with IPsec processing, send it up */
   3271 	ip_input_post_ipsec(mp, ira);
   3272 	return (NULL);
   3273 }
   3274 
   3275 /*
   3276  * Reassemble fragment.
   3277  * When it returns a completed message the first mblk will only contain
   3278  * the headers prior to the fragment header, with the nexthdr value updated
   3279  * to be the header after the fragment header.
   3280  */
   3281 mblk_t *
   3282 ip_input_fragment_v6(mblk_t *mp, ip6_t *ip6h,
   3283     ip6_frag_t *fraghdr, uint_t remlen, ip_recv_attr_t *ira)
   3284 {
   3285 	uint32_t	ident = ntohl(fraghdr->ip6f_ident);
   3286 	uint16_t	offset;
   3287 	boolean_t	more_frags;
   3288 	uint8_t		nexthdr = fraghdr->ip6f_nxt;
   3289 	in6_addr_t	*v6dst_ptr;
   3290 	in6_addr_t	*v6src_ptr;
   3291 	uint_t		end;
   3292 	uint_t		hdr_length;
   3293 	size_t		count;
   3294 	ipf_t		*ipf;
   3295 	ipf_t		**ipfp;
   3296 	ipfb_t		*ipfb;
   3297 	mblk_t		*mp1;
   3298 	uint8_t		ecn_info = 0;
   3299 	size_t		msg_len;
   3300 	mblk_t		*tail_mp;
   3301 	mblk_t		*t_mp;
   3302 	boolean_t	pruned = B_FALSE;
   3303 	uint32_t	sum_val;
   3304 	uint16_t	sum_flags;
   3305 	ill_t		*ill = ira->ira_ill;
   3306 	ip_stack_t	*ipst = ill->ill_ipst;
   3307 	uint_t		prev_nexthdr_offset;
   3308 	uint8_t		prev_nexthdr;
   3309 	uint8_t		*ptr;
   3310 	uint32_t	packet_size;
   3311 
   3312 	/*
   3313 	 * We utilize hardware computed checksum info only for UDP since
   3314 	 * IP fragmentation is a normal occurence for the protocol.  In
   3315 	 * addition, checksum offload support for IP fragments carrying
   3316 	 * UDP payload is commonly implemented across network adapters.
   3317 	 */
   3318 	ASSERT(ira->ira_rill != NULL);
   3319 	if (nexthdr == IPPROTO_UDP && dohwcksum &&
   3320 	    ILL_HCKSUM_CAPABLE(ira->ira_rill) &&
   3321 	    (DB_CKSUMFLAGS(mp) & (HCK_FULLCKSUM | HCK_PARTIALCKSUM))) {
   3322 		mblk_t *mp1 = mp->b_cont;
   3323 		int32_t len;
   3324 
   3325 		/* Record checksum information from the packet */
   3326 		sum_val = (uint32_t)DB_CKSUM16(mp);
   3327 		sum_flags = DB_CKSUMFLAGS(mp);
   3328 
   3329 		/* fragmented payload offset from beginning of mblk */
   3330 		offset = (uint16_t)((uchar_t *)&fraghdr[1] - mp->b_rptr);
   3331 
   3332 		if ((sum_flags & HCK_PARTIALCKSUM) &&
   3333 		    (mp1 == NULL || mp1->b_cont == NULL) &&
   3334 		    offset >= DB_CKSUMSTART(mp) &&
   3335 		    ((len = offset - DB_CKSUMSTART(mp)) & 1) == 0) {
   3336 			uint32_t adj;
   3337 			/*
   3338 			 * Partial checksum has been calculated by hardware
   3339 			 * and attached to the packet; in addition, any
   3340 			 * prepended extraneous data is even byte aligned.
   3341 			 * If any such data exists, we adjust the checksum;
   3342 			 * this would also handle any postpended data.
   3343 			 */
   3344 			IP_ADJCKSUM_PARTIAL(mp->b_rptr + DB_CKSUMSTART(mp),
   3345 			    mp, mp1, len, adj);
   3346 
   3347 			/* One's complement subtract extraneous checksum */
   3348 			if (adj >= sum_val)
   3349 				sum_val = ~(adj - sum_val) & 0xFFFF;
   3350 			else
   3351 				sum_val -= adj;
   3352 		}
   3353 	} else {
   3354 		sum_val = 0;
   3355 		sum_flags = 0;
   3356 	}
   3357 
   3358 	/* Clear hardware checksumming flag */
   3359 	DB_CKSUMFLAGS(mp) = 0;
   3360 
   3361 	/*
   3362 	 * Determine the offset (from the begining of the IP header)
   3363 	 * of the nexthdr value which has IPPROTO_FRAGMENT. We use
   3364 	 * this when removing the fragment header from the packet.
   3365 	 * This packet consists of the IPv6 header, a potential
   3366 	 * hop-by-hop options header, a potential pre-routing-header
   3367 	 * destination options header, and a potential routing header.
   3368 	 */
   3369 	prev_nexthdr_offset = (uint8_t *)&ip6h->ip6_nxt - (uint8_t *)ip6h;
   3370 	prev_nexthdr = ip6h->ip6_nxt;
   3371 	ptr = (uint8_t *)&ip6h[1];
   3372 
   3373 	if (prev_nexthdr == IPPROTO_HOPOPTS) {
   3374 		ip6_hbh_t	*hbh_hdr;
   3375 		uint_t		hdr_len;
   3376 
   3377 		hbh_hdr = (ip6_hbh_t *)ptr;
   3378 		hdr_len = 8 * (hbh_hdr->ip6h_len + 1);
   3379 		prev_nexthdr = hbh_hdr->ip6h_nxt;
   3380 		prev_nexthdr_offset = (uint8_t *)&hbh_hdr->ip6h_nxt
   3381 		    - (uint8_t *)ip6h;
   3382 		ptr += hdr_len;
   3383 	}
   3384 	if (prev_nexthdr == IPPROTO_DSTOPTS) {
   3385 		ip6_dest_t	*dest_hdr;
   3386 		uint_t		hdr_len;
   3387 
   3388 		dest_hdr = (ip6_dest_t *)ptr;
   3389 		hdr_len = 8 * (dest_hdr->ip6d_len + 1);
   3390 		prev_nexthdr = dest_hdr->ip6d_nxt;
   3391 		prev_nexthdr_offset = (uint8_t *)&dest_hdr->ip6d_nxt
   3392 		    - (uint8_t *)ip6h;
   3393 		ptr += hdr_len;
   3394 	}
   3395 	if (prev_nexthdr == IPPROTO_ROUTING) {
   3396 		ip6_rthdr_t	*rthdr;
   3397 		uint_t		hdr_len;
   3398 
   3399 		rthdr = (ip6_rthdr_t *)ptr;
   3400 		prev_nexthdr = rthdr->ip6r_nxt;
   3401 		prev_nexthdr_offset = (uint8_t *)&rthdr->ip6r_nxt
   3402 		    - (uint8_t *)ip6h;
   3403 		hdr_len = 8 * (rthdr->ip6r_len + 1);
   3404 		ptr += hdr_len;
   3405 	}
   3406 	if (prev_nexthdr != IPPROTO_FRAGMENT) {
   3407 		/* Can't handle other headers before the fragment header */
   3408 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors);
   3409 		ip_drop_input("ipIfStatsInHdrErrors", mp, ill);
   3410 		freemsg(mp);
   3411 		return (NULL);
   3412 	}
   3413 
   3414 	/*
   3415 	 * Note: Fragment offset in header is in 8-octet units.
   3416 	 * Clearing least significant 3 bits not only extracts
   3417 	 * it but also gets it in units of octets.
   3418 	 */
   3419 	offset = ntohs(fraghdr->ip6f_offlg) & ~7;
   3420 	more_frags = (fraghdr->ip6f_offlg & IP6F_MORE_FRAG);
   3421 
   3422 	/*
   3423 	 * Is the more frags flag on and the payload length not a multiple
   3424 	 * of eight?
   3425 	 */
   3426 	if (more_frags && (ntohs(ip6h->ip6_plen) & 7)) {
   3427 		ip_drop_input("ICMP_PARAM_PROBLEM", mp, ill);
   3428 		icmp_param_problem_v6(mp, ICMP6_PARAMPROB_HEADER,
   3429 		    (uint32_t)((char *)&ip6h->ip6_plen -
   3430 		    (char *)ip6h), B_FALSE, ira);
   3431 		return (NULL);
   3432 	}
   3433 
   3434 	v6src_ptr = &ip6h->ip6_src;
   3435 	v6dst_ptr = &ip6h->ip6_dst;
   3436 	end = remlen;
   3437 
   3438 	hdr_length = (uint_t)((char *)&fraghdr[1] - (char *)ip6h);
   3439 	end += offset;
   3440 
   3441 	/*
   3442 	 * Would fragment cause reassembled packet to have a payload length
   3443 	 * greater than IP_MAXPACKET - the max payload size?
   3444 	 */
   3445 	if (end > IP_MAXPACKET) {
   3446 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors);
   3447 		ip_drop_input("Reassembled packet too large", mp, ill);
   3448 		icmp_param_problem_v6(mp, ICMP6_PARAMPROB_HEADER,
   3449 		    (uint32_t)((char *)&fraghdr->ip6f_offlg -
   3450 		    (char *)ip6h), B_FALSE, ira);
   3451 		return (NULL);
   3452 	}
   3453 
   3454 	/*
   3455 	 * This packet just has one fragment. Reassembly not
   3456 	 * needed.
   3457 	 */
   3458 	if (!more_frags && offset == 0) {
   3459 		goto reass_done;
   3460 	}
   3461 
   3462 	/*
   3463 	 * Drop the fragmented as early as possible, if
   3464 	 * we don't have resource(s) to re-assemble.
   3465 	 */
   3466 	if (ipst->ips_ip_reass_queue_bytes == 0) {
   3467 		freemsg(mp);
   3468 		return (NULL);
   3469 	}
   3470 
   3471 	/* Record the ECN field info. */
   3472 	ecn_info = (uint8_t)(ntohl(ip6h->ip6_vcf & htonl(~0xFFCFFFFF)) >> 20);
   3473 	/*
   3474 	 * If this is not the first fragment, dump the unfragmentable
   3475 	 * portion of the packet.
   3476 	 */
   3477 	if (offset)
   3478 		mp->b_rptr = (uchar_t *)&fraghdr[1];
   3479 
   3480 	/*
   3481 	 * Fragmentation reassembly.  Each ILL has a hash table for
   3482 	 * queueing packets undergoing reassembly for all IPIFs
   3483 	 * associated with the ILL.  The hash is based on the packet
   3484 	 * IP ident field.  The ILL frag hash table was allocated
   3485 	 * as a timer block at the time the ILL was created.  Whenever
   3486 	 * there is anything on the reassembly queue, the timer will
   3487 	 * be running.
   3488 	 */
   3489 	/* Handle vnic loopback of fragments */
   3490 	if (mp->b_datap->db_ref > 2)
   3491 		msg_len = 0;
   3492 	else
   3493 		msg_len = MBLKSIZE(mp);
   3494 
   3495 	tail_mp = mp;
   3496 	while (tail_mp->b_cont != NULL) {
   3497 		tail_mp = tail_mp->b_cont;
   3498 		if (tail_mp->b_datap->db_ref <= 2)
   3499 			msg_len += MBLKSIZE(tail_mp);
   3500 	}
   3501 	/*
   3502 	 * If the reassembly list for this ILL will get too big
   3503 	 * prune it.
   3504 	 */
   3505 
   3506 	if ((msg_len + sizeof (*ipf) + ill->ill_frag_count) >=
   3507 	    ipst->ips_ip_reass_queue_bytes) {
   3508 		DTRACE_PROBE3(ip_reass_queue_bytes, uint_t, msg_len,
   3509 		    uint_t, ill->ill_frag_count,
   3510 		    uint_t, ipst->ips_ip_reass_queue_bytes);
   3511 		ill_frag_prune(ill,
   3512 		    (ipst->ips_ip_reass_queue_bytes < msg_len) ? 0 :
   3513 		    (ipst->ips_ip_reass_queue_bytes - msg_len));
   3514 		pruned = B_TRUE;
   3515 	}
   3516 
   3517 	ipfb = &ill->ill_frag_hash_tbl[ILL_FRAG_HASH_V6(*v6src_ptr, ident)];
   3518 	mutex_enter(&ipfb->ipfb_lock);
   3519 
   3520 	ipfp = &ipfb->ipfb_ipf;
   3521 	/* Try to find an existing fragment queue for this packet. */
   3522 	for (;;) {
   3523 		ipf = ipfp[0];
   3524 		if (ipf) {
   3525 			/*
   3526 			 * It has to match on ident, source address, and
   3527 			 * dest address.
   3528 			 */
   3529 			if (ipf->ipf_ident == ident &&
   3530 			    IN6_ARE_ADDR_EQUAL(&ipf->ipf_v6src, v6src_ptr) &&
   3531 			    IN6_ARE_ADDR_EQUAL(&ipf->ipf_v6dst, v6dst_ptr)) {
   3532 
   3533 				/*
   3534 				 * If we have received too many
   3535 				 * duplicate fragments for this packet
   3536 				 * free it.
   3537 				 */
   3538 				if (ipf->ipf_num_dups > ip_max_frag_dups) {
   3539 					ill_frag_free_pkts(ill, ipfb, ipf, 1);
   3540 					freemsg(mp);
   3541 					mutex_exit(&ipfb->ipfb_lock);
   3542 					return (NULL);
   3543 				}
   3544 
   3545 				break;
   3546 			}
   3547 			ipfp = &ipf->ipf_hash_next;
   3548 			continue;
   3549 		}
   3550 
   3551 
   3552 		/*
   3553 		 * If we pruned the list, do we want to store this new
   3554 		 * fragment?. We apply an optimization here based on the
   3555 		 * fact that most fragments will be received in order.
   3556 		 * So if the offset of this incoming fragment is zero,
   3557 		 * it is the first fragment of a new packet. We will
   3558 		 * keep it.  Otherwise drop the fragment, as we have
   3559 		 * probably pruned the packet already (since the
   3560 		 * packet cannot be found).
   3561 		 */
   3562 
   3563 		if (pruned && offset != 0) {
   3564 			mutex_exit(&ipfb->ipfb_lock);
   3565 			freemsg(mp);
   3566 			return (NULL);
   3567 		}
   3568 
   3569 		/* New guy.  Allocate a frag message. */
   3570 		mp1 = allocb(sizeof (*ipf), BPRI_MED);
   3571 		if (!mp1) {
   3572 			BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
   3573 			ip_drop_input("ipIfStatsInDiscards", mp, ill);
   3574 			freemsg(mp);
   3575 	partial_reass_done:
   3576 			mutex_exit(&ipfb->ipfb_lock);
   3577 			return (NULL);
   3578 		}
   3579 
   3580 		if (ipfb->ipfb_frag_pkts >= MAX_FRAG_PKTS(ipst))  {
   3581 			/*
   3582 			 * Too many fragmented packets in this hash bucket.
   3583 			 * Free the oldest.
   3584 			 */
   3585 			ill_frag_free_pkts(ill, ipfb, ipfb->ipfb_ipf, 1);
   3586 		}
   3587 
   3588 		mp1->b_cont = mp;
   3589 
   3590 		/* Initialize the fragment header. */
   3591 		ipf = (ipf_t *)mp1->b_rptr;
   3592 		ipf->ipf_mp = mp1;
   3593 		ipf->ipf_ptphn = ipfp;
   3594 		ipfp[0] = ipf;
   3595 		ipf->ipf_hash_next = NULL;
   3596 		ipf->ipf_ident = ident;
   3597 		ipf->ipf_v6src = *v6src_ptr;
   3598 		ipf->ipf_v6dst = *v6dst_ptr;
   3599 		/* Record reassembly start time. */
   3600 		ipf->ipf_timestamp = gethrestime_sec();
   3601 		/* Record ipf generation and account for frag header */
   3602 		ipf->ipf_gen = ill->ill_ipf_gen++;
   3603 		ipf->ipf_count = MBLKSIZE(mp1);
   3604 		ipf->ipf_protocol = nexthdr;
   3605 		ipf->ipf_nf_hdr_len = 0;
   3606 		ipf->ipf_prev_nexthdr_offset = 0;
   3607 		ipf->ipf_last_frag_seen = B_FALSE;
   3608 		ipf->ipf_ecn = ecn_info;
   3609 		ipf->ipf_num_dups = 0;
   3610 		ipfb->ipfb_frag_pkts++;
   3611 		ipf->ipf_checksum = 0;
   3612 		ipf->ipf_checksum_flags = 0;
   3613 
   3614 		/* Store checksum value in fragment header */
   3615 		if (sum_flags != 0) {
   3616 			sum_val = (sum_val & 0xFFFF) + (sum_val >> 16);
   3617 			sum_val = (sum_val & 0xFFFF) + (sum_val >> 16);
   3618 			ipf->ipf_checksum = sum_val;
   3619 			ipf->ipf_checksum_flags = sum_flags;
   3620 		}
   3621 
   3622 		/*
   3623 		 * We handle reassembly two ways.  In the easy case,
   3624 		 * where all the fragments show up in order, we do
   3625 		 * minimal bookkeeping, and just clip new pieces on
   3626 		 * the end.  If we ever see a hole, then we go off
   3627 		 * to ip_reassemble which has to mark the pieces and
   3628 		 * keep track of the number of holes, etc.  Obviously,
   3629 		 * the point of having both mechanisms is so we can
   3630 		 * handle the easy case as efficiently as possible.
   3631 		 */
   3632 		if (offset == 0) {
   3633 			/* Easy case, in-order reassembly so far. */
   3634 			/* Update the byte count */
   3635 			ipf->ipf_count += msg_len;
   3636 			ipf->ipf_tail_mp = tail_mp;
   3637 			/*
   3638 			 * Keep track of next expected offset in
   3639 			 * ipf_end.
   3640 			 */
   3641 			ipf->ipf_end = end;
   3642 			ipf->ipf_nf_hdr_len = hdr_length;
   3643 			ipf->ipf_prev_nexthdr_offset = prev_nexthdr_offset;
   3644 		} else {
   3645 			/* Hard case, hole at the beginning. */
   3646 			ipf->ipf_tail_mp = NULL;
   3647 			/*
   3648 			 * ipf_end == 0 means that we have given up
   3649 			 * on easy reassembly.
   3650 			 */
   3651 			ipf->ipf_end = 0;
   3652 
   3653 			/* Forget checksum offload from now on */
   3654 			ipf->ipf_checksum_flags = 0;
   3655 
   3656 			/*
   3657 			 * ipf_hole_cnt is set by ip_reassemble.
   3658 			 * ipf_count is updated by ip_reassemble.
   3659 			 * No need to check for return value here
   3660 			 * as we don't expect reassembly to complete or
   3661 			 * fail for the first fragment itself.
   3662 			 */
   3663 			(void) ip_reassemble(mp, ipf, offset, more_frags, ill,
   3664 			    msg_len);
   3665 		}
   3666 		/* Update per ipfb and ill byte counts */
   3667 		ipfb->ipfb_count += ipf->ipf_count;
   3668 		ASSERT(ipfb->ipfb_count > 0);	/* Wraparound */
   3669 		atomic_add_32(&ill->ill_frag_count, ipf->ipf_count);
   3670 		/* If the frag timer wasn't already going, start it. */
   3671 		mutex_enter(&ill->ill_lock);
   3672 		ill_frag_timer_start(ill);
   3673 		mutex_exit(&ill->ill_lock);
   3674 		goto partial_reass_done;
   3675 	}
   3676 
   3677 	/*
   3678 	 * If the packet's flag has changed (it could be coming up
   3679 	 * from an interface different than the previous, therefore
   3680 	 * possibly different checksum capability), then forget about
   3681 	 * any stored checksum states.  Otherwise add the value to
   3682 	 * the existing one stored in the fragment header.
   3683 	 */
   3684 	if (sum_flags != 0 && sum_flags == ipf->ipf_checksum_flags) {
   3685 		sum_val += ipf->ipf_checksum;
   3686 		sum_val = (sum_val & 0xFFFF) + (sum_val >> 16);
   3687 		sum_val = (sum_val & 0xFFFF) + (sum_val >> 16);
   3688 		ipf->ipf_checksum = sum_val;
   3689 	} else if (ipf->ipf_checksum_flags != 0) {
   3690 		/* Forget checksum offload from now on */
   3691 		ipf->ipf_checksum_flags = 0;
   3692 	}
   3693 
   3694 	/*
   3695 	 * We have a new piece of a datagram which is already being
   3696 	 * reassembled.  Update the ECN info if all IP fragments
   3697 	 * are ECN capable.  If there is one which is not, clear
   3698 	 * all the info.  If there is at least one which has CE
   3699 	 * code point, IP needs to report that up to transport.
   3700 	 */
   3701 	if (ecn_info != IPH_ECN_NECT && ipf->ipf_ecn != IPH_ECN_NECT) {
   3702 		if (ecn_info == IPH_ECN_CE)
   3703 			ipf->ipf_ecn = IPH_ECN_CE;
   3704 	} else {
   3705 		ipf->ipf_ecn = IPH_ECN_NECT;
   3706 	}
   3707 
   3708 	if (offset && ipf->ipf_end == offset) {
   3709 		/* The new fragment fits at the end */
   3710 		ipf->ipf_tail_mp->b_cont = mp;
   3711 		/* Update the byte count */
   3712 		ipf->ipf_count += msg_len;
   3713 		/* Update per ipfb and ill byte counts */
   3714 		ipfb->ipfb_count += msg_len;
   3715 		ASSERT(ipfb->ipfb_count > 0);	/* Wraparound */
   3716 		atomic_add_32(&ill->ill_frag_count, msg_len);
   3717 		if (more_frags) {
   3718 			/* More to come. */
   3719 			ipf->ipf_end = end;
   3720 			ipf->ipf_tail_mp = tail_mp;
   3721 			goto partial_reass_done;
   3722 		}
   3723 	} else {
   3724 		/*
   3725 		 * Go do the hard cases.
   3726 		 * Call ip_reassemble().
   3727 		 */
   3728 		int ret;
   3729 
   3730 		if (offset == 0) {
   3731 			if (ipf->ipf_prev_nexthdr_offset == 0) {
   3732 				ipf->ipf_nf_hdr_len = hdr_length;
   3733 				ipf->ipf_prev_nexthdr_offset =
   3734 				    prev_nexthdr_offset;
   3735 			}
   3736 		}
   3737 		/* Save current byte count */
   3738 		count = ipf->ipf_count;
   3739 		ret = ip_reassemble(mp, ipf, offset, more_frags, ill, msg_len);
   3740 
   3741 		/* Count of bytes added and subtracted (freeb()ed) */
   3742 		count = ipf->ipf_count - count;
   3743 		if (count) {
   3744 			/* Update per ipfb and ill byte counts */
   3745 			ipfb->ipfb_count += count;
   3746 			ASSERT(ipfb->ipfb_count > 0);	/* Wraparound */
   3747 			atomic_add_32(&ill->ill_frag_count, count);
   3748 		}
   3749 		if (ret == IP_REASS_PARTIAL) {
   3750 			goto partial_reass_done;
   3751 		} else if (ret == IP_REASS_FAILED) {
   3752 			/* Reassembly failed. Free up all resources */
   3753 			ill_frag_free_pkts(ill, ipfb, ipf, 1);
   3754 			for (t_mp = mp; t_mp != NULL; t_mp = t_mp->b_cont) {
   3755 				IP_REASS_SET_START(t_mp, 0);
   3756 				IP_REASS_SET_END(t_mp, 0);
   3757 			}
   3758 			freemsg(mp);
   3759 			goto partial_reass_done;
   3760 		}
   3761 
   3762 		/* We will reach here iff 'ret' is IP_REASS_COMPLETE */
   3763 	}
   3764 	/*
   3765 	 * We have completed reassembly.  Unhook the frag header from
   3766 	 * the reassembly list.
   3767 	 *
   3768 	 * Grab the unfragmentable header length next header value out
   3769 	 * of the first fragment
   3770 	 */
   3771 	ASSERT(ipf->ipf_nf_hdr_len != 0);
   3772 	hdr_length = ipf->ipf_nf_hdr_len;
   3773 
   3774 	/*
   3775 	 * Before we free the frag header, record the ECN info
   3776 	 * to report back to the transport.
   3777 	 */
   3778 	ecn_info = ipf->ipf_ecn;
   3779 
   3780 	/*
   3781 	 * Store the nextheader field in the header preceding the fragment
   3782 	 * header
   3783 	 */
   3784 	nexthdr = ipf->ipf_protocol;
   3785 	prev_nexthdr_offset = ipf->ipf_prev_nexthdr_offset;
   3786 	ipfp = ipf->ipf_ptphn;
   3787 
   3788 	/* We need to supply these to caller */
   3789 	if ((sum_flags = ipf->ipf_checksum_flags) != 0)
   3790 		sum_val = ipf->ipf_checksum;
   3791 	else
   3792 		sum_val = 0;
   3793 
   3794 	mp1 = ipf->ipf_mp;
   3795 	count = ipf->ipf_count;
   3796 	ipf = ipf->ipf_hash_next;
   3797 	if (ipf)
   3798 		ipf->ipf_ptphn = ipfp;
   3799 	ipfp[0] = ipf;
   3800 	atomic_add_32(&ill->ill_frag_count, -count);
   3801 	ASSERT(ipfb->ipfb_count >= count);
   3802 	ipfb->ipfb_count -= count;
   3803 	ipfb->ipfb_frag_pkts--;
   3804 	mutex_exit(&ipfb->ipfb_lock);
   3805 	/* Ditch the frag header. */
   3806 	mp = mp1->b_cont;
   3807 	freeb(mp1);
   3808 
   3809 	/*
   3810 	 * Make sure the packet is good by doing some sanity
   3811 	 * check. If bad we can silentely drop the packet.
   3812 	 */
   3813 reass_done:
   3814 	if (hdr_length < sizeof (ip6_frag_t)) {
   3815 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors);
   3816 		ip_drop_input("ipIfStatsInHdrErrors", mp, ill);
   3817 		ip1dbg(("ip_input_fragment_v6: bad packet\n"));
   3818 		freemsg(mp);
   3819 		return (NULL);
   3820 	}
   3821 
   3822 	/*
   3823 	 * Remove the fragment header from the initial header by
   3824 	 * splitting the mblk into the non-fragmentable header and
   3825 	 * everthing after the fragment extension header.  This has the
   3826 	 * side effect of putting all the headers that need destination
   3827 	 * processing into the b_cont block-- on return this fact is
   3828 	 * used in order to avoid having to look at the extensions
   3829 	 * already processed.
   3830 	 *
   3831 	 * Note that this code assumes that the unfragmentable portion
   3832 	 * of the header is in the first mblk and increments
   3833 	 * the read pointer past it.  If this assumption is broken
   3834 	 * this code fails badly.
   3835 	 */
   3836 	if (mp->b_rptr + hdr_length != mp->b_wptr) {
   3837 		mblk_t *nmp;
   3838 
   3839 		if (!(nmp = dupb(mp))) {
   3840 			ip1dbg(("ip_input_fragment_v6: dupb failed\n"));
   3841 			BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
   3842 			ip_drop_input("ipIfStatsInDiscards", mp, ill);
   3843 			freemsg(mp);
   3844 			return (NULL);
   3845 		}
   3846 		nmp->b_cont = mp->b_cont;
   3847 		mp->b_cont = nmp;
   3848 		nmp->b_rptr += hdr_length;
   3849 	}
   3850 	mp->b_wptr = mp->b_rptr + hdr_length - sizeof (ip6_frag_t);
   3851 
   3852 	ip6h = (ip6_t *)mp->b_rptr;
   3853 	((char *)ip6h)[prev_nexthdr_offset] = nexthdr;
   3854 
   3855 	/* Restore original IP length in header. */
   3856 	packet_size = msgdsize(mp);
   3857 	ip6h->ip6_plen = htons((uint16_t)(packet_size - IPV6_HDR_LEN));
   3858 	/* Record the ECN info. */
   3859 	ip6h->ip6_vcf &= htonl(0xFFCFFFFF);
   3860 	ip6h->ip6_vcf |= htonl(ecn_info << 20);
   3861 
   3862 	/* Update the receive attributes */
   3863 	ira->ira_pktlen = packet_size;
   3864 	ira->ira_ip_hdr_length = hdr_length - sizeof (ip6_frag_t);
   3865 	ira->ira_protocol = nexthdr;
   3866 
   3867 	/* Reassembly is successful; set checksum information in packet */
   3868 	DB_CKSUM16(mp) = (uint16_t)sum_val;
   3869 	DB_CKSUMFLAGS(mp) = sum_flags;
   3870 	DB_CKSUMSTART(mp) = ira->ira_ip_hdr_length;
   3871 
   3872 	return (mp);
   3873 }
   3874 
   3875 /*
   3876  * Given an mblk and a ptr, find the destination address in an IPv6 routing
   3877  * header.
   3878  */
   3879 static in6_addr_t
   3880 pluck_out_dst(const mblk_t *mp, uint8_t *whereptr, in6_addr_t oldrv)
   3881 {
   3882 	ip6_rthdr0_t *rt0;
   3883 	int segleft, numaddr;
   3884 	in6_addr_t *ap, rv = oldrv;
   3885 
   3886 	rt0 = (ip6_rthdr0_t *)whereptr;
   3887 	if (rt0->ip6r0_type != 0 && rt0->ip6r0_type != 2) {
   3888 		DTRACE_PROBE2(pluck_out_dst_unknown_type, mblk_t *, mp,
   3889 		    uint8_t *, whereptr);
   3890 		return (rv);
   3891 	}
   3892 	segleft = rt0->ip6r0_segleft;
   3893 	numaddr = rt0->ip6r0_len / 2;
   3894 
   3895 	if ((rt0->ip6r0_len & 0x1) ||
   3896 	    (mp != NULL && whereptr + (rt0->ip6r0_len + 1) * 8 > mp->b_wptr) ||
   3897 	    (segleft > rt0->ip6r0_len / 2)) {
   3898 		/*
   3899 		 * Corrupt packet.  Either the routing header length is odd
   3900 		 * (can't happen) or mismatched compared to the packet, or the
   3901 		 * number of addresses is.  Return what we can.  This will
   3902 		 * only be a problem on forwarded packets that get squeezed
   3903 		 * through an outbound tunnel enforcing IPsec Tunnel Mode.
   3904 		 */
   3905 		DTRACE_PROBE2(pluck_out_dst_badpkt, mblk_t *, mp, uint8_t *,
   3906 		    whereptr);
   3907 		return (rv);
   3908 	}
   3909 
   3910 	if (segleft != 0) {
   3911 		ap = (in6_addr_t *)((char *)rt0 + sizeof (*rt0));
   3912 		rv = ap[numaddr - 1];
   3913 	}
   3914 
   3915 	return (rv);
   3916 }
   3917 
   3918 /*
   3919  * Walk through the options to see if there is a routing header.
   3920  * If present get the destination which is the last address of
   3921  * the option.
   3922  * mp needs to be provided in cases when the extension headers might span
   3923  * b_cont; mp is never modified by this function.
   3924  */
   3925 in6_addr_t
   3926 ip_get_dst_v6(ip6_t *ip6h, const mblk_t *mp, boolean_t *is_fragment)
   3927 {
   3928 	const mblk_t *current_mp = mp;
   3929 	uint8_t nexthdr;
   3930 	uint8_t *whereptr;
   3931 	int ehdrlen;
   3932 	in6_addr_t rv;
   3933 
   3934 	whereptr = (uint8_t *)ip6h;
   3935 	ehdrlen = sizeof (ip6_t);
   3936 
   3937 	/* We assume at least the IPv6 base header is within one mblk. */
   3938 	ASSERT(mp == NULL ||
   3939 	    (mp->b_rptr <= whereptr && mp->b_wptr >= whereptr + ehdrlen));
   3940 
   3941 	rv = ip6h->ip6_dst;
   3942 	nexthdr = ip6h->ip6_nxt;
   3943 	if (is_fragment != NULL)
   3944 		*is_fragment = B_FALSE;
   3945 
   3946 	/*
   3947 	 * We also assume (thanks to ipsec_tun_outbound()'s pullup) that
   3948 	 * no extension headers will be split across mblks.
   3949 	 */
   3950 
   3951 	while (nexthdr == IPPROTO_HOPOPTS || nexthdr == IPPROTO_DSTOPTS ||
   3952 	    nexthdr == IPPROTO_ROUTING) {
   3953 		if (nexthdr == IPPROTO_ROUTING)
   3954 			rv = pluck_out_dst(current_mp, whereptr, rv);
   3955 
   3956 		/*
   3957 		 * All IPv6 extension headers have the next-header in byte
   3958 		 * 0, and the (length - 8) in 8-byte-words.
   3959 		 */
   3960 		while (current_mp != NULL &&
   3961 		    whereptr + ehdrlen >= current_mp->b_wptr) {
   3962 			ehdrlen -= (current_mp->b_wptr - whereptr);
   3963 			current_mp = current_mp->b_cont;
   3964 			if (current_mp == NULL) {
   3965 				/* Bad packet.  Return what we can. */
   3966 				DTRACE_PROBE3(ip_get_dst_v6_badpkt, mblk_t *,
   3967 				    mp, mblk_t *, current_mp, ip6_t *, ip6h);
   3968 				goto done;
   3969 			}
   3970 			whereptr = current_mp->b_rptr;
   3971 		}
   3972 		whereptr += ehdrlen;
   3973 
   3974 		nexthdr = *whereptr;
   3975 		ASSERT(current_mp == NULL || whereptr + 1 < current_mp->b_wptr);
   3976 		ehdrlen = (*(whereptr + 1) + 1) * 8;
   3977 	}
   3978 
   3979 done:
   3980 	if (nexthdr == IPPROTO_FRAGMENT && is_fragment != NULL)
   3981 		*is_fragment = B_TRUE;
   3982 	return (rv);
   3983 }
   3984 
   3985 /*
   3986  * ip_source_routed_v6:
   3987  * This function is called by redirect code (called from ip_input_v6) to
   3988  * know whether this packet is source routed through this node i.e
   3989  * whether this node (router) is part of the journey. This
   3990  * function is called under two cases :
   3991  *
   3992  * case 1 : Routing header was processed by this node and
   3993  *	    ip_process_rthdr replaced ip6_dst with the next hop
   3994  *	    and we are forwarding the packet to the next hop.
   3995  *
   3996  * case 2 : Routing header was not processed by this node and we
   3997  *	    are just forwarding the packet.
   3998  *
   3999  * For case (1) we don't want to send redirects. For case(2) we
   4000  * want to send redirects.
   4001  */
   4002 static boolean_t
   4003 ip_source_routed_v6(ip6_t *ip6h, mblk_t *mp, ip_stack_t *ipst)
   4004 {
   4005 	uint8_t		nexthdr;
   4006 	in6_addr_t	*addrptr;
   4007 	ip6_rthdr0_t	*rthdr;
   4008 	uint8_t		numaddr;
   4009 	ip6_hbh_t	*hbhhdr;
   4010 	uint_t		ehdrlen;
   4011 	uint8_t		*byteptr;
   4012 
   4013 	ip2dbg(("ip_source_routed_v6\n"));
   4014 	nexthdr = ip6h->ip6_nxt;
   4015 	ehdrlen = IPV6_HDR_LEN;
   4016 
   4017 	/* if a routing hdr is preceeded by HOPOPT or DSTOPT */
   4018 	while (nexthdr == IPPROTO_HOPOPTS ||
   4019 	    nexthdr == IPPROTO_DSTOPTS) {
   4020 		byteptr = (uint8_t *)ip6h + ehdrlen;
   4021 		/*
   4022 		 * Check if we have already processed
   4023 		 * packets or we are just a forwarding
   4024 		 * router which only pulled up msgs up
   4025 		 * to IPV6HDR and  one HBH ext header
   4026 		 */
   4027 		if (byteptr + MIN_EHDR_LEN > mp->b_wptr) {
   4028 			ip2dbg(("ip_source_routed_v6: Extension"
   4029 			    " headers not processed\n"));
   4030 			return (B_FALSE);
   4031 		}
   4032 		hbhhdr = (ip6_hbh_t *)byteptr;
   4033 		nexthdr = hbhhdr->ip6h_nxt;
   4034 		ehdrlen = ehdrlen + 8 * (hbhhdr->ip6h_len + 1);
   4035 	}
   4036 	switch (nexthdr) {
   4037 	case IPPROTO_ROUTING:
   4038 		byteptr = (uint8_t *)ip6h + ehdrlen;
   4039 		/*
   4040 		 * If for some reason, we haven't pulled up
   4041 		 * the routing hdr data mblk, then we must
   4042 		 * not have processed it at all. So for sure
   4043 		 * we are not part of the source routed journey.
   4044 		 */
   4045 		if (byteptr + MIN_EHDR_LEN > mp->b_wptr) {
   4046 			ip2dbg(("ip_source_routed_v6: Routing"
   4047 			    " header not processed\n"));
   4048 			return (B_FALSE);
   4049 		}
   4050 		rthdr = (ip6_rthdr0_t *)byteptr;
   4051 		/*
   4052 		 * Either we are an intermediate router or the
   4053 		 * last hop before destination and we have
   4054 		 * already processed the routing header.
   4055 		 * If segment_left is greater than or equal to zero,
   4056 		 * then we must be the (numaddr - segleft) entry
   4057 		 * of the routing header. Although ip6r0_segleft
   4058 		 * is a unit8_t variable, we still check for zero
   4059 		 * or greater value, if in case the data type
   4060 		 * is changed someday in future.
   4061 		 */
   4062 		if (rthdr->ip6r0_segleft > 0 ||
   4063 		    rthdr->ip6r0_segleft == 0) {
   4064 			numaddr = rthdr->ip6r0_len / 2;
   4065 			addrptr = (in6_addr_t *)((char *)rthdr +
   4066 			    sizeof (*rthdr));
   4067 			addrptr += (numaddr - (rthdr->ip6r0_segleft + 1));
   4068 			if (addrptr != NULL) {
   4069 				if (ip_type_v6(addrptr, ipst) == IRE_LOCAL)
   4070 					return (B_TRUE);
   4071 				ip1dbg(("ip_source_routed_v6: Not local\n"));
   4072 			}
   4073 		}
   4074 	/* FALLTHRU */
   4075 	default:
   4076 		ip2dbg(("ip_source_routed_v6: Not source routed here\n"));
   4077 		return (B_FALSE);
   4078 	}
   4079 }
   4080 
   4081 /*
   4082  * IPv6 fragmentation.  Essentially the same as IPv4 fragmentation.
   4083  * We have not optimized this in terms of number of mblks
   4084  * allocated. For instance, for each fragment sent we always allocate a
   4085  * mblk to hold the IPv6 header and fragment header.
   4086  *
   4087  * Assumes that all the extension headers are contained in the first mblk
   4088  * and that the fragment header has has already been added by calling
   4089  * ip_fraghdr_add_v6.
   4090  */
   4091 int
   4092 ip_fragment_v6(mblk_t *mp, nce_t *nce, iaflags_t ixaflags, uint_t pkt_len,
   4093     uint32_t max_frag, uint32_t xmit_hint, zoneid_t szone, zoneid_t nolzid,
   4094     pfirepostfrag_t postfragfn, uintptr_t *ixa_cookie)
   4095 {
   4096 	ip6_t		*ip6h = (ip6_t *)mp->b_rptr;
   4097 	ip6_t		*fip6h;
   4098 	mblk_t		*hmp;
   4099 	mblk_t		*hmp0;
   4100 	mblk_t		*dmp;
   4101 	ip6_frag_t	*fraghdr;
   4102 	size_t		unfragmentable_len;
   4103 	size_t		mlen;
   4104 	size_t		max_chunk;
   4105 	uint16_t	off_flags;
   4106 	uint16_t	offset = 0;
   4107 	ill_t		*ill = nce->nce_ill;
   4108 	uint8_t		nexthdr;
   4109 	uint8_t		*ptr;
   4110 	ip_stack_t	*ipst = ill->ill_ipst;
   4111 	uint_t		priority = mp->b_band;
   4112 	int		error = 0;
   4113 
   4114 	BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragReqds);
   4115 	if (max_frag == 0) {
   4116 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails);
   4117 		ip_drop_output("FragFails: zero max_frag", mp, ill);
   4118 		freemsg(mp);
   4119 		return (EINVAL);
   4120 	}
   4121 
   4122 	/*
   4123 	 * Caller should have added fraghdr_t to pkt_len, and also
   4124 	 * updated ip6_plen.
   4125 	 */
   4126 	ASSERT(ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN == pkt_len);
   4127 	ASSERT(msgdsize(mp) == pkt_len);
   4128 
   4129 	/*
   4130 	 * Determine the length of the unfragmentable portion of this
   4131 	 * datagram.  This consists of the IPv6 header, a potential
   4132 	 * hop-by-hop options header, a potential pre-routing-header
   4133 	 * destination options header, and a potential routing header.
   4134 	 */
   4135 	nexthdr = ip6h->ip6_nxt;
   4136 	ptr = (uint8_t *)&ip6h[1];
   4137 
   4138 	if (nexthdr == IPPROTO_HOPOPTS) {
   4139 		ip6_hbh_t	*hbh_hdr;
   4140 		uint_t		hdr_len;
   4141 
   4142 		hbh_hdr = (ip6_hbh_t *)ptr;
   4143 		hdr_len = 8 * (hbh_hdr->ip6h_len + 1);
   4144 		nexthdr = hbh_hdr->ip6h_nxt;
   4145 		ptr += hdr_len;
   4146 	}
   4147 	if (nexthdr == IPPROTO_DSTOPTS) {
   4148 		ip6_dest_t	*dest_hdr;
   4149 		uint_t		hdr_len;
   4150 
   4151 		dest_hdr = (ip6_dest_t *)ptr;
   4152 		if (dest_hdr->ip6d_nxt == IPPROTO_ROUTING) {
   4153 			hdr_len = 8 * (dest_hdr->ip6d_len + 1);
   4154 			nexthdr = dest_hdr->ip6d_nxt;
   4155 			ptr += hdr_len;
   4156 		}
   4157 	}
   4158 	if (nexthdr == IPPROTO_ROUTING) {
   4159 		ip6_rthdr_t	*rthdr;
   4160 		uint_t		hdr_len;
   4161 
   4162 		rthdr = (ip6_rthdr_t *)ptr;
   4163 		nexthdr = rthdr->ip6r_nxt;
   4164 		hdr_len = 8 * (rthdr->ip6r_len + 1);
   4165 		ptr += hdr_len;
   4166 	}
   4167 	if (nexthdr != IPPROTO_FRAGMENT) {
   4168 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails);
   4169 		ip_drop_output("FragFails: bad nexthdr", mp, ill);
   4170 		freemsg(mp);
   4171 		return (EINVAL);
   4172 	}
   4173 	unfragmentable_len = (uint_t)(ptr - (uint8_t *)ip6h);
   4174 	unfragmentable_len += sizeof (ip6_frag_t);
   4175 
   4176 	max_chunk = (max_frag - unfragmentable_len) & ~7;
   4177 
   4178 	/*
   4179 	 * Allocate an mblk with enough room for the link-layer
   4180 	 * header and the unfragmentable part of the datagram, which includes
   4181 	 * the fragment header.  This (or a copy) will be used as the
   4182 	 * first mblk for each fragment we send.
   4183 	 */
   4184 	hmp = allocb_tmpl(unfragmentable_len + ipst->ips_ip_wroff_extra, mp);
   4185 	if (hmp == NULL) {
   4186 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails);
   4187 		ip_drop_output("FragFails: no hmp", mp, ill);
   4188 		freemsg(mp);
   4189 		return (ENOBUFS);
   4190 	}
   4191 	hmp->b_rptr += ipst->ips_ip_wroff_extra;
   4192 	hmp->b_wptr = hmp->b_rptr + unfragmentable_len;
   4193 
   4194 	fip6h = (ip6_t *)hmp->b_rptr;
   4195 	bcopy(ip6h, fip6h, unfragmentable_len);
   4196 
   4197 	/*
   4198 	 * pkt_len is set to the total length of the fragmentable data in this
   4199 	 * datagram.  For each fragment sent, we will decrement pkt_len
   4200 	 * by the amount of fragmentable data sent in that fragment
   4201 	 * until len reaches zero.
   4202 	 */
   4203 	pkt_len -= unfragmentable_len;
   4204 
   4205 	/*
   4206 	 * Move read ptr past unfragmentable portion, we don't want this part
   4207 	 * of the data in our fragments.
   4208 	 */
   4209 	mp->b_rptr += unfragmentable_len;
   4210 	if (mp->b_rptr == mp->b_wptr) {
   4211 		mblk_t *mp1 = mp->b_cont;
   4212 		freeb(mp);
   4213 		mp = mp1;
   4214 	}
   4215 
   4216 	while (pkt_len != 0) {
   4217 		mlen = MIN(pkt_len, max_chunk);
   4218 		pkt_len -= mlen;
   4219 		if (pkt_len != 0) {
   4220 			/* Not last */
   4221 			hmp0 = copyb(hmp);
   4222 			if (hmp0 == NULL) {
   4223 				BUMP_MIB(ill->ill_ip_mib,
   4224 				    ipIfStatsOutFragFails);
   4225 				ip_drop_output("FragFails: copyb failed",
   4226 				    mp, ill);
   4227 				freeb(hmp);
   4228 				freemsg(mp);
   4229 				ip1dbg(("ip_fragment_v6: copyb failed\n"));
   4230 				return (ENOBUFS);
   4231 			}
   4232 			off_flags = IP6F_MORE_FRAG;
   4233 		} else {
   4234 			/* Last fragment */
   4235 			hmp0 = hmp;
   4236 			hmp = NULL;
   4237 			off_flags = 0;
   4238 		}
   4239 		fip6h = (ip6_t *)(hmp0->b_rptr);
   4240 		fraghdr = (ip6_frag_t *)(hmp0->b_rptr + unfragmentable_len -
   4241 		    sizeof (ip6_frag_t));
   4242 
   4243 		fip6h->ip6_plen = htons((uint16_t)(mlen +
   4244 		    unfragmentable_len - IPV6_HDR_LEN));
   4245 		/*
   4246 		 * Note: Optimization alert.
   4247 		 * In IPv6 (and IPv4) protocol header, Fragment Offset
   4248 		 * ("offset") is 13 bits wide and in 8-octet units.
   4249 		 * In IPv6 protocol header (unlike IPv4) in a 16 bit field,
   4250 		 * it occupies the most significant 13 bits.
   4251 		 * (least significant 13 bits in IPv4).
   4252 		 * We do not do any shifts here. Not shifting is same effect
   4253 		 * as taking offset value in octet units, dividing by 8 and
   4254 		 * then shifting 3 bits left to line it up in place in proper
   4255 		 * place protocol header.
   4256 		 */
   4257 		fraghdr->ip6f_offlg = htons(offset) | off_flags;
   4258 
   4259 		if (!(dmp = ip_carve_mp(&mp, mlen))) {
   4260 			/* mp has already been freed by ip_carve_mp() */
   4261 			BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails);
   4262 			ip_drop_output("FragFails: could not carve mp",
   4263 			    hmp0, ill);
   4264 			if (hmp != NULL)
   4265 				freeb(hmp);
   4266 			freeb(hmp0);
   4267 			ip1dbg(("ip_carve_mp: failed\n"));
   4268 			return (ENOBUFS);
   4269 		}
   4270 		hmp0->b_cont = dmp;
   4271 		/* Get the priority marking, if any */
   4272 		hmp0->b_band = priority;
   4273 
   4274 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragCreates);
   4275 
   4276 		error = postfragfn(hmp0, nce, ixaflags,
   4277 		    mlen + unfragmentable_len, xmit_hint, szone, nolzid,
   4278 		    ixa_cookie);
   4279 		if (error != 0 && error != EWOULDBLOCK && hmp != NULL) {
   4280 			/* No point in sending the other fragments */
   4281 			BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails);
   4282 			ip_drop_output("FragFails: postfragfn failed",
   4283 			    hmp, ill);
   4284 			freeb(hmp);
   4285 			freemsg(mp);
   4286 			return (error);
   4287 		}
   4288 		/* No need to redo state machine in loop */
   4289 		ixaflags &= ~IXAF_REACH_CONF;
   4290 
   4291 		offset += mlen;
   4292 	}
   4293 	BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragOKs);
   4294 	return (error);
   4295 }
   4296 
   4297 /*
   4298  * Add a fragment header to an IPv6 packet.
   4299  * Assumes that all the extension headers are contained in the first mblk.
   4300  *
   4301  * The fragment header is inserted after an hop-by-hop options header
   4302  * and after [an optional destinations header followed by] a routing header.
   4303  */
   4304 mblk_t *
   4305 ip_fraghdr_add_v6(mblk_t *mp, uint32_t ident, ip_xmit_attr_t *ixa)
   4306 {
   4307 	ip6_t		*ip6h = (ip6_t *)mp->b_rptr;
   4308 	ip6_t		*fip6h;
   4309 	mblk_t		*hmp;
   4310 	ip6_frag_t	*fraghdr;
   4311 	size_t		unfragmentable_len;
   4312 	uint8_t		nexthdr;
   4313 	uint_t		prev_nexthdr_offset;
   4314 	uint8_t		*ptr;
   4315 	uint_t		priority = mp->b_band;
   4316 	ip_stack_t	*ipst = ixa->ixa_ipst;
   4317 
   4318 	/*
   4319 	 * Determine the length of the unfragmentable portion of this
   4320 	 * datagram.  This consists of the IPv6 header, a potential
   4321 	 * hop-by-hop options header, a potential pre-routing-header
   4322 	 * destination options header, and a potential routing header.
   4323 	 */
   4324 	nexthdr = ip6h->ip6_nxt;
   4325 	prev_nexthdr_offset = (uint8_t *)&ip6h->ip6_nxt - (uint8_t *)ip6h;
   4326 	ptr = (uint8_t *)&ip6h[1];
   4327 
   4328 	if (nexthdr == IPPROTO_HOPOPTS) {
   4329 		ip6_hbh_t	*hbh_hdr;
   4330 		uint_t		hdr_len;
   4331 
   4332 		hbh_hdr = (ip6_hbh_t *)ptr;
   4333 		hdr_len = 8 * (hbh_hdr->ip6h_len + 1);
   4334 		nexthdr = hbh_hdr->ip6h_nxt;
   4335 		prev_nexthdr_offset = (uint8_t *)&hbh_hdr->ip6h_nxt
   4336 		    - (uint8_t *)ip6h;
   4337 		ptr += hdr_len;
   4338 	}
   4339 	if (nexthdr == IPPROTO_DSTOPTS) {
   4340 		ip6_dest_t	*dest_hdr;
   4341 		uint_t		hdr_len;
   4342 
   4343 		dest_hdr = (ip6_dest_t *)ptr;
   4344 		if (dest_hdr->ip6d_nxt == IPPROTO_ROUTING) {
   4345 			hdr_len = 8 * (dest_hdr->ip6d_len + 1);
   4346 			nexthdr = dest_hdr->ip6d_nxt;
   4347 			prev_nexthdr_offset = (uint8_t *)&dest_hdr->ip6d_nxt
   4348 			    - (uint8_t *)ip6h;
   4349 			ptr += hdr_len;
   4350 		}
   4351 	}
   4352 	if (nexthdr == IPPROTO_ROUTING) {
   4353 		ip6_rthdr_t	*rthdr;
   4354 		uint_t		hdr_len;
   4355 
   4356 		rthdr = (ip6_rthdr_t *)ptr;
   4357 		nexthdr = rthdr->ip6r_nxt;
   4358 		prev_nexthdr_offset = (uint8_t *)&rthdr->ip6r_nxt
   4359 		    - (uint8_t *)ip6h;
   4360 		hdr_len = 8 * (rthdr->ip6r_len + 1);
   4361 		ptr += hdr_len;
   4362 	}
   4363 	unfragmentable_len = (uint_t)(ptr - (uint8_t *)ip6h);
   4364 
   4365 	/*
   4366 	 * Allocate an mblk with enough room for the link-layer
   4367 	 * header, the unfragmentable part of the datagram, and the
   4368 	 * fragment header.
   4369 	 */
   4370 	hmp = allocb_tmpl(unfragmentable_len + sizeof (ip6_frag_t) +
   4371 	    ipst->ips_ip_wroff_extra, mp);
   4372 	if (hmp == NULL) {
   4373 		ill_t *ill = ixa->ixa_nce->nce_ill;
   4374 
   4375 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
   4376 		ip_drop_output("ipIfStatsOutDiscards: allocb failure", mp, ill);
   4377 		freemsg(mp);
   4378 		return (NULL);
   4379 	}
   4380 	hmp->b_rptr += ipst->ips_ip_wroff_extra;
   4381 	hmp->b_wptr = hmp->b_rptr + unfragmentable_len + sizeof (ip6_frag_t);
   4382 
   4383 	fip6h = (ip6_t *)hmp->b_rptr;
   4384 	fraghdr = (ip6_frag_t *)(hmp->b_rptr + unfragmentable_len);
   4385 
   4386 	bcopy(ip6h, fip6h, unfragmentable_len);
   4387 	fip6h->ip6_plen = htons(ntohs(fip6h->ip6_plen) + sizeof (ip6_frag_t));
   4388 	hmp->b_rptr[prev_nexthdr_offset] = IPPROTO_FRAGMENT;
   4389 
   4390 	fraghdr->ip6f_nxt = nexthdr;
   4391 	fraghdr->ip6f_reserved = 0;
   4392 	fraghdr->ip6f_offlg = 0;
   4393 	fraghdr->ip6f_ident = htonl(ident);
   4394 
   4395 	/* Get the priority marking, if any */
   4396 	hmp->b_band = priority;
   4397 
   4398 	/*
   4399 	 * Move read ptr past unfragmentable portion, we don't want this part
   4400 	 * of the data in our fragments.
   4401 	 */
   4402 	mp->b_rptr += unfragmentable_len;
   4403 	hmp->b_cont = mp;
   4404 	return (hmp);
   4405 }
   4406 
   4407 /*
   4408  * Determine if the ill and multicast aspects of that packets
   4409  * "matches" the conn.
   4410  */
   4411 boolean_t
   4412 conn_wantpacket_v6(conn_t *connp, ip_recv_attr_t *ira, ip6_t *ip6h)
   4413 {
   4414 	ill_t		*ill = ira->ira_rill;
   4415 	zoneid_t	zoneid = ira->ira_zoneid;
   4416 	uint_t		in_ifindex;
   4417 	in6_addr_t	*v6dst_ptr = &ip6h->ip6_dst;
   4418 	in6_addr_t	*v6src_ptr = &ip6h->ip6_src;
   4419 
   4420 	/*
   4421 	 * conn_incoming_ifindex is set by IPV6_BOUND_IF and as link-local
   4422 	 * scopeid. This is used to limit
   4423 	 * unicast and multicast reception to conn_incoming_ifindex.
   4424 	 * conn_wantpacket_v6 is called both for unicast and
   4425 	 * multicast packets.
   4426 	 */
   4427 	in_ifindex = connp->conn_incoming_ifindex;
   4428 
   4429 	/* mpathd can bind to the under IPMP interface, which we allow */
   4430 	if (in_ifindex != 0 && in_ifindex != ill->ill_phyint->phyint_ifindex) {
   4431 		if (!IS_UNDER_IPMP(ill))
   4432 			return (B_FALSE);
   4433 
   4434 		if (in_ifindex != ipmp_ill_get_ipmp_ifindex(ill))
   4435 			return (B_FALSE);
   4436 	}
   4437 
   4438 	if (!IPCL_ZONE_MATCH(connp, zoneid))
   4439 		return (B_FALSE);
   4440 
   4441 	if (!(ira->ira_flags & IRAF_MULTICAST))
   4442 		return (B_TRUE);
   4443 
   4444 	if (connp->conn_multi_router)
   4445 		return (B_TRUE);
   4446 
   4447 	if (ira->ira_protocol == IPPROTO_RSVP)
   4448 		return (B_TRUE);
   4449 
   4450 	return (conn_hasmembers_ill_withsrc_v6(connp, v6dst_ptr, v6src_ptr,
   4451 	    ira->ira_ill));
   4452 }
   4453 
   4454 /*
   4455  * pr_addr_dbg function provides the needed buffer space to call
   4456  * inet_ntop() function's 3rd argument. This function should be
   4457  * used by any kernel routine which wants to save INET6_ADDRSTRLEN
   4458  * stack buffer space in it's own stack frame. This function uses
   4459  * a buffer from it's own stack and prints the information.
   4460  * Example: pr_addr_dbg("func: no route for %s\n ", AF_INET, addr)
   4461  *
   4462  * Note:    This function can call inet_ntop() once.
   4463  */
   4464 void
   4465 pr_addr_dbg(char *fmt1, int af, const void *addr)
   4466 {
   4467 	char	buf[INET6_ADDRSTRLEN];
   4468 
   4469 	if (fmt1 == NULL) {
   4470 		ip0dbg(("pr_addr_dbg: Wrong arguments\n"));
   4471 		return;
   4472 	}
   4473 
   4474 	/*
   4475 	 * This does not compare debug level and just prints
   4476 	 * out. Thus it is the responsibility of the caller
   4477 	 * to check the appropriate debug-level before calling
   4478 	 * this function.
   4479 	 */
   4480 	if (ip_debug > 0) {
   4481 		printf(fmt1, inet_ntop(af, addr, buf, sizeof (buf)));
   4482 	}
   4483 
   4484 
   4485 }
   4486 
   4487 
   4488 /*
   4489  * Return the length in bytes of the IPv6 headers (base header
   4490  * extension headers) that will be needed based on the
   4491  * ip_pkt_t structure passed by the caller.
   4492  *
   4493  * The returned length does not include the length of the upper level
   4494  * protocol (ULP) header.
   4495  */
   4496 int
   4497 ip_total_hdrs_len_v6(const ip_pkt_t *ipp)
   4498 {
   4499 	int len;
   4500 
   4501 	len = IPV6_HDR_LEN;
   4502 
   4503 	/*
   4504 	 * If there's a security label here, then we ignore any hop-by-hop
   4505 	 * options the user may try to set.
   4506 	 */
   4507 	if (ipp->ipp_fields & IPPF_LABEL_V6) {
   4508 		uint_t hopoptslen;
   4509 		/*
   4510 		 * Note that ipp_label_len_v6 is just the option - not
   4511 		 * the hopopts extension header. It also needs to be padded
   4512 		 * to a multiple of 8 bytes.
   4513 		 */
   4514 		ASSERT(ipp->ipp_label_len_v6 != 0);
   4515 		hopoptslen = ipp->ipp_label_len_v6 + sizeof (ip6_hbh_t);
   4516 		hopoptslen = (hopoptslen + 7)/8 * 8;
   4517 		len += hopoptslen;
   4518 	} else if (ipp->ipp_fields & IPPF_HOPOPTS) {
   4519 		ASSERT(ipp->ipp_hopoptslen != 0);
   4520 		len += ipp->ipp_hopoptslen;
   4521 	}
   4522 
   4523 	/*
   4524 	 * En-route destination options
   4525 	 * Only do them if there's a routing header as well
   4526 	 */
   4527 	if ((ipp->ipp_fields & (IPPF_RTHDRDSTOPTS|IPPF_RTHDR)) ==
   4528 	    (IPPF_RTHDRDSTOPTS|IPPF_RTHDR)) {
   4529 		ASSERT(ipp->ipp_rthdrdstoptslen != 0);
   4530 		len += ipp->ipp_rthdrdstoptslen;
   4531 	}
   4532 	if (ipp->ipp_fields & IPPF_RTHDR) {
   4533 		ASSERT(ipp->ipp_rthdrlen != 0);
   4534 		len += ipp->ipp_rthdrlen;
   4535 	}
   4536 	if (ipp->ipp_fields & IPPF_DSTOPTS) {
   4537 		ASSERT(ipp->ipp_dstoptslen != 0);
   4538 		len += ipp->ipp_dstoptslen;
   4539 	}
   4540 	return (len);
   4541 }
   4542 
   4543 /*
   4544  * All-purpose routine to build a header chain of an IPv6 header
   4545  * followed by any required extension headers and a proto header.
   4546  *
   4547  * The caller has to set the source and destination address as well as
   4548  * ip6_plen. The caller has to massage any routing header and compensate
   4549  * for the ULP pseudo-header checksum due to the source route.
   4550  *
   4551  * The extension headers will all be fully filled in.
   4552  */
   4553 void
   4554 ip_build_hdrs_v6(uchar_t *buf, uint_t buf_len, const ip_pkt_t *ipp,
   4555     uint8_t protocol, uint32_t flowinfo)
   4556 {
   4557 	uint8_t *nxthdr_ptr;
   4558 	uint8_t *cp;
   4559 	ip6_t	*ip6h = (ip6_t *)buf;
   4560 
   4561 	/* Initialize IPv6 header */
   4562 	ip6h->ip6_vcf =
   4563 	    (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) |
   4564 	    (flowinfo & ~IPV6_VERS_AND_FLOW_MASK);
   4565 
   4566 	if (ipp->ipp_fields & IPPF_TCLASS) {
   4567 		/* Overrides the class part of flowinfo */
   4568 		ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf,
   4569 		    ipp->ipp_tclass);
   4570 	}
   4571 
   4572 	if (ipp->ipp_fields & IPPF_HOPLIMIT)
   4573 		ip6h->ip6_hops = ipp->ipp_hoplimit;
   4574 	else
   4575 		ip6h->ip6_hops = ipp->ipp_unicast_hops;
   4576 
   4577 	if ((ipp->ipp_fields & IPPF_ADDR) &&
   4578 	    !IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr))
   4579 		ip6h->ip6_src = ipp->ipp_addr;
   4580 
   4581 	nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt;
   4582 	cp = (uint8_t *)&ip6h[1];
   4583 	/*
   4584 	 * Here's where we have to start stringing together
   4585 	 * any extension headers in the right order:
   4586 	 * Hop-by-hop, destination, routing, and final destination opts.
   4587 	 */
   4588 	/*
   4589 	 * If there's a security label here, then we ignore any hop-by-hop
   4590 	 * options the user may try to set.
   4591 	 */
   4592 	if (ipp->ipp_fields & IPPF_LABEL_V6) {
   4593 		/*
   4594 		 * Hop-by-hop options with the label.
   4595 		 * Note that ipp_label_v6 is just the option - not
   4596 		 * the hopopts extension header. It also needs to be padded
   4597 		 * to a multiple of 8 bytes.
   4598 		 */
   4599 		ip6_hbh_t *hbh = (ip6_hbh_t *)cp;
   4600 		uint_t hopoptslen;
   4601 		uint_t padlen;
   4602 
   4603 		padlen = ipp->ipp_label_len_v6 + sizeof (ip6_hbh_t);
   4604 		hopoptslen = (padlen + 7)/8 * 8;
   4605 		padlen = hopoptslen - padlen;
   4606 
   4607 		*nxthdr_ptr = IPPROTO_HOPOPTS;
   4608 		nxthdr_ptr = &hbh->ip6h_nxt;
   4609 		hbh->ip6h_len = hopoptslen/8 - 1;
   4610 		cp += sizeof (ip6_hbh_t);
   4611 		bcopy(ipp->ipp_label_v6, cp, ipp->ipp_label_len_v6);
   4612 		cp += ipp->ipp_label_len_v6;
   4613 
   4614 		ASSERT(padlen <= 7);
   4615 		switch (padlen) {
   4616 		case 0:
   4617 			break;
   4618 		case 1:
   4619 			cp[0] = IP6OPT_PAD1;
   4620 			break;
   4621 		default:
   4622 			cp[0] = IP6OPT_PADN;
   4623 			cp[1] = padlen - 2;
   4624 			bzero(&cp[2], padlen - 2);
   4625 			break;
   4626 		}
   4627 		cp += padlen;
   4628 	} else if (ipp->ipp_fields & IPPF_HOPOPTS) {
   4629 		/* Hop-by-hop options */
   4630 		ip6_hbh_t *hbh = (ip6_hbh_t *)cp;
   4631 
   4632 		*nxthdr_ptr = IPPROTO_HOPOPTS;
   4633 		nxthdr_ptr = &hbh->ip6h_nxt;
   4634 
   4635 		bcopy(ipp->ipp_hopopts, cp, ipp->ipp_hopoptslen);
   4636 		cp += ipp->ipp_hopoptslen;
   4637 	}
   4638 	/*
   4639 	 * En-route destination options
   4640 	 * Only do them if there's a routing header as well
   4641 	 */
   4642 	if ((ipp->ipp_fields & (IPPF_RTHDRDSTOPTS|IPPF_RTHDR)) ==
   4643 	    (IPPF_RTHDRDSTOPTS|IPPF_RTHDR)) {
   4644 		ip6_dest_t *dst = (ip6_dest_t *)cp;
   4645 
   4646 		*nxthdr_ptr = IPPROTO_DSTOPTS;
   4647 		nxthdr_ptr = &dst->ip6d_nxt;
   4648 
   4649 		bcopy(ipp->ipp_rthdrdstopts, cp, ipp->ipp_rthdrdstoptslen);
   4650 		cp += ipp->ipp_rthdrdstoptslen;
   4651 	}
   4652 	/*
   4653 	 * Routing header next
   4654 	 */
   4655 	if (ipp->ipp_fields & IPPF_RTHDR) {
   4656 		ip6_rthdr_t *rt = (ip6_rthdr_t *)cp;
   4657 
   4658 		*nxthdr_ptr = IPPROTO_ROUTING;
   4659 		nxthdr_ptr = &rt->ip6r_nxt;
   4660 
   4661 		bcopy(ipp->ipp_rthdr, cp, ipp->ipp_rthdrlen);
   4662 		cp += ipp->ipp_rthdrlen;
   4663 	}
   4664 	/*
   4665 	 * Do ultimate destination options
   4666 	 */
   4667 	if (ipp->ipp_fields & IPPF_DSTOPTS) {
   4668 		ip6_dest_t *dest = (ip6_dest_t *)cp;
   4669 
   4670 		*nxthdr_ptr = IPPROTO_DSTOPTS;
   4671 		nxthdr_ptr = &dest->ip6d_nxt;
   4672 
   4673 		bcopy(ipp->ipp_dstopts, cp, ipp->ipp_dstoptslen);
   4674 		cp += ipp->ipp_dstoptslen;
   4675 	}
   4676 	/*
   4677 	 * Now set the last header pointer to the proto passed in
   4678 	 */
   4679 	*nxthdr_ptr = protocol;
   4680 	ASSERT((int)(cp - buf) == buf_len);
   4681 }
   4682 
   4683 /*
   4684  * Return a pointer to the routing header extension header
   4685  * in the IPv6 header(s) chain passed in.
   4686  * If none found, return NULL
   4687  * Assumes that all extension headers are in same mblk as the v6 header
   4688  */
   4689 ip6_rthdr_t *
   4690 ip_find_rthdr_v6(ip6_t *ip6h, uint8_t *endptr)
   4691 {
   4692 	ip6_dest_t	*desthdr;
   4693 	ip6_frag_t	*fraghdr;
   4694 	uint_t		hdrlen;
   4695 	uint8_t		nexthdr;
   4696 	uint8_t		*ptr = (uint8_t *)&ip6h[1];
   4697 
   4698 	if (ip6h->ip6_nxt == IPPROTO_ROUTING)
   4699 		return ((ip6_rthdr_t *)ptr);
   4700 
   4701 	/*
   4702 	 * The routing header will precede all extension headers
   4703 	 * other than the hop-by-hop and destination options
   4704 	 * extension headers, so if we see anything other than those,
   4705 	 * we're done and didn't find it.
   4706 	 * We could see a destination options header alone but no
   4707 	 * routing header, in which case we'll return NULL as soon as
   4708 	 * we see anything after that.
   4709 	 * Hop-by-hop and destination option headers are identical,
   4710 	 * so we can use either one we want as a template.
   4711 	 */
   4712 	nexthdr = ip6h->ip6_nxt;
   4713 	while (ptr < endptr) {
   4714 		/* Is there enough left for len + nexthdr? */
   4715 		if (ptr + MIN_EHDR_LEN > endptr)
   4716 			return (NULL);
   4717 
   4718 		switch (nexthdr) {
   4719 		case IPPROTO_HOPOPTS:
   4720 		case IPPROTO_DSTOPTS:
   4721 			/* Assumes the headers are identical for hbh and dst */
   4722 			desthdr = (ip6_dest_t *)ptr;
   4723 			hdrlen = 8 * (desthdr->ip6d_len + 1);
   4724 			nexthdr = desthdr->ip6d_nxt;
   4725 			break;
   4726 
   4727 		case IPPROTO_ROUTING:
   4728 			return ((ip6_rthdr_t *)ptr);
   4729 
   4730 		case IPPROTO_FRAGMENT:
   4731 			fraghdr = (ip6_frag_t *)ptr;
   4732 			hdrlen = sizeof (ip6_frag_t);
   4733 			nexthdr = fraghdr->ip6f_nxt;
   4734 			break;
   4735 
   4736 		default:
   4737 			return (NULL);
   4738 		}
   4739 		ptr += hdrlen;
   4740 	}
   4741 	return (NULL);
   4742 }
   4743 
   4744 /*
   4745  * Called for source-routed packets originating on this node.
   4746  * Manipulates the original routing header by moving every entry up
   4747  * one slot, placing the first entry in the v6 header's v6_dst field,
   4748  * and placing the ultimate destination in the routing header's last
   4749  * slot.
   4750  *
   4751  * Returns the checksum diference between the ultimate destination
   4752  * (last hop in the routing header when the packet is sent) and
   4753  * the first hop (ip6_dst when the packet is sent)
   4754  */
   4755 /* ARGSUSED2 */
   4756 uint32_t
   4757 ip_massage_options_v6(ip6_t *ip6h, ip6_rthdr_t *rth, netstack_t *ns)
   4758 {
   4759 	uint_t		numaddr;
   4760 	uint_t		i;
   4761 	in6_addr_t	*addrptr;
   4762 	in6_addr_t	tmp;
   4763 	ip6_rthdr0_t	*rthdr = (ip6_rthdr0_t *)rth;
   4764 	uint32_t	cksm;
   4765 	uint32_t	addrsum = 0;
   4766 	uint16_t	*ptr;
   4767 
   4768 	/*
   4769 	 * Perform any processing needed for source routing.
   4770 	 * We know that all extension headers will be in the same mblk
   4771 	 * as the IPv6 header.
   4772 	 */
   4773 
   4774 	/*
   4775 	 * If no segments left in header, or the header length field is zero,
   4776 	 * don't move hop addresses around;
   4777 	 * Checksum difference is zero.
   4778 	 */
   4779 	if ((rthdr->ip6r0_segleft == 0) || (rthdr->ip6r0_len == 0))
   4780 		return (0);
   4781 
   4782 	ptr = (uint16_t *)&ip6h->ip6_dst;
   4783 	cksm = 0;
   4784 	for (i = 0; i < (sizeof (in6_addr_t) / sizeof (uint16_t)); i++) {
   4785 		cksm += ptr[i];
   4786 	}
   4787 	cksm = (cksm & 0xFFFF) + (cksm >> 16);
   4788 
   4789 	/*
   4790 	 * Here's where the fun begins - we have to
   4791 	 * move all addresses up one spot, take the
   4792 	 * first hop and make it our first ip6_dst,
   4793 	 * and place the ultimate destination in the
   4794 	 * newly-opened last slot.
   4795 	 */
   4796 	addrptr = (in6_addr_t *)((char *)rthdr + sizeof (*rthdr));
   4797 	numaddr = rthdr->ip6r0_len / 2;
   4798 	tmp = *addrptr;
   4799 	for (i = 0; i < (numaddr - 1); addrptr++, i++) {
   4800 		*addrptr = addrptr[1];
   4801 	}
   4802 	*addrptr = ip6h->ip6_dst;
   4803 	ip6h->ip6_dst = tmp;
   4804 
   4805 	/*
   4806 	 * From the checksummed ultimate destination subtract the checksummed
   4807 	 * current ip6_dst (the first hop address). Return that number.
   4808 	 * (In the v4 case, the second part of this is done in each routine
   4809 	 *  that calls ip_massage_options(). We do it all in this one place
   4810 	 *  for v6).
   4811 	 */
   4812 	ptr = (uint16_t *)&ip6h->ip6_dst;
   4813 	for (i = 0; i < (sizeof (in6_addr_t) / sizeof (uint16_t)); i++) {
   4814 		addrsum += ptr[i];
   4815 	}
   4816 	cksm -= ((addrsum >> 16) + (addrsum & 0xFFFF));
   4817 	if ((int)cksm < 0)
   4818 		cksm--;
   4819 	cksm = (cksm & 0xFFFF) + (cksm >> 16);
   4820 
   4821 	return (cksm);
   4822 }
   4823 
   4824 void
   4825 *ip6_kstat_init(netstackid_t stackid, ip6_stat_t *ip6_statisticsp)
   4826 {
   4827 	kstat_t *ksp;
   4828 
   4829 	ip6_stat_t template = {
   4830 		{ "ip6_udp_fannorm", 	KSTAT_DATA_UINT64 },
   4831 		{ "ip6_udp_fanmb", 	KSTAT_DATA_UINT64 },
   4832 		{ "ip6_recv_pullup", 		KSTAT_DATA_UINT64 },
   4833 		{ "ip6_db_ref",			KSTAT_DATA_UINT64 },
   4834 		{ "ip6_notaligned",		KSTAT_DATA_UINT64 },
   4835 		{ "ip6_multimblk",		KSTAT_DATA_UINT64 },
   4836 		{ "ipsec_proto_ahesp",		KSTAT_DATA_UINT64 },
   4837 		{ "ip6_out_sw_cksum",			KSTAT_DATA_UINT64 },
   4838 		{ "ip6_out_sw_cksum_bytes",		KSTAT_DATA_UINT64 },
   4839 		{ "ip6_in_sw_cksum",			KSTAT_DATA_UINT64 },
   4840 		{ "ip6_tcp_in_full_hw_cksum_err",	KSTAT_DATA_UINT64 },
   4841 		{ "ip6_tcp_in_part_hw_cksum_err",	KSTAT_DATA_UINT64 },
   4842 		{ "ip6_tcp_in_sw_cksum_err",		KSTAT_DATA_UINT64 },
   4843 		{ "ip6_udp_in_full_hw_cksum_err",	KSTAT_DATA_UINT64 },
   4844 		{ "ip6_udp_in_part_hw_cksum_err",	KSTAT_DATA_UINT64 },
   4845 		{ "ip6_udp_in_sw_cksum_err",		KSTAT_DATA_UINT64 },
   4846 	};
   4847 	ksp = kstat_create_netstack("ip", 0, "ip6stat", "net",
   4848 	    KSTAT_TYPE_NAMED, sizeof (template) / sizeof (kstat_named_t),
   4849 	    KSTAT_FLAG_VIRTUAL, stackid);
   4850 
   4851 	if (ksp == NULL)
   4852 		return (NULL);
   4853 
   4854 	bcopy(&template, ip6_statisticsp, sizeof (template));
   4855 	ksp->ks_data = (void *)ip6_statisticsp;
   4856 	ksp->ks_private = (void *)(uintptr_t)stackid;
   4857 
   4858 	kstat_install(ksp);
   4859 	return (ksp);
   4860 }
   4861 
   4862 void
   4863 ip6_kstat_fini(netstackid_t stackid, kstat_t *ksp)
   4864 {
   4865 	if (ksp != NULL) {
   4866 		ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private);
   4867 		kstat_delete_netstack(ksp, stackid);
   4868 	}
   4869 }
   4870 
   4871 /*
   4872  * The following two functions set and get the value for the
   4873  * IPV6_SRC_PREFERENCES socket option.
   4874  */
   4875 int
   4876 ip6_set_src_preferences(ip_xmit_attr_t *ixa, uint32_t prefs)
   4877 {
   4878 	/*
   4879 	 * We only support preferences that are covered by
   4880 	 * IPV6_PREFER_SRC_MASK.
   4881 	 */
   4882 	if (prefs & ~IPV6_PREFER_SRC_MASK)
   4883 		return (EINVAL);
   4884 
   4885 	/*
   4886 	 * Look for conflicting preferences or default preferences.  If
   4887 	 * both bits of a related pair are clear, the application wants the
   4888 	 * system's default value for that pair.  Both bits in a pair can't
   4889 	 * be set.
   4890 	 */
   4891 	if ((prefs & IPV6_PREFER_SRC_MIPMASK) == 0) {
   4892 		prefs |= IPV6_PREFER_SRC_MIPDEFAULT;
   4893 	} else if ((prefs & IPV6_PREFER_SRC_MIPMASK) ==
   4894 	    IPV6_PREFER_SRC_MIPMASK) {
   4895 		return (EINVAL);
   4896 	}
   4897 	if ((prefs & IPV6_PREFER_SRC_TMPMASK) == 0) {
   4898 		prefs |= IPV6_PREFER_SRC_TMPDEFAULT;
   4899 	} else if ((prefs & IPV6_PREFER_SRC_TMPMASK) ==
   4900 	    IPV6_PREFER_SRC_TMPMASK) {
   4901 		return (EINVAL);
   4902 	}
   4903 	if ((prefs & IPV6_PREFER_SRC_CGAMASK) == 0) {
   4904 		prefs |= IPV6_PREFER_SRC_CGADEFAULT;
   4905 	} else if ((prefs & IPV6_PREFER_SRC_CGAMASK) ==
   4906 	    IPV6_PREFER_SRC_CGAMASK) {
   4907 		return (EINVAL);
   4908 	}
   4909 
   4910 	ixa->ixa_src_preferences = prefs;
   4911 	return (0);
   4912 }
   4913 
   4914 size_t
   4915 ip6_get_src_preferences(ip_xmit_attr_t *ixa, uint32_t *val)
   4916 {
   4917 	*val = ixa->ixa_src_preferences;
   4918 	return (sizeof (ixa->ixa_src_preferences));
   4919 }
   4920 
   4921 /*
   4922  * Get the size of the IP options (including the IP headers size)
   4923  * without including the AH header's size. If till_ah is B_FALSE,
   4924  * and if AH header is present, dest options beyond AH header will
   4925  * also be included in the returned size.
   4926  */
   4927 int
   4928 ipsec_ah_get_hdr_size_v6(mblk_t *mp, boolean_t till_ah)
   4929 {
   4930 	ip6_t *ip6h;
   4931 	uint8_t nexthdr;
   4932 	uint8_t *whereptr;
   4933 	ip6_hbh_t *hbhhdr;
   4934 	ip6_dest_t *dsthdr;
   4935 	ip6_rthdr_t *rthdr;
   4936 	int ehdrlen;
   4937 	int size;
   4938 	ah_t *ah;
   4939 
   4940 	ip6h = (ip6_t *)mp->b_rptr;
   4941 	size = IPV6_HDR_LEN;
   4942 	nexthdr = ip6h->ip6_nxt;
   4943 	whereptr = (uint8_t *)&ip6h[1];
   4944 	for (;;) {
   4945 		/* Assume IP has already stripped it */
   4946 		ASSERT(nexthdr != IPPROTO_FRAGMENT);
   4947 		switch (nexthdr) {
   4948 		case IPPROTO_HOPOPTS:
   4949 			hbhhdr = (ip6_hbh_t *)whereptr;
   4950 			nexthdr = hbhhdr->ip6h_nxt;
   4951 			ehdrlen = 8 * (hbhhdr->ip6h_len + 1);
   4952 			break;
   4953 		case IPPROTO_DSTOPTS:
   4954 			dsthdr = (ip6_dest_t *)whereptr;
   4955 			nexthdr = dsthdr->ip6d_nxt;
   4956 			ehdrlen = 8 * (dsthdr->ip6d_len + 1);
   4957 			break;
   4958 		case IPPROTO_ROUTING:
   4959 			rthdr = (ip6_rthdr_t *)whereptr;
   4960 			nexthdr = rthdr->ip6r_nxt;
   4961 			ehdrlen = 8 * (rthdr->ip6r_len + 1);
   4962 			break;
   4963 		default :
   4964 			if (till_ah) {
   4965 				ASSERT(nexthdr == IPPROTO_AH);
   4966 				return (size);
   4967 			}
   4968 			/*
   4969 			 * If we don't have a AH header to traverse,
   4970 			 * return now. This happens normally for
   4971 			 * outbound datagrams where we have not inserted
   4972 			 * the AH header.
   4973 			 */
   4974 			if (nexthdr != IPPROTO_AH) {
   4975 				return (size);
   4976 			}
   4977 
   4978 			/*
   4979 			 * We don't include the AH header's size
   4980 			 * to be symmetrical with other cases where
   4981 			 * we either don't have a AH header (outbound)
   4982 			 * or peek into the AH header yet (inbound and
   4983 			 * not pulled up yet).
   4984 			 */
   4985 			ah = (ah_t *)whereptr;
   4986 			nexthdr = ah->ah_nexthdr;
   4987 			ehdrlen = (ah->ah_length << 2) + 8;
   4988 
   4989 			if (nexthdr == IPPROTO_DSTOPTS) {
   4990 				if (whereptr + ehdrlen >= mp->b_wptr) {
   4991 					/*
   4992 					 * The destination options header
   4993 					 * is not part of the first mblk.
   4994 					 */
   4995 					whereptr = mp->b_cont->b_rptr;
   4996 				} else {
   4997 					whereptr += ehdrlen;
   4998 				}
   4999 
   5000 				dsthdr = (ip6_dest_t *)whereptr;
   5001 				ehdrlen = 8 * (dsthdr->ip6d_len + 1);
   5002 				size += ehdrlen;
   5003 			}
   5004 			return (size);
   5005 		}
   5006 		whereptr += ehdrlen;
   5007 		size += ehdrlen;
   5008 	}
   5009 }
   5010 
   5011 /*
   5012  * Utility routine that checks if `v6srcp' is a valid address on underlying
   5013  * interface `ill'.  If `ipifp' is non-NULL, it's set to a held ipif
   5014  * associated with `v6srcp' on success.  NOTE: if this is not called from
   5015  * inside the IPSQ (ill_g_lock is not held), `ill' may be removed from the
   5016  * group during or after this lookup.
   5017  */
   5018 boolean_t
   5019 ipif_lookup_testaddr_v6(ill_t *ill, const in6_addr_t *v6srcp, ipif_t **ipifp)
   5020 {
   5021 	ipif_t *ipif;
   5022 
   5023 
   5024 	ipif = ipif_lookup_addr_exact_v6(v6srcp, ill, ill->ill_ipst);
   5025 	if (ipif != NULL) {
   5026 		if (ipifp != NULL)
   5027 			*ipifp = ipif;
   5028 		else
   5029 			ipif_refrele(ipif);
   5030 		return (B_TRUE);
   5031 	}
   5032 
   5033 	if (ip_debug > 2) {
   5034 		pr_addr_dbg("ipif_lookup_testaddr_v6: cannot find ipif for "
   5035 		    "src %s\n", AF_INET6, v6srcp);
   5036 	}
   5037 	return (B_FALSE);
   5038 }
   5039