Home | History | Annotate | Download | only in ip
      1 /*
      2  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
      3  * Use is subject to license terms.
      4  */
      5 
      6 /*
      7  * Copyright (c) 1988, 1991, 1993
      8  *	The Regents of the University of California.  All rights reserved.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  * 3. All advertising materials mentioning features or use of this software
     19  *    must display the following acknowledgement:
     20  *	This product includes software developed by the University of
     21  *	California, Berkeley and its contributors.
     22  * 4. Neither the name of the University nor the names of its contributors
     23  *    may be used to endorse or promote products derived from this software
     24  *    without specific prior written permission.
     25  *
     26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     36  * SUCH DAMAGE.
     37  *
     38  *	@(#)rtsock.c	8.6 (Berkeley) 2/11/95
     39  */
     40 
     41 /*
     42  * This file contains routines that processes routing socket requests.
     43  */
     44 
     45 #include <sys/types.h>
     46 #include <sys/stream.h>
     47 #include <sys/stropts.h>
     48 #include <sys/ddi.h>
     49 #include <sys/strsubr.h>
     50 #include <sys/cmn_err.h>
     51 #include <sys/debug.h>
     52 #include <sys/policy.h>
     53 #include <sys/zone.h>
     54 
     55 #include <sys/systm.h>
     56 #include <sys/param.h>
     57 #include <sys/socket.h>
     58 #include <sys/strsun.h>
     59 #include <net/if.h>
     60 #include <net/route.h>
     61 #include <netinet/in.h>
     62 #include <net/if_dl.h>
     63 #include <netinet/ip6.h>
     64 
     65 #include <inet/common.h>
     66 #include <inet/ip.h>
     67 #include <inet/ip6.h>
     68 #include <inet/ip_if.h>
     69 #include <inet/ip_ire.h>
     70 #include <inet/ip_ftable.h>
     71 #include <inet/ip_rts.h>
     72 
     73 #include <inet/ipclassifier.h>
     74 
     75 #include <sys/tsol/tndb.h>
     76 #include <sys/tsol/tnet.h>
     77 
     78 #define	RTS_MSG_SIZE(type, rtm_addrs, af, sacnt) \
     79 	(rts_data_msg_size(rtm_addrs, af, sacnt) + rts_header_msg_size(type))
     80 
     81 static size_t	rts_copyfromsockaddr(struct sockaddr *sa, in6_addr_t *addrp);
     82 static void	rts_fill_msg(int type, int rtm_addrs, ipaddr_t dst,
     83     ipaddr_t mask, ipaddr_t gateway, ipaddr_t src_addr, ipaddr_t brd_addr,
     84     ipaddr_t author, ipaddr_t ifaddr, const ill_t *ill, mblk_t *mp,
     85     const tsol_gc_t *);
     86 static int	rts_getaddrs(rt_msghdr_t *rtm, in6_addr_t *dst_addrp,
     87     in6_addr_t *gw_addrp, in6_addr_t *net_maskp, in6_addr_t *authorp,
     88     in6_addr_t *if_addrp, in6_addr_t *src_addrp, ushort_t *indexp,
     89     sa_family_t *afp, tsol_rtsecattr_t *rtsecattr, int *error);
     90 static void	rts_getifdata(if_data_t *if_data, const ipif_t *ipif);
     91 static int	rts_getmetrics(ire_t *ire, rt_metrics_t *metrics);
     92 static mblk_t	*rts_rtmget(mblk_t *mp, ire_t *ire, ire_t *ifire,
     93     const in6_addr_t *setsrc, tsol_ire_gw_secattr_t *attrp, sa_family_t af);
     94 static void	rts_setmetrics(ire_t *ire, uint_t which, rt_metrics_t *metrics);
     95 static ire_t	*ire_lookup_v4(ipaddr_t dst_addr, ipaddr_t net_mask,
     96     ipaddr_t gw_addr, const ill_t *ill, zoneid_t zoneid,
     97     const ts_label_t *tsl, int match_flags, ip_stack_t *ipst, ire_t **pifire,
     98     ipaddr_t *v4setsrcp, tsol_ire_gw_secattr_t **gwattrp);
     99 static ire_t	*ire_lookup_v6(const in6_addr_t *dst_addr_v6,
    100     const in6_addr_t *net_mask_v6, const in6_addr_t *gw_addr_v6,
    101     const ill_t *ill, zoneid_t zoneid, const ts_label_t *tsl, int match_flags,
    102     ip_stack_t *ipst, ire_t **pifire,
    103     in6_addr_t *v6setsrcp, tsol_ire_gw_secattr_t **gwattrp);
    104 
    105 /*
    106  * Send `mp' to all eligible routing queues.  A queue is ineligible if:
    107  *
    108  *  1. SO_USELOOPBACK is off and it is not the originating queue.
    109  *  2. RTA_UNDER_IPMP is on and RTSQ_UNDER_IPMP is not set in `flags'.
    110  *  3. RTA_UNDER_IPMP is off and RTSQ_NORMAL is not set in `flags'.
    111  *  4. It is not the same address family as `af', and `af' isn't AF_UNSPEC.
    112  */
    113 void
    114 rts_queue_input(mblk_t *mp, conn_t *o_connp, sa_family_t af, uint_t flags,
    115     ip_stack_t *ipst)
    116 {
    117 	mblk_t	*mp1;
    118 	conn_t 	*connp, *next_connp;
    119 
    120 	/*
    121 	 * Since we don't have an ill_t here, RTSQ_DEFAULT must already be
    122 	 * resolved to one or more of RTSQ_NORMAL|RTSQ_UNDER_IPMP at this point.
    123 	 */
    124 	ASSERT(!(flags & RTSQ_DEFAULT));
    125 
    126 	mutex_enter(&ipst->ips_rts_clients->connf_lock);
    127 	connp = ipst->ips_rts_clients->connf_head;
    128 
    129 	for (; connp != NULL; connp = next_connp) {
    130 		next_connp = connp->conn_next;
    131 		/*
    132 		 * If there was a family specified when this routing socket was
    133 		 * created and it doesn't match the family of the message to
    134 		 * copy, then continue.
    135 		 */
    136 		if ((connp->conn_proto != AF_UNSPEC) &&
    137 		    (connp->conn_proto != af))
    138 			continue;
    139 
    140 		/*
    141 		 * Queue the message only if the conn_t and flags match.
    142 		 */
    143 		if (connp->conn_rtaware & RTAW_UNDER_IPMP) {
    144 			if (!(flags & RTSQ_UNDER_IPMP))
    145 				continue;
    146 		} else {
    147 			if (!(flags & RTSQ_NORMAL))
    148 				continue;
    149 		}
    150 		/*
    151 		 * For the originating queue, we only copy the message upstream
    152 		 * if loopback is set.  For others reading on the routing
    153 		 * socket, we check if there is room upstream for a copy of the
    154 		 * message.
    155 		 */
    156 		if ((o_connp == connp) && connp->conn_useloopback == 0) {
    157 			connp = connp->conn_next;
    158 			continue;
    159 		}
    160 		CONN_INC_REF(connp);
    161 		mutex_exit(&ipst->ips_rts_clients->connf_lock);
    162 		/* Pass to rts_input */
    163 		if (IPCL_IS_NONSTR(connp) ? !connp->conn_flow_cntrld :
    164 		    canputnext(connp->conn_rq)) {
    165 			mp1 = dupmsg(mp);
    166 			if (mp1 == NULL)
    167 				mp1 = copymsg(mp);
    168 			/* Note that we pass a NULL ira to rts_input */
    169 			if (mp1 != NULL)
    170 				(connp->conn_recv)(connp, mp1, NULL, NULL);
    171 		}
    172 
    173 		mutex_enter(&ipst->ips_rts_clients->connf_lock);
    174 		/* reload next_connp since conn_next may have changed */
    175 		next_connp = connp->conn_next;
    176 		CONN_DEC_REF(connp);
    177 	}
    178 	mutex_exit(&ipst->ips_rts_clients->connf_lock);
    179 	freemsg(mp);
    180 }
    181 
    182 /*
    183  * Takes an ire and sends an ack to all the routing sockets. This
    184  * routine is used
    185  * - when a route is created/deleted through the ioctl interface.
    186  * - when a stale redirect is deleted
    187  */
    188 void
    189 ip_rts_rtmsg(int type, ire_t *ire, int error, ip_stack_t *ipst)
    190 {
    191 	mblk_t		*mp;
    192 	rt_msghdr_t	*rtm;
    193 	int		rtm_addrs = (RTA_DST | RTA_NETMASK | RTA_GATEWAY);
    194 	sa_family_t	af;
    195 	in6_addr_t	gw_addr_v6;
    196 
    197 	if (ire == NULL)
    198 		return;
    199 	ASSERT(ire->ire_ipversion == IPV4_VERSION ||
    200 	    ire->ire_ipversion == IPV6_VERSION);
    201 
    202 	ASSERT(!(ire->ire_type & IRE_IF_CLONE));
    203 
    204 	if (ire->ire_flags & RTF_SETSRC)
    205 		rtm_addrs |= RTA_SRC;
    206 
    207 	switch (ire->ire_ipversion) {
    208 	case IPV4_VERSION:
    209 		af = AF_INET;
    210 		mp = rts_alloc_msg(type, rtm_addrs, af, 0);
    211 		if (mp == NULL)
    212 			return;
    213 		rts_fill_msg(type, rtm_addrs, ire->ire_addr, ire->ire_mask,
    214 		    ire->ire_gateway_addr, ire->ire_setsrc_addr, 0, 0, 0, NULL,
    215 		    mp, NULL);
    216 		break;
    217 	case IPV6_VERSION:
    218 		af = AF_INET6;
    219 		mp = rts_alloc_msg(type, rtm_addrs, af, 0);
    220 		if (mp == NULL)
    221 			return;
    222 		mutex_enter(&ire->ire_lock);
    223 		gw_addr_v6 = ire->ire_gateway_addr_v6;
    224 		mutex_exit(&ire->ire_lock);
    225 		rts_fill_msg_v6(type, rtm_addrs, &ire->ire_addr_v6,
    226 		    &ire->ire_mask_v6, &gw_addr_v6,
    227 		    &ire->ire_setsrc_addr_v6, &ipv6_all_zeros, &ipv6_all_zeros,
    228 		    &ipv6_all_zeros, NULL, mp, NULL);
    229 		break;
    230 	}
    231 	rtm = (rt_msghdr_t *)mp->b_rptr;
    232 	mp->b_wptr = (uchar_t *)&mp->b_rptr[rtm->rtm_msglen];
    233 	rtm->rtm_addrs = rtm_addrs;
    234 	rtm->rtm_flags = ire->ire_flags;
    235 	if (error != 0)
    236 		rtm->rtm_errno = error;
    237 	else
    238 		rtm->rtm_flags |= RTF_DONE;
    239 	rts_queue_input(mp, NULL, af, RTSQ_ALL, ipst);
    240 }
    241 
    242 /*
    243  * This is a call from the RTS module
    244  * indicating that this is a Routing Socket
    245  * Stream. Insert this conn_t in routing
    246  * socket client list.
    247  */
    248 void
    249 ip_rts_register(conn_t *connp)
    250 {
    251 	ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
    252 
    253 	connp->conn_useloopback = 1;
    254 	ipcl_hash_insert_wildcard(ipst->ips_rts_clients, connp);
    255 }
    256 
    257 /*
    258  * This is a call from the RTS module indicating that it is closing.
    259  */
    260 void
    261 ip_rts_unregister(conn_t *connp)
    262 {
    263 	ipcl_hash_remove(connp);
    264 }
    265 
    266 /*
    267  * Processes requests received on a routing socket. It extracts all the
    268  * arguments and calls the appropriate function to process the request.
    269  *
    270  * RTA_SRC bit flag requests are sent by 'route -setsrc'.
    271  *
    272  * In general, this function does not consume the message supplied but rather
    273  * sends the message upstream with an appropriate UNIX errno.
    274  */
    275 int
    276 ip_rts_request_common(mblk_t *mp, conn_t *connp, cred_t *ioc_cr)
    277 {
    278 	rt_msghdr_t	*rtm = NULL;
    279 	in6_addr_t	dst_addr_v6;
    280 	in6_addr_t	src_addr_v6;
    281 	in6_addr_t	gw_addr_v6;
    282 	in6_addr_t	net_mask_v6;
    283 	in6_addr_t	author_v6;
    284 	in6_addr_t	if_addr_v6;
    285 	mblk_t		*mp1;
    286 	ire_t		*ire = NULL;
    287 	ire_t		*ifire = NULL;
    288 	ipaddr_t	v4setsrc;
    289 	in6_addr_t	v6setsrc = ipv6_all_zeros;
    290 	tsol_ire_gw_secattr_t *gwattr = NULL;
    291 	int		error = 0;
    292 	int		match_flags = MATCH_IRE_DSTONLY;
    293 	int		match_flags_local = MATCH_IRE_TYPE | MATCH_IRE_GW;
    294 	int		found_addrs;
    295 	sa_family_t	af;
    296 	ipaddr_t	dst_addr;
    297 	ipaddr_t	gw_addr;
    298 	ipaddr_t	src_addr;
    299 	ipaddr_t	net_mask;
    300 	ushort_t	index;
    301 	boolean_t	gcgrp_xtraref = B_FALSE;
    302 	tsol_gcgrp_addr_t ga;
    303 	tsol_rtsecattr_t rtsecattr;
    304 	struct rtsa_s	*rtsap = NULL;
    305 	tsol_gcgrp_t	*gcgrp = NULL;
    306 	tsol_gc_t	*gc = NULL;
    307 	ts_label_t	*tsl = NULL;
    308 	zoneid_t	zoneid;
    309 	ip_stack_t	*ipst;
    310 	ill_t   	*ill = NULL;
    311 
    312 	zoneid = connp->conn_zoneid;
    313 	ipst = connp->conn_netstack->netstack_ip;
    314 
    315 	if (mp->b_cont != NULL && !pullupmsg(mp, -1)) {
    316 		freemsg(mp);
    317 		error =  EINVAL;
    318 		goto done;
    319 	}
    320 	if ((mp->b_wptr - mp->b_rptr) < sizeof (rt_msghdr_t)) {
    321 		freemsg(mp);
    322 		error = EINVAL;
    323 		goto done;
    324 	}
    325 
    326 	/*
    327 	 * Check the routing message for basic consistency including the
    328 	 * version number and that the number of octets written is the same
    329 	 * as specified by the rtm_msglen field.
    330 	 *
    331 	 * At this point, an error can be delivered back via rtm_errno.
    332 	 */
    333 	rtm = (rt_msghdr_t *)mp->b_rptr;
    334 	if ((mp->b_wptr - mp->b_rptr) != rtm->rtm_msglen) {
    335 		error = EINVAL;
    336 		goto done;
    337 	}
    338 	if (rtm->rtm_version != RTM_VERSION) {
    339 		error = EPROTONOSUPPORT;
    340 		goto done;
    341 	}
    342 
    343 	/* Only allow RTM_GET or RTM_RESOLVE for unprivileged process */
    344 	if (rtm->rtm_type != RTM_GET &&
    345 	    rtm->rtm_type != RTM_RESOLVE &&
    346 	    (ioc_cr == NULL ||
    347 	    secpolicy_ip_config(ioc_cr, B_FALSE) != 0)) {
    348 		error = EPERM;
    349 		goto done;
    350 	}
    351 
    352 	found_addrs = rts_getaddrs(rtm, &dst_addr_v6, &gw_addr_v6, &net_mask_v6,
    353 	    &author_v6, &if_addr_v6, &src_addr_v6, &index, &af, &rtsecattr,
    354 	    &error);
    355 
    356 	if (error != 0)
    357 		goto done;
    358 
    359 	if ((found_addrs & RTA_DST) == 0) {
    360 		error = EINVAL;
    361 		goto done;
    362 	}
    363 
    364 	/*
    365 	 * Based on the address family of the destination address, determine
    366 	 * the destination, gateway and netmask and return the appropriate error
    367 	 * if an unknown address family was specified (following the errno
    368 	 * values that 4.4BSD-Lite2 returns.)
    369 	 */
    370 	switch (af) {
    371 	case AF_INET:
    372 		IN6_V4MAPPED_TO_IPADDR(&dst_addr_v6, dst_addr);
    373 		IN6_V4MAPPED_TO_IPADDR(&src_addr_v6, src_addr);
    374 		IN6_V4MAPPED_TO_IPADDR(&gw_addr_v6, gw_addr);
    375 		if (((found_addrs & RTA_NETMASK) == 0) ||
    376 		    (rtm->rtm_flags & RTF_HOST))
    377 			net_mask = IP_HOST_MASK;
    378 		else
    379 			IN6_V4MAPPED_TO_IPADDR(&net_mask_v6, net_mask);
    380 		break;
    381 	case AF_INET6:
    382 		if (((found_addrs & RTA_NETMASK) == 0) ||
    383 		    (rtm->rtm_flags & RTF_HOST))
    384 			net_mask_v6 = ipv6_all_ones;
    385 		break;
    386 	default:
    387 		/*
    388 		 * These errno values are meant to be compatible with
    389 		 * 4.4BSD-Lite2 for the given message types.
    390 		 */
    391 		switch (rtm->rtm_type) {
    392 		case RTM_ADD:
    393 		case RTM_DELETE:
    394 			error = ESRCH;
    395 			goto done;
    396 		case RTM_GET:
    397 		case RTM_CHANGE:
    398 			error = EAFNOSUPPORT;
    399 			goto done;
    400 		default:
    401 			error = EOPNOTSUPP;
    402 			goto done;
    403 		}
    404 	}
    405 
    406 	/*
    407 	 * At this point, the address family must be something known.
    408 	 */
    409 	ASSERT(af == AF_INET || af == AF_INET6);
    410 
    411 	/* Handle RTA_IFP */
    412 	if (index != 0) {
    413 		ipif_t		*ipif;
    414 lookup:
    415 		ill = ill_lookup_on_ifindex(index, af == AF_INET6, ipst);
    416 		if (ill == NULL) {
    417 			error = EINVAL;
    418 			goto done;
    419 		}
    420 
    421 		/*
    422 		 * Since all interfaces in an IPMP group must be equivalent,
    423 		 * we prevent changes to a specific underlying interface's
    424 		 * routing configuration.  However, for backward compatibility,
    425 		 * we intepret a request to add a route on an underlying
    426 		 * interface as a request to add a route on its IPMP interface.
    427 		 */
    428 		if (IS_UNDER_IPMP(ill)) {
    429 			switch (rtm->rtm_type) {
    430 			case RTM_CHANGE:
    431 			case RTM_DELETE:
    432 				error = EINVAL;
    433 				goto done;
    434 			case RTM_ADD:
    435 				index = ipmp_ill_get_ipmp_ifindex(ill);
    436 				ill_refrele(ill);
    437 				if (index == 0) {
    438 					ill = NULL; /* already refrele'd */
    439 					error = EINVAL;
    440 					goto done;
    441 				}
    442 				goto lookup;
    443 			}
    444 		}
    445 
    446 		match_flags |= MATCH_IRE_ILL;
    447 		/*
    448 		 * This provides the same zoneid as in Solaris 10
    449 		 * that -ifp picks the zoneid from the first ipif on the ill.
    450 		 * But it might not be useful since the first ipif will always
    451 		 * have the same zoneid as the ill.
    452 		 */
    453 		ipif = ipif_get_next_ipif(NULL, ill);
    454 		if (ipif != NULL) {
    455 			zoneid = ipif->ipif_zoneid;
    456 			ipif_refrele(ipif);
    457 		}
    458 	}
    459 
    460 	/*
    461 	 * If a netmask was supplied in the message, then subsequent route
    462 	 * lookups will attempt to match on the netmask as well.
    463 	 */
    464 	if ((found_addrs & RTA_NETMASK) != 0)
    465 		match_flags |= MATCH_IRE_MASK;
    466 
    467 	/*
    468 	 * We only process any passed-in route security attributes for
    469 	 * either RTM_ADD or RTM_CHANGE message; We overload them
    470 	 * to do an RTM_GET as a different label; ignore otherwise.
    471 	 */
    472 	if (rtm->rtm_type == RTM_ADD || rtm->rtm_type == RTM_CHANGE ||
    473 	    rtm->rtm_type == RTM_GET) {
    474 		ASSERT(rtsecattr.rtsa_cnt <= TSOL_RTSA_REQUEST_MAX);
    475 		if (rtsecattr.rtsa_cnt > 0)
    476 			rtsap = &rtsecattr.rtsa_attr[0];
    477 	}
    478 
    479 	switch (rtm->rtm_type) {
    480 	case RTM_ADD:
    481 		/* if we are adding a route, gateway is a must */
    482 		if ((found_addrs & RTA_GATEWAY) == 0) {
    483 			error = EINVAL;
    484 			goto done;
    485 		}
    486 
    487 		/* Multirouting does not support net routes. */
    488 		if ((rtm->rtm_flags & (RTF_MULTIRT | RTF_HOST)) ==
    489 		    RTF_MULTIRT) {
    490 			error = EADDRNOTAVAIL;
    491 			goto done;
    492 		}
    493 
    494 		/*
    495 		 * Multirouting and user-specified source addresses
    496 		 * do not support interface based routing.
    497 		 * Assigning a source address to an interface based
    498 		 * route is achievable by plumbing a new ipif and
    499 		 * setting up the interface route via this ipif,
    500 		 * though.
    501 		 */
    502 		if (rtm->rtm_flags & (RTF_MULTIRT | RTF_SETSRC)) {
    503 			if ((rtm->rtm_flags & RTF_GATEWAY) == 0) {
    504 				error = EADDRNOTAVAIL;
    505 				goto done;
    506 			}
    507 		}
    508 
    509 		switch (af) {
    510 		case AF_INET:
    511 			if (src_addr != INADDR_ANY) {
    512 				uint_t type;
    513 
    514 				/*
    515 				 * The RTF_SETSRC flag is present, check that
    516 				 * the supplied src address is not the loopback
    517 				 * address. This would produce martian packets.
    518 				 */
    519 				if (src_addr == htonl(INADDR_LOOPBACK)) {
    520 					error = EINVAL;
    521 					goto done;
    522 				}
    523 				/*
    524 				 * Also check that the supplied address is a
    525 				 * valid, local one. Only allow IFF_UP ones
    526 				 */
    527 				type = ip_type_v4(src_addr, ipst);
    528 				if (!(type & (IRE_LOCAL|IRE_LOOPBACK))) {
    529 					error = EADDRNOTAVAIL;
    530 					goto done;
    531 				}
    532 			} else {
    533 				/*
    534 				 * The RTF_SETSRC modifier must be associated
    535 				 * to a non-null source address.
    536 				 */
    537 				if (rtm->rtm_flags & RTF_SETSRC) {
    538 					error = EINVAL;
    539 					goto done;
    540 				}
    541 			}
    542 
    543 			error = ip_rt_add(dst_addr, net_mask, gw_addr, src_addr,
    544 			    rtm->rtm_flags, ill, &ire, B_FALSE,
    545 			    rtsap, ipst, zoneid);
    546 			if (ill != NULL)
    547 				ASSERT(!MUTEX_HELD(&ill->ill_lock));
    548 			break;
    549 		case AF_INET6:
    550 			if (!IN6_IS_ADDR_UNSPECIFIED(&src_addr_v6)) {
    551 				uint_t type;
    552 
    553 				/*
    554 				 * The RTF_SETSRC flag is present, check that
    555 				 * the supplied src address is not the loopback
    556 				 * address. This would produce martian packets.
    557 				 */
    558 				if (IN6_IS_ADDR_LOOPBACK(&src_addr_v6)) {
    559 					error = EINVAL;
    560 					goto done;
    561 				}
    562 				/*
    563 				 * Also check that the supplied address is a
    564 				 * valid, local one. Only allow UP ones.
    565 				 */
    566 				type = ip_type_v6(&src_addr_v6, ipst);
    567 				if (!(type & (IRE_LOCAL|IRE_LOOPBACK))) {
    568 					error = EADDRNOTAVAIL;
    569 					goto done;
    570 				}
    571 
    572 				error = ip_rt_add_v6(&dst_addr_v6, &net_mask_v6,
    573 				    &gw_addr_v6, &src_addr_v6, rtm->rtm_flags,
    574 				    ill, &ire, rtsap, ipst, zoneid);
    575 				break;
    576 			}
    577 			/*
    578 			 * The RTF_SETSRC modifier must be associated
    579 			 * to a non-null source address.
    580 			 */
    581 			if (rtm->rtm_flags & RTF_SETSRC) {
    582 				error = EINVAL;
    583 				goto done;
    584 			}
    585 			error = ip_rt_add_v6(&dst_addr_v6, &net_mask_v6,
    586 			    &gw_addr_v6, NULL, rtm->rtm_flags,
    587 			    ill, &ire, rtsap, ipst, zoneid);
    588 			if (ill != NULL)
    589 				ASSERT(!MUTEX_HELD(&ill->ill_lock));
    590 			break;
    591 		}
    592 		if (error != 0)
    593 			goto done;
    594 		ASSERT(ire != NULL);
    595 		rts_setmetrics(ire, rtm->rtm_inits, &rtm->rtm_rmx);
    596 		break;
    597 	case RTM_DELETE:
    598 		/* if we are deleting a route, gateway is a must */
    599 		if ((found_addrs & RTA_GATEWAY) == 0) {
    600 			error = EINVAL;
    601 			goto done;
    602 		}
    603 		/*
    604 		 * The RTF_SETSRC modifier does not make sense
    605 		 * when deleting a route.
    606 		 */
    607 		if (rtm->rtm_flags & RTF_SETSRC) {
    608 			error = EINVAL;
    609 			goto done;
    610 		}
    611 
    612 		switch (af) {
    613 		case AF_INET:
    614 			error = ip_rt_delete(dst_addr, net_mask, gw_addr,
    615 			    found_addrs, rtm->rtm_flags, ill, B_FALSE,
    616 			    ipst, zoneid);
    617 			break;
    618 		case AF_INET6:
    619 			error = ip_rt_delete_v6(&dst_addr_v6, &net_mask_v6,
    620 			    &gw_addr_v6, found_addrs, rtm->rtm_flags, ill,
    621 			    ipst, zoneid);
    622 			break;
    623 		}
    624 		break;
    625 	case RTM_GET:
    626 	case RTM_CHANGE:
    627 		/*
    628 		 * In the case of RTM_GET, the forwarding table should be
    629 		 * searched recursively.  Also, if a gateway was
    630 		 * specified then the gateway address must also be matched.
    631 		 *
    632 		 * In the case of RTM_CHANGE, the gateway address (if supplied)
    633 		 * is the new gateway address so matching on the gateway address
    634 		 * is not done.  This can lead to ambiguity when looking up the
    635 		 * route to change as usually only the destination (and netmask,
    636 		 * if supplied) is used for the lookup.  However if a RTA_IFP
    637 		 * sockaddr is also supplied, it can disambiguate which route to
    638 		 * change provided the ambigous routes are tied to distinct
    639 		 * ill's (or interface indices).  If the routes are not tied to
    640 		 * any particular interfaces (for example, with traditional
    641 		 * gateway routes), then a RTA_IFP sockaddr will be of no use as
    642 		 * it won't match any such routes.
    643 		 * RTA_SRC is not supported for RTM_GET and RTM_CHANGE,
    644 		 * except when RTM_CHANGE is combined to RTF_SETSRC.
    645 		 */
    646 		if (((found_addrs & RTA_SRC) != 0) &&
    647 		    ((rtm->rtm_type == RTM_GET) ||
    648 		    !(rtm->rtm_flags & RTF_SETSRC))) {
    649 			error = EOPNOTSUPP;
    650 			goto done;
    651 		}
    652 
    653 		if (rtm->rtm_type == RTM_GET) {
    654 			match_flags |= MATCH_IRE_SECATTR;
    655 			match_flags_local |= MATCH_IRE_SECATTR;
    656 			if ((found_addrs & RTA_GATEWAY) != 0)
    657 				match_flags |= MATCH_IRE_GW;
    658 			if (ioc_cr)
    659 				tsl = crgetlabel(ioc_cr);
    660 			if (rtsap != NULL) {
    661 				if (rtsa_validate(rtsap) != 0) {
    662 					error = EINVAL;
    663 					goto done;
    664 				}
    665 				if (tsl != NULL &&
    666 				    crgetzoneid(ioc_cr) != GLOBAL_ZONEID &&
    667 				    (tsl->tsl_doi != rtsap->rtsa_doi ||
    668 				    !bldominates(&tsl->tsl_label,
    669 				    &rtsap->rtsa_slrange.lower_bound))) {
    670 					error = EPERM;
    671 					goto done;
    672 				}
    673 				tsl = labelalloc(
    674 				    &rtsap->rtsa_slrange.lower_bound,
    675 				    rtsap->rtsa_doi, KM_NOSLEEP);
    676 			}
    677 		}
    678 		if (rtm->rtm_type == RTM_CHANGE) {
    679 			if ((found_addrs & RTA_GATEWAY) &&
    680 			    (rtm->rtm_flags & RTF_SETSRC)) {
    681 				/*
    682 				 * Do not want to change the gateway,
    683 				 * but rather the source address.
    684 				 */
    685 				match_flags |= MATCH_IRE_GW;
    686 			}
    687 		}
    688 
    689 		/*
    690 		 * If the netmask is all ones (either as supplied or as derived
    691 		 * above), then first check for an IRE_LOOPBACK or
    692 		 * IRE_LOCAL entry.
    693 		 *
    694 		 * If we didn't check for or find an IRE_LOOPBACK or IRE_LOCAL
    695 		 * entry, then look for any other type of IRE.
    696 		 */
    697 		switch (af) {
    698 		case AF_INET:
    699 			if (net_mask == IP_HOST_MASK) {
    700 				ire = ire_ftable_lookup_v4(dst_addr, 0, gw_addr,
    701 				    IRE_LOCAL | IRE_LOOPBACK, NULL, zoneid,
    702 				    tsl, match_flags_local, 0, ipst, NULL);
    703 			}
    704 			if (ire == NULL) {
    705 				ire = ire_lookup_v4(dst_addr, net_mask,
    706 				    gw_addr, ill, zoneid, tsl, match_flags,
    707 				    ipst, &ifire, &v4setsrc, &gwattr);
    708 				IN6_IPADDR_TO_V4MAPPED(v4setsrc, &v6setsrc);
    709 			}
    710 			break;
    711 		case AF_INET6:
    712 			if (IN6_ARE_ADDR_EQUAL(&net_mask_v6, &ipv6_all_ones)) {
    713 				ire = ire_ftable_lookup_v6(&dst_addr_v6, NULL,
    714 				    &gw_addr_v6, IRE_LOCAL | IRE_LOOPBACK, NULL,
    715 				    zoneid, tsl, match_flags_local, 0, ipst,
    716 				    NULL);
    717 			}
    718 			if (ire == NULL) {
    719 				ire = ire_lookup_v6(&dst_addr_v6,
    720 				    &net_mask_v6, &gw_addr_v6, ill, zoneid,
    721 				    tsl, match_flags, ipst, &ifire, &v6setsrc,
    722 				    &gwattr);
    723 			}
    724 			break;
    725 		}
    726 		if (tsl != NULL && tsl != crgetlabel(ioc_cr))
    727 			label_rele(tsl);
    728 
    729 		if (ire == NULL) {
    730 			error = ESRCH;
    731 			goto done;
    732 		}
    733 		/*
    734 		 * Want to return failure if we get an IRE_NOROUTE from
    735 		 * ire_route_recursive
    736 		 */
    737 		if (ire->ire_type & IRE_NOROUTE) {
    738 			ire_refrele(ire);
    739 			ire = NULL;
    740 			error = ESRCH;
    741 			goto done;
    742 		}
    743 
    744 		/* we know the IRE before we come here */
    745 		switch (rtm->rtm_type) {
    746 		case RTM_GET:
    747 			mp1 = rts_rtmget(mp, ire, ifire, &v6setsrc, gwattr, af);
    748 			if (mp1 == NULL) {
    749 				error = ENOBUFS;
    750 				goto done;
    751 			}
    752 			freemsg(mp);
    753 			mp = mp1;
    754 			rtm = (rt_msghdr_t *)mp->b_rptr;
    755 			break;
    756 		case RTM_CHANGE:
    757 			/*
    758 			 * Do not allow to the multirouting state of a route
    759 			 * to be changed. This aims to prevent undesirable
    760 			 * stages where both multirt and non-multirt routes
    761 			 * for the same destination are declared.
    762 			 */
    763 			if ((ire->ire_flags & RTF_MULTIRT) !=
    764 			    (rtm->rtm_flags & RTF_MULTIRT)) {
    765 				error = EINVAL;
    766 				goto done;
    767 			}
    768 			/*
    769 			 * Note that we do not need to do
    770 			 * ire_flush_cache_*(IRE_FLUSH_ADD) as a change
    771 			 * in metrics or gateway will not affect existing
    772 			 * routes since it does not create a more specific
    773 			 * route.
    774 			 */
    775 			switch (af) {
    776 			case AF_INET:
    777 				if ((found_addrs & RTA_GATEWAY) != 0 &&
    778 				    (ire->ire_gateway_addr != gw_addr)) {
    779 					ire->ire_gateway_addr = gw_addr;
    780 				}
    781 
    782 				if (rtsap != NULL) {
    783 					ga.ga_af = AF_INET;
    784 					IN6_IPADDR_TO_V4MAPPED(
    785 					    ire->ire_gateway_addr, &ga.ga_addr);
    786 
    787 					gcgrp = gcgrp_lookup(&ga, B_TRUE);
    788 					if (gcgrp == NULL) {
    789 						error = ENOMEM;
    790 						goto done;
    791 					}
    792 				}
    793 
    794 				if ((found_addrs & RTA_SRC) != 0 &&
    795 				    (rtm->rtm_flags & RTF_SETSRC) != 0 &&
    796 				    (ire->ire_setsrc_addr != src_addr)) {
    797 					if (src_addr != INADDR_ANY) {
    798 						uint_t type;
    799 
    800 						/*
    801 						 * The RTF_SETSRC flag is
    802 						 * present, check that the
    803 						 * supplied src address is not
    804 						 * the loopback address. This
    805 						 * would produce martian
    806 						 * packets.
    807 						 */
    808 						if (src_addr ==
    809 						    htonl(INADDR_LOOPBACK)) {
    810 							error = EINVAL;
    811 							goto done;
    812 						}
    813 						/*
    814 						 * Also check that the
    815 						 * supplied addr is a valid
    816 						 * local address.
    817 						 */
    818 						type = ip_type_v4(src_addr,
    819 						    ipst);
    820 						if (!(type &
    821 						    (IRE_LOCAL|IRE_LOOPBACK))) {
    822 							error = EADDRNOTAVAIL;
    823 							goto done;
    824 						}
    825 						ire->ire_flags |= RTF_SETSRC;
    826 						ire->ire_setsrc_addr =
    827 						    src_addr;
    828 					} else {
    829 						ire->ire_flags &= ~RTF_SETSRC;
    830 						ire->ire_setsrc_addr =
    831 						    INADDR_ANY;
    832 					}
    833 					/*
    834 					 * Let conn_ixa caching know that
    835 					 * source address selection changed
    836 					 */
    837 					ip_update_source_selection(ipst);
    838 				}
    839 				ire_flush_cache_v4(ire, IRE_FLUSH_GWCHANGE);
    840 				break;
    841 			case AF_INET6:
    842 				mutex_enter(&ire->ire_lock);
    843 				if ((found_addrs & RTA_GATEWAY) != 0 &&
    844 				    !IN6_ARE_ADDR_EQUAL(
    845 				    &ire->ire_gateway_addr_v6, &gw_addr_v6)) {
    846 					ire->ire_gateway_addr_v6 = gw_addr_v6;
    847 				}
    848 				mutex_exit(&ire->ire_lock);
    849 
    850 				if (rtsap != NULL) {
    851 					ga.ga_af = AF_INET6;
    852 					mutex_enter(&ire->ire_lock);
    853 					ga.ga_addr = ire->ire_gateway_addr_v6;
    854 					mutex_exit(&ire->ire_lock);
    855 
    856 					gcgrp = gcgrp_lookup(&ga, B_TRUE);
    857 					if (gcgrp == NULL) {
    858 						error = ENOMEM;
    859 						goto done;
    860 					}
    861 				}
    862 
    863 				if ((found_addrs & RTA_SRC) != 0 &&
    864 				    (rtm->rtm_flags & RTF_SETSRC) != 0 &&
    865 				    !IN6_ARE_ADDR_EQUAL(
    866 				    &ire->ire_setsrc_addr_v6, &src_addr_v6)) {
    867 					if (!IN6_IS_ADDR_UNSPECIFIED(
    868 					    &src_addr_v6)) {
    869 						uint_t type;
    870 
    871 						/*
    872 						 * The RTF_SETSRC flag is
    873 						 * present, check that the
    874 						 * supplied src address is not
    875 						 * the loopback address. This
    876 						 * would produce martian
    877 						 * packets.
    878 						 */
    879 						if (IN6_IS_ADDR_LOOPBACK(
    880 						    &src_addr_v6)) {
    881 							error = EINVAL;
    882 							goto done;
    883 						}
    884 						/*
    885 						 * Also check that the
    886 						 * supplied addr is a valid
    887 						 * local address.
    888 						 */
    889 						type = ip_type_v6(&src_addr_v6,
    890 						    ipst);
    891 						if (!(type &
    892 						    (IRE_LOCAL|IRE_LOOPBACK))) {
    893 							error = EADDRNOTAVAIL;
    894 							goto done;
    895 						}
    896 						mutex_enter(&ire->ire_lock);
    897 						ire->ire_flags |= RTF_SETSRC;
    898 						ire->ire_setsrc_addr_v6 =
    899 						    src_addr_v6;
    900 						mutex_exit(&ire->ire_lock);
    901 					} else {
    902 						mutex_enter(&ire->ire_lock);
    903 						ire->ire_flags &= ~RTF_SETSRC;
    904 						ire->ire_setsrc_addr_v6 =
    905 						    ipv6_all_zeros;
    906 						mutex_exit(&ire->ire_lock);
    907 					}
    908 					/*
    909 					 * Let conn_ixa caching know that
    910 					 * source address selection changed
    911 					 */
    912 					ip_update_source_selection(ipst);
    913 				}
    914 				ire_flush_cache_v6(ire, IRE_FLUSH_GWCHANGE);
    915 				break;
    916 			}
    917 
    918 			if (rtsap != NULL) {
    919 				ASSERT(gcgrp != NULL);
    920 
    921 				/*
    922 				 * Create and add the security attribute to
    923 				 * prefix IRE; it will add a reference to the
    924 				 * group upon allocating a new entry.  If it
    925 				 * finds an already-existing entry for the
    926 				 * security attribute, it simply returns it
    927 				 * and no new group reference is made.
    928 				 */
    929 				gc = gc_create(rtsap, gcgrp, &gcgrp_xtraref);
    930 				if (gc == NULL ||
    931 				    (error = tsol_ire_init_gwattr(ire,
    932 				    ire->ire_ipversion, gc)) != 0) {
    933 					if (gc != NULL) {
    934 						GC_REFRELE(gc);
    935 					} else {
    936 						/* gc_create failed */
    937 						error = ENOMEM;
    938 					}
    939 					goto done;
    940 				}
    941 			}
    942 			rts_setmetrics(ire, rtm->rtm_inits, &rtm->rtm_rmx);
    943 			break;
    944 		}
    945 		break;
    946 	default:
    947 		error = EOPNOTSUPP;
    948 		break;
    949 	}
    950 done:
    951 	if (ire != NULL)
    952 		ire_refrele(ire);
    953 	if (ifire != NULL)
    954 		ire_refrele(ifire);
    955 	if (ill != NULL)
    956 		ill_refrele(ill);
    957 
    958 	if (gcgrp_xtraref)
    959 		GCGRP_REFRELE(gcgrp);
    960 
    961 	if (rtm != NULL) {
    962 		ASSERT(mp->b_wptr <= mp->b_datap->db_lim);
    963 		if (error != 0) {
    964 			rtm->rtm_errno = error;
    965 			/* Send error ACK */
    966 			ip1dbg(("ip_rts_request: error %d\n", error));
    967 		} else {
    968 			rtm->rtm_flags |= RTF_DONE;
    969 			/* OK ACK already set up by caller except this */
    970 			ip2dbg(("ip_rts_request: OK ACK\n"));
    971 		}
    972 		rts_queue_input(mp, connp, af, RTSQ_ALL, ipst);
    973 	}
    974 	return (error);
    975 }
    976 
    977 /*
    978  * Helper function that can do recursive lookups including when
    979  * MATCH_IRE_GW and/or MATCH_IRE_MASK is set.
    980  */
    981 static ire_t *
    982 ire_lookup_v4(ipaddr_t dst_addr, ipaddr_t net_mask, ipaddr_t gw_addr,
    983     const ill_t *ill, zoneid_t zoneid, const ts_label_t *tsl,
    984     int match_flags, ip_stack_t *ipst, ire_t **pifire, ipaddr_t *v4setsrcp,
    985     tsol_ire_gw_secattr_t **gwattrp)
    986 {
    987 	ire_t		*ire;
    988 	ire_t		*ifire = NULL;
    989 	uint_t		ire_type;
    990 
    991 	*pifire = NULL;
    992 	*v4setsrcp = INADDR_ANY;
    993 	*gwattrp = NULL;
    994 
    995 	/* Skip IRE_IF_CLONE */
    996 	match_flags |= MATCH_IRE_TYPE;
    997 	ire_type = (IRE_ONLINK|IRE_OFFLINK) & ~IRE_IF_CLONE;
    998 
    999 	/*
   1000 	 * ire_route_recursive can't match gateway or mask thus if they are
   1001 	 * set we have to do two steps of lookups
   1002 	 */
   1003 	if (match_flags & (MATCH_IRE_GW|MATCH_IRE_MASK)) {
   1004 		ire = ire_ftable_lookup_v4(dst_addr, net_mask, gw_addr,
   1005 		    ire_type, ill, zoneid, tsl, match_flags, 0, ipst, NULL);
   1006 
   1007 		if (ire == NULL ||(ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)))
   1008 			return (ire);
   1009 
   1010 		if (ire->ire_type & IRE_ONLINK)
   1011 			return (ire);
   1012 
   1013 		if (ire->ire_flags & RTF_SETSRC) {
   1014 			ASSERT(ire->ire_setsrc_addr != INADDR_ANY);
   1015 			*v4setsrcp = ire->ire_setsrc_addr;
   1016 			v4setsrcp = NULL;
   1017 		}
   1018 
   1019 		/* The first ire_gw_secattr is passed back */
   1020 		if (ire->ire_gw_secattr != NULL) {
   1021 			*gwattrp = ire->ire_gw_secattr;
   1022 			gwattrp = NULL;
   1023 		}
   1024 
   1025 		/* Look for an interface ire recursively based on the gateway */
   1026 		dst_addr = ire->ire_gateway_addr;
   1027 		match_flags &= ~(MATCH_IRE_GW|MATCH_IRE_MASK);
   1028 		ifire = ire_route_recursive_v4(dst_addr, ire_type, ill, zoneid,
   1029 		    tsl, match_flags, B_FALSE, 0, ipst, v4setsrcp, gwattrp,
   1030 		    NULL);
   1031 	} else {
   1032 		ire = ire_route_recursive_v4(dst_addr, ire_type, ill, zoneid,
   1033 		    tsl, match_flags, B_FALSE, 0, ipst, v4setsrcp, gwattrp,
   1034 		    NULL);
   1035 	}
   1036 	*pifire = ifire;
   1037 	return (ire);
   1038 }
   1039 
   1040 static ire_t *
   1041 ire_lookup_v6(const in6_addr_t *dst_addr_v6,
   1042     const in6_addr_t *net_mask_v6, const in6_addr_t *gw_addr_v6,
   1043     const ill_t *ill, zoneid_t zoneid, const ts_label_t *tsl, int match_flags,
   1044     ip_stack_t *ipst, ire_t **pifire,
   1045     in6_addr_t *v6setsrcp, tsol_ire_gw_secattr_t **gwattrp)
   1046 {
   1047 	ire_t		*ire;
   1048 	ire_t		*ifire = NULL;
   1049 	uint_t		ire_type;
   1050 
   1051 	*pifire = NULL;
   1052 	*v6setsrcp = ipv6_all_zeros;
   1053 	*gwattrp = NULL;
   1054 
   1055 	/* Skip IRE_IF_CLONE */
   1056 	match_flags |= MATCH_IRE_TYPE;
   1057 	ire_type = (IRE_ONLINK|IRE_OFFLINK) & ~IRE_IF_CLONE;
   1058 
   1059 	/*
   1060 	 * ire_route_recursive can't match gateway or mask thus if they are
   1061 	 * set we have to do two steps of lookups
   1062 	 */
   1063 	if (match_flags & (MATCH_IRE_GW|MATCH_IRE_MASK)) {
   1064 		in6_addr_t dst;
   1065 
   1066 		ire = ire_ftable_lookup_v6(dst_addr_v6, net_mask_v6,
   1067 		    gw_addr_v6, ire_type, ill, zoneid, tsl, match_flags, 0,
   1068 		    ipst, NULL);
   1069 
   1070 		if (ire == NULL ||(ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)))
   1071 			return (ire);
   1072 
   1073 		if (ire->ire_type & IRE_ONLINK)
   1074 			return (ire);
   1075 
   1076 		if (ire->ire_flags & RTF_SETSRC) {
   1077 			ASSERT(!IN6_IS_ADDR_UNSPECIFIED(
   1078 			    &ire->ire_setsrc_addr_v6));
   1079 			*v6setsrcp = ire->ire_setsrc_addr_v6;
   1080 			v6setsrcp = NULL;
   1081 		}
   1082 
   1083 		/* The first ire_gw_secattr is passed back */
   1084 		if (ire->ire_gw_secattr != NULL) {
   1085 			*gwattrp = ire->ire_gw_secattr;
   1086 			gwattrp = NULL;
   1087 		}
   1088 
   1089 		mutex_enter(&ire->ire_lock);
   1090 		dst = ire->ire_gateway_addr_v6;
   1091 		mutex_exit(&ire->ire_lock);
   1092 		match_flags &= ~(MATCH_IRE_GW|MATCH_IRE_MASK);
   1093 		ifire = ire_route_recursive_v6(&dst, ire_type, ill, zoneid, tsl,
   1094 		    match_flags, B_FALSE, 0, ipst, v6setsrcp, gwattrp, NULL);
   1095 	} else {
   1096 		ire = ire_route_recursive_v6(dst_addr_v6, ire_type, ill, zoneid,
   1097 		    tsl, match_flags, B_FALSE, 0, ipst, v6setsrcp, gwattrp,
   1098 		    NULL);
   1099 	}
   1100 	*pifire = ifire;
   1101 	return (ire);
   1102 }
   1103 
   1104 
   1105 /*
   1106  * Handle IP_IOC_RTS_REQUEST ioctls
   1107  */
   1108 int
   1109 ip_rts_request(queue_t *q, mblk_t *mp, cred_t *ioc_cr)
   1110 {
   1111 	conn_t	*connp = Q_TO_CONN(q);
   1112 	IOCP	iocp = (IOCP)mp->b_rptr;
   1113 	mblk_t	*mp1, *ioc_mp = mp;
   1114 	int	error = 0;
   1115 	ip_stack_t	*ipst;
   1116 
   1117 	ipst = connp->conn_netstack->netstack_ip;
   1118 
   1119 	ASSERT(mp->b_cont != NULL);
   1120 	/* ioc_mp holds mp */
   1121 	mp = mp->b_cont;
   1122 
   1123 	/*
   1124 	 * The Routing Socket data starts on
   1125 	 * next block. If there is no next block
   1126 	 * this is an indication from routing module
   1127 	 * that it is a routing socket stream queue.
   1128 	 * We need to support that for compatibility with SDP since
   1129 	 * it has a contract private interface to use IP_IOC_RTS_REQUEST.
   1130 	 * Note: SDP no longer uses IP_IOC_RTS_REQUEST - we can remove this.
   1131 	 */
   1132 	if (mp->b_cont == NULL) {
   1133 		/*
   1134 		 * This is a message from SDP
   1135 		 * indicating that this is a Routing Socket
   1136 		 * Stream. Insert this conn_t in routing
   1137 		 * socket client list.
   1138 		 */
   1139 		connp->conn_useloopback = 1;
   1140 		ipcl_hash_insert_wildcard(ipst->ips_rts_clients, connp);
   1141 		goto done;
   1142 	}
   1143 	mp1 = dupmsg(mp->b_cont);
   1144 	if (mp1 == NULL) {
   1145 		error  = ENOBUFS;
   1146 		goto done;
   1147 	}
   1148 	mp = mp1;
   1149 
   1150 	error = ip_rts_request_common(mp, connp, ioc_cr);
   1151 done:
   1152 	iocp->ioc_error = error;
   1153 	ioc_mp->b_datap->db_type = M_IOCACK;
   1154 	if (iocp->ioc_error != 0)
   1155 		iocp->ioc_count = 0;
   1156 	/* Note that we pass a NULL ira to rts_input */
   1157 	(connp->conn_recv)(connp, ioc_mp, NULL, NULL);
   1158 
   1159 	/* conn was refheld in ip_wput_ioctl. */
   1160 	CONN_OPER_PENDING_DONE(connp);
   1161 
   1162 	return (error);
   1163 }
   1164 
   1165 /*
   1166  * Build a reply to the RTM_GET request contained in the given message block
   1167  * using the retrieved IRE of the destination address, the parent IRE (if it
   1168  * exists) and the address family.
   1169  *
   1170  * Returns a pointer to a message block containing the reply if successful,
   1171  * otherwise NULL is returned.
   1172  */
   1173 static mblk_t *
   1174 rts_rtmget(mblk_t *mp, ire_t *ire, ire_t *ifire, const in6_addr_t *setsrc,
   1175     tsol_ire_gw_secattr_t *attrp, sa_family_t af)
   1176 {
   1177 	rt_msghdr_t	*rtm;
   1178 	rt_msghdr_t	*new_rtm;
   1179 	mblk_t		*new_mp;
   1180 	int		rtm_addrs;
   1181 	int		rtm_flags;
   1182 	tsol_gc_t	*gc = NULL;
   1183 	tsol_gcgrp_t	*gcgrp = NULL;
   1184 	ill_t		*ill;
   1185 	ipif_t		*ipif = NULL;
   1186 	ipaddr_t	brdaddr;	/* IFF_POINTOPOINT destination */
   1187 	ipaddr_t	ifaddr;
   1188 	in6_addr_t	brdaddr6;	/* IFF_POINTOPOINT destination */
   1189 	in6_addr_t	ifaddr6;
   1190 	ipaddr_t	v4setsrc;
   1191 
   1192 	rtm = (rt_msghdr_t *)mp->b_rptr;
   1193 
   1194 	/*
   1195 	 * Find the ill used to send packets. This will be NULL in case
   1196 	 * of a reject or blackhole.
   1197 	 */
   1198 	if (ifire != NULL)
   1199 		ill = ire_nexthop_ill(ifire);
   1200 	else
   1201 		ill = ire_nexthop_ill(ire);
   1202 
   1203 	if (attrp != NULL) {
   1204 		mutex_enter(&attrp->igsa_lock);
   1205 		if ((gc = attrp->igsa_gc) != NULL) {
   1206 			gcgrp = gc->gc_grp;
   1207 			ASSERT(gcgrp != NULL);
   1208 			rw_enter(&gcgrp->gcgrp_rwlock, RW_READER);
   1209 		}
   1210 		mutex_exit(&attrp->igsa_lock);
   1211 	}
   1212 
   1213 	/*
   1214 	 * Always return RTA_DST, RTA_GATEWAY and RTA_NETMASK.
   1215 	 *
   1216 	 * The 4.4BSD-Lite2 code (net/rtsock.c) returns both
   1217 	 * RTA_IFP and RTA_IFA if either is defined, and also
   1218 	 * returns RTA_BRD if the appropriate interface is
   1219 	 * point-to-point.
   1220 	 */
   1221 	rtm_addrs = (RTA_DST | RTA_GATEWAY | RTA_NETMASK);
   1222 	if ((rtm->rtm_addrs & (RTA_IFP | RTA_IFA)) && ill != NULL) {
   1223 		rtm_addrs |= (RTA_IFP | RTA_IFA);
   1224 		/*
   1225 		 * We associate an IRE with an ILL, hence we don't exactly
   1226 		 * know what might make sense for RTA_IFA and RTA_BRD. We
   1227 		 * pick the first ipif on the ill.
   1228 		 */
   1229 		ipif = ipif_get_next_ipif(NULL, ill);
   1230 		if (ipif != NULL) {
   1231 			if (ipif->ipif_isv6)
   1232 				ifaddr6 = ipif->ipif_v6lcl_addr;
   1233 			else
   1234 				ifaddr = ipif->ipif_lcl_addr;
   1235 			if (ipif->ipif_flags & IPIF_POINTOPOINT) {
   1236 				rtm_addrs |= RTA_BRD;
   1237 				if (ipif->ipif_isv6)
   1238 					brdaddr6 = ipif->ipif_v6pp_dst_addr;
   1239 				else
   1240 					brdaddr = ipif->ipif_pp_dst_addr;
   1241 			}
   1242 			ipif_refrele(ipif);
   1243 		}
   1244 	}
   1245 
   1246 	new_mp = rts_alloc_msg(RTM_GET, rtm_addrs, af, gc != NULL ? 1 : 0);
   1247 	if (new_mp == NULL) {
   1248 		if (gcgrp != NULL)
   1249 			rw_exit(&gcgrp->gcgrp_rwlock);
   1250 		if (ill != NULL)
   1251 			ill_refrele(ill);
   1252 		return (NULL);
   1253 	}
   1254 
   1255 	/*
   1256 	 * We set the destination address, gateway address,
   1257 	 * netmask and flags in the RTM_GET response depending
   1258 	 * on whether we found a parent IRE or not.
   1259 	 * In particular, if we did find a parent IRE during the
   1260 	 * recursive search, use that IRE's gateway address.
   1261 	 * Otherwise, we use the IRE's source address for the
   1262 	 * gateway address.
   1263 	 */
   1264 	ASSERT(af == AF_INET || af == AF_INET6);
   1265 	switch (af) {
   1266 	case AF_INET:
   1267 		IN6_V4MAPPED_TO_IPADDR(setsrc, v4setsrc);
   1268 		if (v4setsrc != INADDR_ANY)
   1269 			rtm_addrs |= RTA_SRC;
   1270 
   1271 		rtm_flags = ire->ire_flags;
   1272 		rts_fill_msg(RTM_GET, rtm_addrs, ire->ire_addr,
   1273 		    ire->ire_mask, ire->ire_gateway_addr, v4setsrc,
   1274 		    brdaddr, 0, ifaddr, ill, new_mp, gc);
   1275 		break;
   1276 	case AF_INET6:
   1277 		if (!IN6_IS_ADDR_UNSPECIFIED(setsrc))
   1278 			rtm_addrs |= RTA_SRC;
   1279 
   1280 		rtm_flags = ire->ire_flags;
   1281 		rts_fill_msg_v6(RTM_GET, rtm_addrs, &ire->ire_addr_v6,
   1282 		    &ire->ire_mask_v6, &ire->ire_gateway_addr_v6,
   1283 		    setsrc, &brdaddr6, &ipv6_all_zeros,
   1284 		    &ifaddr6, ill, new_mp, gc);
   1285 		break;
   1286 	}
   1287 
   1288 	if (gcgrp != NULL)
   1289 		rw_exit(&gcgrp->gcgrp_rwlock);
   1290 
   1291 	new_rtm = (rt_msghdr_t *)new_mp->b_rptr;
   1292 
   1293 	/*
   1294 	 * The rtm_msglen, rtm_version and rtm_type fields in
   1295 	 * RTM_GET response are filled in by rts_fill_msg.
   1296 	 *
   1297 	 * rtm_addrs and rtm_flags are filled in based on what
   1298 	 * was requested and the state of the IREs looked up
   1299 	 * above.
   1300 	 *
   1301 	 * rtm_inits and rtm_rmx are filled in with metrics
   1302 	 * based on whether a parent IRE was found or not.
   1303 	 *
   1304 	 * TODO: rtm_index and rtm_use should probably be
   1305 	 * filled in with something resonable here and not just
   1306 	 * copied from the request.
   1307 	 */
   1308 	new_rtm->rtm_index = rtm->rtm_index;
   1309 	new_rtm->rtm_pid = rtm->rtm_pid;
   1310 	new_rtm->rtm_seq = rtm->rtm_seq;
   1311 	new_rtm->rtm_use = rtm->rtm_use;
   1312 	new_rtm->rtm_addrs = rtm_addrs;
   1313 	new_rtm->rtm_flags = rtm_flags;
   1314 	new_rtm->rtm_inits = rts_getmetrics(ire, &new_rtm->rtm_rmx);
   1315 	if (ill != NULL)
   1316 		ill_refrele(ill);
   1317 	return (new_mp);
   1318 }
   1319 
   1320 /*
   1321  * Fill the given if_data_t with interface statistics.
   1322  */
   1323 static void
   1324 rts_getifdata(if_data_t *if_data, const ipif_t *ipif)
   1325 {
   1326 	if_data->ifi_type = ipif->ipif_ill->ill_type;
   1327 						/* ethernet, tokenring, etc */
   1328 	if_data->ifi_addrlen = 0;		/* media address length */
   1329 	if_data->ifi_hdrlen = 0;		/* media header length */
   1330 	if_data->ifi_mtu = ipif->ipif_ill->ill_mtu;	/* mtu */
   1331 	if_data->ifi_metric = ipif->ipif_metric; /* metric (external only) */
   1332 	if_data->ifi_baudrate = 0;		/* linespeed */
   1333 
   1334 	if_data->ifi_ipackets = 0;		/* packets received on if */
   1335 	if_data->ifi_ierrors = 0;		/* input errors on interface */
   1336 	if_data->ifi_opackets = 0;		/* packets sent on interface */
   1337 	if_data->ifi_oerrors = 0;		/* output errors on if */
   1338 	if_data->ifi_collisions = 0;		/* collisions on csma if */
   1339 	if_data->ifi_ibytes = 0;		/* total number received */
   1340 	if_data->ifi_obytes = 0;		/* total number sent */
   1341 	if_data->ifi_imcasts = 0;		/* multicast packets received */
   1342 	if_data->ifi_omcasts = 0;		/* multicast packets sent */
   1343 	if_data->ifi_iqdrops = 0;		/* dropped on input */
   1344 	if_data->ifi_noproto = 0;		/* destined for unsupported */
   1345 						/* protocol. */
   1346 }
   1347 
   1348 /*
   1349  * Set the metrics on a forwarding table route.
   1350  */
   1351 static void
   1352 rts_setmetrics(ire_t *ire, uint_t which, rt_metrics_t *metrics)
   1353 {
   1354 	clock_t		rtt;
   1355 	clock_t		rtt_sd;
   1356 	ill_t		*ill;
   1357 	ifrt_t		*ifrt;
   1358 	mblk_t		*mp;
   1359 	in6_addr_t	gw_addr_v6;
   1360 
   1361 	/* Need to add back some metrics to the IRE? */
   1362 	/*
   1363 	 * Bypass obtaining the lock and searching ill_saved_ire_mp in the
   1364 	 * common case of no metrics.
   1365 	 */
   1366 	if (which == 0)
   1367 		return;
   1368 	ire->ire_metrics.iulp_set = B_TRUE;
   1369 
   1370 	/*
   1371 	 * iulp_rtt and iulp_rtt_sd are in milliseconds, but 4.4BSD-Lite2's
   1372 	 * <net/route.h> says: rmx_rtt and rmx_rttvar are stored as
   1373 	 * microseconds.
   1374 	 */
   1375 	if (which & RTV_RTT)
   1376 		rtt = metrics->rmx_rtt / 1000;
   1377 	if (which & RTV_RTTVAR)
   1378 		rtt_sd = metrics->rmx_rttvar / 1000;
   1379 
   1380 	/*
   1381 	 * Update the metrics in the IRE itself.
   1382 	 */
   1383 	mutex_enter(&ire->ire_lock);
   1384 	if (which & RTV_MTU)
   1385 		ire->ire_metrics.iulp_mtu = metrics->rmx_mtu;
   1386 	if (which & RTV_RTT)
   1387 		ire->ire_metrics.iulp_rtt = rtt;
   1388 	if (which & RTV_SSTHRESH)
   1389 		ire->ire_metrics.iulp_ssthresh = metrics->rmx_ssthresh;
   1390 	if (which & RTV_RTTVAR)
   1391 		ire->ire_metrics.iulp_rtt_sd = rtt_sd;
   1392 	if (which & RTV_SPIPE)
   1393 		ire->ire_metrics.iulp_spipe = metrics->rmx_sendpipe;
   1394 	if (which & RTV_RPIPE)
   1395 		ire->ire_metrics.iulp_rpipe = metrics->rmx_recvpipe;
   1396 	mutex_exit(&ire->ire_lock);
   1397 
   1398 	/*
   1399 	 * Search through the ifrt_t chain hanging off the ILL in order to
   1400 	 * reflect the metric change there.
   1401 	 */
   1402 	ill = ire->ire_ill;
   1403 	if (ill == NULL)
   1404 		return;
   1405 	ASSERT((ill->ill_isv6 && ire->ire_ipversion == IPV6_VERSION) ||
   1406 	    ((!ill->ill_isv6 && ire->ire_ipversion == IPV4_VERSION)));
   1407 	if (ill->ill_isv6) {
   1408 		mutex_enter(&ire->ire_lock);
   1409 		gw_addr_v6 = ire->ire_gateway_addr_v6;
   1410 		mutex_exit(&ire->ire_lock);
   1411 	}
   1412 	mutex_enter(&ill->ill_saved_ire_lock);
   1413 	for (mp = ill->ill_saved_ire_mp; mp != NULL; mp = mp->b_cont) {
   1414 		/*
   1415 		 * On a given ill, the tuple of address, gateway, mask,
   1416 		 * ire_type and zoneid unique for each saved IRE.
   1417 		 */
   1418 		ifrt = (ifrt_t *)mp->b_rptr;
   1419 		if (ill->ill_isv6) {
   1420 			if (!IN6_ARE_ADDR_EQUAL(&ifrt->ifrt_v6addr,
   1421 			    &ire->ire_addr_v6) ||
   1422 			    !IN6_ARE_ADDR_EQUAL(&ifrt->ifrt_v6gateway_addr,
   1423 			    &gw_addr_v6) ||
   1424 			    !IN6_ARE_ADDR_EQUAL(&ifrt->ifrt_v6mask,
   1425 			    &ire->ire_mask_v6))
   1426 				continue;
   1427 		} else {
   1428 			if (ifrt->ifrt_addr != ire->ire_addr ||
   1429 			    ifrt->ifrt_gateway_addr != ire->ire_gateway_addr ||
   1430 			    ifrt->ifrt_mask != ire->ire_mask)
   1431 				continue;
   1432 		}
   1433 		if (ifrt->ifrt_zoneid != ire->ire_zoneid ||
   1434 		    ifrt->ifrt_type != ire->ire_type)
   1435 			continue;
   1436 
   1437 		if (which & RTV_MTU)
   1438 			ifrt->ifrt_metrics.iulp_mtu = metrics->rmx_mtu;
   1439 		if (which & RTV_RTT)
   1440 			ifrt->ifrt_metrics.iulp_rtt = rtt;
   1441 		if (which & RTV_SSTHRESH) {
   1442 			ifrt->ifrt_metrics.iulp_ssthresh =
   1443 			    metrics->rmx_ssthresh;
   1444 		}
   1445 		if (which & RTV_RTTVAR)
   1446 			ifrt->ifrt_metrics.iulp_rtt_sd = metrics->rmx_rttvar;
   1447 		if (which & RTV_SPIPE)
   1448 			ifrt->ifrt_metrics.iulp_spipe = metrics->rmx_sendpipe;
   1449 		if (which & RTV_RPIPE)
   1450 			ifrt->ifrt_metrics.iulp_rpipe = metrics->rmx_recvpipe;
   1451 		break;
   1452 	}
   1453 	mutex_exit(&ill->ill_saved_ire_lock);
   1454 
   1455 	/*
   1456 	 * Update any IRE_IF_CLONE hanging created from this IRE_IF so they
   1457 	 * get any new iulp_mtu.
   1458 	 * We do that by deleting them; ire_create_if_clone will pick
   1459 	 * up the new metrics.
   1460 	 */
   1461 	if ((ire->ire_type & IRE_INTERFACE) && ire->ire_dep_children != 0)
   1462 		ire_dep_delete_if_clone(ire);
   1463 }
   1464 
   1465 /*
   1466  * Get the metrics from a forwarding table route.
   1467  */
   1468 static int
   1469 rts_getmetrics(ire_t *ire, rt_metrics_t *metrics)
   1470 {
   1471 	int	metrics_set = 0;
   1472 
   1473 	bzero(metrics, sizeof (rt_metrics_t));
   1474 
   1475 	/*
   1476 	 * iulp_rtt and iulp_rtt_sd are in milliseconds, but 4.4BSD-Lite2's
   1477 	 * <net/route.h> says: rmx_rtt and rmx_rttvar are stored as
   1478 	 * microseconds.
   1479 	 */
   1480 	metrics->rmx_rtt = ire->ire_metrics.iulp_rtt * 1000;
   1481 	metrics_set |= RTV_RTT;
   1482 	metrics->rmx_mtu = ire->ire_metrics.iulp_mtu;
   1483 	metrics_set |= RTV_MTU;
   1484 	metrics->rmx_ssthresh = ire->ire_metrics.iulp_ssthresh;
   1485 	metrics_set |= RTV_SSTHRESH;
   1486 	metrics->rmx_rttvar = ire->ire_metrics.iulp_rtt_sd * 1000;
   1487 	metrics_set |= RTV_RTTVAR;
   1488 	metrics->rmx_sendpipe = ire->ire_metrics.iulp_spipe;
   1489 	metrics_set |= RTV_SPIPE;
   1490 	metrics->rmx_recvpipe = ire->ire_metrics.iulp_rpipe;
   1491 	metrics_set |= RTV_RPIPE;
   1492 	return (metrics_set);
   1493 }
   1494 
   1495 /*
   1496  * Given two sets of metrics (src and dst), use the dst values if they are
   1497  * set. If a dst value is not set but the src value is set, then we use
   1498  * the src value.
   1499  * dst is updated with the new values.
   1500  * This is used to merge information from a dce_t and ire_metrics, where the
   1501  * dce values takes precedence.
   1502  */
   1503 void
   1504 rts_merge_metrics(iulp_t *dst, const iulp_t *src)
   1505 {
   1506 	if (!src->iulp_set)
   1507 		return;
   1508 
   1509 	if (dst->iulp_ssthresh == 0)
   1510 		dst->iulp_ssthresh = src->iulp_ssthresh;
   1511 	if (dst->iulp_rtt == 0)
   1512 		dst->iulp_rtt = src->iulp_rtt;
   1513 	if (dst->iulp_rtt_sd == 0)
   1514 		dst->iulp_rtt_sd = src->iulp_rtt_sd;
   1515 	if (dst->iulp_spipe == 0)
   1516 		dst->iulp_spipe = src->iulp_spipe;
   1517 	if (dst->iulp_rpipe == 0)
   1518 		dst->iulp_rpipe = src->iulp_rpipe;
   1519 	if (dst->iulp_rtomax == 0)
   1520 		dst->iulp_rtomax = src->iulp_rtomax;
   1521 	if (dst->iulp_sack == 0)
   1522 		dst->iulp_sack = src->iulp_sack;
   1523 	if (dst->iulp_tstamp_ok == 0)
   1524 		dst->iulp_tstamp_ok = src->iulp_tstamp_ok;
   1525 	if (dst->iulp_wscale_ok == 0)
   1526 		dst->iulp_wscale_ok = src->iulp_wscale_ok;
   1527 	if (dst->iulp_ecn_ok == 0)
   1528 		dst->iulp_ecn_ok = src->iulp_ecn_ok;
   1529 	if (dst->iulp_pmtud_ok == 0)
   1530 		dst->iulp_pmtud_ok = src->iulp_pmtud_ok;
   1531 	if (dst->iulp_mtu == 0)
   1532 		dst->iulp_mtu = src->iulp_mtu;
   1533 }
   1534 
   1535 
   1536 /*
   1537  * Takes a pointer to a routing message and extracts necessary info by looking
   1538  * at the rtm->rtm_addrs bits and store the requested sockaddrs in the pointers
   1539  * passed (all of which must be valid).
   1540  *
   1541  * The bitmask of sockaddrs actually found in the message is returned, or zero
   1542  * is returned in the case of an error.
   1543  */
   1544 static int
   1545 rts_getaddrs(rt_msghdr_t *rtm, in6_addr_t *dst_addrp, in6_addr_t *gw_addrp,
   1546     in6_addr_t *net_maskp, in6_addr_t *authorp, in6_addr_t *if_addrp,
   1547     in6_addr_t *in_src_addrp, ushort_t *indexp, sa_family_t *afp,
   1548     tsol_rtsecattr_t *rtsecattr, int *error)
   1549 {
   1550 	struct sockaddr *sa;
   1551 	int	i;
   1552 	int	addr_bits;
   1553 	int	length;
   1554 	int	found_addrs = 0;
   1555 	caddr_t	cp;
   1556 	size_t	size;
   1557 	struct sockaddr_dl *sdl;
   1558 
   1559 	*dst_addrp = ipv6_all_zeros;
   1560 	*gw_addrp = ipv6_all_zeros;
   1561 	*net_maskp = ipv6_all_zeros;
   1562 	*authorp = ipv6_all_zeros;
   1563 	*if_addrp = ipv6_all_zeros;
   1564 	*in_src_addrp = ipv6_all_zeros;
   1565 	*indexp = 0;
   1566 	*afp = AF_UNSPEC;
   1567 	rtsecattr->rtsa_cnt = 0;
   1568 	*error = 0;
   1569 
   1570 	/*
   1571 	 * At present we handle only RTA_DST, RTA_GATEWAY, RTA_NETMASK, RTA_IFP,
   1572 	 * RTA_IFA and RTA_AUTHOR.  The rest will be added as we need them.
   1573 	 */
   1574 	cp = (caddr_t)&rtm[1];
   1575 	length = rtm->rtm_msglen;
   1576 	for (i = 0; (i < RTA_NUMBITS) && ((cp - (caddr_t)rtm) < length); i++) {
   1577 		/*
   1578 		 * The address family we are working with starts out as
   1579 		 * AF_UNSPEC, but is set to the one specified with the
   1580 		 * destination address.
   1581 		 *
   1582 		 * If the "working" address family that has been set to
   1583 		 * something other than AF_UNSPEC, then the address family of
   1584 		 * subsequent sockaddrs must either be AF_UNSPEC (for
   1585 		 * compatibility with older programs) or must be the same as our
   1586 		 * "working" one.
   1587 		 *
   1588 		 * This code assumes that RTA_DST (1) comes first in the loop.
   1589 		 */
   1590 		sa = (struct sockaddr *)cp;
   1591 		addr_bits = (rtm->rtm_addrs & (1 << i));
   1592 		if (addr_bits == 0)
   1593 			continue;
   1594 		switch (addr_bits) {
   1595 		case RTA_DST:
   1596 			size = rts_copyfromsockaddr(sa, dst_addrp);
   1597 			*afp = sa->sa_family;
   1598 			break;
   1599 		case RTA_GATEWAY:
   1600 			if (sa->sa_family != *afp && sa->sa_family != AF_UNSPEC)
   1601 				return (0);
   1602 			size = rts_copyfromsockaddr(sa, gw_addrp);
   1603 			break;
   1604 		case RTA_NETMASK:
   1605 			if (sa->sa_family != *afp && sa->sa_family != AF_UNSPEC)
   1606 				return (0);
   1607 			size = rts_copyfromsockaddr(sa, net_maskp);
   1608 			break;
   1609 		case RTA_IFP:
   1610 			if (sa->sa_family != AF_LINK &&
   1611 			    sa->sa_family != AF_UNSPEC)
   1612 				return (0);
   1613 			sdl = (struct sockaddr_dl *)cp;
   1614 			*indexp = sdl->sdl_index;
   1615 			size = sizeof (struct sockaddr_dl);
   1616 			break;
   1617 		case RTA_SRC:
   1618 			/* Source address of the incoming packet */
   1619 			size = rts_copyfromsockaddr(sa, in_src_addrp);
   1620 			*afp = sa->sa_family;
   1621 			break;
   1622 		case RTA_IFA:
   1623 			if (sa->sa_family != *afp && sa->sa_family != AF_UNSPEC)
   1624 				return (0);
   1625 			size = rts_copyfromsockaddr(sa, if_addrp);
   1626 			break;
   1627 		case RTA_AUTHOR:
   1628 			if (sa->sa_family != *afp && sa->sa_family != AF_UNSPEC)
   1629 				return (0);
   1630 			size = rts_copyfromsockaddr(sa, authorp);
   1631 			break;
   1632 		default:
   1633 			return (0);
   1634 		}
   1635 		if (size == 0)
   1636 			return (0);
   1637 		cp += size;
   1638 		found_addrs |= addr_bits;
   1639 	}
   1640 
   1641 	/*
   1642 	 * Parse the routing message and look for any security-
   1643 	 * related attributes for the route.  For each valid
   1644 	 * attribute, allocate/obtain the corresponding kernel
   1645 	 * route security attributes.
   1646 	 */
   1647 	if (((cp - (caddr_t)rtm) < length) && is_system_labeled()) {
   1648 		*error = tsol_rtsa_init(rtm, rtsecattr, cp);
   1649 		ASSERT(rtsecattr->rtsa_cnt <= TSOL_RTSA_REQUEST_MAX);
   1650 	}
   1651 
   1652 	return (found_addrs);
   1653 }
   1654 
   1655 /*
   1656  * Fills the message with the given info.
   1657  */
   1658 static void
   1659 rts_fill_msg(int type, int rtm_addrs, ipaddr_t dst, ipaddr_t mask,
   1660     ipaddr_t gateway, ipaddr_t src_addr, ipaddr_t brd_addr, ipaddr_t author,
   1661     ipaddr_t ifaddr, const ill_t *ill, mblk_t *mp,
   1662     const tsol_gc_t *gc)
   1663 {
   1664 	rt_msghdr_t	*rtm;
   1665 	sin_t		*sin;
   1666 	size_t		data_size, header_size;
   1667 	uchar_t		*cp;
   1668 	int		i;
   1669 
   1670 	ASSERT(mp != NULL);
   1671 	/*
   1672 	 * First find the type of the message
   1673 	 * and its length.
   1674 	 */
   1675 	header_size = rts_header_msg_size(type);
   1676 	/*
   1677 	 * Now find the size of the data
   1678 	 * that follows the message header.
   1679 	 */
   1680 	data_size = rts_data_msg_size(rtm_addrs, AF_INET, gc != NULL ? 1 : 0);
   1681 
   1682 	rtm = (rt_msghdr_t *)mp->b_rptr;
   1683 	mp->b_wptr = &mp->b_rptr[header_size];
   1684 	cp = mp->b_wptr;
   1685 	bzero(cp, data_size);
   1686 	for (i = 0; i < RTA_NUMBITS; i++) {
   1687 		sin = (sin_t *)cp;
   1688 		switch (rtm_addrs & (1 << i)) {
   1689 		case RTA_DST:
   1690 			sin->sin_addr.s_addr = dst;
   1691 			sin->sin_family = AF_INET;
   1692 			cp += sizeof (sin_t);
   1693 			break;
   1694 		case RTA_GATEWAY:
   1695 			sin->sin_addr.s_addr = gateway;
   1696 			sin->sin_family = AF_INET;
   1697 			cp += sizeof (sin_t);
   1698 			break;
   1699 		case RTA_NETMASK:
   1700 			sin->sin_addr.s_addr = mask;
   1701 			sin->sin_family = AF_INET;
   1702 			cp += sizeof (sin_t);
   1703 			break;
   1704 		case RTA_IFP:
   1705 			cp += ill_dls_info((struct sockaddr_dl *)cp, ill);
   1706 			break;
   1707 		case RTA_IFA:
   1708 			sin->sin_addr.s_addr = ifaddr;
   1709 			sin->sin_family = AF_INET;
   1710 			cp += sizeof (sin_t);
   1711 			break;
   1712 		case RTA_SRC:
   1713 			sin->sin_addr.s_addr = src_addr;
   1714 			sin->sin_family = AF_INET;
   1715 			cp += sizeof (sin_t);
   1716 			break;
   1717 		case RTA_AUTHOR:
   1718 			sin->sin_addr.s_addr = author;
   1719 			sin->sin_family = AF_INET;
   1720 			cp += sizeof (sin_t);
   1721 			break;
   1722 		case RTA_BRD:
   1723 			/*
   1724 			 * RTA_BRD is used typically to specify a point-to-point
   1725 			 * destination address.
   1726 			 */
   1727 			sin->sin_addr.s_addr = brd_addr;
   1728 			sin->sin_family = AF_INET;
   1729 			cp += sizeof (sin_t);
   1730 			break;
   1731 		}
   1732 	}
   1733 
   1734 	if (gc != NULL) {
   1735 		rtm_ext_t *rtm_ext;
   1736 		struct rtsa_s *rp_dst;
   1737 		tsol_rtsecattr_t *rsap;
   1738 
   1739 		ASSERT(gc->gc_grp != NULL);
   1740 		ASSERT(RW_LOCK_HELD(&gc->gc_grp->gcgrp_rwlock));
   1741 
   1742 		rtm_ext = (rtm_ext_t *)cp;
   1743 		rtm_ext->rtmex_type = RTMEX_GATEWAY_SECATTR;
   1744 		rtm_ext->rtmex_len = TSOL_RTSECATTR_SIZE(1);
   1745 
   1746 		rsap = (tsol_rtsecattr_t *)(rtm_ext + 1);
   1747 		rsap->rtsa_cnt = 1;
   1748 		rp_dst = rsap->rtsa_attr;
   1749 
   1750 		ASSERT(gc->gc_db != NULL);
   1751 		bcopy(&gc->gc_db->gcdb_attr, rp_dst, sizeof (*rp_dst));
   1752 		cp = (uchar_t *)rp_dst;
   1753 	}
   1754 
   1755 	mp->b_wptr = cp;
   1756 	mp->b_cont = NULL;
   1757 	/*
   1758 	 * set the fields that are common to
   1759 	 * to different messages.
   1760 	 */
   1761 	rtm->rtm_msglen = (short)(header_size + data_size);
   1762 	rtm->rtm_version = RTM_VERSION;
   1763 	rtm->rtm_type = (uchar_t)type;
   1764 }
   1765 
   1766 /*
   1767  * Allocates and initializes a routing socket message.
   1768  * Note that sacnt is either zero or one.
   1769  */
   1770 mblk_t *
   1771 rts_alloc_msg(int type, int rtm_addrs, sa_family_t af, uint_t sacnt)
   1772 {
   1773 	size_t	length;
   1774 	mblk_t	*mp;
   1775 
   1776 	length = RTS_MSG_SIZE(type, rtm_addrs, af, sacnt);
   1777 	mp = allocb(length, BPRI_MED);
   1778 	if (mp == NULL)
   1779 		return (mp);
   1780 	bzero(mp->b_rptr, length);
   1781 	return (mp);
   1782 }
   1783 
   1784 /*
   1785  * Returns the size of the routing
   1786  * socket message header size.
   1787  */
   1788 size_t
   1789 rts_header_msg_size(int type)
   1790 {
   1791 	switch (type) {
   1792 	case RTM_DELADDR:
   1793 	case RTM_NEWADDR:
   1794 	case RTM_CHGADDR:
   1795 	case RTM_FREEADDR:
   1796 		return (sizeof (ifa_msghdr_t));
   1797 	case RTM_IFINFO:
   1798 		return (sizeof (if_msghdr_t));
   1799 	default:
   1800 		return (sizeof (rt_msghdr_t));
   1801 	}
   1802 }
   1803 
   1804 /*
   1805  * Returns the size of the message needed with the given rtm_addrs and family.
   1806  *
   1807  * It is assumed that all of the sockaddrs (with the exception of RTA_IFP) are
   1808  * of the same family (currently either AF_INET or AF_INET6).
   1809  */
   1810 size_t
   1811 rts_data_msg_size(int rtm_addrs, sa_family_t af, uint_t sacnt)
   1812 {
   1813 	int	i;
   1814 	size_t	length = 0;
   1815 
   1816 	for (i = 0; i < RTA_NUMBITS; i++) {
   1817 		switch (rtm_addrs & (1 << i)) {
   1818 		case RTA_IFP:
   1819 			length += sizeof (struct sockaddr_dl);
   1820 			break;
   1821 		case RTA_DST:
   1822 		case RTA_GATEWAY:
   1823 		case RTA_NETMASK:
   1824 		case RTA_SRC:
   1825 		case RTA_IFA:
   1826 		case RTA_AUTHOR:
   1827 		case RTA_BRD:
   1828 			ASSERT(af == AF_INET || af == AF_INET6);
   1829 			switch (af) {
   1830 			case AF_INET:
   1831 				length += sizeof (sin_t);
   1832 				break;
   1833 			case AF_INET6:
   1834 				length += sizeof (sin6_t);
   1835 				break;
   1836 			}
   1837 			break;
   1838 		}
   1839 	}
   1840 	if (sacnt > 0)
   1841 		length += sizeof (rtm_ext_t) + TSOL_RTSECATTR_SIZE(sacnt);
   1842 
   1843 	return (length);
   1844 }
   1845 
   1846 /*
   1847  * This routine is called to generate a message to the routing
   1848  * socket indicating that a redirect has occured, a routing lookup
   1849  * has failed, or that a protocol has detected timeouts to a particular
   1850  * destination. This routine is called for message types RTM_LOSING,
   1851  * RTM_REDIRECT, and RTM_MISS.
   1852  */
   1853 void
   1854 ip_rts_change(int type, ipaddr_t dst_addr, ipaddr_t gw_addr, ipaddr_t net_mask,
   1855     ipaddr_t source, ipaddr_t author, int flags, int error, int rtm_addrs,
   1856     ip_stack_t *ipst)
   1857 {
   1858 	rt_msghdr_t	*rtm;
   1859 	mblk_t		*mp;
   1860 
   1861 	if (rtm_addrs == 0)
   1862 		return;
   1863 	mp = rts_alloc_msg(type, rtm_addrs, AF_INET, 0);
   1864 	if (mp == NULL)
   1865 		return;
   1866 	rts_fill_msg(type, rtm_addrs, dst_addr, net_mask, gw_addr, source, 0,
   1867 	    author, 0, NULL, mp, NULL);
   1868 	rtm = (rt_msghdr_t *)mp->b_rptr;
   1869 	rtm->rtm_flags = flags;
   1870 	rtm->rtm_errno = error;
   1871 	rtm->rtm_flags |= RTF_DONE;
   1872 	rtm->rtm_addrs = rtm_addrs;
   1873 	rts_queue_input(mp, NULL, AF_INET, RTSQ_ALL, ipst);
   1874 }
   1875 
   1876 /*
   1877  * This routine is called to generate a message to the routing
   1878  * socket indicating that the status of a network interface has changed.
   1879  * Message type generated RTM_IFINFO.
   1880  */
   1881 void
   1882 ip_rts_ifmsg(const ipif_t *ipif, uint_t flags)
   1883 {
   1884 	ip_rts_xifmsg(ipif, 0, 0, flags);
   1885 }
   1886 
   1887 void
   1888 ip_rts_xifmsg(const ipif_t *ipif, uint64_t set, uint64_t clear, uint_t flags)
   1889 {
   1890 	if_msghdr_t	*ifm;
   1891 	mblk_t		*mp;
   1892 	sa_family_t	af;
   1893 	ip_stack_t	*ipst = ipif->ipif_ill->ill_ipst;
   1894 
   1895 	/*
   1896 	 * This message should be generated only
   1897 	 * when the physical device is changing
   1898 	 * state.
   1899 	 */
   1900 	if (ipif->ipif_id != 0)
   1901 		return;
   1902 	if (ipif->ipif_isv6) {
   1903 		af = AF_INET6;
   1904 		mp = rts_alloc_msg(RTM_IFINFO, RTA_IFP, af, 0);
   1905 		if (mp == NULL)
   1906 			return;
   1907 		rts_fill_msg_v6(RTM_IFINFO, RTA_IFP, &ipv6_all_zeros,
   1908 		    &ipv6_all_zeros, &ipv6_all_zeros, &ipv6_all_zeros,
   1909 		    &ipv6_all_zeros, &ipv6_all_zeros, &ipv6_all_zeros,
   1910 		    ipif->ipif_ill, mp, NULL);
   1911 	} else {
   1912 		af = AF_INET;
   1913 		mp = rts_alloc_msg(RTM_IFINFO, RTA_IFP, af, 0);
   1914 		if (mp == NULL)
   1915 			return;
   1916 		rts_fill_msg(RTM_IFINFO, RTA_IFP, 0, 0, 0, 0, 0, 0, 0,
   1917 		    ipif->ipif_ill, mp, NULL);
   1918 	}
   1919 	ifm = (if_msghdr_t *)mp->b_rptr;
   1920 	ifm->ifm_index = ipif->ipif_ill->ill_phyint->phyint_ifindex;
   1921 	ifm->ifm_flags = (ipif->ipif_flags | ipif->ipif_ill->ill_flags |
   1922 	    ipif->ipif_ill->ill_phyint->phyint_flags | set) & ~clear;
   1923 	rts_getifdata(&ifm->ifm_data, ipif);
   1924 	ifm->ifm_addrs = RTA_IFP;
   1925 
   1926 	if (flags & RTSQ_DEFAULT) {
   1927 		flags = RTSQ_ALL;
   1928 		/*
   1929 		 * If this message is for an underlying interface, prevent
   1930 		 * "normal" (IPMP-unaware) routing sockets from seeing it.
   1931 		 */
   1932 		if (IS_UNDER_IPMP(ipif->ipif_ill))
   1933 			flags &= ~RTSQ_NORMAL;
   1934 	}
   1935 
   1936 	rts_queue_input(mp, NULL, af, flags, ipst);
   1937 }
   1938 
   1939 /*
   1940  * If cmd is RTM_ADD or RTM_DELETE, generate the rt_msghdr_t message;
   1941  * otherwise (RTM_NEWADDR, RTM_DELADDR, RTM_CHGADDR and RTM_FREEADDR)
   1942  * generate the ifa_msghdr_t message.
   1943  */
   1944 static void
   1945 rts_new_rtsmsg(int cmd, int error, const ipif_t *ipif, uint_t flags)
   1946 {
   1947 	int		rtm_addrs;
   1948 	mblk_t		*mp;
   1949 	ifa_msghdr_t	*ifam;
   1950 	rt_msghdr_t	*rtm;
   1951 	sa_family_t	af;
   1952 	ip_stack_t	*ipst = ipif->ipif_ill->ill_ipst;
   1953 
   1954 	/*
   1955 	 * Do not report unspecified address if this is the RTM_CHGADDR or
   1956 	 * RTM_FREEADDR message.
   1957 	 */
   1958 	if (cmd == RTM_CHGADDR || cmd == RTM_FREEADDR) {
   1959 		if (!ipif->ipif_isv6) {
   1960 			if (ipif->ipif_lcl_addr == INADDR_ANY)
   1961 				return;
   1962 		} else if (IN6_IS_ADDR_UNSPECIFIED(&ipif->ipif_v6lcl_addr)) {
   1963 			return;
   1964 		}
   1965 	}
   1966 
   1967 	if (ipif->ipif_isv6)
   1968 		af = AF_INET6;
   1969 	else
   1970 		af = AF_INET;
   1971 
   1972 	if (cmd == RTM_ADD || cmd == RTM_DELETE)
   1973 		rtm_addrs = (RTA_DST | RTA_NETMASK);
   1974 	else
   1975 		rtm_addrs = (RTA_IFA | RTA_NETMASK | RTA_BRD | RTA_IFP);
   1976 
   1977 	mp = rts_alloc_msg(cmd, rtm_addrs, af, 0);
   1978 	if (mp == NULL)
   1979 		return;
   1980 
   1981 	if (cmd != RTM_ADD && cmd != RTM_DELETE) {
   1982 		switch (af) {
   1983 		case AF_INET:
   1984 			rts_fill_msg(cmd, rtm_addrs, 0,
   1985 			    ipif->ipif_net_mask, 0, ipif->ipif_lcl_addr,
   1986 			    ipif->ipif_pp_dst_addr, 0,
   1987 			    ipif->ipif_lcl_addr, ipif->ipif_ill,
   1988 			    mp, NULL);
   1989 			break;
   1990 		case AF_INET6:
   1991 			rts_fill_msg_v6(cmd, rtm_addrs,
   1992 			    &ipv6_all_zeros, &ipif->ipif_v6net_mask,
   1993 			    &ipv6_all_zeros, &ipif->ipif_v6lcl_addr,
   1994 			    &ipif->ipif_v6pp_dst_addr, &ipv6_all_zeros,
   1995 			    &ipif->ipif_v6lcl_addr, ipif->ipif_ill,
   1996 			    mp, NULL);
   1997 			break;
   1998 		}
   1999 		ifam = (ifa_msghdr_t *)mp->b_rptr;
   2000 		ifam->ifam_index =
   2001 		    ipif->ipif_ill->ill_phyint->phyint_ifindex;
   2002 		ifam->ifam_metric = ipif->ipif_metric;
   2003 		ifam->ifam_flags = ((cmd == RTM_NEWADDR) ? RTF_UP : 0);
   2004 		ifam->ifam_addrs = rtm_addrs;
   2005 	} else {
   2006 		switch (af) {
   2007 		case AF_INET:
   2008 			rts_fill_msg(cmd, rtm_addrs,
   2009 			    ipif->ipif_lcl_addr, ipif->ipif_net_mask, 0,
   2010 			    0, 0, 0, 0, NULL, mp, NULL);
   2011 			break;
   2012 		case AF_INET6:
   2013 			rts_fill_msg_v6(cmd, rtm_addrs,
   2014 			    &ipif->ipif_v6lcl_addr,
   2015 			    &ipif->ipif_v6net_mask, &ipv6_all_zeros,
   2016 			    &ipv6_all_zeros, &ipv6_all_zeros,
   2017 			    &ipv6_all_zeros, &ipv6_all_zeros,
   2018 			    NULL, mp, NULL);
   2019 			break;
   2020 		}
   2021 		rtm = (rt_msghdr_t *)mp->b_rptr;
   2022 		rtm->rtm_index =
   2023 		    ipif->ipif_ill->ill_phyint->phyint_ifindex;
   2024 		rtm->rtm_flags = ((cmd == RTM_ADD) ? RTF_UP : 0);
   2025 		rtm->rtm_errno = error;
   2026 		if (error == 0)
   2027 			rtm->rtm_flags |= RTF_DONE;
   2028 		rtm->rtm_addrs = rtm_addrs;
   2029 	}
   2030 	rts_queue_input(mp, NULL, af, flags, ipst);
   2031 }
   2032 
   2033 /*
   2034  * This is called to generate messages to the routing socket
   2035  * indicating a network interface has had addresses associated with it.
   2036  * The structure of the code is based on the 4.4BSD-Lite2 <net/rtsock.c>.
   2037  */
   2038 void
   2039 ip_rts_newaddrmsg(int cmd, int error, const ipif_t *ipif, uint_t flags)
   2040 {
   2041 	ip_stack_t	*ipst = ipif->ipif_ill->ill_ipst;
   2042 
   2043 	if (flags & RTSQ_DEFAULT) {
   2044 		flags = RTSQ_ALL;
   2045 		/*
   2046 		 * If this message is for an underlying interface, prevent
   2047 		 * "normal" (IPMP-unaware) routing sockets from seeing it.
   2048 		 */
   2049 		if (IS_UNDER_IPMP(ipif->ipif_ill))
   2050 			flags &= ~RTSQ_NORMAL;
   2051 	}
   2052 
   2053 	/*
   2054 	 * Let conn_ixa caching know that source address selection
   2055 	 * changed
   2056 	 */
   2057 	if (cmd == RTM_ADD || cmd == RTM_DELETE)
   2058 		ip_update_source_selection(ipst);
   2059 
   2060 	/*
   2061 	 * If the request is DELETE, send RTM_DELETE and RTM_DELADDR.
   2062 	 * if the request is ADD, send RTM_NEWADDR and RTM_ADD.
   2063 	 * otherwise simply send the request.
   2064 	 */
   2065 	switch (cmd) {
   2066 	case RTM_ADD:
   2067 		rts_new_rtsmsg(RTM_NEWADDR, error, ipif, flags);
   2068 		rts_new_rtsmsg(RTM_ADD, error, ipif, flags);
   2069 		break;
   2070 	case RTM_DELETE:
   2071 		rts_new_rtsmsg(RTM_DELETE, error, ipif, flags);
   2072 		rts_new_rtsmsg(RTM_DELADDR, error, ipif, flags);
   2073 		break;
   2074 	default:
   2075 		rts_new_rtsmsg(cmd, error, ipif, flags);
   2076 		break;
   2077 	}
   2078 }
   2079 
   2080 /*
   2081  * Based on the address family specified in a sockaddr, copy the address field
   2082  * into an in6_addr_t.
   2083  *
   2084  * In the case of AF_UNSPEC, we assume the family is actually AF_INET for
   2085  * compatibility with programs that leave the family cleared in the sockaddr.
   2086  * Callers of rts_copyfromsockaddr should check the family themselves if they
   2087  * wish to verify its value.
   2088  *
   2089  * In the case of AF_INET6, a check is made to ensure that address is not an
   2090  * IPv4-mapped address.
   2091  */
   2092 size_t
   2093 rts_copyfromsockaddr(struct sockaddr *sa, in6_addr_t *addrp)
   2094 {
   2095 	switch (sa->sa_family) {
   2096 	case AF_INET:
   2097 	case AF_UNSPEC:
   2098 		IN6_IPADDR_TO_V4MAPPED(((sin_t *)sa)->sin_addr.s_addr, addrp);
   2099 		return (sizeof (sin_t));
   2100 	case AF_INET6:
   2101 		*addrp = ((sin6_t *)sa)->sin6_addr;
   2102 		if (IN6_IS_ADDR_V4MAPPED(addrp))
   2103 			return (0);
   2104 		return (sizeof (sin6_t));
   2105 	default:
   2106 		return (0);
   2107 	}
   2108 }
   2109