Home | History | Annotate | Download | only in ip
      1 /*
      2  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
      3  * Use is subject to license terms.
      4  */
      5 
      6 /*
      7  * Copyright (c) 1988, 1991, 1993
      8  *	The Regents of the University of California.  All rights reserved.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  * 3. All advertising materials mentioning features or use of this software
     19  *    must display the following acknowledgement:
     20  *	This product includes software developed by the University of
     21  *	California, Berkeley and its contributors.
     22  * 4. Neither the name of the University nor the names of its contributors
     23  *    may be used to endorse or promote products derived from this software
     24  *    without specific prior written permission.
     25  *
     26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     36  * SUCH DAMAGE.
     37  *
     38  *	@(#)rtsock.c	8.6 (Berkeley) 2/11/95
     39  */
     40 
     41 /*
     42  * This file contains routines that processes routing socket requests.
     43  */
     44 
     45 #include <sys/types.h>
     46 #include <sys/stream.h>
     47 #include <sys/stropts.h>
     48 #include <sys/ddi.h>
     49 #include <sys/strsubr.h>
     50 #include <sys/cmn_err.h>
     51 #include <sys/debug.h>
     52 #include <sys/policy.h>
     53 #include <sys/zone.h>
     54 
     55 #include <sys/systm.h>
     56 #include <sys/param.h>
     57 #include <sys/socket.h>
     58 #include <sys/strsun.h>
     59 #include <net/if.h>
     60 #include <net/route.h>
     61 #include <netinet/in.h>
     62 #include <net/if_dl.h>
     63 #include <netinet/ip6.h>
     64 
     65 #include <inet/common.h>
     66 #include <inet/ip.h>
     67 #include <inet/ip6.h>
     68 #include <inet/ip_if.h>
     69 #include <inet/ip_ire.h>
     70 #include <inet/ip_ftable.h>
     71 #include <inet/ip_rts.h>
     72 
     73 #include <inet/ipclassifier.h>
     74 
     75 #include <sys/tsol/tndb.h>
     76 #include <sys/tsol/tnet.h>
     77 
     78 #define	RTS_MSG_SIZE(type, rtm_addrs, af, sacnt) \
     79 	(rts_data_msg_size(rtm_addrs, af, sacnt) + rts_header_msg_size(type))
     80 
     81 static size_t	rts_copyfromsockaddr(struct sockaddr *sa, in6_addr_t *addrp);
     82 static void	rts_fill_msg(int type, int rtm_addrs, ipaddr_t dst,
     83     ipaddr_t mask, ipaddr_t gateway, ipaddr_t src_addr, ipaddr_t brd_addr,
     84     ipaddr_t author, const ipif_t *ipif, mblk_t *mp, uint_t, const tsol_gc_t *);
     85 static int	rts_getaddrs(rt_msghdr_t *rtm, in6_addr_t *dst_addrp,
     86     in6_addr_t *gw_addrp, in6_addr_t *net_maskp, in6_addr_t *authorp,
     87     in6_addr_t *if_addrp, in6_addr_t *src_addrp, ushort_t *indexp,
     88     sa_family_t *afp, tsol_rtsecattr_t *rtsecattr, int *error);
     89 static void	rts_getifdata(if_data_t *if_data, const ipif_t *ipif);
     90 static int	rts_getmetrics(ire_t *ire, rt_metrics_t *metrics);
     91 static mblk_t	*rts_rtmget(mblk_t *mp, ire_t *ire, ire_t *sire,
     92     sa_family_t af);
     93 static void	rts_setmetrics(ire_t *ire, uint_t which, rt_metrics_t *metrics);
     94 static void	ip_rts_request_retry(ipsq_t *, queue_t *q, mblk_t *mp, void *);
     95 
     96 /*
     97  * Send `mp' to all eligible routing queues.  A queue is ineligible if:
     98  *
     99  *  1. SO_USELOOPBACK is off and it is not the originating queue.
    100  *  2. RTAW_UNDER_IPMP is on and RTSQ_UNDER_IPMP is clear in `flags'.
    101  *  3. RTAW_UNDER_IPMP is off and RTSQ_NORMAL is clear in `flags'.
    102  *  4. It is not the same address family as `af', and `af' isn't AF_UNSPEC.
    103  */
    104 void
    105 rts_queue_input(mblk_t *mp, conn_t *o_connp, sa_family_t af, uint_t flags,
    106     ip_stack_t *ipst)
    107 {
    108 	mblk_t	*mp1;
    109 	conn_t 	*connp, *next_connp;
    110 
    111 	/*
    112 	 * Since we don't have an ill_t here, RTSQ_DEFAULT must already be
    113 	 * resolved to one or more of RTSQ_NORMAL|RTSQ_UNDER_IPMP by now.
    114 	 */
    115 	ASSERT(!(flags & RTSQ_DEFAULT));
    116 
    117 	mutex_enter(&ipst->ips_rts_clients->connf_lock);
    118 	connp = ipst->ips_rts_clients->connf_head;
    119 
    120 	for (; connp != NULL; connp = next_connp) {
    121 		next_connp = connp->conn_next;
    122 
    123 		/*
    124 		 * If there was a family specified when this routing socket was
    125 		 * created and it doesn't match the family of the message to
    126 		 * copy, then continue.
    127 		 */
    128 		if ((connp->conn_proto != AF_UNSPEC) &&
    129 		    (connp->conn_proto != af))
    130 			continue;
    131 
    132 		/*
    133 		 * Queue the message only if the conn_t and flags match.
    134 		 */
    135 		if (connp->conn_rtaware & RTAW_UNDER_IPMP) {
    136 			if (!(flags & RTSQ_UNDER_IPMP))
    137 				continue;
    138 		} else {
    139 			if (!(flags & RTSQ_NORMAL))
    140 				continue;
    141 		}
    142 
    143 		/*
    144 		 * For the originating queue, we only copy the message upstream
    145 		 * if loopback is set.  For others reading on the routing
    146 		 * socket, we check if there is room upstream for a copy of the
    147 		 * message.
    148 		 */
    149 		if ((o_connp == connp) && connp->conn_loopback == 0) {
    150 			connp = connp->conn_next;
    151 			continue;
    152 		}
    153 		CONN_INC_REF(connp);
    154 		mutex_exit(&ipst->ips_rts_clients->connf_lock);
    155 		/* Pass to rts_input */
    156 		if ((IPCL_IS_NONSTR(connp) && !PROTO_FLOW_CNTRLD(connp))||
    157 		    (!IPCL_IS_NONSTR(connp) &&
    158 		    canputnext(CONNP_TO_RQ(connp)))) {
    159 			mp1 = dupmsg(mp);
    160 			if (mp1 == NULL)
    161 				mp1 = copymsg(mp);
    162 			if (mp1 != NULL)
    163 				(connp->conn_recv)(connp, mp1, NULL);
    164 		}
    165 
    166 		mutex_enter(&ipst->ips_rts_clients->connf_lock);
    167 		/* reload next_connp since conn_next may have changed */
    168 		next_connp = connp->conn_next;
    169 		CONN_DEC_REF(connp);
    170 	}
    171 	mutex_exit(&ipst->ips_rts_clients->connf_lock);
    172 	freemsg(mp);
    173 }
    174 
    175 /*
    176  * Takes an ire and sends an ack to all the routing sockets. This
    177  * routine is used
    178  * - when a route is created/deleted through the ioctl interface.
    179  * - when ire_expire deletes a stale redirect
    180  */
    181 void
    182 ip_rts_rtmsg(int type, ire_t *ire, int error, ip_stack_t *ipst)
    183 {
    184 	mblk_t		*mp;
    185 	rt_msghdr_t	*rtm;
    186 	int		rtm_addrs = (RTA_DST | RTA_NETMASK | RTA_GATEWAY);
    187 	sa_family_t	af;
    188 	in6_addr_t	gw_addr_v6;
    189 
    190 	if (ire == NULL)
    191 		return;
    192 	ASSERT(ire->ire_ipversion == IPV4_VERSION ||
    193 	    ire->ire_ipversion == IPV6_VERSION);
    194 
    195 	if (ire->ire_flags & RTF_SETSRC)
    196 		rtm_addrs |= RTA_SRC;
    197 
    198 	switch (ire->ire_ipversion) {
    199 	case IPV4_VERSION:
    200 		af = AF_INET;
    201 		mp = rts_alloc_msg(type, rtm_addrs, af, 0);
    202 		if (mp == NULL)
    203 			return;
    204 		rts_fill_msg(type, rtm_addrs, ire->ire_addr, ire->ire_mask,
    205 		    ire->ire_gateway_addr, ire->ire_src_addr, 0, 0, NULL, mp,
    206 		    0, NULL);
    207 		break;
    208 	case IPV6_VERSION:
    209 		af = AF_INET6;
    210 		mp = rts_alloc_msg(type, rtm_addrs, af, 0);
    211 		if (mp == NULL)
    212 			return;
    213 		mutex_enter(&ire->ire_lock);
    214 		gw_addr_v6 = ire->ire_gateway_addr_v6;
    215 		mutex_exit(&ire->ire_lock);
    216 		rts_fill_msg_v6(type, rtm_addrs, &ire->ire_addr_v6,
    217 		    &ire->ire_mask_v6, &gw_addr_v6,
    218 		    &ire->ire_src_addr_v6, &ipv6_all_zeros, &ipv6_all_zeros,
    219 		    NULL, mp, 0, NULL);
    220 		break;
    221 	}
    222 	rtm = (rt_msghdr_t *)mp->b_rptr;
    223 	mp->b_wptr = (uchar_t *)&mp->b_rptr[rtm->rtm_msglen];
    224 	rtm->rtm_addrs = rtm_addrs;
    225 	rtm->rtm_flags = ire->ire_flags;
    226 	if (error != 0)
    227 		rtm->rtm_errno = error;
    228 	else
    229 		rtm->rtm_flags |= RTF_DONE;
    230 	rts_queue_input(mp, NULL, af, RTSQ_ALL, ipst);
    231 }
    232 
    233 /* ARGSUSED */
    234 static void
    235 ip_rts_request_retry(ipsq_t *dummy_sq, queue_t *q, mblk_t *mp, void *dummy)
    236 {
    237 	(void) ip_rts_request(q, mp, msg_getcred(mp, NULL));
    238 }
    239 
    240 /*
    241  * This is a call from the RTS module
    242  * indicating that this is a Routing Socket
    243  * Stream. Insert this conn_t in routing
    244  * socket client list.
    245  */
    246 void
    247 ip_rts_register(conn_t *connp)
    248 {
    249 	ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
    250 
    251 	connp->conn_loopback = 1;
    252 	ipcl_hash_insert_wildcard(ipst->ips_rts_clients, connp);
    253 }
    254 
    255 /*
    256  * This is a call from the RTS module indicating that it is closing.
    257  */
    258 void
    259 ip_rts_unregister(conn_t *connp)
    260 {
    261 	ipcl_hash_remove(connp);
    262 }
    263 
    264 /*
    265  * Processes requests received on a routing socket. It extracts all the
    266  * arguments and calls the appropriate function to process the request.
    267  *
    268  * RTA_SRC bit flag requests are sent by 'route -setsrc'.
    269  *
    270  * In general, this function does not consume the message supplied but rather
    271  * sends the message upstream with an appropriate UNIX errno.
    272  *
    273  * We may need to restart this operation if the ipif cannot be looked up
    274  * due to an exclusive operation that is currently in progress. The restart
    275  * entry point is ip_rts_request_retry. While the request is enqueud in the
    276  * ipsq the ioctl could be aborted and the conn close. To ensure that we don't
    277  * have stale conn pointers, ip_wput_ioctl does a conn refhold. This is
    278  * released at the completion of the rts ioctl at the end of this function
    279  * by calling CONN_OPER_PENDING_DONE or when the ioctl is aborted and
    280  * conn close occurs in conn_ioctl_cleanup.
    281  */
    282 int
    283 ip_rts_request_common(queue_t *q, mblk_t *mp, conn_t *connp, cred_t *ioc_cr)
    284 {
    285 	rt_msghdr_t	*rtm = NULL;
    286 	in6_addr_t	dst_addr_v6;
    287 	in6_addr_t	src_addr_v6;
    288 	in6_addr_t	gw_addr_v6;
    289 	in6_addr_t	net_mask_v6;
    290 	in6_addr_t	author_v6;
    291 	in6_addr_t	if_addr_v6;
    292 	mblk_t		*mp1, *ioc_mp = mp;
    293 	ire_t		*ire = NULL;
    294 	ire_t		*sire = NULL;
    295 	int		error = 0;
    296 	int		match_flags = MATCH_IRE_DSTONLY;
    297 	int		match_flags_local = MATCH_IRE_TYPE | MATCH_IRE_GW;
    298 	int		found_addrs;
    299 	sa_family_t	af;
    300 	ipaddr_t	dst_addr;
    301 	ipaddr_t	gw_addr;
    302 	ipaddr_t	src_addr;
    303 	ipaddr_t	net_mask;
    304 	ushort_t	index;
    305 	ipif_t		*ipif = NULL;
    306 	ipif_t		*tmp_ipif = NULL;
    307 	IOCP		iocp = (IOCP)mp->b_rptr;
    308 	boolean_t	gcgrp_xtraref = B_FALSE;
    309 	tsol_gcgrp_addr_t ga;
    310 	tsol_rtsecattr_t rtsecattr;
    311 	struct rtsa_s	*rtsap = NULL;
    312 	tsol_gcgrp_t	*gcgrp = NULL;
    313 	tsol_gc_t	*gc = NULL;
    314 	ts_label_t	*tsl = NULL;
    315 	zoneid_t	zoneid;
    316 	ip_stack_t	*ipst;
    317 
    318 	ip1dbg(("ip_rts_request: mp is %x\n", DB_TYPE(mp)));
    319 
    320 	zoneid = connp->conn_zoneid;
    321 	ipst = connp->conn_netstack->netstack_ip;
    322 
    323 	ASSERT(mp->b_cont != NULL);
    324 	/* ioc_mp holds mp */
    325 	mp = mp->b_cont;
    326 
    327 	/*
    328 	 * The Routing Socket data starts on
    329 	 * next block. If there is no next block
    330 	 * this is an indication from routing module
    331 	 * that it is a routing socket stream queue.
    332 	 * We need to support that for compatibility with SDP since
    333 	 * it has a contract private interface to use IP_IOC_RTS_REQUEST.
    334 	 */
    335 	if (mp->b_cont == NULL) {
    336 		/*
    337 		 * This is a message from SDP
    338 		 * indicating that this is a Routing Socket
    339 		 * Stream. Insert this conn_t in routing
    340 		 * socket client list.
    341 		 */
    342 		connp->conn_loopback = 1;
    343 		ipcl_hash_insert_wildcard(ipst->ips_rts_clients, connp);
    344 		goto done;
    345 	}
    346 	mp1 = dupmsg(mp->b_cont);
    347 	if (mp1 == NULL) {
    348 		error  = ENOBUFS;
    349 		goto done;
    350 	}
    351 	mp = mp1;
    352 
    353 	if (mp->b_cont != NULL && !pullupmsg(mp, -1)) {
    354 		freemsg(mp);
    355 		error =  EINVAL;
    356 		goto done;
    357 	}
    358 	if ((mp->b_wptr - mp->b_rptr) < sizeof (rt_msghdr_t)) {
    359 		freemsg(mp);
    360 		error = EINVAL;
    361 		goto done;
    362 	}
    363 
    364 	/*
    365 	 * Check the routing message for basic consistency including the
    366 	 * version number and that the number of octets written is the same
    367 	 * as specified by the rtm_msglen field.
    368 	 *
    369 	 * At this point, an error can be delivered back via rtm_errno.
    370 	 */
    371 	rtm = (rt_msghdr_t *)mp->b_rptr;
    372 	if ((mp->b_wptr - mp->b_rptr) != rtm->rtm_msglen) {
    373 		error = EINVAL;
    374 		goto done;
    375 	}
    376 	if (rtm->rtm_version != RTM_VERSION) {
    377 		error = EPROTONOSUPPORT;
    378 		goto done;
    379 	}
    380 
    381 	/* Only allow RTM_GET or RTM_RESOLVE for unprivileged process */
    382 	if (rtm->rtm_type != RTM_GET &&
    383 	    rtm->rtm_type != RTM_RESOLVE &&
    384 	    (ioc_cr == NULL ||
    385 	    secpolicy_ip_config(ioc_cr, B_FALSE) != 0)) {
    386 		error = EPERM;
    387 		goto done;
    388 	}
    389 
    390 	found_addrs = rts_getaddrs(rtm, &dst_addr_v6, &gw_addr_v6, &net_mask_v6,
    391 	    &author_v6, &if_addr_v6, &src_addr_v6, &index, &af, &rtsecattr,
    392 	    &error);
    393 
    394 	if (error != 0)
    395 		goto done;
    396 
    397 	if ((found_addrs & RTA_DST) == 0) {
    398 		error = EINVAL;
    399 		goto done;
    400 	}
    401 
    402 	/*
    403 	 * Based on the address family of the destination address, determine
    404 	 * the destination, gateway and netmask and return the appropriate error
    405 	 * if an unknown address family was specified (following the errno
    406 	 * values that 4.4BSD-Lite2 returns.)
    407 	 */
    408 	switch (af) {
    409 	case AF_INET:
    410 		IN6_V4MAPPED_TO_IPADDR(&dst_addr_v6, dst_addr);
    411 		IN6_V4MAPPED_TO_IPADDR(&src_addr_v6, src_addr);
    412 		IN6_V4MAPPED_TO_IPADDR(&gw_addr_v6, gw_addr);
    413 		if (((found_addrs & RTA_NETMASK) == 0) ||
    414 		    (rtm->rtm_flags & RTF_HOST))
    415 			net_mask = IP_HOST_MASK;
    416 		else
    417 			IN6_V4MAPPED_TO_IPADDR(&net_mask_v6, net_mask);
    418 		break;
    419 	case AF_INET6:
    420 		if (((found_addrs & RTA_NETMASK) == 0) ||
    421 		    (rtm->rtm_flags & RTF_HOST))
    422 			net_mask_v6 = ipv6_all_ones;
    423 		break;
    424 	default:
    425 		/*
    426 		 * These errno values are meant to be compatible with
    427 		 * 4.4BSD-Lite2 for the given message types.
    428 		 */
    429 		switch (rtm->rtm_type) {
    430 		case RTM_ADD:
    431 		case RTM_DELETE:
    432 			error = ESRCH;
    433 			goto done;
    434 		case RTM_GET:
    435 		case RTM_CHANGE:
    436 			error = EAFNOSUPPORT;
    437 			goto done;
    438 		default:
    439 			error = EOPNOTSUPP;
    440 			goto done;
    441 		}
    442 	}
    443 
    444 	/*
    445 	 * At this point, the address family must be something known.
    446 	 */
    447 	ASSERT(af == AF_INET || af == AF_INET6);
    448 
    449 	if (index != 0) {
    450 		ill_t   *ill;
    451 lookup:
    452 		/*
    453 		 * IPC must be refheld somewhere in ip_wput_nondata or
    454 		 * ip_wput_ioctl etc... and cleaned up if ioctl is killed.
    455 		 * If ILL_CHANGING the request is queued in the ipsq.
    456 		 */
    457 		ill = ill_lookup_on_ifindex(index, af == AF_INET6,
    458 		    CONNP_TO_WQ(connp), ioc_mp, ip_rts_request_retry, &error,
    459 		    ipst);
    460 		if (ill == NULL) {
    461 			if (error != EINPROGRESS)
    462 				error = EINVAL;
    463 			goto done;
    464 		}
    465 
    466 		/*
    467 		 * Since all interfaces in an IPMP group must be equivalent,
    468 		 * we prevent changes to a specific underlying interface's
    469 		 * routing configuration.  However, for backward compatibility,
    470 		 * we intepret a request to add a route on an underlying
    471 		 * interface as a request to add a route on its IPMP interface.
    472 		 */
    473 		if (IS_UNDER_IPMP(ill)) {
    474 			switch (rtm->rtm_type) {
    475 			case RTM_CHANGE:
    476 			case RTM_DELETE:
    477 				ill_refrele(ill);
    478 				error = EINVAL;
    479 				goto done;
    480 			case RTM_ADD:
    481 				index = ipmp_ill_get_ipmp_ifindex(ill);
    482 				ill_refrele(ill);
    483 				if (index == 0) {
    484 					error = EINVAL;
    485 					goto done;
    486 				}
    487 				goto lookup;
    488 			}
    489 		}
    490 
    491 		ipif = ipif_get_next_ipif(NULL, ill);
    492 		ill_refrele(ill);
    493 		match_flags |= MATCH_IRE_ILL;
    494 	}
    495 
    496 	/*
    497 	 * If a netmask was supplied in the message, then subsequent route
    498 	 * lookups will attempt to match on the netmask as well.
    499 	 */
    500 	if ((found_addrs & RTA_NETMASK) != 0)
    501 		match_flags |= MATCH_IRE_MASK;
    502 
    503 	/*
    504 	 * We only process any passed-in route security attributes for
    505 	 * either RTM_ADD or RTM_CHANGE message; We overload them
    506 	 * to do an RTM_GET as a different label; ignore otherwise.
    507 	 */
    508 	if (rtm->rtm_type == RTM_ADD || rtm->rtm_type == RTM_CHANGE ||
    509 	    rtm->rtm_type == RTM_GET) {
    510 		ASSERT(rtsecattr.rtsa_cnt <= TSOL_RTSA_REQUEST_MAX);
    511 		if (rtsecattr.rtsa_cnt > 0)
    512 			rtsap = &rtsecattr.rtsa_attr[0];
    513 	}
    514 
    515 	switch (rtm->rtm_type) {
    516 	case RTM_ADD:
    517 		/* if we are adding a route, gateway is a must */
    518 		if ((found_addrs & RTA_GATEWAY) == 0) {
    519 			error = EINVAL;
    520 			goto done;
    521 		}
    522 
    523 		/* Multirouting does not support net routes. */
    524 		if ((rtm->rtm_flags & (RTF_MULTIRT | RTF_HOST)) ==
    525 		    RTF_MULTIRT) {
    526 			error = EADDRNOTAVAIL;
    527 			goto done;
    528 		}
    529 
    530 		/*
    531 		 * Multirouting and user-specified source addresses
    532 		 * do not support interface based routing.
    533 		 * Assigning a source address to an interface based
    534 		 * route is achievable by plumbing a new ipif and
    535 		 * setting up the interface route via this ipif,
    536 		 * though.
    537 		 */
    538 		if (rtm->rtm_flags & (RTF_MULTIRT | RTF_SETSRC)) {
    539 			if ((rtm->rtm_flags & RTF_GATEWAY) == 0) {
    540 				error = EADDRNOTAVAIL;
    541 				goto done;
    542 			}
    543 		}
    544 
    545 		switch (af) {
    546 		case AF_INET:
    547 			if (src_addr != INADDR_ANY) {
    548 				/*
    549 				 * The RTF_SETSRC flag is present, check that
    550 				 * the supplied src address is not the loopback
    551 				 * address. This would produce martian packets.
    552 				 */
    553 				if (src_addr == htonl(INADDR_LOOPBACK)) {
    554 					error = EINVAL;
    555 					goto done;
    556 				}
    557 				/*
    558 				 * Also check that the supplied address is a
    559 				 * valid, local one.
    560 				 */
    561 				tmp_ipif = ipif_lookup_addr(src_addr, NULL,
    562 				    ALL_ZONES, CONNP_TO_WQ(connp), ioc_mp,
    563 				    ip_rts_request_retry, &error, ipst);
    564 				if (tmp_ipif == NULL) {
    565 					if (error != EINPROGRESS)
    566 						error = EADDRNOTAVAIL;
    567 					goto done;
    568 				}
    569 				if (!(tmp_ipif->ipif_flags & IPIF_UP) ||
    570 				    (tmp_ipif->ipif_flags &
    571 				    (IPIF_NOLOCAL | IPIF_ANYCAST))) {
    572 					error = EINVAL;
    573 					goto done;
    574 				}
    575 			} else {
    576 				/*
    577 				 * The RTF_SETSRC modifier must be associated
    578 				 * to a non-null source address.
    579 				 */
    580 				if (rtm->rtm_flags & RTF_SETSRC) {
    581 					error = EINVAL;
    582 					goto done;
    583 				}
    584 			}
    585 
    586 			error = ip_rt_add(dst_addr, net_mask, gw_addr, src_addr,
    587 			    rtm->rtm_flags, ipif, &ire, B_FALSE,
    588 			    WR(q), ioc_mp, ip_rts_request_retry,
    589 			    rtsap, ipst);
    590 			if (ipif != NULL)
    591 				ASSERT(!MUTEX_HELD(&ipif->ipif_ill->ill_lock));
    592 			break;
    593 		case AF_INET6:
    594 			if (!IN6_IS_ADDR_UNSPECIFIED(&src_addr_v6)) {
    595 				/*
    596 				 * The RTF_SETSRC flag is present, check that
    597 				 * the supplied src address is not the loopback
    598 				 * address. This would produce martian packets.
    599 				 */
    600 				if (IN6_IS_ADDR_LOOPBACK(&src_addr_v6)) {
    601 					error = EINVAL;
    602 					goto done;
    603 				}
    604 				/*
    605 				 * Also check that the supplied address is a
    606 				 * valid, local one.
    607 				 */
    608 				tmp_ipif = ipif_lookup_addr_v6(&src_addr_v6,
    609 				    NULL, ALL_ZONES, CONNP_TO_WQ(connp), ioc_mp,
    610 				    ip_rts_request_retry, &error, ipst);
    611 				if (tmp_ipif == NULL) {
    612 					if (error != EINPROGRESS)
    613 						error = EADDRNOTAVAIL;
    614 					goto done;
    615 				}
    616 
    617 				if (!(tmp_ipif->ipif_flags & IPIF_UP) ||
    618 				    (tmp_ipif->ipif_flags &
    619 				    (IPIF_NOLOCAL | IPIF_ANYCAST))) {
    620 					error = EINVAL;
    621 					goto done;
    622 				}
    623 
    624 				error = ip_rt_add_v6(&dst_addr_v6, &net_mask_v6,
    625 				    &gw_addr_v6, &src_addr_v6, rtm->rtm_flags,
    626 				    ipif, &ire, WR(q), ioc_mp,
    627 				    ip_rts_request_retry, rtsap, ipst);
    628 				break;
    629 			}
    630 			/*
    631 			 * The RTF_SETSRC modifier must be associated
    632 			 * to a non-null source address.
    633 			 */
    634 			if (rtm->rtm_flags & RTF_SETSRC) {
    635 				error = EINVAL;
    636 				goto done;
    637 			}
    638 			error = ip_rt_add_v6(&dst_addr_v6, &net_mask_v6,
    639 			    &gw_addr_v6, NULL, rtm->rtm_flags,
    640 			    ipif, &ire, WR(q), ioc_mp,
    641 			    ip_rts_request_retry, rtsap, ipst);
    642 			if (ipif != NULL)
    643 				ASSERT(!MUTEX_HELD(&ipif->ipif_ill->ill_lock));
    644 			break;
    645 		}
    646 		if (error != 0)
    647 			goto done;
    648 		ASSERT(ire != NULL);
    649 		rts_setmetrics(ire, rtm->rtm_inits, &rtm->rtm_rmx);
    650 		break;
    651 	case RTM_DELETE:
    652 		/* if we are deleting a route, gateway is a must */
    653 		if ((found_addrs & RTA_GATEWAY) == 0) {
    654 			error = EINVAL;
    655 			goto done;
    656 		}
    657 		/*
    658 		 * The RTF_SETSRC modifier does not make sense
    659 		 * when deleting a route.
    660 		 */
    661 		if (rtm->rtm_flags & RTF_SETSRC) {
    662 			error = EINVAL;
    663 			goto done;
    664 		}
    665 
    666 		switch (af) {
    667 		case AF_INET:
    668 			error = ip_rt_delete(dst_addr, net_mask, gw_addr,
    669 			    found_addrs, rtm->rtm_flags, ipif, B_FALSE,
    670 			    WR(q), ioc_mp, ip_rts_request_retry, ipst);
    671 			break;
    672 		case AF_INET6:
    673 			error = ip_rt_delete_v6(&dst_addr_v6, &net_mask_v6,
    674 			    &gw_addr_v6, found_addrs, rtm->rtm_flags, ipif,
    675 			    WR(q), ioc_mp, ip_rts_request_retry, ipst);
    676 			break;
    677 		}
    678 		break;
    679 	case RTM_GET:
    680 	case RTM_CHANGE:
    681 		/*
    682 		 * In the case of RTM_GET, the forwarding table should be
    683 		 * searched recursively with default being matched if the
    684 		 * specific route doesn't exist.  Also, if a gateway was
    685 		 * specified then the gateway address must also be matched.
    686 		 *
    687 		 * In the case of RTM_CHANGE, the gateway address (if supplied)
    688 		 * is the new gateway address so matching on the gateway address
    689 		 * is not done.  This can lead to ambiguity when looking up the
    690 		 * route to change as usually only the destination (and netmask,
    691 		 * if supplied) is used for the lookup.  However if a RTA_IFP
    692 		 * sockaddr is also supplied, it can disambiguate which route to
    693 		 * change provided the ambigous routes are tied to distinct
    694 		 * ill's (or interface indices).  If the routes are not tied to
    695 		 * any particular interfaces (for example, with traditional
    696 		 * gateway routes), then a RTA_IFP sockaddr will be of no use as
    697 		 * it won't match any such routes.
    698 		 * RTA_SRC is not supported for RTM_GET and RTM_CHANGE,
    699 		 * except when RTM_CHANGE is combined to RTF_SETSRC.
    700 		 */
    701 		if (((found_addrs & RTA_SRC) != 0) &&
    702 		    ((rtm->rtm_type == RTM_GET) ||
    703 		    !(rtm->rtm_flags & RTF_SETSRC))) {
    704 			error = EOPNOTSUPP;
    705 			goto done;
    706 		}
    707 
    708 		if (rtm->rtm_type == RTM_GET) {
    709 			match_flags |=
    710 			    (MATCH_IRE_DEFAULT | MATCH_IRE_RECURSIVE |
    711 			    MATCH_IRE_SECATTR);
    712 			match_flags_local |= MATCH_IRE_SECATTR;
    713 			if ((found_addrs & RTA_GATEWAY) != 0)
    714 				match_flags |= MATCH_IRE_GW;
    715 			if (ioc_cr)
    716 				tsl = crgetlabel(ioc_cr);
    717 			if (rtsap != NULL) {
    718 				if (rtsa_validate(rtsap) != 0) {
    719 					error = EINVAL;
    720 					goto done;
    721 				}
    722 				if (tsl != NULL &&
    723 				    crgetzoneid(ioc_cr) != GLOBAL_ZONEID &&
    724 				    (tsl->tsl_doi != rtsap->rtsa_doi ||
    725 				    !bldominates(&tsl->tsl_label,
    726 				    &rtsap->rtsa_slrange.lower_bound))) {
    727 					error = EPERM;
    728 					goto done;
    729 				}
    730 				tsl = labelalloc(
    731 				    &rtsap->rtsa_slrange.lower_bound,
    732 				    rtsap->rtsa_doi, KM_NOSLEEP);
    733 			}
    734 		}
    735 		if (rtm->rtm_type == RTM_CHANGE) {
    736 			if ((found_addrs & RTA_GATEWAY) &&
    737 			    (rtm->rtm_flags & RTF_SETSRC)) {
    738 				/*
    739 				 * Do not want to change the gateway,
    740 				 * but rather the source address.
    741 				 */
    742 				match_flags |= MATCH_IRE_GW;
    743 			}
    744 		}
    745 
    746 		/*
    747 		 * If the netmask is all ones (either as supplied or as derived
    748 		 * above), then first check for an IRE_LOOPBACK or
    749 		 * IRE_LOCAL entry.
    750 		 *
    751 		 * If we didn't check for or find an IRE_LOOPBACK or IRE_LOCAL
    752 		 * entry, then look in the forwarding table.
    753 		 */
    754 		switch (af) {
    755 		case AF_INET:
    756 			if (net_mask == IP_HOST_MASK) {
    757 				ire = ire_ctable_lookup(dst_addr, gw_addr,
    758 				    IRE_LOCAL | IRE_LOOPBACK, NULL, zoneid,
    759 				    tsl, match_flags_local, ipst);
    760 				/*
    761 				 * If we found an IRE_LOCAL, make sure
    762 				 * it is one that would be used by this
    763 				 * zone to send packets.
    764 				 */
    765 				if (ire != NULL &&
    766 				    ire->ire_type == IRE_LOCAL &&
    767 				    ipst->ips_ip_restrict_interzone_loopback &&
    768 				    !ire_local_ok_across_zones(ire,
    769 				    zoneid, &dst_addr, tsl, ipst)) {
    770 					ire_refrele(ire);
    771 					ire = NULL;
    772 				}
    773 			}
    774 			if (ire == NULL) {
    775 				ire = ire_ftable_lookup(dst_addr, net_mask,
    776 				    gw_addr, 0, ipif, &sire, zoneid, 0,
    777 				    tsl, match_flags, ipst);
    778 			}
    779 			break;
    780 		case AF_INET6:
    781 			if (IN6_ARE_ADDR_EQUAL(&net_mask_v6, &ipv6_all_ones)) {
    782 				ire = ire_ctable_lookup_v6(&dst_addr_v6,
    783 				    &gw_addr_v6, IRE_LOCAL | IRE_LOOPBACK, NULL,
    784 				    zoneid, tsl, match_flags_local, ipst);
    785 				/*
    786 				 * If we found an IRE_LOCAL, make sure
    787 				 * it is one that would be used by this
    788 				 * zone to send packets.
    789 				 */
    790 				if (ire != NULL &&
    791 				    ire->ire_type == IRE_LOCAL &&
    792 				    ipst->ips_ip_restrict_interzone_loopback &&
    793 				    !ire_local_ok_across_zones(ire,
    794 				    zoneid, (void *)&dst_addr_v6, tsl, ipst)) {
    795 					ire_refrele(ire);
    796 					ire = NULL;
    797 				}
    798 			}
    799 			if (ire == NULL) {
    800 				ire = ire_ftable_lookup_v6(&dst_addr_v6,
    801 				    &net_mask_v6, &gw_addr_v6, 0, ipif, &sire,
    802 				    zoneid, 0, tsl, match_flags, ipst);
    803 			}
    804 			break;
    805 		}
    806 		if (tsl != NULL && tsl != crgetlabel(ioc_cr))
    807 			label_rele(tsl);
    808 
    809 		if (ire == NULL) {
    810 			error = ESRCH;
    811 			goto done;
    812 		}
    813 		/* we know the IRE before we come here */
    814 		switch (rtm->rtm_type) {
    815 		case RTM_GET:
    816 			mp1 = rts_rtmget(mp, ire, sire, af);
    817 			if (mp1 == NULL) {
    818 				error = ENOBUFS;
    819 				goto done;
    820 			}
    821 			freemsg(mp);
    822 			mp = mp1;
    823 			rtm = (rt_msghdr_t *)mp->b_rptr;
    824 			break;
    825 		case RTM_CHANGE:
    826 			/*
    827 			 * Do not allow to the multirouting state of a route
    828 			 * to be changed. This aims to prevent undesirable
    829 			 * stages where both multirt and non-multirt routes
    830 			 * for the same destination are declared.
    831 			 */
    832 			if ((ire->ire_flags & RTF_MULTIRT) !=
    833 			    (rtm->rtm_flags & RTF_MULTIRT)) {
    834 				error = EINVAL;
    835 				goto done;
    836 			}
    837 			/*
    838 			 * Note that we do not need to do
    839 			 * ire_flush_cache_*(IRE_FLUSH_ADD) as a change
    840 			 * in metrics or gateway will not affect existing
    841 			 * routes since it does not create a more specific
    842 			 * route.
    843 			 */
    844 			switch (af) {
    845 			case AF_INET:
    846 				ire_flush_cache_v4(ire, IRE_FLUSH_DELETE);
    847 				if ((found_addrs & RTA_GATEWAY) != 0 &&
    848 				    (ire->ire_gateway_addr != gw_addr)) {
    849 					ire->ire_gateway_addr = gw_addr;
    850 				}
    851 
    852 				if (rtsap != NULL) {
    853 					ga.ga_af = AF_INET;
    854 					IN6_IPADDR_TO_V4MAPPED(
    855 					    ire->ire_gateway_addr, &ga.ga_addr);
    856 
    857 					gcgrp = gcgrp_lookup(&ga, B_TRUE);
    858 					if (gcgrp == NULL) {
    859 						error = ENOMEM;
    860 						goto done;
    861 					}
    862 				}
    863 
    864 				if ((found_addrs & RTA_SRC) != 0 &&
    865 				    (rtm->rtm_flags & RTF_SETSRC) != 0 &&
    866 				    (ire->ire_src_addr != src_addr)) {
    867 
    868 					if (src_addr != INADDR_ANY) {
    869 						/*
    870 						 * The RTF_SETSRC flag is
    871 						 * present, check that the
    872 						 * supplied src address is not
    873 						 * the loopback address. This
    874 						 * would produce martian
    875 						 * packets.
    876 						 */
    877 						if (src_addr ==
    878 						    htonl(INADDR_LOOPBACK)) {
    879 							error = EINVAL;
    880 							goto done;
    881 						}
    882 						/*
    883 						 * Also check that the the
    884 						 * supplied addr is a valid
    885 						 * local address.
    886 						 */
    887 						tmp_ipif = ipif_lookup_addr(
    888 						    src_addr, NULL, ALL_ZONES,
    889 						    WR(q), ioc_mp,
    890 						    ip_rts_request_retry,
    891 						    &error, ipst);
    892 						if (tmp_ipif == NULL) {
    893 							error = (error ==
    894 							    EINPROGRESS) ?
    895 							    error :
    896 							    EADDRNOTAVAIL;
    897 							goto done;
    898 						}
    899 
    900 						if (!(tmp_ipif->ipif_flags &
    901 						    IPIF_UP) ||
    902 						    (tmp_ipif->ipif_flags &
    903 						    (IPIF_NOLOCAL |
    904 						    IPIF_ANYCAST))) {
    905 							error = EINVAL;
    906 							goto done;
    907 						}
    908 						ire->ire_flags |= RTF_SETSRC;
    909 					} else {
    910 						ire->ire_flags &= ~RTF_SETSRC;
    911 					}
    912 					ire->ire_src_addr = src_addr;
    913 				}
    914 				break;
    915 			case AF_INET6:
    916 				ire_flush_cache_v6(ire, IRE_FLUSH_DELETE);
    917 				mutex_enter(&ire->ire_lock);
    918 				if ((found_addrs & RTA_GATEWAY) != 0 &&
    919 				    !IN6_ARE_ADDR_EQUAL(
    920 				    &ire->ire_gateway_addr_v6, &gw_addr_v6)) {
    921 					ire->ire_gateway_addr_v6 = gw_addr_v6;
    922 				}
    923 
    924 				if (rtsap != NULL) {
    925 					ga.ga_af = AF_INET6;
    926 					ga.ga_addr = ire->ire_gateway_addr_v6;
    927 
    928 					gcgrp = gcgrp_lookup(&ga, B_TRUE);
    929 					if (gcgrp == NULL) {
    930 						error = ENOMEM;
    931 						goto done;
    932 					}
    933 				}
    934 
    935 				if ((found_addrs & RTA_SRC) != 0 &&
    936 				    (rtm->rtm_flags & RTF_SETSRC) != 0 &&
    937 				    !IN6_ARE_ADDR_EQUAL(
    938 				    &ire->ire_src_addr_v6, &src_addr_v6)) {
    939 
    940 					if (!IN6_IS_ADDR_UNSPECIFIED(
    941 					    &src_addr_v6)) {
    942 						/*
    943 						 * The RTF_SETSRC flag is
    944 						 * present, check that the
    945 						 * supplied src address is not
    946 						 * the loopback address. This
    947 						 * would produce martian
    948 						 * packets.
    949 						 */
    950 						if (IN6_IS_ADDR_LOOPBACK(
    951 						    &src_addr_v6)) {
    952 							mutex_exit(
    953 							    &ire->ire_lock);
    954 							error = EINVAL;
    955 							goto done;
    956 						}
    957 						/*
    958 						 * Also check that the the
    959 						 * supplied addr is a valid
    960 						 * local address.
    961 						 */
    962 						tmp_ipif = ipif_lookup_addr_v6(
    963 						    &src_addr_v6, NULL,
    964 						    ALL_ZONES,
    965 						    CONNP_TO_WQ(connp), ioc_mp,
    966 						    ip_rts_request_retry,
    967 						    &error, ipst);
    968 						if (tmp_ipif == NULL) {
    969 							mutex_exit(
    970 							    &ire->ire_lock);
    971 							error = (error ==
    972 							    EINPROGRESS) ?
    973 							    error :
    974 							    EADDRNOTAVAIL;
    975 							goto done;
    976 						}
    977 						if (!(tmp_ipif->ipif_flags &
    978 						    IPIF_UP) ||
    979 						    (tmp_ipif->ipif_flags &
    980 						    (IPIF_NOLOCAL |
    981 						    IPIF_ANYCAST))) {
    982 							mutex_exit(
    983 							    &ire->ire_lock);
    984 							error = EINVAL;
    985 							goto done;
    986 						}
    987 						ire->ire_flags |= RTF_SETSRC;
    988 					} else {
    989 						ire->ire_flags &= ~RTF_SETSRC;
    990 					}
    991 					ire->ire_src_addr_v6 = src_addr_v6;
    992 				}
    993 				mutex_exit(&ire->ire_lock);
    994 				break;
    995 			}
    996 
    997 			if (rtsap != NULL) {
    998 				in_addr_t ga_addr4;
    999 
   1000 				ASSERT(gcgrp != NULL);
   1001 
   1002 				/*
   1003 				 * Create and add the security attribute to
   1004 				 * prefix IRE; it will add a reference to the
   1005 				 * group upon allocating a new entry.  If it
   1006 				 * finds an already-existing entry for the
   1007 				 * security attribute, it simply returns it
   1008 				 * and no new group reference is made.
   1009 				 */
   1010 				gc = gc_create(rtsap, gcgrp, &gcgrp_xtraref);
   1011 				if (gc == NULL ||
   1012 				    (error = tsol_ire_init_gwattr(ire,
   1013 				    ire->ire_ipversion, gc, NULL)) != 0) {
   1014 					if (gc != NULL) {
   1015 						GC_REFRELE(gc);
   1016 					} else {
   1017 						/* gc_create failed */
   1018 						error = ENOMEM;
   1019 					}
   1020 					goto done;
   1021 				}
   1022 
   1023 				/*
   1024 				 * Now delete any existing gateway IRE caches
   1025 				 * as well as all caches using the gateway,
   1026 				 * and allow them to be created on demand
   1027 				 * through ip_newroute{_v6}.
   1028 				 */
   1029 				IN6_V4MAPPED_TO_IPADDR(&ga.ga_addr, ga_addr4);
   1030 				if (af == AF_INET) {
   1031 					ire_clookup_delete_cache_gw(
   1032 					    ga_addr4, ALL_ZONES, ipst);
   1033 				} else {
   1034 					ire_clookup_delete_cache_gw_v6(
   1035 					    &ga.ga_addr, ALL_ZONES, ipst);
   1036 				}
   1037 			}
   1038 			rts_setmetrics(ire, rtm->rtm_inits, &rtm->rtm_rmx);
   1039 			break;
   1040 		}
   1041 		break;
   1042 	default:
   1043 		error = EOPNOTSUPP;
   1044 		break;
   1045 	}
   1046 done:
   1047 	if (ire != NULL)
   1048 		ire_refrele(ire);
   1049 	if (sire != NULL)
   1050 		ire_refrele(sire);
   1051 	if (ipif != NULL)
   1052 		ipif_refrele(ipif);
   1053 	if (tmp_ipif != NULL)
   1054 		ipif_refrele(tmp_ipif);
   1055 
   1056 	if (gcgrp_xtraref)
   1057 		GCGRP_REFRELE(gcgrp);
   1058 
   1059 	if (error == EINPROGRESS) {
   1060 		if (rtm != NULL)
   1061 			freemsg(mp);
   1062 		return (error);
   1063 	}
   1064 	if (rtm != NULL) {
   1065 		ASSERT(mp->b_wptr <= mp->b_datap->db_lim);
   1066 		if (error != 0) {
   1067 			rtm->rtm_errno = error;
   1068 			/* Send error ACK */
   1069 			ip1dbg(("ip_rts_request: error %d\n", error));
   1070 		} else {
   1071 			rtm->rtm_flags |= RTF_DONE;
   1072 			/* OK ACK already set up by caller except this */
   1073 			ip2dbg(("ip_rts_request: OK ACK\n"));
   1074 		}
   1075 		rts_queue_input(mp, connp, af, RTSQ_ALL, ipst);
   1076 	}
   1077 
   1078 	iocp->ioc_error = error;
   1079 	ioc_mp->b_datap->db_type = M_IOCACK;
   1080 	if (iocp->ioc_error != 0)
   1081 		iocp->ioc_count = 0;
   1082 	(connp->conn_recv)(connp, ioc_mp, NULL);
   1083 
   1084 	/* conn was refheld in ip_wput_ioctl. */
   1085 	CONN_OPER_PENDING_DONE(connp);
   1086 
   1087 	return (error);
   1088 }
   1089 
   1090 int
   1091 ip_rts_request(queue_t *q, mblk_t *mp, cred_t *ioc_cr)
   1092 {
   1093 	return (ip_rts_request_common(q, mp, Q_TO_CONN(q), ioc_cr));
   1094 }
   1095 
   1096 /*
   1097  * Build a reply to the RTM_GET request contained in the given message block
   1098  * using the retrieved IRE of the destination address, the parent IRE (if it
   1099  * exists) and the address family.
   1100  *
   1101  * Returns a pointer to a message block containing the reply if successful,
   1102  * otherwise NULL is returned.
   1103  */
   1104 static mblk_t *
   1105 rts_rtmget(mblk_t *mp, ire_t *ire, ire_t *sire, sa_family_t af)
   1106 {
   1107 	rt_msghdr_t	*rtm;
   1108 	rt_msghdr_t	*new_rtm;
   1109 	mblk_t		*new_mp;
   1110 	int		rtm_addrs;
   1111 	int		rtm_flags;
   1112 	in6_addr_t	gw_addr_v6;
   1113 	tsol_ire_gw_secattr_t *attrp = NULL;
   1114 	tsol_gc_t	*gc = NULL;
   1115 	tsol_gcgrp_t	*gcgrp = NULL;
   1116 	int		sacnt = 0;
   1117 
   1118 	ASSERT(ire->ire_ipif != NULL);
   1119 	rtm = (rt_msghdr_t *)mp->b_rptr;
   1120 
   1121 	if (sire != NULL && sire->ire_gw_secattr != NULL)
   1122 		attrp = sire->ire_gw_secattr;
   1123 	else if (ire->ire_gw_secattr != NULL)
   1124 		attrp = ire->ire_gw_secattr;
   1125 
   1126 	if (attrp != NULL) {
   1127 		mutex_enter(&attrp->igsa_lock);
   1128 		if ((gc = attrp->igsa_gc) != NULL) {
   1129 			gcgrp = gc->gc_grp;
   1130 			ASSERT(gcgrp != NULL);
   1131 			rw_enter(&gcgrp->gcgrp_rwlock, RW_READER);
   1132 			sacnt = 1;
   1133 		} else if ((gcgrp = attrp->igsa_gcgrp) != NULL) {
   1134 			rw_enter(&gcgrp->gcgrp_rwlock, RW_READER);
   1135 			gc = gcgrp->gcgrp_head;
   1136 			sacnt = gcgrp->gcgrp_count;
   1137 		}
   1138 		mutex_exit(&attrp->igsa_lock);
   1139 
   1140 		/* do nothing if there's no gc to report */
   1141 		if (gc == NULL) {
   1142 			ASSERT(sacnt == 0);
   1143 			if (gcgrp != NULL) {
   1144 				/* we might as well drop the lock now */
   1145 				rw_exit(&gcgrp->gcgrp_rwlock);
   1146 				gcgrp = NULL;
   1147 			}
   1148 			attrp = NULL;
   1149 		}
   1150 
   1151 		ASSERT(gc == NULL || (gcgrp != NULL &&
   1152 		    RW_LOCK_HELD(&gcgrp->gcgrp_rwlock)));
   1153 	}
   1154 	ASSERT(sacnt == 0 || gc != NULL);
   1155 
   1156 	/*
   1157 	 * Always return RTA_DST, RTA_GATEWAY and RTA_NETMASK.
   1158 	 *
   1159 	 * The 4.4BSD-Lite2 code (net/rtsock.c) returns both
   1160 	 * RTA_IFP and RTA_IFA if either is defined, and also
   1161 	 * returns RTA_BRD if the appropriate interface is
   1162 	 * point-to-point.
   1163 	 */
   1164 	rtm_addrs = (RTA_DST | RTA_GATEWAY | RTA_NETMASK);
   1165 	if (rtm->rtm_addrs & (RTA_IFP | RTA_IFA)) {
   1166 		rtm_addrs |= (RTA_IFP | RTA_IFA);
   1167 		if (ire->ire_ipif->ipif_flags & IPIF_POINTOPOINT)
   1168 			rtm_addrs |= RTA_BRD;
   1169 	}
   1170 
   1171 	new_mp = rts_alloc_msg(RTM_GET, rtm_addrs, af, sacnt);
   1172 	if (new_mp == NULL) {
   1173 		if (gcgrp != NULL)
   1174 			rw_exit(&gcgrp->gcgrp_rwlock);
   1175 		return (NULL);
   1176 	}
   1177 
   1178 	/*
   1179 	 * We set the destination address, gateway address,
   1180 	 * netmask and flags in the RTM_GET response depending
   1181 	 * on whether we found a parent IRE or not.
   1182 	 * In particular, if we did find a parent IRE during the
   1183 	 * recursive search, use that IRE's gateway address.
   1184 	 * Otherwise, we use the IRE's source address for the
   1185 	 * gateway address.
   1186 	 */
   1187 	ASSERT(af == AF_INET || af == AF_INET6);
   1188 	switch (af) {
   1189 	case AF_INET:
   1190 		if (sire == NULL) {
   1191 			rtm_flags = ire->ire_flags;
   1192 			rts_fill_msg(RTM_GET, rtm_addrs, ire->ire_addr,
   1193 			    ire->ire_mask, ire->ire_src_addr, ire->ire_src_addr,
   1194 			    ire->ire_ipif->ipif_pp_dst_addr, 0, ire->ire_ipif,
   1195 			    new_mp, sacnt, gc);
   1196 		} else {
   1197 			if (sire->ire_flags & RTF_SETSRC)
   1198 				rtm_addrs |= RTA_SRC;
   1199 
   1200 			rtm_flags = sire->ire_flags;
   1201 			rts_fill_msg(RTM_GET, rtm_addrs, sire->ire_addr,
   1202 			    sire->ire_mask, sire->ire_gateway_addr,
   1203 			    (sire->ire_flags & RTF_SETSRC) ?
   1204 			    sire->ire_src_addr : ire->ire_src_addr,
   1205 			    ire->ire_ipif->ipif_pp_dst_addr,
   1206 			    0, ire->ire_ipif, new_mp, sacnt, gc);
   1207 		}
   1208 		break;
   1209 	case AF_INET6:
   1210 		if (sire == NULL) {
   1211 			rtm_flags = ire->ire_flags;
   1212 			rts_fill_msg_v6(RTM_GET, rtm_addrs, &ire->ire_addr_v6,
   1213 			    &ire->ire_mask_v6, &ire->ire_src_addr_v6,
   1214 			    &ire->ire_src_addr_v6,
   1215 			    &ire->ire_ipif->ipif_v6pp_dst_addr,
   1216 			    &ipv6_all_zeros, ire->ire_ipif, new_mp,
   1217 			    sacnt, gc);
   1218 		} else {
   1219 			if (sire->ire_flags & RTF_SETSRC)
   1220 				rtm_addrs |= RTA_SRC;
   1221 
   1222 			rtm_flags = sire->ire_flags;
   1223 			mutex_enter(&sire->ire_lock);
   1224 			gw_addr_v6 = sire->ire_gateway_addr_v6;
   1225 			mutex_exit(&sire->ire_lock);
   1226 			rts_fill_msg_v6(RTM_GET, rtm_addrs, &sire->ire_addr_v6,
   1227 			    &sire->ire_mask_v6, &gw_addr_v6,
   1228 			    (sire->ire_flags & RTF_SETSRC) ?
   1229 			    &sire->ire_src_addr_v6 : &ire->ire_src_addr_v6,
   1230 			    &ire->ire_ipif->ipif_v6pp_dst_addr, &ipv6_all_zeros,
   1231 			    ire->ire_ipif, new_mp, sacnt, gc);
   1232 		}
   1233 		break;
   1234 	}
   1235 
   1236 	if (gcgrp != NULL)
   1237 		rw_exit(&gcgrp->gcgrp_rwlock);
   1238 
   1239 	new_rtm = (rt_msghdr_t *)new_mp->b_rptr;
   1240 
   1241 	/*
   1242 	 * The rtm_msglen, rtm_version and rtm_type fields in
   1243 	 * RTM_GET response are filled in by rts_fill_msg.
   1244 	 *
   1245 	 * rtm_addrs and rtm_flags are filled in based on what
   1246 	 * was requested and the state of the IREs looked up
   1247 	 * above.
   1248 	 *
   1249 	 * rtm_inits and rtm_rmx are filled in with metrics
   1250 	 * based on whether a parent IRE was found or not.
   1251 	 *
   1252 	 * TODO: rtm_index and rtm_use should probably be
   1253 	 * filled in with something resonable here and not just
   1254 	 * copied from the request.
   1255 	 */
   1256 	new_rtm->rtm_index = rtm->rtm_index;
   1257 	new_rtm->rtm_pid = rtm->rtm_pid;
   1258 	new_rtm->rtm_seq = rtm->rtm_seq;
   1259 	new_rtm->rtm_use = rtm->rtm_use;
   1260 	new_rtm->rtm_addrs = rtm_addrs;
   1261 	new_rtm->rtm_flags = rtm_flags;
   1262 	if (sire == NULL)
   1263 		new_rtm->rtm_inits = rts_getmetrics(ire, &new_rtm->rtm_rmx);
   1264 	else
   1265 		new_rtm->rtm_inits = rts_getmetrics(sire, &new_rtm->rtm_rmx);
   1266 
   1267 	return (new_mp);
   1268 }
   1269 
   1270 /*
   1271  * Fill the given if_data_t with interface statistics.
   1272  */
   1273 static void
   1274 rts_getifdata(if_data_t *if_data, const ipif_t *ipif)
   1275 {
   1276 	if_data->ifi_type = ipif->ipif_type;	/* ethernet, tokenring, etc */
   1277 	if_data->ifi_addrlen = 0;		/* media address length */
   1278 	if_data->ifi_hdrlen = 0;		/* media header length */
   1279 	if_data->ifi_mtu = ipif->ipif_mtu;	/* maximum transmission unit */
   1280 	if_data->ifi_metric = ipif->ipif_metric; /* metric (external only) */
   1281 	if_data->ifi_baudrate = 0;		/* linespeed */
   1282 
   1283 	if_data->ifi_ipackets = 0;		/* packets received on if */
   1284 	if_data->ifi_ierrors = 0;		/* input errors on interface */
   1285 	if_data->ifi_opackets = 0;		/* packets sent on interface */
   1286 	if_data->ifi_oerrors = 0;		/* output errors on if */
   1287 	if_data->ifi_collisions = 0;		/* collisions on csma if */
   1288 	if_data->ifi_ibytes = 0;		/* total number received */
   1289 	if_data->ifi_obytes = 0;		/* total number sent */
   1290 	if_data->ifi_imcasts = 0;		/* multicast packets received */
   1291 	if_data->ifi_omcasts = 0;		/* multicast packets sent */
   1292 	if_data->ifi_iqdrops = 0;		/* dropped on input */
   1293 	if_data->ifi_noproto = 0;		/* destined for unsupported */
   1294 						/* protocol. */
   1295 }
   1296 
   1297 /*
   1298  * Set the metrics on a forwarding table route.
   1299  */
   1300 static void
   1301 rts_setmetrics(ire_t *ire, uint_t which, rt_metrics_t *metrics)
   1302 {
   1303 	clock_t		rtt;
   1304 	clock_t		rtt_sd;
   1305 	ipif_t		*ipif;
   1306 	ifrt_t		*ifrt;
   1307 	mblk_t		*mp;
   1308 	in6_addr_t	gw_addr_v6;
   1309 
   1310 	/*
   1311 	 * Bypass obtaining the lock and searching ipif_saved_ire_mp in the
   1312 	 * common case of no metrics.
   1313 	 */
   1314 	if (which == 0)
   1315 		return;
   1316 	ire->ire_uinfo.iulp_set = B_TRUE;
   1317 
   1318 	/*
   1319 	 * iulp_rtt and iulp_rtt_sd are in milliseconds, but 4.4BSD-Lite2's
   1320 	 * <net/route.h> says: rmx_rtt and rmx_rttvar are stored as
   1321 	 * microseconds.
   1322 	 */
   1323 	if (which & RTV_RTT)
   1324 		rtt = metrics->rmx_rtt / 1000;
   1325 	if (which & RTV_RTTVAR)
   1326 		rtt_sd = metrics->rmx_rttvar / 1000;
   1327 
   1328 	/*
   1329 	 * Update the metrics in the IRE itself.
   1330 	 */
   1331 	mutex_enter(&ire->ire_lock);
   1332 	if (which & RTV_MTU)
   1333 		ire->ire_max_frag = metrics->rmx_mtu;
   1334 	if (which & RTV_RTT)
   1335 		ire->ire_uinfo.iulp_rtt = rtt;
   1336 	if (which & RTV_SSTHRESH)
   1337 		ire->ire_uinfo.iulp_ssthresh = metrics->rmx_ssthresh;
   1338 	if (which & RTV_RTTVAR)
   1339 		ire->ire_uinfo.iulp_rtt_sd = rtt_sd;
   1340 	if (which & RTV_SPIPE)
   1341 		ire->ire_uinfo.iulp_spipe = metrics->rmx_sendpipe;
   1342 	if (which & RTV_RPIPE)
   1343 		ire->ire_uinfo.iulp_rpipe = metrics->rmx_recvpipe;
   1344 	mutex_exit(&ire->ire_lock);
   1345 
   1346 	/*
   1347 	 * Search through the ifrt_t chain hanging off the IPIF in order to
   1348 	 * reflect the metric change there.
   1349 	 */
   1350 	ipif = ire->ire_ipif;
   1351 	if (ipif == NULL)
   1352 		return;
   1353 	ASSERT((ipif->ipif_isv6 && ire->ire_ipversion == IPV6_VERSION) ||
   1354 	    ((!ipif->ipif_isv6 && ire->ire_ipversion == IPV4_VERSION)));
   1355 	if (ipif->ipif_isv6) {
   1356 		mutex_enter(&ire->ire_lock);
   1357 		gw_addr_v6 = ire->ire_gateway_addr_v6;
   1358 		mutex_exit(&ire->ire_lock);
   1359 	}
   1360 	mutex_enter(&ipif->ipif_saved_ire_lock);
   1361 	for (mp = ipif->ipif_saved_ire_mp; mp != NULL; mp = mp->b_cont) {
   1362 		/*
   1363 		 * On a given ipif, the triple of address, gateway and mask is
   1364 		 * unique for each saved IRE (in the case of ordinary interface
   1365 		 * routes, the gateway address is all-zeroes).
   1366 		 */
   1367 		ifrt = (ifrt_t *)mp->b_rptr;
   1368 		if (ipif->ipif_isv6) {
   1369 			if (!IN6_ARE_ADDR_EQUAL(&ifrt->ifrt_v6addr,
   1370 			    &ire->ire_addr_v6) ||
   1371 			    !IN6_ARE_ADDR_EQUAL(&ifrt->ifrt_v6gateway_addr,
   1372 			    &gw_addr_v6) ||
   1373 			    !IN6_ARE_ADDR_EQUAL(&ifrt->ifrt_v6mask,
   1374 			    &ire->ire_mask_v6))
   1375 				continue;
   1376 		} else {
   1377 			if (ifrt->ifrt_addr != ire->ire_addr ||
   1378 			    ifrt->ifrt_gateway_addr != ire->ire_gateway_addr ||
   1379 			    ifrt->ifrt_mask != ire->ire_mask)
   1380 				continue;
   1381 		}
   1382 		if (which & RTV_MTU)
   1383 			ifrt->ifrt_max_frag = metrics->rmx_mtu;
   1384 		if (which & RTV_RTT)
   1385 			ifrt->ifrt_iulp_info.iulp_rtt = rtt;
   1386 		if (which & RTV_SSTHRESH) {
   1387 			ifrt->ifrt_iulp_info.iulp_ssthresh =
   1388 			    metrics->rmx_ssthresh;
   1389 		}
   1390 		if (which & RTV_RTTVAR)
   1391 			ifrt->ifrt_iulp_info.iulp_rtt_sd = metrics->rmx_rttvar;
   1392 		if (which & RTV_SPIPE)
   1393 			ifrt->ifrt_iulp_info.iulp_spipe = metrics->rmx_sendpipe;
   1394 		if (which & RTV_RPIPE)
   1395 			ifrt->ifrt_iulp_info.iulp_rpipe = metrics->rmx_recvpipe;
   1396 		break;
   1397 	}
   1398 	mutex_exit(&ipif->ipif_saved_ire_lock);
   1399 }
   1400 
   1401 /*
   1402  * Get the metrics from a forwarding table route.
   1403  */
   1404 static int
   1405 rts_getmetrics(ire_t *ire, rt_metrics_t *metrics)
   1406 {
   1407 	int	metrics_set = 0;
   1408 
   1409 	bzero(metrics, sizeof (rt_metrics_t));
   1410 	/*
   1411 	 * iulp_rtt and iulp_rtt_sd are in milliseconds, but 4.4BSD-Lite2's
   1412 	 * <net/route.h> says: rmx_rtt and rmx_rttvar are stored as
   1413 	 * microseconds.
   1414 	 */
   1415 	metrics->rmx_rtt = ire->ire_uinfo.iulp_rtt * 1000;
   1416 	metrics_set |= RTV_RTT;
   1417 	metrics->rmx_mtu = ire->ire_max_frag;
   1418 	metrics_set |= RTV_MTU;
   1419 	metrics->rmx_ssthresh = ire->ire_uinfo.iulp_ssthresh;
   1420 	metrics_set |= RTV_SSTHRESH;
   1421 	metrics->rmx_rttvar = ire->ire_uinfo.iulp_rtt_sd * 1000;
   1422 	metrics_set |= RTV_RTTVAR;
   1423 	metrics->rmx_sendpipe = ire->ire_uinfo.iulp_spipe;
   1424 	metrics_set |= RTV_SPIPE;
   1425 	metrics->rmx_recvpipe = ire->ire_uinfo.iulp_rpipe;
   1426 	metrics_set |= RTV_RPIPE;
   1427 	return (metrics_set);
   1428 }
   1429 
   1430 /*
   1431  * Takes a pointer to a routing message and extracts necessary info by looking
   1432  * at the rtm->rtm_addrs bits and store the requested sockaddrs in the pointers
   1433  * passed (all of which must be valid).
   1434  *
   1435  * The bitmask of sockaddrs actually found in the message is returned, or zero
   1436  * is returned in the case of an error.
   1437  */
   1438 static int
   1439 rts_getaddrs(rt_msghdr_t *rtm, in6_addr_t *dst_addrp, in6_addr_t *gw_addrp,
   1440     in6_addr_t *net_maskp, in6_addr_t *authorp, in6_addr_t *if_addrp,
   1441     in6_addr_t *in_src_addrp, ushort_t *indexp, sa_family_t *afp,
   1442     tsol_rtsecattr_t *rtsecattr, int *error)
   1443 {
   1444 	struct sockaddr *sa;
   1445 	int	i;
   1446 	int	addr_bits;
   1447 	int	length;
   1448 	int	found_addrs = 0;
   1449 	caddr_t	cp;
   1450 	size_t	size;
   1451 	struct sockaddr_dl *sdl;
   1452 
   1453 	*dst_addrp = ipv6_all_zeros;
   1454 	*gw_addrp = ipv6_all_zeros;
   1455 	*net_maskp = ipv6_all_zeros;
   1456 	*authorp = ipv6_all_zeros;
   1457 	*if_addrp = ipv6_all_zeros;
   1458 	*in_src_addrp = ipv6_all_zeros;
   1459 	*indexp = 0;
   1460 	*afp = AF_UNSPEC;
   1461 	rtsecattr->rtsa_cnt = 0;
   1462 	*error = 0;
   1463 
   1464 	/*
   1465 	 * At present we handle only RTA_DST, RTA_GATEWAY, RTA_NETMASK, RTA_IFP,
   1466 	 * RTA_IFA and RTA_AUTHOR.  The rest will be added as we need them.
   1467 	 */
   1468 	cp = (caddr_t)&rtm[1];
   1469 	length = rtm->rtm_msglen;
   1470 	for (i = 0; (i < RTA_NUMBITS) && ((cp - (caddr_t)rtm) < length); i++) {
   1471 		/*
   1472 		 * The address family we are working with starts out as
   1473 		 * AF_UNSPEC, but is set to the one specified with the
   1474 		 * destination address.
   1475 		 *
   1476 		 * If the "working" address family that has been set to
   1477 		 * something other than AF_UNSPEC, then the address family of
   1478 		 * subsequent sockaddrs must either be AF_UNSPEC (for
   1479 		 * compatibility with older programs) or must be the same as our
   1480 		 * "working" one.
   1481 		 *
   1482 		 * This code assumes that RTA_DST (1) comes first in the loop.
   1483 		 */
   1484 		sa = (struct sockaddr *)cp;
   1485 		addr_bits = (rtm->rtm_addrs & (1 << i));
   1486 		if (addr_bits == 0)
   1487 			continue;
   1488 		switch (addr_bits) {
   1489 		case RTA_DST:
   1490 			size = rts_copyfromsockaddr(sa, dst_addrp);
   1491 			*afp = sa->sa_family;
   1492 			break;
   1493 		case RTA_GATEWAY:
   1494 			if (sa->sa_family != *afp && sa->sa_family != AF_UNSPEC)
   1495 				return (0);
   1496 			size = rts_copyfromsockaddr(sa, gw_addrp);
   1497 			break;
   1498 		case RTA_NETMASK:
   1499 			if (sa->sa_family != *afp && sa->sa_family != AF_UNSPEC)
   1500 				return (0);
   1501 			size = rts_copyfromsockaddr(sa, net_maskp);
   1502 			break;
   1503 		case RTA_IFP:
   1504 			if (sa->sa_family != AF_LINK &&
   1505 			    sa->sa_family != AF_UNSPEC)
   1506 				return (0);
   1507 			sdl = (struct sockaddr_dl *)cp;
   1508 			*indexp = sdl->sdl_index;
   1509 			size = sizeof (struct sockaddr_dl);
   1510 			break;
   1511 		case RTA_SRC:
   1512 			/* Source address of the incoming packet */
   1513 			size = rts_copyfromsockaddr(sa, in_src_addrp);
   1514 			*afp = sa->sa_family;
   1515 			break;
   1516 		case RTA_IFA:
   1517 			if (sa->sa_family != *afp && sa->sa_family != AF_UNSPEC)
   1518 				return (0);
   1519 			size = rts_copyfromsockaddr(sa, if_addrp);
   1520 			break;
   1521 		case RTA_AUTHOR:
   1522 			if (sa->sa_family != *afp && sa->sa_family != AF_UNSPEC)
   1523 				return (0);
   1524 			size = rts_copyfromsockaddr(sa, authorp);
   1525 			break;
   1526 		default:
   1527 			return (0);
   1528 		}
   1529 		if (size == 0)
   1530 			return (0);
   1531 		cp += size;
   1532 		found_addrs |= addr_bits;
   1533 	}
   1534 
   1535 	/*
   1536 	 * Parse the routing message and look for any security-
   1537 	 * related attributes for the route.  For each valid
   1538 	 * attribute, allocate/obtain the corresponding kernel
   1539 	 * route security attributes.
   1540 	 */
   1541 	if (((cp - (caddr_t)rtm) < length) && is_system_labeled()) {
   1542 		*error = tsol_rtsa_init(rtm, rtsecattr, cp);
   1543 		ASSERT(rtsecattr->rtsa_cnt <= TSOL_RTSA_REQUEST_MAX);
   1544 	}
   1545 
   1546 	return (found_addrs);
   1547 }
   1548 
   1549 /*
   1550  * Fills the message with the given info.
   1551  */
   1552 static void
   1553 rts_fill_msg(int type, int rtm_addrs, ipaddr_t dst, ipaddr_t mask,
   1554     ipaddr_t gateway, ipaddr_t src_addr, ipaddr_t brd_addr, ipaddr_t author,
   1555     const ipif_t *ipif, mblk_t *mp, uint_t sacnt, const tsol_gc_t *gc)
   1556 {
   1557 	rt_msghdr_t	*rtm;
   1558 	sin_t		*sin;
   1559 	size_t		data_size, header_size;
   1560 	uchar_t		*cp;
   1561 	int		i;
   1562 
   1563 	ASSERT(mp != NULL);
   1564 	ASSERT(sacnt == 0 || gc != NULL);
   1565 	/*
   1566 	 * First find the type of the message
   1567 	 * and its length.
   1568 	 */
   1569 	header_size = rts_header_msg_size(type);
   1570 	/*
   1571 	 * Now find the size of the data
   1572 	 * that follows the message header.
   1573 	 */
   1574 	data_size = rts_data_msg_size(rtm_addrs, AF_INET, sacnt);
   1575 
   1576 	rtm = (rt_msghdr_t *)mp->b_rptr;
   1577 	mp->b_wptr = &mp->b_rptr[header_size];
   1578 	cp = mp->b_wptr;
   1579 	bzero(cp, data_size);
   1580 	for (i = 0; i < RTA_NUMBITS; i++) {
   1581 		sin = (sin_t *)cp;
   1582 		switch (rtm_addrs & (1 << i)) {
   1583 		case RTA_DST:
   1584 			sin->sin_addr.s_addr = dst;
   1585 			sin->sin_family = AF_INET;
   1586 			cp += sizeof (sin_t);
   1587 			break;
   1588 		case RTA_GATEWAY:
   1589 			sin->sin_addr.s_addr = gateway;
   1590 			sin->sin_family = AF_INET;
   1591 			cp += sizeof (sin_t);
   1592 			break;
   1593 		case RTA_NETMASK:
   1594 			sin->sin_addr.s_addr = mask;
   1595 			sin->sin_family = AF_INET;
   1596 			cp += sizeof (sin_t);
   1597 			break;
   1598 		case RTA_IFP:
   1599 			cp += ill_dls_info((struct sockaddr_dl *)cp, ipif);
   1600 			break;
   1601 		case RTA_IFA:
   1602 		case RTA_SRC:
   1603 			sin->sin_addr.s_addr = src_addr;
   1604 			sin->sin_family = AF_INET;
   1605 			cp += sizeof (sin_t);
   1606 			break;
   1607 		case RTA_AUTHOR:
   1608 			sin->sin_addr.s_addr = author;
   1609 			sin->sin_family = AF_INET;
   1610 			cp += sizeof (sin_t);
   1611 			break;
   1612 		case RTA_BRD:
   1613 			/*
   1614 			 * RTA_BRD is used typically to specify a point-to-point
   1615 			 * destination address.
   1616 			 */
   1617 			sin->sin_addr.s_addr = brd_addr;
   1618 			sin->sin_family = AF_INET;
   1619 			cp += sizeof (sin_t);
   1620 			break;
   1621 		}
   1622 	}
   1623 
   1624 	if (gc != NULL) {
   1625 		rtm_ext_t *rtm_ext;
   1626 		struct rtsa_s *rp_dst;
   1627 		tsol_rtsecattr_t *rsap;
   1628 		int i;
   1629 
   1630 		ASSERT(gc->gc_grp != NULL);
   1631 		ASSERT(RW_LOCK_HELD(&gc->gc_grp->gcgrp_rwlock));
   1632 		ASSERT(sacnt > 0);
   1633 
   1634 		rtm_ext = (rtm_ext_t *)cp;
   1635 		rtm_ext->rtmex_type = RTMEX_GATEWAY_SECATTR;
   1636 		rtm_ext->rtmex_len = TSOL_RTSECATTR_SIZE(sacnt);
   1637 
   1638 		rsap = (tsol_rtsecattr_t *)(rtm_ext + 1);
   1639 		rsap->rtsa_cnt = sacnt;
   1640 		rp_dst = rsap->rtsa_attr;
   1641 
   1642 		for (i = 0; i < sacnt; i++, gc = gc->gc_next, rp_dst++) {
   1643 			ASSERT(gc->gc_db != NULL);
   1644 			bcopy(&gc->gc_db->gcdb_attr, rp_dst, sizeof (*rp_dst));
   1645 		}
   1646 		cp = (uchar_t *)rp_dst;
   1647 	}
   1648 
   1649 	mp->b_wptr = cp;
   1650 	mp->b_cont = NULL;
   1651 	/*
   1652 	 * set the fields that are common to
   1653 	 * to different messages.
   1654 	 */
   1655 	rtm->rtm_msglen = (short)(header_size + data_size);
   1656 	rtm->rtm_version = RTM_VERSION;
   1657 	rtm->rtm_type = (uchar_t)type;
   1658 }
   1659 
   1660 /*
   1661  * Allocates and initializes a routing socket message.
   1662  */
   1663 mblk_t *
   1664 rts_alloc_msg(int type, int rtm_addrs, sa_family_t af, uint_t sacnt)
   1665 {
   1666 	size_t	length;
   1667 	mblk_t	*mp;
   1668 
   1669 	length = RTS_MSG_SIZE(type, rtm_addrs, af, sacnt);
   1670 	mp = allocb(length, BPRI_MED);
   1671 	if (mp == NULL)
   1672 		return (mp);
   1673 	bzero(mp->b_rptr, length);
   1674 	return (mp);
   1675 }
   1676 
   1677 /*
   1678  * Returns the size of the routing
   1679  * socket message header size.
   1680  */
   1681 size_t
   1682 rts_header_msg_size(int type)
   1683 {
   1684 	switch (type) {
   1685 	case RTM_DELADDR:
   1686 	case RTM_NEWADDR:
   1687 		return (sizeof (ifa_msghdr_t));
   1688 	case RTM_IFINFO:
   1689 		return (sizeof (if_msghdr_t));
   1690 	default:
   1691 		return (sizeof (rt_msghdr_t));
   1692 	}
   1693 }
   1694 
   1695 /*
   1696  * Returns the size of the message needed with the given rtm_addrs and family.
   1697  *
   1698  * It is assumed that all of the sockaddrs (with the exception of RTA_IFP) are
   1699  * of the same family (currently either AF_INET or AF_INET6).
   1700  */
   1701 size_t
   1702 rts_data_msg_size(int rtm_addrs, sa_family_t af, uint_t sacnt)
   1703 {
   1704 	int	i;
   1705 	size_t	length = 0;
   1706 
   1707 	for (i = 0; i < RTA_NUMBITS; i++) {
   1708 		switch (rtm_addrs & (1 << i)) {
   1709 		case RTA_IFP:
   1710 			length += sizeof (struct sockaddr_dl);
   1711 			break;
   1712 		case RTA_DST:
   1713 		case RTA_GATEWAY:
   1714 		case RTA_NETMASK:
   1715 		case RTA_SRC:
   1716 		case RTA_IFA:
   1717 		case RTA_AUTHOR:
   1718 		case RTA_BRD:
   1719 			ASSERT(af == AF_INET || af == AF_INET6);
   1720 			switch (af) {
   1721 			case AF_INET:
   1722 				length += sizeof (sin_t);
   1723 				break;
   1724 			case AF_INET6:
   1725 				length += sizeof (sin6_t);
   1726 				break;
   1727 			}
   1728 			break;
   1729 		}
   1730 	}
   1731 	if (sacnt > 0)
   1732 		length += sizeof (rtm_ext_t) + TSOL_RTSECATTR_SIZE(sacnt);
   1733 
   1734 	return (length);
   1735 }
   1736 
   1737 /*
   1738  * This routine is called to generate a message to the routing
   1739  * socket indicating that a redirect has occured, a routing lookup
   1740  * has failed, or that a protocol has detected timeouts to a particular
   1741  * destination. This routine is called for message types RTM_LOSING,
   1742  * RTM_REDIRECT, and RTM_MISS.
   1743  */
   1744 void
   1745 ip_rts_change(int type, ipaddr_t dst_addr, ipaddr_t gw_addr, ipaddr_t net_mask,
   1746     ipaddr_t source, ipaddr_t author, int flags, int error, int rtm_addrs,
   1747     ip_stack_t *ipst)
   1748 {
   1749 	rt_msghdr_t	*rtm;
   1750 	mblk_t		*mp;
   1751 
   1752 	if (rtm_addrs == 0)
   1753 		return;
   1754 	mp = rts_alloc_msg(type, rtm_addrs, AF_INET, 0);
   1755 	if (mp == NULL)
   1756 		return;
   1757 	rts_fill_msg(type, rtm_addrs, dst_addr, net_mask, gw_addr, source, 0,
   1758 	    author, NULL, mp, 0, NULL);
   1759 	rtm = (rt_msghdr_t *)mp->b_rptr;
   1760 	rtm->rtm_flags = flags;
   1761 	rtm->rtm_errno = error;
   1762 	rtm->rtm_flags |= RTF_DONE;
   1763 	rtm->rtm_addrs = rtm_addrs;
   1764 	rts_queue_input(mp, NULL, AF_INET, RTSQ_ALL, ipst);
   1765 }
   1766 
   1767 /*
   1768  * This routine is called to generate a message to the routing
   1769  * socket indicating that the status of a network interface has changed.
   1770  * Message type generated RTM_IFINFO.
   1771  */
   1772 void
   1773 ip_rts_ifmsg(const ipif_t *ipif, uint_t flags)
   1774 {
   1775 	ip_rts_xifmsg(ipif, 0, 0, flags);
   1776 }
   1777 
   1778 void
   1779 ip_rts_xifmsg(const ipif_t *ipif, uint64_t set, uint64_t clear, uint_t flags)
   1780 {
   1781 	if_msghdr_t	*ifm;
   1782 	mblk_t		*mp;
   1783 	sa_family_t	af;
   1784 	ip_stack_t	*ipst = ipif->ipif_ill->ill_ipst;
   1785 
   1786 	/*
   1787 	 * This message should be generated only when the physical interface
   1788 	 * is changing state.
   1789 	 */
   1790 	if (ipif->ipif_id != 0)
   1791 		return;
   1792 
   1793 	if (ipif->ipif_isv6) {
   1794 		af = AF_INET6;
   1795 		mp = rts_alloc_msg(RTM_IFINFO, RTA_IFP, af, 0);
   1796 		if (mp == NULL)
   1797 			return;
   1798 		rts_fill_msg_v6(RTM_IFINFO, RTA_IFP, &ipv6_all_zeros,
   1799 		    &ipv6_all_zeros, &ipv6_all_zeros, &ipv6_all_zeros,
   1800 		    &ipv6_all_zeros, &ipv6_all_zeros, ipif, mp, 0, NULL);
   1801 	} else {
   1802 		af = AF_INET;
   1803 		mp = rts_alloc_msg(RTM_IFINFO, RTA_IFP, af, 0);
   1804 		if (mp == NULL)
   1805 			return;
   1806 		rts_fill_msg(RTM_IFINFO, RTA_IFP, 0, 0, 0, 0, 0, 0, ipif, mp,
   1807 		    0, NULL);
   1808 	}
   1809 	ifm = (if_msghdr_t *)mp->b_rptr;
   1810 	ifm->ifm_index = ipif->ipif_ill->ill_phyint->phyint_ifindex;
   1811 	ifm->ifm_flags = (ipif->ipif_flags | ipif->ipif_ill->ill_flags |
   1812 	    ipif->ipif_ill->ill_phyint->phyint_flags | set) & ~clear;
   1813 	rts_getifdata(&ifm->ifm_data, ipif);
   1814 	ifm->ifm_addrs = RTA_IFP;
   1815 
   1816 	if (flags & RTSQ_DEFAULT) {
   1817 		flags = RTSQ_ALL;
   1818 		/*
   1819 		 * If this message is for an underlying interface, prevent
   1820 		 * "normal" (IPMP-unaware) routing sockets from seeing it.
   1821 		 */
   1822 		if (IS_UNDER_IPMP(ipif->ipif_ill))
   1823 			flags &= ~RTSQ_NORMAL;
   1824 	}
   1825 
   1826 	rts_queue_input(mp, NULL, af, flags, ipst);
   1827 }
   1828 
   1829 /*
   1830  * This is called to generate messages to the routing socket
   1831  * indicating a network interface has had addresses associated with it.
   1832  * The structure of the code is based on the 4.4BSD-Lite2 <net/rtsock.c>.
   1833  */
   1834 void
   1835 ip_rts_newaddrmsg(int cmd, int error, const ipif_t *ipif, uint_t flags)
   1836 {
   1837 	int		pass;
   1838 	int		ncmd;
   1839 	int		rtm_addrs;
   1840 	mblk_t		*mp;
   1841 	ifa_msghdr_t	*ifam;
   1842 	rt_msghdr_t	*rtm;
   1843 	sa_family_t	af;
   1844 	ip_stack_t	*ipst = ipif->ipif_ill->ill_ipst;
   1845 
   1846 	if (ipif->ipif_isv6)
   1847 		af = AF_INET6;
   1848 	else
   1849 		af = AF_INET;
   1850 
   1851 	if (flags & RTSQ_DEFAULT) {
   1852 		flags = RTSQ_ALL;
   1853 		/*
   1854 		 * If this message is for an underlying interface, prevent
   1855 		 * "normal" (IPMP-unaware) routing sockets from seeing it.
   1856 		 */
   1857 		if (IS_UNDER_IPMP(ipif->ipif_ill))
   1858 			flags &= ~RTSQ_NORMAL;
   1859 	}
   1860 
   1861 	/*
   1862 	 * If the request is DELETE, send RTM_DELETE and RTM_DELADDR.
   1863 	 * if the request is ADD, send RTM_NEWADDR and RTM_ADD.
   1864 	 */
   1865 	for (pass = 1; pass < 3; pass++) {
   1866 		if ((cmd == RTM_ADD && pass == 1) ||
   1867 		    (cmd == RTM_DELETE && pass == 2)) {
   1868 			ncmd = ((cmd == RTM_ADD) ? RTM_NEWADDR : RTM_DELADDR);
   1869 
   1870 			rtm_addrs = (RTA_IFA | RTA_NETMASK | RTA_BRD | RTA_IFP);
   1871 			mp = rts_alloc_msg(ncmd, rtm_addrs, af, 0);
   1872 			if (mp == NULL)
   1873 				continue;
   1874 			switch (af) {
   1875 			case AF_INET:
   1876 				rts_fill_msg(ncmd, rtm_addrs, 0,
   1877 				    ipif->ipif_net_mask, 0, ipif->ipif_lcl_addr,
   1878 				    ipif->ipif_pp_dst_addr, 0, ipif, mp,
   1879 				    0, NULL);
   1880 				break;
   1881 			case AF_INET6:
   1882 				rts_fill_msg_v6(ncmd, rtm_addrs,
   1883 				    &ipv6_all_zeros, &ipif->ipif_v6net_mask,
   1884 				    &ipv6_all_zeros, &ipif->ipif_v6lcl_addr,
   1885 				    &ipif->ipif_v6pp_dst_addr, &ipv6_all_zeros,
   1886 				    ipif, mp, 0, NULL);
   1887 				break;
   1888 			}
   1889 			ifam = (ifa_msghdr_t *)mp->b_rptr;
   1890 			ifam->ifam_index =
   1891 			    ipif->ipif_ill->ill_phyint->phyint_ifindex;
   1892 			ifam->ifam_metric = ipif->ipif_metric;
   1893 			ifam->ifam_flags = ((cmd == RTM_ADD) ? RTF_UP : 0);
   1894 			ifam->ifam_addrs = rtm_addrs;
   1895 			rts_queue_input(mp, NULL, af, flags, ipst);
   1896 		}
   1897 		if ((cmd == RTM_ADD && pass == 2) ||
   1898 		    (cmd == RTM_DELETE && pass == 1)) {
   1899 			rtm_addrs = (RTA_DST | RTA_NETMASK);
   1900 			mp = rts_alloc_msg(cmd, rtm_addrs, af, 0);
   1901 			if (mp == NULL)
   1902 				continue;
   1903 			switch (af) {
   1904 			case AF_INET:
   1905 				rts_fill_msg(cmd, rtm_addrs,
   1906 				    ipif->ipif_lcl_addr, ipif->ipif_net_mask, 0,
   1907 				    0, 0, 0, NULL, mp, 0, NULL);
   1908 				break;
   1909 			case AF_INET6:
   1910 				rts_fill_msg_v6(cmd, rtm_addrs,
   1911 				    &ipif->ipif_v6lcl_addr,
   1912 				    &ipif->ipif_v6net_mask, &ipv6_all_zeros,
   1913 				    &ipv6_all_zeros, &ipv6_all_zeros,
   1914 				    &ipv6_all_zeros, NULL, mp, 0, NULL);
   1915 				break;
   1916 			}
   1917 			rtm = (rt_msghdr_t *)mp->b_rptr;
   1918 			rtm->rtm_index =
   1919 			    ipif->ipif_ill->ill_phyint->phyint_ifindex;
   1920 			rtm->rtm_flags = ((cmd == RTM_ADD) ? RTF_UP : 0);
   1921 			rtm->rtm_errno = error;
   1922 			if (error == 0)
   1923 				rtm->rtm_flags |= RTF_DONE;
   1924 			rtm->rtm_addrs = rtm_addrs;
   1925 			rts_queue_input(mp, NULL, af, flags, ipst);
   1926 		}
   1927 	}
   1928 }
   1929 
   1930 /*
   1931  * Based on the address family specified in a sockaddr, copy the address field
   1932  * into an in6_addr_t.
   1933  *
   1934  * In the case of AF_UNSPEC, we assume the family is actually AF_INET for
   1935  * compatibility with programs that leave the family cleared in the sockaddr.
   1936  * Callers of rts_copyfromsockaddr should check the family themselves if they
   1937  * wish to verify its value.
   1938  *
   1939  * In the case of AF_INET6, a check is made to ensure that address is not an
   1940  * IPv4-mapped address.
   1941  */
   1942 size_t
   1943 rts_copyfromsockaddr(struct sockaddr *sa, in6_addr_t *addrp)
   1944 {
   1945 	switch (sa->sa_family) {
   1946 	case AF_INET:
   1947 	case AF_UNSPEC:
   1948 		IN6_IPADDR_TO_V4MAPPED(((sin_t *)sa)->sin_addr.s_addr, addrp);
   1949 		return (sizeof (sin_t));
   1950 	case AF_INET6:
   1951 		*addrp = ((sin6_t *)sa)->sin6_addr;
   1952 		if (IN6_IS_ADDR_V4MAPPED(addrp))
   1953 			return (0);
   1954 		return (sizeof (sin6_t));
   1955 	default:
   1956 		return (0);
   1957 	}
   1958 }
   1959