Home | History | Annotate | Download | only in in.mpathd
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 #include "mpd_defs.h"
     27 #include "mpd_tables.h"
     28 
     29 int debug = 0;				/* Debug flag */
     30 static int pollfd_num = 0;		/* Num. of poll descriptors */
     31 static struct pollfd *pollfds = NULL;	/* Array of poll descriptors */
     32 					/* All times below in ms */
     33 int	user_failure_detection_time;	/* user specified failure detection */
     34 					/* time (fdt) */
     35 int	user_probe_interval;		/* derived from user specified fdt */
     36 
     37 /*
     38  * Structure to store mib2 information returned by the kernel.
     39  * This is used to process routing table information.
     40  */
     41 typedef struct mib_item_s {
     42 	struct mib_item_s	*mi_next;
     43 	struct opthdr		mi_opthdr;
     44 	void			*mi_valp;
     45 } mib_item_t;
     46 
     47 static int	rtsock_v4;		/* AF_INET routing socket */
     48 static int	rtsock_v6;		/* AF_INET6 routing socket */
     49 int	ifsock_v4 = -1;			/* IPv4 socket for ioctls  */
     50 int	ifsock_v6 = -1;			/* IPv6 socket for ioctls  */
     51 static int	lsock_v4;		/* Listen socket to detect mpathd */
     52 static int	lsock_v6;		/* Listen socket to detect mpathd */
     53 static int	mibfd = -1;		/* fd to get mib info */
     54 static boolean_t force_mcast = _B_FALSE; /* Only for test purposes */
     55 
     56 static uint_t	last_initifs_time;	/* Time when initifs was last run */
     57 static	char **argv0;			/* Saved for re-exec on SIGHUP */
     58 boolean_t handle_link_notifications = _B_TRUE;
     59 static int	ipRouteEntrySize;	/* Size of IPv4 route entry */
     60 static int	ipv6RouteEntrySize;	/* Size of IPv6 route entry */
     61 
     62 static void	initlog(void);
     63 static void	run_timeouts(void);
     64 static void	initifs(void);
     65 static void	check_if_removed(struct phyint_instance *pii);
     66 static void	select_test_ifs(void);
     67 static void	update_router_list(mib_item_t *item);
     68 static void	mib_get_constants(mib_item_t *item);
     69 static int	mibwalk(void (*proc)(mib_item_t *));
     70 static void	ire_process_v4(mib2_ipRouteEntry_t *buf, size_t len);
     71 static void	ire_process_v6(mib2_ipv6RouteEntry_t *buf, size_t len);
     72 static void	router_add_common(int af, char *ifname,
     73     struct in6_addr nexthop);
     74 static void	init_router_targets();
     75 static void	cleanup(void);
     76 static int	setup_listener(int af);
     77 static void	check_config(void);
     78 static void	check_testconfig(void);
     79 static void	check_addr_unique(struct phyint_instance *,
     80     struct sockaddr_storage *);
     81 static void	init_host_targets(void);
     82 static void	dup_host_targets(struct phyint_instance *desired_pii);
     83 static void	loopback_cmd(int sock, int family);
     84 static boolean_t daemonize(void);
     85 static int	closefunc(void *, int);
     86 static unsigned int process_cmd(int newfd, union mi_commands *mpi);
     87 static unsigned int process_query(int fd, mi_query_t *miq);
     88 static unsigned int send_addrinfo(int fd, ipmp_addrinfo_t *adinfop);
     89 static unsigned int send_groupinfo(int fd, ipmp_groupinfo_t *grinfop);
     90 static unsigned int send_grouplist(int fd, ipmp_grouplist_t *grlistp);
     91 static unsigned int send_ifinfo(int fd, ipmp_ifinfo_t *ifinfop);
     92 static unsigned int send_result(int fd, unsigned int error, int syserror);
     93 
     94 addrlist_t *localaddrs;
     95 
     96 /*
     97  * Return the current time in milliseconds (from an arbitrary reference)
     98  * truncated to fit into an int. Truncation is ok since we are interested
     99  * only in differences and not the absolute values.
    100  */
    101 uint_t
    102 getcurrenttime(void)
    103 {
    104 	uint_t	cur_time;	/* In ms */
    105 
    106 	/*
    107 	 * Use of a non-user-adjustable source of time is
    108 	 * required. However millisecond precision is sufficient.
    109 	 * divide by 10^6
    110 	 */
    111 	cur_time = (uint_t)(gethrtime() / 1000000LL);
    112 	return (cur_time);
    113 }
    114 
    115 uint64_t
    116 getcurrentsec(void)
    117 {
    118 	return (gethrtime() / NANOSEC);
    119 }
    120 
    121 /*
    122  * Add fd to the set being polled. Returns 0 if ok; -1 if failed.
    123  */
    124 int
    125 poll_add(int fd)
    126 {
    127 	int i;
    128 	int new_num;
    129 	struct pollfd *newfds;
    130 retry:
    131 	/* Check if already present */
    132 	for (i = 0; i < pollfd_num; i++) {
    133 		if (pollfds[i].fd == fd)
    134 			return (0);
    135 	}
    136 	/* Check for empty spot already present */
    137 	for (i = 0; i < pollfd_num; i++) {
    138 		if (pollfds[i].fd == -1) {
    139 			pollfds[i].fd = fd;
    140 			return (0);
    141 		}
    142 	}
    143 
    144 	/* Allocate space for 32 more fds and initialize to -1 */
    145 	new_num = pollfd_num + 32;
    146 	newfds = realloc(pollfds, new_num * sizeof (struct pollfd));
    147 	if (newfds == NULL) {
    148 		logperror("poll_add: realloc");
    149 		return (-1);
    150 	}
    151 	for (i = pollfd_num; i < new_num; i++) {
    152 		newfds[i].fd = -1;
    153 		newfds[i].events = POLLIN;
    154 	}
    155 	pollfd_num = new_num;
    156 	pollfds = newfds;
    157 	goto retry;
    158 }
    159 
    160 /*
    161  * Remove fd from the set being polled. Returns 0 if ok; -1 if failed.
    162  */
    163 int
    164 poll_remove(int fd)
    165 {
    166 	int i;
    167 
    168 	/* Check if already present */
    169 	for (i = 0; i < pollfd_num; i++) {
    170 		if (pollfds[i].fd == fd) {
    171 			pollfds[i].fd = -1;
    172 			return (0);
    173 		}
    174 	}
    175 	return (-1);
    176 }
    177 
    178 /*
    179  * Extract information about the phyint instance. If the phyint instance still
    180  * exists in the kernel then set pii_in_use, else clear it. check_if_removed()
    181  * will use it to detect phyint instances that don't exist any longer and
    182  * remove them, from our database of phyint instances.
    183  * Return value:
    184  *	returns true if the phyint instance exists in the kernel,
    185  *	returns false otherwise
    186  */
    187 static boolean_t
    188 pii_process(int af, char *name, struct phyint_instance **pii_p)
    189 {
    190 	int err;
    191 	struct phyint_instance *pii;
    192 	struct phyint_instance *pii_other;
    193 
    194 	if (debug & D_PHYINT)
    195 		logdebug("pii_process(%s %s)\n", AF_STR(af), name);
    196 
    197 	pii = phyint_inst_lookup(af, name);
    198 	if (pii == NULL) {
    199 		/*
    200 		 * Phyint instance does not exist in our tables,
    201 		 * create new phyint instance
    202 		 */
    203 		pii = phyint_inst_init_from_k(af, name);
    204 	} else {
    205 		/* Phyint exists in our tables */
    206 		err = phyint_inst_update_from_k(pii);
    207 
    208 		switch (err) {
    209 		case PI_IOCTL_ERROR:
    210 			/* Some ioctl error. don't change anything */
    211 			pii->pii_in_use = 1;
    212 			break;
    213 
    214 		case PI_GROUP_CHANGED:
    215 		case PI_IFINDEX_CHANGED:
    216 			/*
    217 			 * Interface index or group membership has changed.
    218 			 * Delete the old state and recreate based on the new
    219 			 * state (it may no longer be in a group).
    220 			 */
    221 			pii_other = phyint_inst_other(pii);
    222 			if (pii_other != NULL)
    223 				phyint_inst_delete(pii_other);
    224 			phyint_inst_delete(pii);
    225 			pii = phyint_inst_init_from_k(af, name);
    226 			break;
    227 
    228 		case PI_DELETED:
    229 			/* Phyint instance has disappeared from kernel */
    230 			pii->pii_in_use = 0;
    231 			break;
    232 
    233 		case PI_OK:
    234 			/* Phyint instance exists and is fine */
    235 			pii->pii_in_use = 1;
    236 			break;
    237 
    238 		default:
    239 			/* Unknown status */
    240 			logerr("pii_process: Unknown status %d\n", err);
    241 			break;
    242 		}
    243 	}
    244 
    245 	*pii_p = pii;
    246 	if (pii != NULL)
    247 		return (pii->pii_in_use ? _B_TRUE : _B_FALSE);
    248 	else
    249 		return (_B_FALSE);
    250 }
    251 
    252 /*
    253  * Scan all interfaces to detect changes as well as new and deleted interfaces
    254  */
    255 static void
    256 initifs()
    257 {
    258 	int	i, nlifr;
    259 	int	af;
    260 	char	*cp;
    261 	char	*buf;
    262 	int	sockfd;
    263 	uint64_t	flags;
    264 	struct lifnum	lifn;
    265 	struct lifconf	lifc;
    266 	struct lifreq	lifreq;
    267 	struct lifreq	*lifr;
    268 	struct logint	*li;
    269 	struct phyint_instance *pii;
    270 	struct phyint_instance *next_pii;
    271 	struct phyint_group *pg, *next_pg;
    272 	char		pi_name[LIFNAMSIZ + 1];
    273 
    274 	if (debug & D_PHYINT)
    275 		logdebug("initifs: Scanning interfaces\n");
    276 
    277 	last_initifs_time = getcurrenttime();
    278 
    279 	/*
    280 	 * Free the existing local address list; we'll build a new list below.
    281 	 */
    282 	addrlist_free(&localaddrs);
    283 
    284 	/*
    285 	 * Mark the interfaces so that we can find phyints and logints
    286 	 * which have disappeared from the kernel. pii_process() and
    287 	 * logint_init_from_k() will set {pii,li}_in_use when they find
    288 	 * the interface in the kernel. Also, clear dupaddr bit on probe
    289 	 * logint. check_addr_unique() will set the dupaddr bit on the
    290 	 * probe logint, if the testaddress is not unique.
    291 	 */
    292 	for (pii = phyint_instances; pii != NULL; pii = pii->pii_next) {
    293 		pii->pii_in_use = 0;
    294 		for (li = pii->pii_logint; li != NULL; li = li->li_next) {
    295 			li->li_in_use = 0;
    296 			if (pii->pii_probe_logint == li)
    297 				li->li_dupaddr = 0;
    298 		}
    299 	}
    300 
    301 	/*
    302 	 * As above, mark groups so that we can detect IPMP interfaces which
    303 	 * have been removed from the kernel.  Also, delete the group address
    304 	 * list since we'll iteratively recreate it below.
    305 	 */
    306 	for (pg = phyint_groups; pg != NULL; pg = pg->pg_next) {
    307 		pg->pg_in_use = _B_FALSE;
    308 		addrlist_free(&pg->pg_addrs);
    309 	}
    310 
    311 	lifn.lifn_family = AF_UNSPEC;
    312 	lifn.lifn_flags = LIFC_ALLZONES | LIFC_UNDER_IPMP;
    313 again:
    314 	if (ioctl(ifsock_v4, SIOCGLIFNUM, (char *)&lifn) < 0) {
    315 		logperror("initifs: ioctl (get interface count)");
    316 		return;
    317 	}
    318 	/*
    319 	 * Pad the interface count to detect when additional interfaces have
    320 	 * been configured between SIOCGLIFNUM and SIOCGLIFCONF.
    321 	 */
    322 	lifn.lifn_count += 4;
    323 
    324 	if ((buf = calloc(lifn.lifn_count, sizeof (struct lifreq))) == NULL) {
    325 		logperror("initifs: calloc");
    326 		return;
    327 	}
    328 
    329 	lifc.lifc_family = AF_UNSPEC;
    330 	lifc.lifc_flags = LIFC_ALLZONES | LIFC_UNDER_IPMP;
    331 	lifc.lifc_len = lifn.lifn_count * sizeof (struct lifreq);
    332 	lifc.lifc_buf = buf;
    333 
    334 	if (ioctl(ifsock_v4, SIOCGLIFCONF, (char *)&lifc) < 0) {
    335 		logperror("initifs: ioctl (get interface configuration)");
    336 		free(buf);
    337 		return;
    338 	}
    339 
    340 	/*
    341 	 * If every lifr_req slot is taken, then additional interfaces must
    342 	 * have been plumbed between the SIOCGLIFNUM and the SIOCGLIFCONF.
    343 	 * Recalculate to make sure we didn't miss any interfaces.
    344 	 */
    345 	nlifr = lifc.lifc_len / sizeof (struct lifreq);
    346 	if (nlifr >= lifn.lifn_count) {
    347 		free(buf);
    348 		goto again;
    349 	}
    350 
    351 	/*
    352 	 * Walk through the lifreqs returned by SIOGGLIFCONF, and refresh the
    353 	 * global list of addresses, phyint groups, phyints, and logints.
    354 	 */
    355 	for (lifr = lifc.lifc_req, i = 0; i < nlifr; i++, lifr++) {
    356 		af = lifr->lifr_addr.ss_family;
    357 		sockfd = (af == AF_INET) ? ifsock_v4 : ifsock_v6;
    358 		(void) strlcpy(lifreq.lifr_name, lifr->lifr_name, LIFNAMSIZ);
    359 
    360 		if (ioctl(sockfd, SIOCGLIFFLAGS, &lifreq) == -1) {
    361 			if (errno != ENXIO)
    362 				logperror("initifs: ioctl (SIOCGLIFFLAGS)");
    363 			continue;
    364 		}
    365 		flags = lifreq.lifr_flags;
    366 
    367 		/*
    368 		 * If the address is IFF_UP, add it to the local address list.
    369 		 * (We ignore addresses that aren't IFF_UP since another node
    370 		 * might legitimately have that address IFF_UP.)
    371 		 */
    372 		if (flags & IFF_UP) {
    373 			(void) addrlist_add(&localaddrs, lifr->lifr_name, flags,
    374 			    &lifr->lifr_addr);
    375 		}
    376 
    377 		/*
    378 		 * If this address is on an IPMP meta-interface, update our
    379 		 * phyint_group information (either by recording that group
    380 		 * still exists or creating a new group), and track what
    381 		 * group the address is part of.
    382 		 */
    383 		if (flags & IFF_IPMP) {
    384 			if (ioctl(sockfd, SIOCGLIFGROUPNAME, &lifreq) == -1) {
    385 				if (errno != ENXIO)
    386 					logperror("initifs: ioctl "
    387 					    "(SIOCGLIFGROUPNAME)");
    388 				continue;
    389 			}
    390 
    391 			pg = phyint_group_lookup(lifreq.lifr_groupname);
    392 			if (pg == NULL) {
    393 				pg = phyint_group_create(lifreq.lifr_groupname);
    394 				if (pg == NULL) {
    395 					logerr("initifs: cannot create group "
    396 					    "%s\n", lifreq.lifr_groupname);
    397 					continue;
    398 				}
    399 				phyint_group_insert(pg);
    400 			}
    401 			pg->pg_in_use = _B_TRUE;
    402 
    403 			/*
    404 			 * Add this to the group's list of data addresses.
    405 			 */
    406 			if (!addrlist_add(&pg->pg_addrs, lifr->lifr_name, flags,
    407 			    &lifr->lifr_addr)) {
    408 				logerr("initifs: insufficient memory to track "
    409 				    "data address information for %s\n",
    410 				    lifr->lifr_name);
    411 			}
    412 			continue;
    413 		}
    414 
    415 		/*
    416 		 * This isn't an address on an IPMP meta-interface, so it's
    417 		 * either on an underlying interface or not related to any
    418 		 * group.  Update our phyint and logint information (via
    419 		 * pii_process() and logint_init_from_k()) -- but first,
    420 		 * convert the logint name to a phyint name so we can call
    421 		 * pii_process().
    422 		 */
    423 		(void) strlcpy(pi_name, lifr->lifr_name, sizeof (pi_name));
    424 		if ((cp = strchr(pi_name, IF_SEPARATOR)) != NULL)
    425 			*cp = '\0';
    426 
    427 		if (pii_process(af, pi_name, &pii)) {
    428 			/* The phyint is fine. So process the logint */
    429 			logint_init_from_k(pii, lifr->lifr_name);
    430 			check_addr_unique(pii, &lifr->lifr_addr);
    431 		}
    432 	}
    433 	free(buf);
    434 
    435 	/*
    436 	 * Scan for groups, phyints and logints that have disappeared from the
    437 	 * kernel, and delete them.
    438 	 */
    439 	for (pii = phyint_instances; pii != NULL; pii = next_pii) {
    440 		next_pii = pii->pii_next;
    441 		check_if_removed(pii);
    442 	}
    443 
    444 	for (pg = phyint_groups; pg != NULL; pg = next_pg) {
    445 		next_pg = pg->pg_next;
    446 		if (!pg->pg_in_use) {
    447 			phyint_group_delete(pg);
    448 			continue;
    449 		}
    450 		/*
    451 		 * Refresh the group's state.  This is necessary since the
    452 		 * group's state is defined by the set of usable interfaces in
    453 		 * the group, and an interface is considered unusable if all
    454 		 * of its addresses are down.  When an address goes down/up,
    455 		 * the RTM_DELADDR/RTM_NEWADDR brings us through here.
    456 		 */
    457 		phyint_group_refresh_state(pg);
    458 	}
    459 
    460 	/*
    461 	 * Select a test address for sending probes on each phyint instance
    462 	 */
    463 	select_test_ifs();
    464 
    465 	/*
    466 	 * Handle link up/down notifications.
    467 	 */
    468 	process_link_state_changes();
    469 }
    470 
    471 /*
    472  * Check that a given test address is unique across all of the interfaces in a
    473  * group.  (e.g., IPv6 link-locals may not be inherently unique, and binding
    474  * to such an (IFF_NOFAILOVER) address can produce unexpected results.)
    475  * Any issues will be reported by check_testconfig().
    476  */
    477 static void
    478 check_addr_unique(struct phyint_instance *ourpii, struct sockaddr_storage *ss)
    479 {
    480 	struct phyint		*pi;
    481 	struct phyint_group	*pg;
    482 	struct in6_addr		addr;
    483 	struct phyint_instance	*pii;
    484 	struct sockaddr_in	*sin;
    485 
    486 	if (ss->ss_family == AF_INET) {
    487 		sin = (struct sockaddr_in *)ss;
    488 		IN6_INADDR_TO_V4MAPPED(&sin->sin_addr, &addr);
    489 	} else {
    490 		assert(ss->ss_family == AF_INET6);
    491 		addr = ((struct sockaddr_in6 *)ss)->sin6_addr;
    492 	}
    493 
    494 	/*
    495 	 * For anonymous groups, every interface is assumed to be on its own
    496 	 * link, so there is no chance of overlapping addresses.
    497 	 */
    498 	pg = ourpii->pii_phyint->pi_group;
    499 	if (pg == phyint_anongroup)
    500 		return;
    501 
    502 	/*
    503 	 * Walk the list of phyint instances in the group and check for test
    504 	 * addresses matching ours.  Of course, we skip ourself.
    505 	 */
    506 	for (pi = pg->pg_phyint; pi != NULL; pi = pi->pi_pgnext) {
    507 		pii = PHYINT_INSTANCE(pi, ss->ss_family);
    508 		if (pii == NULL || pii == ourpii ||
    509 		    pii->pii_probe_logint == NULL)
    510 			continue;
    511 
    512 		/*
    513 		 * If this test address is not unique, set the dupaddr bit.
    514 		 */
    515 		if (IN6_ARE_ADDR_EQUAL(&addr, &pii->pii_probe_logint->li_addr))
    516 			pii->pii_probe_logint->li_dupaddr = 1;
    517 	}
    518 }
    519 
    520 /*
    521  * Stop probing an interface.  Called when an interface is offlined.
    522  * The probe socket is closed on each interface instance, and the
    523  * interface state set to PI_OFFLINE.
    524  */
    525 void
    526 stop_probing(struct phyint *pi)
    527 {
    528 	struct phyint_instance *pii;
    529 
    530 	pii = pi->pi_v4;
    531 	if (pii != NULL) {
    532 		if (pii->pii_probe_sock != -1)
    533 			close_probe_socket(pii, _B_TRUE);
    534 		pii->pii_probe_logint = NULL;
    535 	}
    536 
    537 	pii = pi->pi_v6;
    538 	if (pii != NULL) {
    539 		if (pii->pii_probe_sock != -1)
    540 			close_probe_socket(pii, _B_TRUE);
    541 		pii->pii_probe_logint = NULL;
    542 	}
    543 
    544 	phyint_chstate(pi, PI_OFFLINE);
    545 }
    546 
    547 enum { BAD_TESTFLAGS, OK_TESTFLAGS, BEST_TESTFLAGS };
    548 
    549 /*
    550  * Rate the provided test flags.  By definition, IFF_NOFAILOVER must be set.
    551  * IFF_UP must also be set so that the associated address can be used as a
    552  * source address.  Further, we must be able to exchange packets with local
    553  * destinations, so IFF_NOXMIT and IFF_NOLOCAL must be clear.  For historical
    554  * reasons, we have a proclivity for IFF_DEPRECATED IPv4 test addresses.
    555  */
    556 static int
    557 rate_testflags(uint64_t flags)
    558 {
    559 	if ((flags & (IFF_NOFAILOVER | IFF_UP)) != (IFF_NOFAILOVER | IFF_UP))
    560 		return (BAD_TESTFLAGS);
    561 
    562 	if ((flags & (IFF_NOXMIT | IFF_NOLOCAL)) != 0)
    563 		return (BAD_TESTFLAGS);
    564 
    565 	if ((flags & (IFF_IPV6 | IFF_DEPRECATED)) == IFF_DEPRECATED)
    566 		return (BEST_TESTFLAGS);
    567 
    568 	if ((flags & (IFF_IPV6 | IFF_DEPRECATED)) == IFF_IPV6)
    569 		return (BEST_TESTFLAGS);
    570 
    571 	return (OK_TESTFLAGS);
    572 }
    573 
    574 /*
    575  * Attempt to select a test address for each phyint instance.
    576  * Call phyint_inst_sockinit() to complete the initializations.
    577  */
    578 static void
    579 select_test_ifs(void)
    580 {
    581 	struct phyint		*pi;
    582 	struct phyint_instance	*pii;
    583 	struct phyint_instance	*next_pii;
    584 	struct logint		*li;
    585 	struct logint  		*probe_logint;
    586 	boolean_t		target_scan_reqd = _B_FALSE;
    587 	int			rating;
    588 
    589 	if (debug & D_PHYINT)
    590 		logdebug("select_test_ifs\n");
    591 
    592 	/*
    593 	 * For each phyint instance, do the test address selection
    594 	 */
    595 	for (pii = phyint_instances; pii != NULL; pii = next_pii) {
    596 		next_pii = pii->pii_next;
    597 		probe_logint = NULL;
    598 
    599 		/*
    600 		 * An interface that is offline should not be probed.
    601 		 * IFF_OFFLINE interfaces should always be PI_OFFLINE
    602 		 * unless some other entity has set the offline flag.
    603 		 */
    604 		if (pii->pii_phyint->pi_flags & IFF_OFFLINE) {
    605 			if (pii->pii_phyint->pi_state != PI_OFFLINE) {
    606 				logerr("shouldn't be probing offline"
    607 				    " interface %s (state is: %u)."
    608 				    " Stopping probes.\n",
    609 				    pii->pii_phyint->pi_name,
    610 				    pii->pii_phyint->pi_state);
    611 				stop_probing(pii->pii_phyint);
    612 			}
    613 			continue;
    614 		} else {
    615 			/*
    616 			 * If something cleared IFF_OFFLINE (e.g., by accident
    617 			 * because the SIOCGLIFFLAGS/SIOCSLIFFLAGS sequence is
    618 			 * inherently racy), the phyint may still be offline.
    619 			 * Just ignore it.
    620 			 */
    621 			if (pii->pii_phyint->pi_state == PI_OFFLINE)
    622 				continue;
    623 		}
    624 
    625 		li = pii->pii_probe_logint;
    626 		if (li != NULL) {
    627 			/*
    628 			 * We've already got a test address; only proceed
    629 			 * if it's suboptimal.
    630 			 */
    631 			if (rate_testflags(li->li_flags) == BEST_TESTFLAGS)
    632 				continue;
    633 		}
    634 
    635 		/*
    636 		 * Walk the logints of this phyint instance, and select
    637 		 * the best available test address
    638 		 */
    639 		for (li = pii->pii_logint; li != NULL; li = li->li_next) {
    640 			/*
    641 			 * Skip 0.0.0.0 addresses, as those are never
    642 			 * actually usable.
    643 			 */
    644 			if (pii->pii_af == AF_INET &&
    645 			    IN6_IS_ADDR_V4MAPPED_ANY(&li->li_addr))
    646 				continue;
    647 
    648 			/*
    649 			 * Skip any IPv6 logints that are not link-local,
    650 			 * since we should always have a link-local address
    651 			 * anyway and in6_data() expects link-local replies.
    652 			 */
    653 			if (pii->pii_af == AF_INET6 &&
    654 			    !IN6_IS_ADDR_LINKLOCAL(&li->li_addr))
    655 				continue;
    656 
    657 			/*
    658 			 * Rate the testflags. If we've found an optimal
    659 			 * match, then break out; otherwise, record the most
    660 			 * recent OK one.
    661 			 */
    662 			rating = rate_testflags(li->li_flags);
    663 			if (rating == BAD_TESTFLAGS)
    664 				continue;
    665 
    666 			probe_logint = li;
    667 			if (rating == BEST_TESTFLAGS)
    668 				break;
    669 		}
    670 
    671 		/*
    672 		 * If the probe logint has changed, ditch the old one.
    673 		 */
    674 		if (pii->pii_probe_logint != NULL &&
    675 		    pii->pii_probe_logint != probe_logint) {
    676 			if (pii->pii_probe_sock != -1)
    677 				close_probe_socket(pii, _B_TRUE);
    678 			pii->pii_probe_logint = NULL;
    679 		}
    680 
    681 		if (probe_logint == NULL) {
    682 			/*
    683 			 * We don't have a test address; zero out the probe
    684 			 * stats array since it is no longer relevant.
    685 			 * Optimize by checking if it is already zeroed out.
    686 			 */
    687 			int pr_ndx;
    688 
    689 			pr_ndx = PROBE_INDEX_PREV(pii->pii_probe_next);
    690 			if (pii->pii_probes[pr_ndx].pr_status != PR_UNUSED) {
    691 				clear_pii_probe_stats(pii);
    692 				reset_crtt_all(pii->pii_phyint);
    693 			}
    694 			continue;
    695 		} else if (probe_logint == pii->pii_probe_logint) {
    696 			/*
    697 			 * If we didn't find any new test addr, go to the
    698 			 * next phyint.
    699 			 */
    700 			continue;
    701 		}
    702 
    703 		/*
    704 		 * The phyint is either being assigned a new testaddr
    705 		 * or is being assigned a testaddr for the 1st time.
    706 		 * Need to initialize the phyint socket
    707 		 */
    708 		pii->pii_probe_logint = probe_logint;
    709 		if (!phyint_inst_sockinit(pii)) {
    710 			if (debug & D_PHYINT) {
    711 				logdebug("select_test_ifs: "
    712 				    "phyint_sockinit failed\n");
    713 			}
    714 			phyint_inst_delete(pii);
    715 			continue;
    716 		}
    717 
    718 		/*
    719 		 * This phyint instance is now enabled for probes; this
    720 		 * impacts our state machine in two ways:
    721 		 *
    722 		 * 1. If we're probe *capable* as well (i.e., we have
    723 		 *    probe targets) and the interface is in PI_NOTARGETS,
    724 		 *    then transition to PI_RUNNING.
    725 		 *
    726 		 * 2. If we're not probe capable, and the other phyint
    727 		 *    instance is also not probe capable, and we were in
    728 		 *    PI_RUNNING, then transition to PI_NOTARGETS.
    729 		 *
    730 		 * Also see the state diagram in mpd_probe.c.
    731 		 */
    732 		if (PROBE_CAPABLE(pii)) {
    733 			if (pii->pii_phyint->pi_state == PI_NOTARGETS)
    734 				phyint_chstate(pii->pii_phyint, PI_RUNNING);
    735 		} else if (!PROBE_CAPABLE(phyint_inst_other(pii))) {
    736 			if (pii->pii_phyint->pi_state == PI_RUNNING)
    737 				phyint_chstate(pii->pii_phyint, PI_NOTARGETS);
    738 		}
    739 
    740 		/*
    741 		 * If no targets are currently known for this phyint
    742 		 * we need to call init_router_targets. Since
    743 		 * init_router_targets() initializes the list of targets
    744 		 * for all phyints it is done below the loop.
    745 		 */
    746 		if (pii->pii_targets == NULL)
    747 			target_scan_reqd = _B_TRUE;
    748 
    749 		/*
    750 		 * Start the probe timer for this instance.
    751 		 */
    752 		if (!pii->pii_basetime_inited && PROBE_ENABLED(pii)) {
    753 			start_timer(pii);
    754 			pii->pii_basetime_inited = 1;
    755 		}
    756 	}
    757 
    758 	/*
    759 	 * Scan the interface list for any interfaces that are PI_FAILED or
    760 	 * PI_NOTARGETS but no longer enabled to send probes, and call
    761 	 * phyint_check_for_repair() to see if the link state indicates that
    762 	 * the interface should be repaired.  Also see the state diagram in
    763 	 * mpd_probe.c.
    764 	 */
    765 	for (pi = phyints; pi != NULL; pi = pi->pi_next) {
    766 		if ((!PROBE_ENABLED(pi->pi_v4) && !PROBE_ENABLED(pi->pi_v6)) &&
    767 		    (pi->pi_state == PI_FAILED ||
    768 		    pi->pi_state == PI_NOTARGETS)) {
    769 			phyint_check_for_repair(pi);
    770 		}
    771 	}
    772 
    773 	check_testconfig();
    774 
    775 	/*
    776 	 * Try to populate the target list. init_router_targets populates
    777 	 * the target list from the routing table. If our target list is
    778 	 * still empty, init_host_targets adds host targets based on the
    779 	 * host target list of other phyints in the group.
    780 	 */
    781 	if (target_scan_reqd) {
    782 		init_router_targets();
    783 		init_host_targets();
    784 	}
    785 }
    786 
    787 /*
    788  * Check test address configuration, and log notices/errors if appropriate.
    789  * Note that this function only logs pre-existing conditions (e.g., that
    790  * probe-based failure detection is disabled).
    791  */
    792 static void
    793 check_testconfig(void)
    794 {
    795 	struct phyint	*pi;
    796 	struct logint  	*li;
    797 	char		abuf[INET6_ADDRSTRLEN];
    798 	int		pri;
    799 
    800 	for (pi = phyints; pi != NULL; pi = pi->pi_next) {
    801 		if (pi->pi_flags & IFF_OFFLINE)
    802 			continue;
    803 
    804 		if (PROBE_ENABLED(pi->pi_v4) || PROBE_ENABLED(pi->pi_v6)) {
    805 			if (pi->pi_taddrmsg_printed ||
    806 			    pi->pi_duptaddrmsg_printed) {
    807 				if (pi->pi_duptaddrmsg_printed)
    808 					pri = LOG_ERR;
    809 				else
    810 					pri = LOG_INFO;
    811 				logmsg(pri, "Test address now configured on "
    812 				    "interface %s; enabling probe-based "
    813 				    "failure detection on it\n", pi->pi_name);
    814 				pi->pi_taddrmsg_printed = 0;
    815 				pi->pi_duptaddrmsg_printed = 0;
    816 			}
    817 			continue;
    818 		}
    819 
    820 		li = NULL;
    821 		if (pi->pi_v4 != NULL && pi->pi_v4->pii_probe_logint != NULL &&
    822 		    pi->pi_v4->pii_probe_logint->li_dupaddr)
    823 			li = pi->pi_v4->pii_probe_logint;
    824 
    825 		if (pi->pi_v6 != NULL && pi->pi_v6->pii_probe_logint != NULL &&
    826 		    pi->pi_v6->pii_probe_logint->li_dupaddr)
    827 			li = pi->pi_v6->pii_probe_logint;
    828 
    829 		if (li != NULL && li->li_dupaddr) {
    830 			if (pi->pi_duptaddrmsg_printed)
    831 				continue;
    832 			logerr("Test address %s is not unique in group; "
    833 			    "disabling probe-based failure detection on %s\n",
    834 			    pr_addr(li->li_phyint_inst->pii_af,
    835 			    li->li_addr, abuf, sizeof (abuf)), pi->pi_name);
    836 			pi->pi_duptaddrmsg_printed = 1;
    837 			continue;
    838 		}
    839 
    840 		if (getcurrentsec() < pi->pi_taddrthresh)
    841 			continue;
    842 
    843 		if (!pi->pi_taddrmsg_printed) {
    844 			logtrace("No test address configured on interface %s; "
    845 			    "disabling probe-based failure detection on it\n",
    846 			    pi->pi_name);
    847 			pi->pi_taddrmsg_printed = 1;
    848 		}
    849 	}
    850 }
    851 
    852 /*
    853  * Check phyint group configuration, to detect any inconsistencies,
    854  * and log an error message. This is called from runtimeouts every
    855  * 20 secs. But the error message is displayed once. If the
    856  * consistency is resolved by the admin, a recovery message is displayed
    857  * once.
    858  */
    859 static void
    860 check_config(void)
    861 {
    862 	struct phyint_group *pg;
    863 	struct phyint *pi;
    864 	boolean_t v4_in_group;
    865 	boolean_t v6_in_group;
    866 
    867 	/*
    868 	 * All phyints of a group must be homogeneous to ensure that they can
    869 	 * take over for one another.  If any phyint in a group has IPv4
    870 	 * plumbed, check that all phyints have IPv4 plumbed.  Do a similar
    871 	 * check for IPv6.
    872 	 */
    873 	for (pg = phyint_groups; pg != NULL; pg = pg->pg_next) {
    874 		if (pg == phyint_anongroup)
    875 			continue;
    876 
    877 		v4_in_group = _B_FALSE;
    878 		v6_in_group = _B_FALSE;
    879 		/*
    880 		 * 1st pass. Determine if at least 1 phyint in the group
    881 		 * has IPv4 plumbed and if so set v4_in_group to true.
    882 		 * Repeat similarly for IPv6.
    883 		 */
    884 		for (pi = pg->pg_phyint; pi != NULL; pi = pi->pi_pgnext) {
    885 			if (pi->pi_v4 != NULL)
    886 				v4_in_group = _B_TRUE;
    887 			if (pi->pi_v6 != NULL)
    888 				v6_in_group = _B_TRUE;
    889 		}
    890 
    891 		/*
    892 		 * 2nd pass. If v4_in_group is true, check that phyint
    893 		 * has IPv4 plumbed. Repeat similarly for IPv6. Print
    894 		 * out a message the 1st time only.
    895 		 */
    896 		for (pi = pg->pg_phyint; pi != NULL; pi = pi->pi_pgnext) {
    897 			if (pi->pi_flags & IFF_OFFLINE)
    898 				continue;
    899 
    900 			if (v4_in_group == _B_TRUE && pi->pi_v4 == NULL) {
    901 				if (!pi->pi_cfgmsg_printed) {
    902 					logerr("IP interface %s in group %s is"
    903 					    " not plumbed for IPv4, affecting"
    904 					    " IPv4 connectivity\n",
    905 					    pi->pi_name,
    906 					    pi->pi_group->pg_name);
    907 					pi->pi_cfgmsg_printed = 1;
    908 				}
    909 			} else if (v6_in_group == _B_TRUE &&
    910 			    pi->pi_v6 == NULL) {
    911 				if (!pi->pi_cfgmsg_printed) {
    912 					logerr("IP interface %s in group %s is"
    913 					    " not plumbed for IPv6, affecting"
    914 					    " IPv6 connectivity\n",
    915 					    pi->pi_name,
    916 					    pi->pi_group->pg_name);
    917 					pi->pi_cfgmsg_printed = 1;
    918 				}
    919 			} else {
    920 				/*
    921 				 * The phyint matches the group configuration,
    922 				 * if we have reached this point. If it was
    923 				 * improperly configured earlier, log an
    924 				 * error recovery message
    925 				 */
    926 				if (pi->pi_cfgmsg_printed) {
    927 					logerr("IP interface %s is now"
    928 					    " consistent with group %s "
    929 					    " and connectivity is restored\n",
    930 					    pi->pi_name, pi->pi_group->pg_name);
    931 					pi->pi_cfgmsg_printed = 0;
    932 				}
    933 			}
    934 
    935 		}
    936 	}
    937 }
    938 
    939 /*
    940  * Timer mechanism using relative time (in milliseconds) from the
    941  * previous timer event. Timers exceeding TIMER_INFINITY milliseconds
    942  * will fire after TIMER_INFINITY milliseconds.
    943  * Unsigned arithmetic note: We assume a 32-bit circular sequence space for
    944  * time values. Hence 2 consecutive timer events cannot be spaced farther
    945  * than 0x7fffffff. We call this TIMER_INFINITY, and it is the maximum value
    946  * that can be passed for the delay parameter of timer_schedule()
    947  */
    948 static uint_t timer_next;	/* Currently scheduled timeout */
    949 static boolean_t timer_active = _B_FALSE; /* SIGALRM has not yet occurred */
    950 
    951 static void
    952 timer_init(void)
    953 {
    954 	timer_next = getcurrenttime() + TIMER_INFINITY;
    955 	/*
    956 	 * The call to run_timeouts() will get the timer started
    957 	 * Since there are no phyints at this point, the timer will
    958 	 * be set for IF_SCAN_INTERVAL ms.
    959 	 */
    960 	run_timeouts();
    961 }
    962 
    963 /*
    964  * Make sure the next SIGALRM occurs delay milliseconds from the current
    965  * time if not earlier. We are interested only in time differences.
    966  */
    967 void
    968 timer_schedule(uint_t delay)
    969 {
    970 	uint_t now;
    971 	struct itimerval itimerval;
    972 
    973 	if (debug & D_TIMER)
    974 		logdebug("timer_schedule(%u)\n", delay);
    975 
    976 	assert(delay <= TIMER_INFINITY);
    977 
    978 	now = getcurrenttime();
    979 	if (delay == 0) {
    980 		/* Minimum allowed delay */
    981 		delay = 1;
    982 	}
    983 	/* Will this timer occur before the currently scheduled SIGALRM? */
    984 	if (timer_active && TIME_GE(now + delay, timer_next)) {
    985 		if (debug & D_TIMER) {
    986 			logdebug("timer_schedule(%u) - no action: "
    987 			    "now %u next %u\n", delay, now, timer_next);
    988 		}
    989 		return;
    990 	}
    991 	timer_next = now + delay;
    992 
    993 	itimerval.it_value.tv_sec = delay / 1000;
    994 	itimerval.it_value.tv_usec = (delay % 1000) * 1000;
    995 	itimerval.it_interval.tv_sec = 0;
    996 	itimerval.it_interval.tv_usec = 0;
    997 	if (debug & D_TIMER) {
    998 		logdebug("timer_schedule(%u): sec %ld usec %ld\n",
    999 		    delay, itimerval.it_value.tv_sec,
   1000 		    itimerval.it_value.tv_usec);
   1001 	}
   1002 	timer_active = _B_TRUE;
   1003 	if (setitimer(ITIMER_REAL, &itimerval, NULL) < 0) {
   1004 		logperror("timer_schedule: setitimer");
   1005 		exit(2);
   1006 	}
   1007 }
   1008 
   1009 static void
   1010 timer_cancel(void)
   1011 {
   1012 	struct itimerval itimerval;
   1013 
   1014 	if (debug & D_TIMER)
   1015 		logdebug("timer_cancel()\n");
   1016 
   1017 	bzero(&itimerval, sizeof (itimerval));
   1018 	if (setitimer(ITIMER_REAL, &itimerval, NULL) < 0)
   1019 		logperror("timer_cancel: setitimer");
   1020 }
   1021 
   1022 /*
   1023  * Timer has fired. Determine when the next timer event will occur by asking
   1024  * all the timer routines. Should not be called from a timer routine.
   1025  */
   1026 static void
   1027 run_timeouts(void)
   1028 {
   1029 	uint_t next;
   1030 	uint_t next_event_time;
   1031 	struct phyint_instance *pii;
   1032 	struct phyint_instance *next_pii;
   1033 	static boolean_t timeout_running;
   1034 
   1035 	/* assert that recursive timeouts don't happen. */
   1036 	assert(!timeout_running);
   1037 
   1038 	timeout_running = _B_TRUE;
   1039 
   1040 	if (debug & D_TIMER)
   1041 		logdebug("run_timeouts()\n");
   1042 
   1043 	if ((getcurrenttime() - last_initifs_time) > IF_SCAN_INTERVAL) {
   1044 		initifs();
   1045 		check_config();
   1046 	}
   1047 
   1048 	next = TIMER_INFINITY;
   1049 
   1050 	for (pii = phyint_instances; pii != NULL; pii = next_pii) {
   1051 		next_pii = pii->pii_next;
   1052 		next_event_time = phyint_inst_timer(pii);
   1053 		if (next_event_time != TIMER_INFINITY && next_event_time < next)
   1054 			next = next_event_time;
   1055 
   1056 		if (debug & D_TIMER) {
   1057 			logdebug("run_timeouts(%s %s): next scheduled for"
   1058 			    " this phyint inst %u, next scheduled global"
   1059 			    " %u ms\n",
   1060 			    AF_STR(pii->pii_af), pii->pii_phyint->pi_name,
   1061 			    next_event_time, next);
   1062 		}
   1063 	}
   1064 
   1065 	/*
   1066 	 * Make sure initifs() is called at least once every
   1067 	 * IF_SCAN_INTERVAL, to make sure that we are in sync
   1068 	 * with the kernel, in case we have missed any routing
   1069 	 * socket messages.
   1070 	 */
   1071 	if (next > IF_SCAN_INTERVAL)
   1072 		next = IF_SCAN_INTERVAL;
   1073 
   1074 	if (debug & D_TIMER)
   1075 		logdebug("run_timeouts: %u ms\n", next);
   1076 
   1077 	timer_schedule(next);
   1078 	timeout_running = _B_FALSE;
   1079 }
   1080 
   1081 static int eventpipe_read = -1;	/* Used for synchronous signal delivery */
   1082 static int eventpipe_write = -1;
   1083 boolean_t cleanup_started = _B_FALSE;	/* true if we're going away */
   1084 
   1085 /*
   1086  * Ensure that signals are processed synchronously with the rest of
   1087  * the code by just writing a one character signal number on the pipe.
   1088  * The poll loop will pick this up and process the signal event.
   1089  */
   1090 static void
   1091 sig_handler(int signo)
   1092 {
   1093 	uchar_t buf = (uchar_t)signo;
   1094 
   1095 	/*
   1096 	 * Don't write to pipe if cleanup has already begun. cleanup()
   1097 	 * might have closed the pipe already
   1098 	 */
   1099 	if (cleanup_started)
   1100 		return;
   1101 
   1102 	if (eventpipe_write == -1) {
   1103 		logerr("sig_handler: no pipe found\n");
   1104 		return;
   1105 	}
   1106 	if (write(eventpipe_write, &buf, sizeof (buf)) < 0)
   1107 		logperror("sig_handler: write");
   1108 }
   1109 
   1110 extern struct probes_missed probes_missed;
   1111 
   1112 /*
   1113  * Pick up a signal "byte" from the pipe and process it.
   1114  */
   1115 static void
   1116 in_signal(int fd)
   1117 {
   1118 	uchar_t buf;
   1119 	uint64_t  sent, acked, lost, unacked, unknown;
   1120 	struct phyint_instance *pii;
   1121 	int pr_ndx;
   1122 
   1123 	switch (read(fd, &buf, sizeof (buf))) {
   1124 	case -1:
   1125 		logperror("in_signal: read");
   1126 		exit(1);
   1127 		/* NOTREACHED */
   1128 	case 1:
   1129 		break;
   1130 	case 0:
   1131 		logerr("in_signal: read end of file\n");
   1132 		exit(1);
   1133 		/* NOTREACHED */
   1134 	default:
   1135 		logerr("in_signal: read > 1\n");
   1136 		exit(1);
   1137 	}
   1138 
   1139 	if (debug & D_TIMER)
   1140 		logdebug("in_signal() got %d\n", buf);
   1141 
   1142 	switch (buf) {
   1143 	case SIGALRM:
   1144 		if (debug & D_TIMER) {
   1145 			uint_t now = getcurrenttime();
   1146 
   1147 			logdebug("in_signal(SIGALRM) delta %u\n",
   1148 			    now - timer_next);
   1149 		}
   1150 		timer_active = _B_FALSE;
   1151 		run_timeouts();
   1152 		break;
   1153 	case SIGUSR1:
   1154 		logdebug("Printing configuration:\n");
   1155 		/* Print out the internal tables */
   1156 		phyint_inst_print_all();
   1157 
   1158 		/*
   1159 		 * Print out the accumulated statistics about missed
   1160 		 * probes (happens due to scheduling delay).
   1161 		 */
   1162 		logerr("Missed sending total of %d probes spread over"
   1163 		    " %d occurrences\n", probes_missed.pm_nprobes,
   1164 		    probes_missed.pm_ntimes);
   1165 
   1166 		/*
   1167 		 * Print out the accumulated statistics about probes
   1168 		 * that were sent.
   1169 		 */
   1170 		for (pii = phyint_instances; pii != NULL;
   1171 		    pii = pii->pii_next) {
   1172 			unacked = 0;
   1173 			acked = pii->pii_cum_stats.acked;
   1174 			lost = pii->pii_cum_stats.lost;
   1175 			sent = pii->pii_cum_stats.sent;
   1176 			unknown = pii->pii_cum_stats.unknown;
   1177 			for (pr_ndx = 0; pr_ndx < PROBE_STATS_COUNT; pr_ndx++) {
   1178 				switch (pii->pii_probes[pr_ndx].pr_status) {
   1179 				case PR_ACKED:
   1180 					acked++;
   1181 					break;
   1182 				case PR_LOST:
   1183 					lost++;
   1184 					break;
   1185 				case PR_UNACKED:
   1186 					unacked++;
   1187 					break;
   1188 				}
   1189 			}
   1190 			logerr("\nProbe stats on (%s %s)\n"
   1191 			    "Number of probes sent %lld\n"
   1192 			    "Number of probe acks received %lld\n"
   1193 			    "Number of probes/acks lost %lld\n"
   1194 			    "Number of valid unacknowledged probes %lld\n"
   1195 			    "Number of ambiguous probe acks received %lld\n",
   1196 			    AF_STR(pii->pii_af), pii->pii_name,
   1197 			    sent, acked, lost, unacked, unknown);
   1198 		}
   1199 		break;
   1200 	case SIGHUP:
   1201 		logerr("SIGHUP: restart and reread config file\n");
   1202 		/*
   1203 		 * Cancel the interval timer.  Needed since setitimer() uses
   1204 		 * alarm() and the time left is inherited across exec(), and
   1205 		 * thus the SIGALRM may be delivered before a handler has been
   1206 		 * setup, causing in.mpathd to erroneously exit.
   1207 		 */
   1208 		timer_cancel();
   1209 		cleanup();
   1210 		(void) execv(argv0[0], argv0);
   1211 		_exit(0177);
   1212 		/* NOTREACHED */
   1213 	case SIGINT:
   1214 	case SIGTERM:
   1215 	case SIGQUIT:
   1216 		cleanup();
   1217 		exit(0);
   1218 		/* NOTREACHED */
   1219 	default:
   1220 		logerr("in_signal: unknown signal: %d\n", buf);
   1221 	}
   1222 }
   1223 
   1224 static void
   1225 cleanup(void)
   1226 {
   1227 	struct phyint_instance *pii;
   1228 	struct phyint_instance *next_pii;
   1229 
   1230 	/*
   1231 	 * Make sure that we don't write to eventpipe in
   1232 	 * sig_handler() if any signal notably SIGALRM,
   1233 	 * occurs after we close the eventpipe descriptor below
   1234 	 */
   1235 	cleanup_started = _B_TRUE;
   1236 
   1237 	for (pii = phyint_instances; pii != NULL; pii = next_pii) {
   1238 		next_pii = pii->pii_next;
   1239 		phyint_inst_delete(pii);
   1240 	}
   1241 
   1242 	(void) close(ifsock_v4);
   1243 	(void) close(ifsock_v6);
   1244 	(void) close(rtsock_v4);
   1245 	(void) close(rtsock_v6);
   1246 	(void) close(lsock_v4);
   1247 	(void) close(lsock_v6);
   1248 	(void) close(0);
   1249 	(void) close(1);
   1250 	(void) close(2);
   1251 	(void) close(mibfd);
   1252 	(void) close(eventpipe_read);
   1253 	(void) close(eventpipe_write);
   1254 }
   1255 
   1256 /*
   1257  * Create pipe for signal delivery and set up signal handlers.
   1258  */
   1259 static void
   1260 setup_eventpipe(void)
   1261 {
   1262 	int fds[2];
   1263 	struct sigaction act;
   1264 
   1265 	if ((pipe(fds)) < 0) {
   1266 		logperror("setup_eventpipe: pipe");
   1267 		exit(1);
   1268 	}
   1269 	eventpipe_read = fds[0];
   1270 	eventpipe_write = fds[1];
   1271 	if (poll_add(eventpipe_read) == -1) {
   1272 		exit(1);
   1273 	}
   1274 
   1275 	act.sa_handler = sig_handler;
   1276 	act.sa_flags = SA_RESTART;
   1277 	(void) sigaction(SIGALRM, &act, NULL);
   1278 
   1279 	(void) sigset(SIGHUP, sig_handler);
   1280 	(void) sigset(SIGUSR1, sig_handler);
   1281 	(void) sigset(SIGTERM, sig_handler);
   1282 	(void) sigset(SIGINT, sig_handler);
   1283 	(void) sigset(SIGQUIT, sig_handler);
   1284 }
   1285 
   1286 /*
   1287  * Create a routing socket for receiving RTM_IFINFO messages.
   1288  */
   1289 static int
   1290 setup_rtsock(int af)
   1291 {
   1292 	int	s;
   1293 	int	flags;
   1294 	int	aware = RTAW_UNDER_IPMP;
   1295 
   1296 	s = socket(PF_ROUTE, SOCK_RAW, af);
   1297 	if (s == -1) {
   1298 		logperror("setup_rtsock: socket PF_ROUTE");
   1299 		exit(1);
   1300 	}
   1301 
   1302 	if (setsockopt(s, SOL_ROUTE, RT_AWARE, &aware, sizeof (aware)) == -1) {
   1303 		logperror("setup_rtsock: setsockopt RT_AWARE");
   1304 		(void) close(s);
   1305 		exit(1);
   1306 	}
   1307 
   1308 	if ((flags = fcntl(s, F_GETFL, 0)) < 0) {
   1309 		logperror("setup_rtsock: fcntl F_GETFL");
   1310 		(void) close(s);
   1311 		exit(1);
   1312 	}
   1313 	if ((fcntl(s, F_SETFL, flags | O_NONBLOCK)) < 0) {
   1314 		logperror("setup_rtsock: fcntl F_SETFL");
   1315 		(void) close(s);
   1316 		exit(1);
   1317 	}
   1318 	if (poll_add(s) == -1) {
   1319 		(void) close(s);
   1320 		exit(1);
   1321 	}
   1322 	return (s);
   1323 }
   1324 
   1325 /*
   1326  * Process an RTM_IFINFO message received on a routing socket.
   1327  * The return value indicates whether a full interface scan is required.
   1328  * Link up/down notifications are reflected in the IFF_RUNNING flag.
   1329  * If just the state of the IFF_RUNNING interface flag has changed, a
   1330  * a full interface scan isn't required.
   1331  */
   1332 static boolean_t
   1333 process_rtm_ifinfo(if_msghdr_t *ifm, int type)
   1334 {
   1335 	struct sockaddr_dl *sdl;
   1336 	struct phyint *pi;
   1337 	uint64_t old_flags;
   1338 	struct phyint_instance *pii;
   1339 
   1340 	assert(ifm->ifm_type == RTM_IFINFO && ifm->ifm_addrs == RTA_IFP);
   1341 
   1342 	/*
   1343 	 * Although the sockaddr_dl structure is directly after the
   1344 	 * if_msghdr_t structure. At the time of writing, the size of the
   1345 	 * if_msghdr_t structure is different on 32 and 64 bit kernels, due
   1346 	 * to the presence of a timeval structure, which contains longs,
   1347 	 * in the if_data structure.  Anyway, we know where the message ends,
   1348 	 * so we work backwards to get the start of the sockaddr_dl structure.
   1349 	 */
   1350 	/*LINTED*/
   1351 	sdl = (struct sockaddr_dl *)((char *)ifm + ifm->ifm_msglen -
   1352 	    sizeof (struct sockaddr_dl));
   1353 
   1354 	assert(sdl->sdl_family == AF_LINK);
   1355 
   1356 	/*
   1357 	 * The interface name is in sdl_data.
   1358 	 * RTM_IFINFO messages are only generated for logical interface
   1359 	 * zero, so there is no colon and logical interface number to
   1360 	 * strip from the name.	 The name is not null terminated, but
   1361 	 * there should be enough space in sdl_data to add the null.
   1362 	 */
   1363 	if (sdl->sdl_nlen >= sizeof (sdl->sdl_data)) {
   1364 		if (debug & D_LINKNOTE)
   1365 			logdebug("process_rtm_ifinfo: phyint name too long\n");
   1366 		return (_B_TRUE);
   1367 	}
   1368 	sdl->sdl_data[sdl->sdl_nlen] = 0;
   1369 
   1370 	pi = phyint_lookup(sdl->sdl_data);
   1371 	if (pi == NULL) {
   1372 		if (debug & D_LINKNOTE)
   1373 			logdebug("process_rtm_ifinfo: phyint lookup failed"
   1374 			    " for %s\n", sdl->sdl_data);
   1375 		return (_B_TRUE);
   1376 	}
   1377 
   1378 	/*
   1379 	 * We want to try and avoid doing a full interface scan for
   1380 	 * link state notifications from the datalink layer, as indicated
   1381 	 * by the state of the IFF_RUNNING flag.  If just the
   1382 	 * IFF_RUNNING flag has changed state, the link state changes
   1383 	 * are processed without a full scan.
   1384 	 * If there is both an IPv4 and IPv6 instance associated with
   1385 	 * the physical interface, we will get an RTM_IFINFO message
   1386 	 * for each instance.  If we just maintained a single copy of
   1387 	 * the physical interface flags, it would appear that no flags
   1388 	 * had changed when the second message is processed, leading us
   1389 	 * to believe that the message wasn't generated by a flags change,
   1390 	 * and that a full interface scan is required.
   1391 	 * To get around this problem, two additional copies of the flags
   1392 	 * are kept, one copy for each instance.  These are only used in
   1393 	 * this routine.  At any one time, all three copies of the flags
   1394 	 * should be identical except for the IFF_RUNNING flag.	 The
   1395 	 * copy of the flags in the "phyint" structure is always up to
   1396 	 * date.
   1397 	 */
   1398 	pii = (type == AF_INET) ? pi->pi_v4 : pi->pi_v6;
   1399 	if (pii == NULL) {
   1400 		if (debug & D_LINKNOTE)
   1401 			logdebug("process_rtm_ifinfo: no instance of address "
   1402 			    "family %s for %s\n", AF_STR(type), pi->pi_name);
   1403 		return (_B_TRUE);
   1404 	}
   1405 
   1406 	old_flags = pii->pii_flags;
   1407 	pii->pii_flags = PHYINT_FLAGS(ifm->ifm_flags);
   1408 	pi->pi_flags = pii->pii_flags;
   1409 
   1410 	if (debug & D_LINKNOTE) {
   1411 		logdebug("process_rtm_ifinfo: %s address family: %s, "
   1412 		    "old flags: %llx, new flags: %llx\n", pi->pi_name,
   1413 		    AF_STR(type), old_flags, pi->pi_flags);
   1414 	}
   1415 
   1416 	/*
   1417 	 * If IFF_STANDBY has changed, indicate that the interface has changed
   1418 	 * types and refresh IFF_INACTIVE if need be.
   1419 	 */
   1420 	if ((old_flags ^ pii->pii_flags) & IFF_STANDBY) {
   1421 		phyint_changed(pi);
   1422 		if (pii->pii_flags & IFF_STANDBY)
   1423 			phyint_standby_refresh_inactive(pi);
   1424 	}
   1425 
   1426 	/* Has just the IFF_RUNNING flag changed state ? */
   1427 	if ((old_flags ^ pii->pii_flags) != IFF_RUNNING) {
   1428 		struct phyint_instance *pii_other;
   1429 		/*
   1430 		 * It wasn't just a link state change.	Update
   1431 		 * the other instance's copy of the flags.
   1432 		 */
   1433 		pii_other = phyint_inst_other(pii);
   1434 		if (pii_other != NULL)
   1435 			pii_other->pii_flags = pii->pii_flags;
   1436 		return (_B_TRUE);
   1437 	}
   1438 
   1439 	return (_B_FALSE);
   1440 }
   1441 
   1442 /*
   1443  * Retrieve as many routing socket messages as possible, and try to
   1444  * empty the routing sockets. Initiate full scan of targets or interfaces
   1445  * as needed.
   1446  * We listen on separate IPv4 an IPv6 sockets so that we can accurately
   1447  * detect changes in certain flags (see "process_rtm_ifinfo()" above).
   1448  */
   1449 static void
   1450 process_rtsock(int rtsock_v4, int rtsock_v6)
   1451 {
   1452 	int	nbytes;
   1453 	int64_t msg[2048 / 8];
   1454 	struct rt_msghdr *rtm;
   1455 	boolean_t need_if_scan = _B_FALSE;
   1456 	boolean_t need_rt_scan = _B_FALSE;
   1457 	boolean_t rtm_ifinfo_seen = _B_FALSE;
   1458 	int type;
   1459 
   1460 	/* Read as many messages as possible and try to empty the sockets */
   1461 	for (type = AF_INET; ; type = AF_INET6) {
   1462 		for (;;) {
   1463 			nbytes = read((type == AF_INET) ? rtsock_v4 :
   1464 			    rtsock_v6, msg, sizeof (msg));
   1465 			if (nbytes <= 0) {
   1466 				/* No more messages */
   1467 				break;
   1468 			}
   1469 			rtm = (struct rt_msghdr *)msg;
   1470 			if (rtm->rtm_version != RTM_VERSION) {
   1471 				logerr("process_rtsock: version %d "
   1472 				    "not understood\n", rtm->rtm_version);
   1473 				break;
   1474 			}
   1475 
   1476 			if (debug & D_PHYINT) {
   1477 				logdebug("process_rtsock: message %d\n",
   1478 				    rtm->rtm_type);
   1479 			}
   1480 
   1481 			switch (rtm->rtm_type) {
   1482 			case RTM_NEWADDR:
   1483 			case RTM_DELADDR:
   1484 				/*
   1485 				 * Some logical interface has changed,
   1486 				 * have to scan everything to determine
   1487 				 * what actually changed.
   1488 				 */
   1489 				need_if_scan = _B_TRUE;
   1490 				break;
   1491 
   1492 			case RTM_IFINFO:
   1493 				rtm_ifinfo_seen = _B_TRUE;
   1494 				need_if_scan |= process_rtm_ifinfo(
   1495 				    (if_msghdr_t *)rtm, type);
   1496 				break;
   1497 
   1498 			case RTM_ADD:
   1499 			case RTM_DELETE:
   1500 			case RTM_CHANGE:
   1501 			case RTM_OLDADD:
   1502 			case RTM_OLDDEL:
   1503 				need_rt_scan = _B_TRUE;
   1504 				break;
   1505 
   1506 			default:
   1507 				/* Not interesting */
   1508 				break;
   1509 			}
   1510 		}
   1511 		if (type == AF_INET6)
   1512 			break;
   1513 	}
   1514 
   1515 	if (need_if_scan) {
   1516 		if (debug & D_LINKNOTE && rtm_ifinfo_seen)
   1517 			logdebug("process_rtsock: synchronizing with kernel\n");
   1518 		initifs();
   1519 	} else if (rtm_ifinfo_seen) {
   1520 		if (debug & D_LINKNOTE)
   1521 			logdebug("process_rtsock: "
   1522 			    "link up/down notification(s) seen\n");
   1523 		process_link_state_changes();
   1524 	}
   1525 
   1526 	if (need_rt_scan)
   1527 		init_router_targets();
   1528 }
   1529 
   1530 /*
   1531  * Look if the phyint instance or one of its logints have been removed from
   1532  * the kernel and take appropriate action.
   1533  * Uses {pii,li}_in_use.
   1534  */
   1535 static void
   1536 check_if_removed(struct phyint_instance *pii)
   1537 {
   1538 	struct logint *li;
   1539 	struct logint *next_li;
   1540 
   1541 	/* Detect phyints that have been removed from the kernel. */
   1542 	if (!pii->pii_in_use) {
   1543 		logtrace("%s %s has been removed from kernel\n",
   1544 		    AF_STR(pii->pii_af), pii->pii_phyint->pi_name);
   1545 		phyint_inst_delete(pii);
   1546 	} else {
   1547 		/* Detect logints that have been removed. */
   1548 		for (li = pii->pii_logint; li != NULL; li = next_li) {
   1549 			next_li = li->li_next;
   1550 			if (!li->li_in_use) {
   1551 				logint_delete(li);
   1552 			}
   1553 		}
   1554 	}
   1555 }
   1556 
   1557 /*
   1558  * Parse the supplied mib2 information to extract the routing information
   1559  * table. Process the routing table to get the list of known onlink routers
   1560  * and update our database. These onlink routers will serve as probe
   1561  * targets.
   1562  */
   1563 static void
   1564 update_router_list(mib_item_t *item)
   1565 {
   1566 	for (; item != NULL; item = item->mi_next) {
   1567 		if (item->mi_opthdr.name == 0)
   1568 			continue;
   1569 		if (item->mi_opthdr.level == MIB2_IP &&
   1570 		    item->mi_opthdr.name == MIB2_IP_ROUTE) {
   1571 			ire_process_v4((mib2_ipRouteEntry_t *)item->mi_valp,
   1572 			    item->mi_opthdr.len);
   1573 		} else if (item->mi_opthdr.level == MIB2_IP6 &&
   1574 		    item->mi_opthdr.name == MIB2_IP6_ROUTE) {
   1575 			ire_process_v6((mib2_ipv6RouteEntry_t *)item->mi_valp,
   1576 			    item->mi_opthdr.len);
   1577 		}
   1578 	}
   1579 }
   1580 
   1581 
   1582 /*
   1583  * Convert octet `octp' to a phyint name and store in `ifname'
   1584  */
   1585 static void
   1586 oct2ifname(const Octet_t *octp, char *ifname, size_t ifsize)
   1587 {
   1588 	char *cp;
   1589 	size_t len = MIN(octp->o_length, ifsize - 1);
   1590 
   1591 	(void) strncpy(ifname, octp->o_bytes, len);
   1592 	ifname[len] = '\0';
   1593 
   1594 	if ((cp = strchr(ifname, IF_SEPARATOR)) != NULL)
   1595 		*cp = '\0';
   1596 }
   1597 
   1598 /*
   1599  * Examine the IPv4 routing table `buf' for possible targets.  For each
   1600  * possible target, if it's on the same subnet an interface route, pass
   1601  * it to router_add_common() for further consideration.
   1602  */
   1603 static void
   1604 ire_process_v4(mib2_ipRouteEntry_t *buf, size_t len)
   1605 {
   1606 	char ifname[LIFNAMSIZ];
   1607 	mib2_ipRouteEntry_t	*rp, *rp1, *endp;
   1608 	struct in_addr		nexthop_v4;
   1609 	struct in6_addr		nexthop;
   1610 
   1611 	if (debug & D_TARGET)
   1612 		logdebug("ire_process_v4(len %d)\n", len);
   1613 
   1614 	if (len == 0)
   1615 		return;
   1616 
   1617 	assert((len % ipRouteEntrySize) == 0);
   1618 	endp = buf + (len / ipRouteEntrySize);
   1619 
   1620 	/*
   1621 	 * Scan the routing table entries for any IRE_OFFSUBNET entries, and
   1622 	 * cross-reference them with the interface routes to determine if
   1623 	 * they're possible probe targets.
   1624 	 */
   1625 	for (rp = buf; rp < endp; rp++) {
   1626 		if (!(rp->ipRouteInfo.re_ire_type & IRE_OFFSUBNET))
   1627 			continue;
   1628 
   1629 		/* Get the nexthop address. */
   1630 		nexthop_v4.s_addr = rp->ipRouteNextHop;
   1631 
   1632 		/*
   1633 		 * Rescan the routing table looking for interface routes that
   1634 		 * are on the same subnet, and try to add them.  If they're
   1635 		 * not relevant (e.g., the interface route isn't part of an
   1636 		 * IPMP group, router_add_common() will discard).
   1637 		 */
   1638 		for (rp1 = buf; rp1 < endp; rp1++) {
   1639 			if (!(rp1->ipRouteInfo.re_ire_type & IRE_INTERFACE) ||
   1640 			    rp1->ipRouteIfIndex.o_length == 0)
   1641 				continue;
   1642 
   1643 			if ((rp1->ipRouteDest & rp1->ipRouteMask) !=
   1644 			    (nexthop_v4.s_addr & rp1->ipRouteMask))
   1645 				continue;
   1646 
   1647 			oct2ifname(&rp1->ipRouteIfIndex, ifname, LIFNAMSIZ);
   1648 			IN6_INADDR_TO_V4MAPPED(&nexthop_v4, &nexthop);
   1649 			router_add_common(AF_INET, ifname, nexthop);
   1650 		}
   1651 	}
   1652 }
   1653 
   1654 void
   1655 router_add_common(int af, char *ifname, struct in6_addr nexthop)
   1656 {
   1657 	struct phyint_instance *pii;
   1658 	struct phyint *pi;
   1659 
   1660 	if (debug & D_TARGET)
   1661 		logdebug("router_add_common(%s %s)\n", AF_STR(af), ifname);
   1662 
   1663 	/*
   1664 	 * Retrieve the phyint instance; bail if it's not known to us yet.
   1665 	 */
   1666 	pii = phyint_inst_lookup(af, ifname);
   1667 	if (pii == NULL)
   1668 		return;
   1669 
   1670 	/*
   1671 	 * Don't use our own addresses as targets.
   1672 	 */
   1673 	if (own_address(nexthop))
   1674 		return;
   1675 
   1676 	/*
   1677 	 * If the phyint is part a named group, then add the address to all
   1678 	 * members of the group; note that this is suboptimal in the IPv4 case
   1679 	 * as it has already been added to all matching interfaces in
   1680 	 * ire_process_v4(). Otherwise, add the address only to the phyint
   1681 	 * itself, since other phyints in the anongroup may not be on the same
   1682 	 * subnet.
   1683 	 */
   1684 	pi = pii->pii_phyint;
   1685 	if (pi->pi_group == phyint_anongroup) {
   1686 		target_add(pii, nexthop, _B_TRUE);
   1687 	} else {
   1688 		pi = pi->pi_group->pg_phyint;
   1689 		for (; pi != NULL; pi = pi->pi_pgnext)
   1690 			target_add(PHYINT_INSTANCE(pi, af), nexthop, _B_TRUE);
   1691 	}
   1692 }
   1693 
   1694 /*
   1695  * Examine the IPv6 routing table `buf' for possible link-local targets, and
   1696  * pass any contenders to router_add_common() for further consideration.
   1697  */
   1698 static void
   1699 ire_process_v6(mib2_ipv6RouteEntry_t *buf, size_t len)
   1700 {
   1701 	struct lifreq lifr;
   1702 	char ifname[LIFNAMSIZ];
   1703 	char grname[LIFGRNAMSIZ];
   1704 	mib2_ipv6RouteEntry_t *rp, *rp1, *endp;
   1705 	struct in6_addr nexthop_v6;
   1706 
   1707 	if (debug & D_TARGET)
   1708 		logdebug("ire_process_v6(len %d)\n", len);
   1709 
   1710 	if (len == 0)
   1711 		return;
   1712 
   1713 	assert((len % ipv6RouteEntrySize) == 0);
   1714 	endp = buf + (len / ipv6RouteEntrySize);
   1715 
   1716 	/*
   1717 	 * Scan the routing table entries for any IRE_OFFSUBNET entries, and
   1718 	 * cross-reference them with the interface routes to determine if
   1719 	 * they're possible probe targets.
   1720 	 */
   1721 	for (rp = buf; rp < endp; rp++) {
   1722 		if (!(rp->ipv6RouteInfo.re_ire_type & IRE_OFFSUBNET) ||
   1723 		    !IN6_IS_ADDR_LINKLOCAL(&rp->ipv6RouteNextHop))
   1724 			continue;
   1725 
   1726 		/* Get the nexthop address. */
   1727 		nexthop_v6 = rp->ipv6RouteNextHop;
   1728 
   1729 		/*
   1730 		 * The interface name should always exist for link-locals;
   1731 		 * we use it to map this entry to an IPMP group name.
   1732 		 */
   1733 		if (rp->ipv6RouteIfIndex.o_length == 0)
   1734 			continue;
   1735 
   1736 		oct2ifname(&rp->ipv6RouteIfIndex, lifr.lifr_name, LIFNAMSIZ);
   1737 		if (ioctl(ifsock_v6, SIOCGLIFGROUPNAME, &lifr) == -1 ||
   1738 		    strlcpy(grname, lifr.lifr_groupname, LIFGRNAMSIZ) == 0) {
   1739 			continue;
   1740 		}
   1741 
   1742 		/*
   1743 		 * Rescan the list of routes for interface routes, and add the
   1744 		 * above target to any interfaces in the same IPMP group.
   1745 		 */
   1746 		for (rp1 = buf; rp1 < endp; rp1++) {
   1747 			if (!(rp1->ipv6RouteInfo.re_ire_type & IRE_INTERFACE) ||
   1748 			    rp1->ipv6RouteIfIndex.o_length == 0) {
   1749 				continue;
   1750 			}
   1751 			oct2ifname(&rp1->ipv6RouteIfIndex, ifname, LIFNAMSIZ);
   1752 			(void) strlcpy(lifr.lifr_name, ifname, LIFNAMSIZ);
   1753 
   1754 			if (ioctl(ifsock_v6, SIOCGLIFGROUPNAME, &lifr) != -1 &&
   1755 			    strcmp(lifr.lifr_groupname, grname) == 0) {
   1756 				router_add_common(AF_INET6, ifname, nexthop_v6);
   1757 			}
   1758 		}
   1759 	}
   1760 }
   1761 
   1762 /*
   1763  * Build a list of target routers, by scanning the routing tables.
   1764  * It is assumed that interface routes exist, to reach the routers.
   1765  */
   1766 static void
   1767 init_router_targets(void)
   1768 {
   1769 	struct	target *tg;
   1770 	struct	target *next_tg;
   1771 	struct	phyint_instance *pii;
   1772 	struct	phyint *pi;
   1773 
   1774 	if (force_mcast)
   1775 		return;
   1776 
   1777 	for (pii = phyint_instances; pii != NULL; pii = pii->pii_next) {
   1778 		pi = pii->pii_phyint;
   1779 		/*
   1780 		 * Set tg_in_use to false only for router targets.
   1781 		 */
   1782 		if (!pii->pii_targets_are_routers)
   1783 			continue;
   1784 
   1785 		for (tg = pii->pii_targets; tg != NULL; tg = tg->tg_next)
   1786 			tg->tg_in_use = 0;
   1787 	}
   1788 
   1789 	if (mibwalk(update_router_list) == -1)
   1790 		exit(1);
   1791 
   1792 	for (pii = phyint_instances; pii != NULL; pii = pii->pii_next) {
   1793 		pi = pii->pii_phyint;
   1794 		if (!pii->pii_targets_are_routers)
   1795 			continue;
   1796 
   1797 		for (tg = pii->pii_targets; tg != NULL; tg = next_tg) {
   1798 			next_tg = tg->tg_next;
   1799 			/*
   1800 			 * If the group has failed, it's likely the route was
   1801 			 * removed by an application affected by that failure.
   1802 			 * In that case, we keep the target so that we can
   1803 			 * reliably repair, at which point we'll refresh the
   1804 			 * target list again.
   1805 			 */
   1806 			if (!tg->tg_in_use && !GROUP_FAILED(pi->pi_group))
   1807 				target_delete(tg);
   1808 		}
   1809 	}
   1810 }
   1811 
   1812 /*
   1813  * Attempt to assign host targets to any interfaces that do not currently
   1814  * have probe targets by sharing targets with other interfaces in the group.
   1815  */
   1816 static void
   1817 init_host_targets(void)
   1818 {
   1819 	struct phyint_instance *pii;
   1820 	struct phyint_group *pg;
   1821 
   1822 	for (pii = phyint_instances; pii != NULL; pii = pii->pii_next) {
   1823 		pg = pii->pii_phyint->pi_group;
   1824 		if (pg != phyint_anongroup && pii->pii_targets == NULL)
   1825 			dup_host_targets(pii);
   1826 	}
   1827 }
   1828 
   1829 /*
   1830  * Duplicate host targets from other phyints of the group to
   1831  * the phyint instance 'desired_pii'.
   1832  */
   1833 static void
   1834 dup_host_targets(struct phyint_instance	 *desired_pii)
   1835 {
   1836 	int af;
   1837 	struct phyint *pi;
   1838 	struct phyint_instance *pii;
   1839 	struct target *tg;
   1840 
   1841 	assert(desired_pii->pii_phyint->pi_group != phyint_anongroup);
   1842 
   1843 	af = desired_pii->pii_af;
   1844 
   1845 	/*
   1846 	 * For every phyint in the same group as desired_pii, check if
   1847 	 * it has any host targets. If so add them to desired_pii.
   1848 	 */
   1849 	for (pi = desired_pii->pii_phyint; pi != NULL; pi = pi->pi_pgnext) {
   1850 		pii = PHYINT_INSTANCE(pi, af);
   1851 		/*
   1852 		 * We know that we don't have targets on this phyint instance
   1853 		 * since we have been called. But we still check for
   1854 		 * pii_targets_are_routers because another phyint instance
   1855 		 * could have router targets, since IFF_NOFAILOVER addresses
   1856 		 * on different phyint instances may belong to different
   1857 		 * subnets.
   1858 		 */
   1859 		if ((pii == NULL) || (pii == desired_pii) ||
   1860 		    pii->pii_targets_are_routers)
   1861 			continue;
   1862 		for (tg = pii->pii_targets; tg != NULL; tg = tg->tg_next) {
   1863 			target_create(desired_pii, tg->tg_address, _B_FALSE);
   1864 		}
   1865 	}
   1866 }
   1867 
   1868 static void
   1869 usage(char *cmd)
   1870 {
   1871 	(void) fprintf(stderr, "usage: %s\n", cmd);
   1872 }
   1873 
   1874 
   1875 #define	MPATHD_DEFAULT_FILE	"/etc/default/mpathd"
   1876 
   1877 /* Get an option from the /etc/default/mpathd file */
   1878 static char *
   1879 getdefault(char *name)
   1880 {
   1881 	char namebuf[BUFSIZ];
   1882 	char *value = NULL;
   1883 
   1884 	if (defopen(MPATHD_DEFAULT_FILE) == 0) {
   1885 		char	*cp;
   1886 		int	flags;
   1887 
   1888 		/*
   1889 		 * ignore case
   1890 		 */
   1891 		flags = defcntl(DC_GETFLAGS, 0);
   1892 		TURNOFF(flags, DC_CASE);
   1893 		(void) defcntl(DC_SETFLAGS, flags);
   1894 
   1895 		/* Add "=" to the name */
   1896 		(void) strncpy(namebuf, name, sizeof (namebuf) - 2);
   1897 		(void) strncat(namebuf, "=", 2);
   1898 
   1899 		if ((cp = defread(namebuf)) != NULL)
   1900 			value = strdup(cp);
   1901 
   1902 		/* close */
   1903 		(void) defopen((char *)NULL);
   1904 	}
   1905 	return (value);
   1906 }
   1907 
   1908 
   1909 /*
   1910  * Command line options below
   1911  */
   1912 boolean_t	failback_enabled = _B_TRUE;	/* failback enabled/disabled */
   1913 boolean_t	track_all_phyints = _B_FALSE;	/* track all IP interfaces */
   1914 static boolean_t adopt = _B_FALSE;
   1915 static boolean_t foreground = _B_FALSE;
   1916 
   1917 int
   1918 main(int argc, char *argv[])
   1919 {
   1920 	int i;
   1921 	int c;
   1922 	struct phyint *pi;
   1923 	struct phyint_instance *pii;
   1924 	char *value;
   1925 
   1926 	argv0 = argv;		/* Saved for re-exec on SIGHUP */
   1927 	srandom(gethostid());	/* Initialize the random number generator */
   1928 
   1929 	/*
   1930 	 * NOTE: The messages output by in.mpathd are not suitable for
   1931 	 * translation, so we do not call textdomain().
   1932 	 */
   1933 	(void) setlocale(LC_ALL, "");
   1934 
   1935 	/*
   1936 	 * Get the user specified value of 'failure detection time'
   1937 	 * from /etc/default/mpathd
   1938 	 */
   1939 	value = getdefault("FAILURE_DETECTION_TIME");
   1940 	if (value != NULL) {
   1941 		user_failure_detection_time =
   1942 		    (int)strtol((char *)value, NULL, 0);
   1943 
   1944 		if (user_failure_detection_time <= 0) {
   1945 			user_failure_detection_time = FAILURE_DETECTION_TIME;
   1946 			logerr("Invalid failure detection time %s, assuming "
   1947 			    "default of %d ms\n", value,
   1948 			    user_failure_detection_time);
   1949 
   1950 		} else if (user_failure_detection_time <
   1951 		    MIN_FAILURE_DETECTION_TIME) {
   1952 			user_failure_detection_time =
   1953 			    MIN_FAILURE_DETECTION_TIME;
   1954 			logerr("Too small failure detection time of %s, "
   1955 			    "assuming minimum of %d ms\n", value,
   1956 			    user_failure_detection_time);
   1957 		}
   1958 		free(value);
   1959 	} else {
   1960 		/* User has not specified the parameter, Use default value */
   1961 		user_failure_detection_time = FAILURE_DETECTION_TIME;
   1962 	}
   1963 
   1964 	/*
   1965 	 * This gives the frequency at which probes will be sent.
   1966 	 * When fdt ms elapses, we should be able to determine
   1967 	 * whether 5 consecutive probes have failed or not.
   1968 	 * 1 probe will be sent in every user_probe_interval ms,
   1969 	 * randomly anytime in the (0.5  - 1.0) 2nd half of every
   1970 	 * user_probe_interval. Thus when we send out probe 'n' we
   1971 	 * can be sure that probe 'n - 2' is lost, if we have not
   1972 	 * got the ack. (since the probe interval is > crtt). But
   1973 	 * probe 'n - 1' may be a valid unacked probe, since the
   1974 	 * time between 2 successive probes could be as small as
   1975 	 * 0.5 * user_probe_interval.  Hence the NUM_PROBE_FAILS + 2
   1976 	 */
   1977 	user_probe_interval = user_failure_detection_time /
   1978 	    (NUM_PROBE_FAILS + 2);
   1979 
   1980 	/*
   1981 	 * Get the user specified value of failback_enabled from
   1982 	 * /etc/default/mpathd
   1983 	 */
   1984 	value = getdefault("FAILBACK");
   1985 	if (value != NULL) {
   1986 		if (strcasecmp(value, "yes") == 0)
   1987 			failback_enabled = _B_TRUE;
   1988 		else if (strcasecmp(value, "no") == 0)
   1989 			failback_enabled = _B_FALSE;
   1990 		else
   1991 			logerr("Invalid value for FAILBACK %s\n", value);
   1992 		free(value);
   1993 	} else {
   1994 		failback_enabled = _B_TRUE;
   1995 	}
   1996 
   1997 	/*
   1998 	 * Get the user specified value of track_all_phyints from
   1999 	 * /etc/default/mpathd. The sense is reversed in
   2000 	 * TRACK_INTERFACES_ONLY_WITH_GROUPS.
   2001 	 */
   2002 	value = getdefault("TRACK_INTERFACES_ONLY_WITH_GROUPS");
   2003 	if (value != NULL) {
   2004 		if (strcasecmp(value, "yes") == 0)
   2005 			track_all_phyints = _B_FALSE;
   2006 		else if (strcasecmp(value, "no") == 0)
   2007 			track_all_phyints = _B_TRUE;
   2008 		else
   2009 			logerr("Invalid value for "
   2010 			    "TRACK_INTERFACES_ONLY_WITH_GROUPS %s\n", value);
   2011 		free(value);
   2012 	} else {
   2013 		track_all_phyints = _B_FALSE;
   2014 	}
   2015 
   2016 	while ((c = getopt(argc, argv, "adD:ml")) != EOF) {
   2017 		switch (c) {
   2018 		case 'a':
   2019 			adopt = _B_TRUE;
   2020 			break;
   2021 		case 'm':
   2022 			force_mcast = _B_TRUE;
   2023 			break;
   2024 		case 'd':
   2025 			debug = D_ALL;
   2026 			foreground = _B_TRUE;
   2027 			break;
   2028 		case 'D':
   2029 			i = (int)strtol(optarg, NULL, 0);
   2030 			if (i == 0) {
   2031 				(void) fprintf(stderr, "Bad debug flags: %s\n",
   2032 				    optarg);
   2033 				exit(1);
   2034 			}
   2035 			debug |= i;
   2036 			foreground = _B_TRUE;
   2037 			break;
   2038 		case 'l':
   2039 			/*
   2040 			 * Turn off link state notification handling.
   2041 			 * Undocumented command line flag, for debugging
   2042 			 * purposes.
   2043 			 */
   2044 			handle_link_notifications = _B_FALSE;
   2045 			break;
   2046 		default:
   2047 			usage(argv[0]);
   2048 			exit(1);
   2049 		}
   2050 	}
   2051 
   2052 	/*
   2053 	 * The sockets for the loopback command interface should be listening
   2054 	 * before we fork and exit in daemonize(). This way, whoever started us
   2055 	 * can use the loopback interface as soon as they get a zero exit
   2056 	 * status.
   2057 	 */
   2058 	lsock_v4 = setup_listener(AF_INET);
   2059 	lsock_v6 = setup_listener(AF_INET6);
   2060 
   2061 	if (lsock_v4 < 0 && lsock_v6 < 0) {
   2062 		logerr("main: setup_listener failed for both IPv4 and IPv6\n");
   2063 		exit(1);
   2064 	}
   2065 
   2066 	if (!foreground) {
   2067 		if (!daemonize()) {
   2068 			logerr("cannot daemonize\n");
   2069 			exit(EXIT_FAILURE);
   2070 		}
   2071 		initlog();
   2072 	}
   2073 
   2074 	/*
   2075 	 * Initializations:
   2076 	 * 1. Create ifsock* sockets. These are used for performing SIOC*
   2077 	 *    ioctls. We have 2 sockets 1 each for IPv4 and IPv6.
   2078 	 * 2. Initialize a pipe for handling/recording signal events.
   2079 	 * 3. Create the routing sockets,  used for listening
   2080 	 *    to routing / interface changes.
   2081 	 * 4. phyint_init() - Initialize physical interface state
   2082 	 *    (in mpd_tables.c).  Must be done before creating interfaces,
   2083 	 *    which timer_init() does indirectly.
   2084 	 * 5. Query kernel for route entry sizes (v4 and v6).
   2085 	 * 6. timer_init()  - Initialize timer related stuff
   2086 	 * 7. initifs() - Initialize our database of all known interfaces
   2087 	 * 8. init_router_targets() - Initialize our database of all known
   2088 	 *    router targets.
   2089 	 */
   2090 	ifsock_v4 = socket(AF_INET, SOCK_DGRAM, 0);
   2091 	if (ifsock_v4 < 0) {
   2092 		logperror("main: IPv4 socket open");
   2093 		exit(1);
   2094 	}
   2095 
   2096 	ifsock_v6 = socket(AF_INET6, SOCK_DGRAM, 0);
   2097 	if (ifsock_v6 < 0) {
   2098 		logperror("main: IPv6 socket open");
   2099 		exit(1);
   2100 	}
   2101 
   2102 	setup_eventpipe();
   2103 
   2104 	rtsock_v4 = setup_rtsock(AF_INET);
   2105 	rtsock_v6 = setup_rtsock(AF_INET6);
   2106 
   2107 	if (phyint_init() == -1) {
   2108 		logerr("cannot initialize physical interface structures");
   2109 		exit(1);
   2110 	}
   2111 
   2112 	if (mibwalk(mib_get_constants) == -1)
   2113 		exit(1);
   2114 
   2115 	timer_init();
   2116 
   2117 	initifs();
   2118 
   2119 	/*
   2120 	 * If we're operating in "adopt" mode and no interfaces need to be
   2121 	 * tracked, shut down (ifconfig(1M) will restart us on demand if
   2122 	 * interfaces are subsequently put into multipathing groups).
   2123 	 */
   2124 	if (adopt && phyint_instances == NULL)
   2125 		exit(0);
   2126 
   2127 	/*
   2128 	 * Main body. Keep listening for activity on any of the sockets
   2129 	 * that we are monitoring and take appropriate action as necessary.
   2130 	 * signals are also handled synchronously.
   2131 	 */
   2132 	for (;;) {
   2133 		if (poll(pollfds, pollfd_num, -1) < 0) {
   2134 			if (errno == EINTR)
   2135 				continue;
   2136 			logperror("main: poll");
   2137 			exit(1);
   2138 		}
   2139 		for (i = 0; i < pollfd_num; i++) {
   2140 			if ((pollfds[i].fd == -1) ||
   2141 			    !(pollfds[i].revents & POLLIN))
   2142 				continue;
   2143 			if (pollfds[i].fd == eventpipe_read) {
   2144 				in_signal(eventpipe_read);
   2145 				break;
   2146 			}
   2147 			if (pollfds[i].fd == rtsock_v4 ||
   2148 			    pollfds[i].fd == rtsock_v6) {
   2149 				process_rtsock(rtsock_v4, rtsock_v6);
   2150 				break;
   2151 			}
   2152 
   2153 			for (pii = phyint_instances; pii != NULL;
   2154 			    pii = pii->pii_next) {
   2155 				if (pollfds[i].fd == pii->pii_probe_sock) {
   2156 					if (pii->pii_af == AF_INET)
   2157 						in_data(pii);
   2158 					else
   2159 						in6_data(pii);
   2160 					break;
   2161 				}
   2162 			}
   2163 
   2164 			for (pi = phyints; pi != NULL; pi = pi->pi_next) {
   2165 				if (pi->pi_notes != 0 &&
   2166 				    pollfds[i].fd == dlpi_fd(pi->pi_dh)) {
   2167 					(void) dlpi_recv(pi->pi_dh, NULL, NULL,
   2168 					    NULL, NULL, 0, NULL);
   2169 					break;
   2170 				}
   2171 			}
   2172 
   2173 			if (pollfds[i].fd == lsock_v4)
   2174 				loopback_cmd(lsock_v4, AF_INET);
   2175 			else if (pollfds[i].fd == lsock_v6)
   2176 				loopback_cmd(lsock_v6, AF_INET6);
   2177 		}
   2178 	}
   2179 	/* NOTREACHED */
   2180 	return (EXIT_SUCCESS);
   2181 }
   2182 
   2183 static int
   2184 setup_listener(int af)
   2185 {
   2186 	int sock;
   2187 	int on;
   2188 	int len;
   2189 	int ret;
   2190 	struct sockaddr_storage laddr;
   2191 	struct sockaddr_in  *sin;
   2192 	struct sockaddr_in6 *sin6;
   2193 	struct in6_addr loopback_addr = IN6ADDR_LOOPBACK_INIT;
   2194 
   2195 	assert(af == AF_INET || af == AF_INET6);
   2196 
   2197 	sock = socket(af, SOCK_STREAM, 0);
   2198 	if (sock < 0) {
   2199 		logperror("setup_listener: socket");
   2200 		exit(1);
   2201 	}
   2202 
   2203 	on = 1;
   2204 	if (setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, (char *)&on,
   2205 	    sizeof (on)) < 0) {
   2206 		logperror("setup_listener: setsockopt (SO_REUSEADDR)");
   2207 		exit(1);
   2208 	}
   2209 
   2210 	bzero(&laddr, sizeof (laddr));
   2211 	laddr.ss_family = af;
   2212 
   2213 	if (af == AF_INET) {
   2214 		sin = (struct sockaddr_in *)&laddr;
   2215 		sin->sin_port = htons(MPATHD_PORT);
   2216 		sin->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
   2217 		len = sizeof (struct sockaddr_in);
   2218 	} else {
   2219 		sin6 = (struct sockaddr_in6 *)&laddr;
   2220 		sin6->sin6_port = htons(MPATHD_PORT);
   2221 		sin6->sin6_addr = loopback_addr;
   2222 		len = sizeof (struct sockaddr_in6);
   2223 	}
   2224 
   2225 	ret = bind(sock, (struct sockaddr *)&laddr, len);
   2226 	if (ret < 0) {
   2227 		if (errno == EADDRINUSE) {
   2228 			/*
   2229 			 * Another instance of mpathd may be already active.
   2230 			 */
   2231 			logerr("main: is another instance of in.mpathd "
   2232 			    "already active?\n");
   2233 			exit(1);
   2234 		} else {
   2235 			(void) close(sock);
   2236 			return (-1);
   2237 		}
   2238 	}
   2239 	if (listen(sock, 30) < 0) {
   2240 		logperror("main: listen");
   2241 		exit(1);
   2242 	}
   2243 	if (poll_add(sock) == -1) {
   2244 		(void) close(sock);
   2245 		exit(1);
   2246 	}
   2247 
   2248 	return (sock);
   2249 }
   2250 
   2251 /*
   2252  * Table of commands and their expected size; used by loopback_cmd().
   2253  */
   2254 static struct {
   2255 	const char	*name;
   2256 	unsigned int	size;
   2257 } commands[] = {
   2258 	{ "MI_PING",		sizeof (uint32_t)	},
   2259 	{ "MI_OFFLINE",		sizeof (mi_offline_t)	},
   2260 	{ "MI_UNDO_OFFLINE",	sizeof (mi_undo_offline_t) },
   2261 	{ "MI_QUERY",		sizeof (mi_query_t)	}
   2262 };
   2263 
   2264 /*
   2265  * Commands received over the loopback interface come here (via libipmp).
   2266  */
   2267 static void
   2268 loopback_cmd(int sock, int family)
   2269 {
   2270 	int newfd;
   2271 	ssize_t len;
   2272 	boolean_t is_priv = _B_FALSE;
   2273 	struct sockaddr_storage	peer;
   2274 	struct sockaddr_in	*peer_sin;
   2275 	struct sockaddr_in6	*peer_sin6;
   2276 	socklen_t peerlen;
   2277 	union mi_commands mpi;
   2278 	char abuf[INET6_ADDRSTRLEN];
   2279 	uint_t cmd;
   2280 	int retval;
   2281 
   2282 	peerlen = sizeof (peer);
   2283 	newfd = accept(sock, (struct sockaddr *)&peer, &peerlen);
   2284 	if (newfd < 0) {
   2285 		logperror("loopback_cmd: accept");
   2286 		return;
   2287 	}
   2288 
   2289 	switch (family) {
   2290 	case AF_INET:
   2291 		/*
   2292 		 * Validate the address and port to make sure that
   2293 		 * non privileged processes don't connect and start
   2294 		 * talking to us.
   2295 		 */
   2296 		if (peerlen != sizeof (struct sockaddr_in)) {
   2297 			logerr("loopback_cmd: AF_INET peerlen %d\n", peerlen);
   2298 			(void) close(newfd);
   2299 			return;
   2300 		}
   2301 		peer_sin = (struct sockaddr_in *)&peer;
   2302 		is_priv = ntohs(peer_sin->sin_port) < IPPORT_RESERVED;
   2303 		(void) inet_ntop(AF_INET, &peer_sin->sin_addr.s_addr,
   2304 		    abuf, sizeof (abuf));
   2305 
   2306 		if (ntohl(peer_sin->sin_addr.s_addr) != INADDR_LOOPBACK) {
   2307 			logerr("Attempt to connect from addr %s port %d\n",
   2308 			    abuf, ntohs(peer_sin->sin_port));
   2309 			(void) close(newfd);
   2310 			return;
   2311 		}
   2312 		break;
   2313 
   2314 	case AF_INET6:
   2315 		if (peerlen != sizeof (struct sockaddr_in6)) {
   2316 			logerr("loopback_cmd: AF_INET6 peerlen %d\n", peerlen);
   2317 			(void) close(newfd);
   2318 			return;
   2319 		}
   2320 		/*
   2321 		 * Validate the address and port to make sure that
   2322 		 * non privileged processes don't connect and start
   2323 		 * talking to us.
   2324 		 */
   2325 		peer_sin6 = (struct sockaddr_in6 *)&peer;
   2326 		is_priv = ntohs(peer_sin6->sin6_port) < IPPORT_RESERVED;
   2327 		(void) inet_ntop(AF_INET6, &peer_sin6->sin6_addr, abuf,
   2328 		    sizeof (abuf));
   2329 		if (!IN6_IS_ADDR_LOOPBACK(&peer_sin6->sin6_addr)) {
   2330 			logerr("Attempt to connect from addr %s port %d\n",
   2331 			    abuf, ntohs(peer_sin6->sin6_port));
   2332 			(void) close(newfd);
   2333 			return;
   2334 		}
   2335 
   2336 	default:
   2337 		logdebug("loopback_cmd: family %d\n", family);
   2338 		(void) close(newfd);
   2339 		return;
   2340 	}
   2341 
   2342 	/*
   2343 	 * The sizeof the 'mpi' buffer corresponds to the maximum size of
   2344 	 * all supported commands
   2345 	 */
   2346 	len = read(newfd, &mpi, sizeof (mpi));
   2347 
   2348 	/*
   2349 	 * In theory, we can receive any sized message for a stream socket,
   2350 	 * but we don't expect that to happen for a small message over a
   2351 	 * loopback connection.
   2352 	 */
   2353 	if (len < sizeof (uint32_t)) {
   2354 		logerr("loopback_cmd: bad command format or read returns "
   2355 		    "partial data %d\n", len);
   2356 		(void) close(newfd);
   2357 		return;
   2358 	}
   2359 
   2360 	cmd = mpi.mi_command;
   2361 	if (cmd >= MI_NCMD) {
   2362 		logerr("loopback_cmd: unknown command id `%d'\n", cmd);
   2363 		(void) close(newfd);
   2364 		return;
   2365 	}
   2366 
   2367 	/*
   2368 	 * Only MI_PING and MI_QUERY can come from unprivileged sources.
   2369 	 */
   2370 	if (!is_priv && (cmd != MI_QUERY && cmd != MI_PING)) {
   2371 		logerr("Unprivileged request from %s for privileged "
   2372 		    "command %s\n", abuf, commands[cmd].name);
   2373 		(void) close(newfd);
   2374 		return;
   2375 	}
   2376 
   2377 	if (len < commands[cmd].size) {
   2378 		logerr("loopback_cmd: short %s command (expected %d, got %d)\n",
   2379 		    commands[cmd].name, commands[cmd].size, len);
   2380 		(void) close(newfd);
   2381 		return;
   2382 	}
   2383 
   2384 	retval = process_cmd(newfd, &mpi);
   2385 	if (retval != IPMP_SUCCESS) {
   2386 		logerr("failed processing %s: %s\n", commands[cmd].name,
   2387 		    ipmp_errmsg(retval));
   2388 	}
   2389 	(void) close(newfd);
   2390 }
   2391 
   2392 /*
   2393  * Process the commands received via libipmp.
   2394  */
   2395 static unsigned int
   2396 process_cmd(int newfd, union mi_commands *mpi)
   2397 {
   2398 	struct phyint *pi;
   2399 	struct mi_offline *mio;
   2400 	struct mi_undo_offline *miu;
   2401 	unsigned int retval;
   2402 
   2403 	switch (mpi->mi_command) {
   2404 	case MI_PING:
   2405 		return (send_result(newfd, IPMP_SUCCESS, 0));
   2406 
   2407 	case MI_OFFLINE:
   2408 		mio = &mpi->mi_ocmd;
   2409 
   2410 		pi = phyint_lookup(mio->mio_ifname);
   2411 		if (pi == NULL)
   2412 			return (send_result(newfd, IPMP_EUNKIF, 0));
   2413 
   2414 		retval = phyint_offline(pi, mio->mio_min_redundancy);
   2415 		if (retval == IPMP_FAILURE)
   2416 			return (send_result(newfd, IPMP_FAILURE, errno));
   2417 
   2418 		return (send_result(newfd, retval, 0));
   2419 
   2420 	case MI_UNDO_OFFLINE:
   2421 		miu = &mpi->mi_ucmd;
   2422 
   2423 		pi = phyint_lookup(miu->miu_ifname);
   2424 		if (pi == NULL)
   2425 			return (send_result(newfd, IPMP_EUNKIF, 0));
   2426 
   2427 		retval = phyint_undo_offline(pi);
   2428 		if (retval == IPMP_FAILURE)
   2429 			return (send_result(newfd, IPMP_FAILURE, errno));
   2430 
   2431 		return (send_result(newfd, retval, 0));
   2432 
   2433 	case MI_QUERY:
   2434 		return (process_query(newfd, &mpi->mi_qcmd));
   2435 
   2436 	default:
   2437 		break;
   2438 	}
   2439 
   2440 	return (send_result(newfd, IPMP_EPROTO, 0));
   2441 }
   2442 
   2443 /*
   2444  * Process the query request pointed to by `miq' and send a reply on file
   2445  * descriptor `fd'.  Returns an IPMP error code.
   2446  */
   2447 static unsigned int
   2448 process_query(int fd, mi_query_t *miq)
   2449 {
   2450 	ipmp_addrinfo_t		*adinfop;
   2451 	ipmp_addrinfolist_t	*adlp;
   2452 	ipmp_groupinfo_t	*grinfop;
   2453 	ipmp_groupinfolist_t	*grlp;
   2454 	ipmp_grouplist_t	*grlistp;
   2455 	ipmp_ifinfo_t		*ifinfop;
   2456 	ipmp_ifinfolist_t	*iflp;
   2457 	ipmp_snap_t		*snap;
   2458 	unsigned int		retval;
   2459 
   2460 	switch (miq->miq_inforeq) {
   2461 	case IPMP_ADDRINFO:
   2462 		retval = getgraddrinfo(miq->miq_grname, &miq->miq_addr,
   2463 		    &adinfop);
   2464 		if (retval != IPMP_SUCCESS)
   2465 			return (send_result(fd, retval, errno));
   2466 
   2467 		retval = send_result(fd, IPMP_SUCCESS, 0);
   2468 		if (retval == IPMP_SUCCESS)
   2469 			retval = send_addrinfo(fd, adinfop);
   2470 
   2471 		ipmp_freeaddrinfo(adinfop);
   2472 		return (retval);
   2473 
   2474 	case IPMP_GROUPLIST:
   2475 		retval = getgrouplist(&grlistp);
   2476 		if (retval != IPMP_SUCCESS)
   2477 			return (send_result(fd, retval, errno));
   2478 
   2479 		retval = send_result(fd, IPMP_SUCCESS, 0);
   2480 		if (retval == IPMP_SUCCESS)
   2481 			retval = send_grouplist(fd, grlistp);
   2482 
   2483 		ipmp_freegrouplist(grlistp);
   2484 		return (retval);
   2485 
   2486 	case IPMP_GROUPINFO:
   2487 		miq->miq_grname[LIFGRNAMSIZ - 1] = '\0';
   2488 		retval = getgroupinfo(miq->miq_grname, &grinfop);
   2489 		if (retval != IPMP_SUCCESS)
   2490 			return (send_result(fd, retval, errno));
   2491 
   2492 		retval = send_result(fd, IPMP_SUCCESS, 0);
   2493 		if (retval == IPMP_SUCCESS)
   2494 			retval = send_groupinfo(fd, grinfop);
   2495 
   2496 		ipmp_freegroupinfo(grinfop);
   2497 		return (retval);
   2498 
   2499 	case IPMP_IFINFO:
   2500 		miq->miq_ifname[LIFNAMSIZ - 1] = '\0';
   2501 		retval = getifinfo(miq->miq_ifname, &ifinfop);
   2502 		if (retval != IPMP_SUCCESS)
   2503 			return (send_result(fd, retval, errno));
   2504 
   2505 		retval = send_result(fd, IPMP_SUCCESS, 0);
   2506 		if (retval == IPMP_SUCCESS)
   2507 			retval = send_ifinfo(fd, ifinfop);
   2508 
   2509 		ipmp_freeifinfo(ifinfop);
   2510 		return (retval);
   2511 
   2512 	case IPMP_SNAP:
   2513 		/*
   2514 		 * Before taking the snapshot, sync with the kernel.
   2515 		 */
   2516 		initifs();
   2517 
   2518 		retval = getsnap(&snap);
   2519 		if (retval != IPMP_SUCCESS)
   2520 			return (send_result(fd, retval, errno));
   2521 
   2522 		retval = send_result(fd, IPMP_SUCCESS, 0);
   2523 		if (retval != IPMP_SUCCESS)
   2524 			goto out;
   2525 
   2526 		retval = ipmp_writetlv(fd, IPMP_SNAP, sizeof (*snap), snap);
   2527 		if (retval != IPMP_SUCCESS)
   2528 			goto out;
   2529 
   2530 		retval = send_grouplist(fd, snap->sn_grlistp);
   2531 		if (retval != IPMP_SUCCESS)
   2532 			goto out;
   2533 
   2534 		iflp = snap->sn_ifinfolistp;
   2535 		for (; iflp != NULL; iflp = iflp->ifl_next) {
   2536 			retval = send_ifinfo(fd, iflp->ifl_ifinfop);
   2537 			if (retval != IPMP_SUCCESS)
   2538 				goto out;
   2539 		}
   2540 
   2541 		grlp = snap->sn_grinfolistp;
   2542 		for (; grlp != NULL; grlp = grlp->grl_next) {
   2543 			retval = send_groupinfo(fd, grlp->grl_grinfop);
   2544 			if (retval != IPMP_SUCCESS)
   2545 				goto out;
   2546 		}
   2547 
   2548 		adlp = snap->sn_adinfolistp;
   2549 		for (; adlp != NULL; adlp = adlp->adl_next) {
   2550 			retval = send_addrinfo(fd, adlp->adl_adinfop);
   2551 			if (retval != IPMP_SUCCESS)
   2552 				goto out;
   2553 		}
   2554 	out:
   2555 		ipmp_snap_free(snap);
   2556 		return (retval);
   2557 
   2558 	default:
   2559 		break;
   2560 
   2561 	}
   2562 	return (send_result(fd, IPMP_EPROTO, 0));
   2563 }
   2564 
   2565 /*
   2566  * Send the group information pointed to by `grinfop' on file descriptor `fd'.
   2567  * Returns an IPMP error code.
   2568  */
   2569 static unsigned int
   2570 send_groupinfo(int fd, ipmp_groupinfo_t *grinfop)
   2571 {
   2572 	ipmp_iflist_t	*iflistp = grinfop->gr_iflistp;
   2573 	ipmp_addrlist_t	*adlistp = grinfop->gr_adlistp;
   2574 	unsigned int	retval;
   2575 
   2576 	retval = ipmp_writetlv(fd, IPMP_GROUPINFO, sizeof (*grinfop), grinfop);
   2577 	if (retval != IPMP_SUCCESS)
   2578 		return (retval);
   2579 
   2580 	retval = ipmp_writetlv(fd, IPMP_IFLIST,
   2581 	    IPMP_IFLIST_SIZE(iflistp->il_nif), iflistp);
   2582 	if (retval != IPMP_SUCCESS)
   2583 		return (retval);
   2584 
   2585 	return (ipmp_writetlv(fd, IPMP_ADDRLIST,
   2586 	    IPMP_ADDRLIST_SIZE(adlistp->al_naddr), adlistp));
   2587 }
   2588 
   2589 /*
   2590  * Send the interface information pointed to by `ifinfop' on file descriptor
   2591  * `fd'.  Returns an IPMP error code.
   2592  */
   2593 static unsigned int
   2594 send_ifinfo(int fd, ipmp_ifinfo_t *ifinfop)
   2595 {
   2596 	ipmp_addrlist_t	*adlist4p = ifinfop->if_targinfo4.it_targlistp;
   2597 	ipmp_addrlist_t	*adlist6p = ifinfop->if_targinfo6.it_targlistp;
   2598 	unsigned int	retval;
   2599 
   2600 	retval = ipmp_writetlv(fd, IPMP_IFINFO, sizeof (*ifinfop), ifinfop);
   2601 	if (retval != IPMP_SUCCESS)
   2602 		return (retval);
   2603 
   2604 	retval = ipmp_writetlv(fd, IPMP_ADDRLIST,
   2605 	    IPMP_ADDRLIST_SIZE(adlist4p->al_naddr), adlist4p);
   2606 	if (retval != IPMP_SUCCESS)
   2607 		return (retval);
   2608 
   2609 	return (ipmp_writetlv(fd, IPMP_ADDRLIST,
   2610 	    IPMP_ADDRLIST_SIZE(adlist6p->al_naddr), adlist6p));
   2611 }
   2612 
   2613 /*
   2614  * Send the address information pointed to by `adinfop' on file descriptor
   2615  * `fd'.  Returns an IPMP error code.
   2616  */
   2617 static unsigned int
   2618 send_addrinfo(int fd, ipmp_addrinfo_t *adinfop)
   2619 {
   2620 	return (ipmp_writetlv(fd, IPMP_ADDRINFO, sizeof (*adinfop), adinfop));
   2621 }
   2622 
   2623 /*
   2624  * Send the group list pointed to by `grlistp' on file descriptor `fd'.
   2625  * Returns an IPMP error code.
   2626  */
   2627 static unsigned int
   2628 send_grouplist(int fd, ipmp_grouplist_t *grlistp)
   2629 {
   2630 	return (ipmp_writetlv(fd, IPMP_GROUPLIST,
   2631 	    IPMP_GROUPLIST_SIZE(grlistp->gl_ngroup), grlistp));
   2632 }
   2633 
   2634 /*
   2635  * Initialize an mi_result_t structure using `error' and `syserror' and
   2636  * send it on file descriptor `fd'.  Returns an IPMP error code.
   2637  */
   2638 static unsigned int
   2639 send_result(int fd, unsigned int error, int syserror)
   2640 {
   2641 	mi_result_t me;
   2642 
   2643 	me.me_mpathd_error = error;
   2644 	if (error == IPMP_FAILURE)
   2645 		me.me_sys_error = syserror;
   2646 	else
   2647 		me.me_sys_error = 0;
   2648 
   2649 	return (ipmp_write(fd, &me, sizeof (me)));
   2650 }
   2651 
   2652 /*
   2653  * Daemonize the process.
   2654  */
   2655 static boolean_t
   2656 daemonize(void)
   2657 {
   2658 	switch (fork()) {
   2659 	case -1:
   2660 		return (_B_FALSE);
   2661 
   2662 	case  0:
   2663 		/*
   2664 		 * Lose our controlling terminal, and become both a session
   2665 		 * leader and a process group leader.
   2666 		 */
   2667 		if (setsid() == -1)
   2668 			return (_B_FALSE);
   2669 
   2670 		/*
   2671 		 * Under POSIX, a session leader can accidentally (through
   2672 		 * open(2)) acquire a controlling terminal if it does not
   2673 		 * have one.  Just to be safe, fork() again so we are not a
   2674 		 * session leader.
   2675 		 */
   2676 		switch (fork()) {
   2677 		case -1:
   2678 			return (_B_FALSE);
   2679 
   2680 		case 0:
   2681 			(void) chdir("/");
   2682 			(void) umask(022);
   2683 			(void) fdwalk(closefunc, NULL);
   2684 			break;
   2685 
   2686 		default:
   2687 			_exit(EXIT_SUCCESS);
   2688 		}
   2689 		break;
   2690 
   2691 	default:
   2692 		_exit(EXIT_SUCCESS);
   2693 	}
   2694 
   2695 	return (_B_TRUE);
   2696 }
   2697 
   2698 /*
   2699  * The parent has created some fds before forking on purpose, keep them open.
   2700  */
   2701 static int
   2702 closefunc(void *not_used, int fd)
   2703 /* ARGSUSED */
   2704 {
   2705 	if (fd != lsock_v4 && fd != lsock_v6)
   2706 		(void) close(fd);
   2707 	return (0);
   2708 }
   2709 
   2710 /* LOGGER */
   2711 
   2712 #include <syslog.h>
   2713 
   2714 /*
   2715  * Logging routines.  All routines log to syslog, unless the daemon is
   2716  * running in the foreground, in which case the logging goes to stderr.
   2717  *
   2718  * The following routines are available:
   2719  *
   2720  *	logdebug(): A printf-like function for outputting debug messages
   2721  *	(messages at LOG_DEBUG) that are only of use to developers.
   2722  *
   2723  *	logtrace(): A printf-like function for outputting tracing messages
   2724  *	(messages at LOG_INFO) from the daemon.	 This is typically used
   2725  *	to log the receipt of interesting network-related conditions.
   2726  *
   2727  *	logerr(): A printf-like function for outputting error messages
   2728  *	(messages at LOG_ERR) from the daemon.
   2729  *
   2730  *	logperror*(): A set of functions used to output error messages
   2731  *	(messages at LOG_ERR); these automatically append strerror(errno)
   2732  *	and a newline to the message passed to them.
   2733  *
   2734  * NOTE: since the logging functions write to syslog, the messages passed
   2735  *	 to them are not eligible for localization.  Thus, gettext() must
   2736  *	 *not* be used.
   2737  */
   2738 
   2739 static int logging = 0;
   2740 
   2741 static void
   2742 initlog(void)
   2743 {
   2744 	logging++;
   2745 	openlog("in.mpathd", LOG_PID, LOG_DAEMON);
   2746 }
   2747 
   2748 /* PRINTFLIKE2 */
   2749 void
   2750 logmsg(int pri, const char *fmt, ...)
   2751 {
   2752 	va_list ap;
   2753 
   2754 	va_start(ap, fmt);
   2755 
   2756 	if (logging)
   2757 		vsyslog(pri, fmt, ap);
   2758 	else
   2759 		(void) vfprintf(stderr, fmt, ap);
   2760 	va_end(ap);
   2761 }
   2762 
   2763 /* PRINTFLIKE1 */
   2764 void
   2765 logperror(const char *str)
   2766 {
   2767 	if (logging)
   2768 		syslog(LOG_ERR, "%s: %m\n", str);
   2769 	else
   2770 		(void) fprintf(stderr, "%s: %s\n", str, strerror(errno));
   2771 }
   2772 
   2773 void
   2774 logperror_pii(struct phyint_instance *pii, const char *str)
   2775 {
   2776 	if (logging) {
   2777 		syslog(LOG_ERR, "%s (%s %s): %m\n",
   2778 		    str, AF_STR(pii->pii_af), pii->pii_phyint->pi_name);
   2779 	} else {
   2780 		(void) fprintf(stderr, "%s (%s %s): %s\n",
   2781 		    str, AF_STR(pii->pii_af), pii->pii_phyint->pi_name,
   2782 		    strerror(errno));
   2783 	}
   2784 }
   2785 
   2786 void
   2787 logperror_li(struct logint *li, const char *str)
   2788 {
   2789 	struct	phyint_instance	*pii = li->li_phyint_inst;
   2790 
   2791 	if (logging) {
   2792 		syslog(LOG_ERR, "%s (%s %s): %m\n",
   2793 		    str, AF_STR(pii->pii_af), li->li_name);
   2794 	} else {
   2795 		(void) fprintf(stderr, "%s (%s %s): %s\n",
   2796 		    str, AF_STR(pii->pii_af), li->li_name,
   2797 		    strerror(errno));
   2798 	}
   2799 }
   2800 
   2801 void
   2802 close_probe_socket(struct phyint_instance *pii, boolean_t polled)
   2803 {
   2804 	if (polled)
   2805 		(void) poll_remove(pii->pii_probe_sock);
   2806 	(void) close(pii->pii_probe_sock);
   2807 	pii->pii_probe_sock = -1;
   2808 	pii->pii_basetime_inited = 0;
   2809 }
   2810 
   2811 boolean_t
   2812 addrlist_add(addrlist_t **addrsp, const char *name, uint64_t flags,
   2813     struct sockaddr_storage *ssp)
   2814 {
   2815 	addrlist_t *addrp;
   2816 
   2817 	if ((addrp = malloc(sizeof (addrlist_t))) == NULL)
   2818 		return (_B_FALSE);
   2819 
   2820 	(void) strlcpy(addrp->al_name, name, LIFNAMSIZ);
   2821 	addrp->al_flags = flags;
   2822 	addrp->al_addr = *ssp;
   2823 	addrp->al_next = *addrsp;
   2824 	*addrsp = addrp;
   2825 	return (_B_TRUE);
   2826 }
   2827 
   2828 void
   2829 addrlist_free(addrlist_t **addrsp)
   2830 {
   2831 	addrlist_t *addrp, *next_addrp;
   2832 
   2833 	for (addrp = *addrsp; addrp != NULL; addrp = next_addrp) {
   2834 		next_addrp = addrp->al_next;
   2835 		free(addrp);
   2836 	}
   2837 	*addrsp = NULL;
   2838 }
   2839 
   2840 /*
   2841  * Send down a T_OPTMGMT_REQ to ip asking for all data in the various
   2842  * tables defined by mib2.h. Pass the table information returned to the
   2843  * supplied function.
   2844  */
   2845 static int
   2846 mibwalk(void (*proc)(mib_item_t *))
   2847 {
   2848 	mib_item_t		*head_item = NULL;
   2849 	mib_item_t		*last_item = NULL;
   2850 	mib_item_t		*tmp;
   2851 	struct strbuf		ctlbuf, databuf;
   2852 	int			flags;
   2853 	int			rval;
   2854 	uintptr_t		buf[512 / sizeof (uintptr_t)];
   2855 	struct T_optmgmt_req	*tor = (struct T_optmgmt_req *)buf;
   2856 	struct T_optmgmt_ack	*toa = (struct T_optmgmt_ack *)buf;
   2857 	struct T_error_ack	*tea = (struct T_error_ack *)buf;
   2858 	struct opthdr		*req, *optp;
   2859 	int			status = -1;
   2860 
   2861 	if (mibfd == -1) {
   2862 		if ((mibfd = open("/dev/ip", O_RDWR)) < 0) {
   2863 			logperror("mibwalk(): ip open");
   2864 			return (status);
   2865 		}
   2866 	}
   2867 
   2868 	tor->PRIM_type = T_SVR4_OPTMGMT_REQ;
   2869 	tor->OPT_offset = sizeof (struct T_optmgmt_req);
   2870 	tor->OPT_length = sizeof (struct opthdr);
   2871 	tor->MGMT_flags = T_CURRENT;
   2872 
   2873 	/*
   2874 	 * Note: we use the special level value below so that IP will return
   2875 	 * us information concerning IRE_MARK_TESTHIDDEN routes.
   2876 	 */
   2877 	req = (struct opthdr *)&tor[1];
   2878 	req->level = EXPER_IP_AND_ALL_IRES;
   2879 	req->name  = 0;
   2880 	req->len   = 0;
   2881 
   2882 	ctlbuf.buf = (char *)&buf;
   2883 	ctlbuf.len = tor->OPT_length + tor->OPT_offset;
   2884 
   2885 	if (putmsg(mibfd, &ctlbuf, NULL, 0) == -1) {
   2886 		logperror("mibwalk(): putmsg(ctl)");
   2887 		return (status);
   2888 	}
   2889 
   2890 	/*
   2891 	 * The response consists of multiple T_OPTMGMT_ACK msgs, 1 msg for
   2892 	 * each table defined in mib2.h.  Each T_OPTMGMT_ACK msg contains
   2893 	 * a control and data part. The control part contains a struct
   2894 	 * T_optmgmt_ack followed by a struct opthdr. The 'opthdr' identifies
   2895 	 * the level, name and length of the data in the data part. The
   2896 	 * data part contains the actual table data. The last message
   2897 	 * is an end-of-data (EOD), consisting of a T_OPTMGMT_ACK and a
   2898 	 * single option with zero optlen.
   2899 	 */
   2900 	for (;;) {
   2901 		errno = flags = 0;
   2902 		ctlbuf.maxlen = sizeof (buf);
   2903 		rval = getmsg(mibfd, &ctlbuf, NULL, &flags);
   2904 		if (rval & MORECTL || rval < 0) {
   2905 			if (errno == EINTR)
   2906 				continue;
   2907 			logerr("mibwalk(): getmsg(ctl) ret: %d err: %d\n",
   2908 			    rval, errno);
   2909 			goto error;
   2910 		}
   2911 		if (ctlbuf.len < sizeof (t_scalar_t)) {
   2912 			logerr("mibwalk(): ctlbuf.len %d\n", ctlbuf.len);
   2913 			goto error;
   2914 		}
   2915 
   2916 		switch (toa->PRIM_type) {
   2917 		case T_ERROR_ACK:
   2918 			if (ctlbuf.len < sizeof (struct T_error_ack)) {
   2919 				logerr("mibwalk(): T_ERROR_ACK ctlbuf "
   2920 				    "too short: %d\n", ctlbuf.len);
   2921 				goto error;
   2922 			}
   2923 			logerr("mibwalk(): T_ERROR_ACK: TLI_err = 0x%lx: %s\n"
   2924 			    " UNIX_err = 0x%lx\n", tea->TLI_error,
   2925 			    t_strerror(tea->TLI_error), tea->UNIX_error);
   2926 			goto error;
   2927 
   2928 		case T_OPTMGMT_ACK:
   2929 			optp = (struct opthdr *)&toa[1];
   2930 			if (ctlbuf.len < (sizeof (struct T_optmgmt_ack) +
   2931 			    sizeof (struct opthdr))) {
   2932 				logerr("mibwalk(): T_OPTMGMT_ACK ctlbuf too "
   2933 				    "short: %d\n", ctlbuf.len);
   2934 				goto error;
   2935 			}
   2936 			if (toa->MGMT_flags != T_SUCCESS) {
   2937 				logerr("mibwalk(): MGMT_flags != T_SUCCESS: "
   2938 				    "0x%lx\n", toa->MGMT_flags);
   2939 				goto error;
   2940 			}
   2941 			break;
   2942 
   2943 		default:
   2944 			goto error;
   2945 		}
   2946 		/* The following assert also implies MGMT_flags == T_SUCCESS */
   2947 		assert(toa->PRIM_type == T_OPTMGMT_ACK);
   2948 
   2949 		/*
   2950 		 * We have reached the end of this T_OPTMGMT_ACK
   2951 		 * message. If this is the last message i.e EOD,
   2952 		 * break, else process the next T_OPTMGMT_ACK msg.
   2953 		 */
   2954 		if (rval == 0) {
   2955 			if (optp->len == 0 && optp->name == 0 &&
   2956 			    optp->level == 0) {
   2957 				/* This is the EOD message. */
   2958 				break;
   2959 			}
   2960 			/* Not EOD but no data to retrieve */
   2961 			continue;
   2962 		}
   2963 
   2964 		/*
   2965 		 * We should only be here if MOREDATA was set.
   2966 		 * Allocate an empty mib_item_t and link into the list
   2967 		 * of MIB items.
   2968 		 */
   2969 		if ((tmp = malloc(sizeof (*tmp))) == NULL) {
   2970 			logperror("mibwalk(): malloc() failed.");
   2971 			goto error;
   2972 		}
   2973 		if (last_item != NULL)
   2974 			last_item->mi_next = tmp;
   2975 		else
   2976 			head_item = tmp;
   2977 		last_item = tmp;
   2978 		last_item->mi_next = NULL;
   2979 		last_item->mi_opthdr = *optp;
   2980 		last_item->mi_valp = malloc(optp->len);
   2981 		if (last_item->mi_valp == NULL) {
   2982 			logperror("mibwalk(): malloc() failed.");
   2983 			goto error;
   2984 		}
   2985 
   2986 		databuf.maxlen = last_item->mi_opthdr.len;
   2987 		databuf.buf = (char *)last_item->mi_valp;
   2988 		databuf.len = 0;
   2989 
   2990 		/* Retrieve the actual MIB data */
   2991 		for (;;) {
   2992 			flags = 0;
   2993 			if ((rval = getmsg(mibfd, NULL, &databuf,
   2994 			    &flags)) != 0) {
   2995 				if (rval < 0 && errno == EINTR)
   2996 					continue;
   2997 				/*
   2998 				 * We shouldn't get MOREDATA here so treat that
   2999 				 * as an error.
   3000 				 */
   3001 				logperror("mibwalk(): getmsg(data)");
   3002 				goto error;
   3003 			}
   3004 			break;
   3005 		}
   3006 	}
   3007 	status = 0;
   3008 	/* Pass the accumulated MIB data to the supplied function pointer */
   3009 	(*proc)(head_item);
   3010 error:
   3011 	while (head_item != NULL) {
   3012 		tmp = head_item;
   3013 		head_item = tmp->mi_next;
   3014 		free(tmp->mi_valp);
   3015 		free(tmp);
   3016 	}
   3017 	return (status);
   3018 }
   3019 
   3020 /*
   3021  * Parse the supplied mib2 information to get the size of routing table
   3022  * entries. This is needed when running in a branded zone where the
   3023  * Solaris application environment and the Solaris kernel may not be the
   3024  * the same release version.
   3025  */
   3026 static void
   3027 mib_get_constants(mib_item_t *item)
   3028 {
   3029 	mib2_ip_t		*ipv4;
   3030 	mib2_ipv6IfStatsEntry_t	*ipv6;
   3031 
   3032 	for (; item != NULL; item = item->mi_next) {
   3033 		if (item->mi_opthdr.name != 0)
   3034 			continue;
   3035 		if (item->mi_opthdr.level == MIB2_IP) {
   3036 			ipv4 = (mib2_ip_t *)item->mi_valp;
   3037 			ipRouteEntrySize = ipv4->ipRouteEntrySize;
   3038 		} else if (item->mi_opthdr.level == MIB2_IP6) {
   3039 			ipv6 = (mib2_ipv6IfStatsEntry_t *)item->mi_valp;
   3040 			ipv6RouteEntrySize = ipv6->ipv6RouteEntrySize;
   3041 		}
   3042 	}
   3043 }
   3044