Home | History | Annotate | Download | only in in.routed
      1 /*
      2  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
      3  * Use is subject to license terms.
      4  *
      5  * Copyright (c) 1983, 1988, 1993
      6  *	The Regents of the University of California.  All rights reserved.
      7  *
      8  * Redistribution and use in source and binary forms, with or without
      9  * modification, are permitted provided that the following conditions
     10  * are met:
     11  * 1. Redistributions of source code must retain the above copyright
     12  *    notice, this list of conditions and the following disclaimer.
     13  * 2. Redistributions in binary form must reproduce the above copyright
     14  *    notice, this list of conditions and the following disclaimer in the
     15  *    documentation and/or other materials provided with the distribution.
     16  * 3. All advertising materials mentioning features or use of this software
     17  *    must display the following acknowledgment:
     18  *	This product includes software developed by the University of
     19  *	California, Berkeley and its contributors.
     20  * 4. Neither the name of the University nor the names of its contributors
     21  *    may be used to endorse or promote products derived from this software
     22  *    without specific prior written permission.
     23  *
     24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     34  * SUCH DAMAGE.
     35  *
     36  * $FreeBSD: src/sbin/routed/table.c,v 1.15 2000/08/11 08:24:38 sheldonh Exp $
     37  */
     38 
     39 #include "defs.h"
     40 #include <fcntl.h>
     41 #include <stropts.h>
     42 #include <sys/tihdr.h>
     43 #include <inet/mib2.h>
     44 #include <inet/ip.h>
     45 
     46 /* This structure is used to store a disassembled routing socket message. */
     47 struct rt_addrinfo {
     48 	int	rti_addrs;
     49 	struct sockaddr_storage *rti_info[RTAX_MAX];
     50 };
     51 
     52 static struct rt_spare *rts_better(struct rt_entry *);
     53 static struct rt_spare rts_empty = EMPTY_RT_SPARE;
     54 static void set_need_flash(void);
     55 static void rtbad(struct rt_entry *, struct interface *);
     56 static int rt_xaddrs(struct rt_addrinfo *, struct sockaddr_storage *,
     57     char *, int);
     58 static struct interface *gwkludge_iflookup(in_addr_t, in_addr_t, in_addr_t);
     59 static struct interface *lifp_iflookup(in_addr_t, const char *);
     60 
     61 struct radix_node_head *rhead;		/* root of the radix tree */
     62 
     63 /* Flash update needed.  _B_TRUE to suppress the 1st. */
     64 boolean_t need_flash = _B_TRUE;
     65 
     66 struct timeval age_timer;		/* next check of old routes */
     67 struct timeval need_kern = {		/* need to update kernel table */
     68 	EPOCH+MIN_WAITTIME-1, 0
     69 };
     70 
     71 static uint32_t	total_routes;
     72 
     73 #define	ROUNDUP_LONG(a) \
     74 	((a) > 0 ? (1 + (((a) - 1) | (sizeof (long) - 1))) : sizeof (long))
     75 
     76 /*
     77  * It is desirable to "aggregate" routes, to combine differing routes of
     78  * the same metric and next hop into a common route with a smaller netmask
     79  * or to suppress redundant routes, routes that add no information to
     80  * routes with smaller netmasks.
     81  *
     82  * A route is redundant if and only if any and all routes with smaller
     83  * but matching netmasks and nets are the same.  Since routes are
     84  * kept sorted in the radix tree, redundant routes always come second.
     85  *
     86  * There are two kinds of aggregations.  First, two routes of the same bit
     87  * mask and differing only in the least significant bit of the network
     88  * number can be combined into a single route with a coarser mask.
     89  *
     90  * Second, a route can be suppressed in favor of another route with a more
     91  * coarse mask provided no incompatible routes with intermediate masks
     92  * are present.  The second kind of aggregation involves suppressing routes.
     93  * A route must not be suppressed if an incompatible route exists with
     94  * an intermediate mask, since the suppressed route would be covered
     95  * by the intermediate.
     96  *
     97  * This code relies on the radix tree walk encountering routes
     98  * sorted first by address, with the smallest address first.
     99  */
    100 
    101 static struct ag_info ag_slots[NUM_AG_SLOTS], *ag_avail, *ag_corsest,
    102 	*ag_finest;
    103 
    104 #ifdef DEBUG_AG
    105 #define	CHECK_AG() do { int acnt = 0; struct ag_info *cag;	\
    106 	for (cag = ag_avail; cag != NULL; cag = cag->ag_fine)	\
    107 		acnt++;						\
    108 	for (cag = ag_corsest; cag != NULL; cag = cag->ag_fine)	\
    109 		acnt++;						\
    110 	if (acnt != NUM_AG_SLOTS)				\
    111 		abort();					\
    112 } while (_B_FALSE)
    113 #else
    114 #define	CHECK_AG()	(void)0
    115 #endif
    116 
    117 
    118 /*
    119  * Output the contents of an aggregation table slot.
    120  *	This function must always be immediately followed with the deletion
    121  *	of the target slot.
    122  */
    123 static void
    124 ag_out(struct ag_info *ag, void (*out)(struct ag_info *))
    125 {
    126 	struct ag_info *ag_cors;
    127 	uint32_t bit;
    128 
    129 
    130 	/* Forget it if this route should not be output for split-horizon. */
    131 	if (ag->ag_state & AGS_SPLIT_HZ)
    132 		return;
    133 
    134 	/*
    135 	 * If we output both the even and odd twins, then the immediate parent,
    136 	 * if it is present, is redundant, unless the parent manages to
    137 	 * aggregate into something coarser.
    138 	 * On successive calls, this code detects the even and odd twins,
    139 	 * and marks the parent.
    140 	 *
    141 	 * Note that the order in which the radix tree code emits routes
    142 	 * ensures that the twins are seen before the parent is emitted.
    143 	 */
    144 	ag_cors = ag->ag_cors;
    145 	if (ag_cors != NULL &&
    146 	    ag_cors->ag_mask == (ag->ag_mask << 1) &&
    147 	    ag_cors->ag_dst_h == (ag->ag_dst_h & ag_cors->ag_mask)) {
    148 		ag_cors->ag_state |= ((ag_cors->ag_dst_h == ag->ag_dst_h) ?
    149 		    AGS_REDUN0 : AGS_REDUN1);
    150 	}
    151 
    152 	/*
    153 	 * Skip it if this route is itself redundant.
    154 	 *
    155 	 * It is ok to change the contents of the slot here, since it is
    156 	 * always deleted next.
    157 	 */
    158 	if (ag->ag_state & AGS_REDUN0) {
    159 		if (ag->ag_state & AGS_REDUN1)
    160 			return;		/* quit if fully redundant */
    161 		/* make it finer if it is half-redundant */
    162 		bit = (-ag->ag_mask) >> 1;
    163 		ag->ag_dst_h |= bit;
    164 		ag->ag_mask |= bit;
    165 
    166 	} else if (ag->ag_state & AGS_REDUN1) {
    167 		/* make it finer if it is half-redundant */
    168 		bit = (-ag->ag_mask) >> 1;
    169 		ag->ag_mask |= bit;
    170 	}
    171 	out(ag);
    172 }
    173 
    174 
    175 static void
    176 ag_del(struct ag_info *ag)
    177 {
    178 	CHECK_AG();
    179 
    180 	if (ag->ag_cors == NULL)
    181 		ag_corsest = ag->ag_fine;
    182 	else
    183 		ag->ag_cors->ag_fine = ag->ag_fine;
    184 
    185 	if (ag->ag_fine == NULL)
    186 		ag_finest = ag->ag_cors;
    187 	else
    188 		ag->ag_fine->ag_cors = ag->ag_cors;
    189 
    190 	ag->ag_fine = ag_avail;
    191 	ag_avail = ag;
    192 
    193 	CHECK_AG();
    194 }
    195 
    196 
    197 /* Look for a route that can suppress the given route. */
    198 static struct ag_info *
    199 ag_find_suppressor(struct ag_info *ag)
    200 {
    201 	struct ag_info *ag_cors;
    202 	in_addr_t dst_h = ag->ag_dst_h;
    203 
    204 	for (ag_cors = ag->ag_cors; ag_cors != NULL;
    205 	    ag_cors = ag_cors->ag_cors) {
    206 
    207 		if ((dst_h & ag_cors->ag_mask) == ag_cors->ag_dst_h) {
    208 			/*
    209 			 * We found a route with a coarser mask that covers
    210 			 * the given target.  It can suppress the target
    211 			 * only if it has a good enough metric and it
    212 			 * either has the same (gateway, ifp), or if its state
    213 			 * includes AGS_CORS_GATE or the target's state
    214 			 * includes AGS_FINE_GATE.
    215 			 */
    216 			if (ag_cors->ag_pref <= ag->ag_pref &&
    217 			    (((ag->ag_nhop == ag_cors->ag_nhop) &&
    218 			    (ag->ag_ifp == ag_cors->ag_ifp)) ||
    219 			    ag_cors->ag_state & AGS_CORS_GATE ||
    220 			    ag->ag_state & AGS_FINE_GATE)) {
    221 				return (ag_cors);
    222 			}
    223 		}
    224 	}
    225 
    226 	return (NULL);
    227 }
    228 
    229 
    230 /*
    231  * Flush routes waiting for aggregation.
    232  * This must not suppress a route unless it is known that among all routes
    233  * with coarser masks that match it, the one with the longest mask is
    234  * appropriate.  This is ensured by scanning the routes in lexical order,
    235  * and with the most restrictive mask first among routes to the same
    236  * destination.
    237  */
    238 void
    239 ag_flush(in_addr_t lim_dst_h,	/* flush routes to here */
    240     in_addr_t lim_mask,		/* matching this mask */
    241     void (*out)(struct ag_info *))
    242 {
    243 	struct ag_info *ag, *ag_cors, *ag_supr;
    244 	in_addr_t dst_h;
    245 
    246 
    247 	for (ag = ag_finest; ag != NULL && ag->ag_mask >= lim_mask;
    248 	    ag = ag_cors) {
    249 		/* Get the next route now, before we delete ag. */
    250 		ag_cors = ag->ag_cors;
    251 
    252 		/* Work on only the specified routes. */
    253 		dst_h = ag->ag_dst_h;
    254 		if ((dst_h & lim_mask) != lim_dst_h)
    255 			continue;
    256 
    257 		/*
    258 		 * Don't try to suppress the route if its state doesn't
    259 		 * include AGS_SUPPRESS.
    260 		 */
    261 		if (!(ag->ag_state & AGS_SUPPRESS)) {
    262 			ag_out(ag, out);
    263 			ag_del(ag);
    264 			continue;
    265 		}
    266 
    267 		ag_supr = ag_find_suppressor(ag);
    268 		if (ag_supr == NULL) {
    269 			/*
    270 			 * We didn't find a route which suppresses the
    271 			 * target, so the target can go out.
    272 			 */
    273 			ag_out(ag, out);
    274 		} else {
    275 			/*
    276 			 * We found a route which suppresses the target, so
    277 			 * don't output the target.
    278 			 */
    279 			if (TRACEACTIONS) {
    280 				trace_misc("aggregated away %s",
    281 				    rtname(htonl(ag->ag_dst_h), ag->ag_mask,
    282 				    ag->ag_nhop));
    283 				trace_misc("on coarser route %s",
    284 				    rtname(htonl(ag_supr->ag_dst_h),
    285 				    ag_supr->ag_mask, ag_supr->ag_nhop));
    286 			}
    287 			/*
    288 			 * If the suppressed target was redundant, then
    289 			 * mark the suppressor as redundant.
    290 			 */
    291 			if (AG_IS_REDUN(ag->ag_state) &&
    292 			    ag_supr->ag_mask == (ag->ag_mask<<1)) {
    293 				if (ag_supr->ag_dst_h == dst_h)
    294 					ag_supr->ag_state |= AGS_REDUN0;
    295 				else
    296 					ag_supr->ag_state |= AGS_REDUN1;
    297 			}
    298 			if (ag->ag_tag != ag_supr->ag_tag)
    299 				ag_supr->ag_tag = 0;
    300 			if (ag->ag_nhop != ag_supr->ag_nhop)
    301 				ag_supr->ag_nhop = 0;
    302 		}
    303 
    304 		/* The route has either been output or suppressed */
    305 		ag_del(ag);
    306 	}
    307 
    308 	CHECK_AG();
    309 }
    310 
    311 
    312 /* Try to aggregate a route with previous routes. */
    313 void
    314 ag_check(in_addr_t dst,
    315     in_addr_t	mask,
    316     in_addr_t	gate,
    317     struct interface *ifp,
    318     in_addr_t	nhop,
    319     uint8_t	metric,
    320     uint8_t	pref,
    321     uint32_t	seqno,
    322     uint16_t	tag,
    323     uint16_t	state,
    324     void (*out)(struct ag_info *))	/* output using this */
    325 {
    326 	struct ag_info *ag, *nag, *ag_cors;
    327 	in_addr_t xaddr;
    328 	int tmp;
    329 	struct interface *xifp;
    330 
    331 	dst = ntohl(dst);
    332 
    333 	/*
    334 	 * Don't bother trying to aggregate routes with non-contiguous
    335 	 * subnet masks.
    336 	 *
    337 	 * (X & -X) contains a single bit if and only if X is a power of 2.
    338 	 * (X + (X & -X)) == 0 if and only if X is a power of 2.
    339 	 */
    340 	if ((mask & -mask) + mask != 0) {
    341 		struct ag_info nc_ag;
    342 
    343 		nc_ag.ag_dst_h = dst;
    344 		nc_ag.ag_mask = mask;
    345 		nc_ag.ag_gate = gate;
    346 		nc_ag.ag_ifp = ifp;
    347 		nc_ag.ag_nhop = nhop;
    348 		nc_ag.ag_metric = metric;
    349 		nc_ag.ag_pref = pref;
    350 		nc_ag.ag_tag = tag;
    351 		nc_ag.ag_state = state;
    352 		nc_ag.ag_seqno = seqno;
    353 		out(&nc_ag);
    354 		return;
    355 	}
    356 
    357 	/* Search for the right slot in the aggregation table. */
    358 	ag_cors = NULL;
    359 	ag = ag_corsest;
    360 	while (ag != NULL) {
    361 		if (ag->ag_mask >= mask)
    362 			break;
    363 
    364 		/*
    365 		 * Suppress old routes (i.e. combine with compatible routes
    366 		 * with coarser masks) as we look for the right slot in the
    367 		 * aggregation table for the new route.
    368 		 * A route to an address less than the current destination
    369 		 * will not be affected by the current route or any route
    370 		 * seen hereafter.  That means it is safe to suppress it.
    371 		 * This check keeps poor routes (e.g. with large hop counts)
    372 		 * from preventing suppression of finer routes.
    373 		 */
    374 		if (ag_cors != NULL && ag->ag_dst_h < dst &&
    375 		    (ag->ag_state & AGS_SUPPRESS) &&
    376 		    ag_cors->ag_pref <= ag->ag_pref &&
    377 		    (ag->ag_dst_h & ag_cors->ag_mask) == ag_cors->ag_dst_h &&
    378 		    ((ag_cors->ag_nhop == ag->ag_nhop &&
    379 		    (ag_cors->ag_ifp == ag->ag_ifp))||
    380 		    (ag->ag_state & AGS_FINE_GATE) ||
    381 		    (ag_cors->ag_state & AGS_CORS_GATE))) {
    382 			/*
    383 			 * If the suppressed target was redundant,
    384 			 * then mark the suppressor redundant.
    385 			 */
    386 			if (AG_IS_REDUN(ag->ag_state) &&
    387 			    ag_cors->ag_mask == (ag->ag_mask << 1)) {
    388 				if (ag_cors->ag_dst_h == dst)
    389 					ag_cors->ag_state |= AGS_REDUN0;
    390 				else
    391 					ag_cors->ag_state |= AGS_REDUN1;
    392 			}
    393 			if (ag->ag_tag != ag_cors->ag_tag)
    394 				ag_cors->ag_tag = 0;
    395 			if (ag->ag_nhop != ag_cors->ag_nhop)
    396 				ag_cors->ag_nhop = 0;
    397 			ag_del(ag);
    398 			CHECK_AG();
    399 		} else {
    400 			ag_cors = ag;
    401 		}
    402 		ag = ag_cors->ag_fine;
    403 	}
    404 
    405 	/*
    406 	 * If we find the even/odd twin of the new route, and if the
    407 	 * masks and so forth are equal, we can aggregate them.
    408 	 * We can probably promote one of the pair.
    409 	 *
    410 	 * Since the routes are encountered in lexical order,
    411 	 * the new route must be odd.  However, the second or later
    412 	 * times around this loop, it could be the even twin promoted
    413 	 * from the even/odd pair of twins of the finer route.
    414 	 */
    415 	while (ag != NULL && ag->ag_mask == mask &&
    416 	    ((ag->ag_dst_h ^ dst) & (mask<<1)) == 0) {
    417 
    418 		/*
    419 		 * Here we know the target route and the route in the current
    420 		 * slot have the same netmasks and differ by at most the
    421 		 * last bit.  They are either for the same destination, or
    422 		 * for an even/odd pair of destinations.
    423 		 */
    424 		if (ag->ag_dst_h == dst) {
    425 			if (ag->ag_nhop == nhop && ag->ag_ifp == ifp) {
    426 				/*
    427 				 * We have two routes to the same destination,
    428 				 * with the same nexthop and interface.
    429 				 * Routes are encountered in lexical order,
    430 				 * so a route is never promoted until the
    431 				 * parent route is already present.  So we
    432 				 * know that the new route is a promoted (or
    433 				 * aggregated) pair and the route already in
    434 				 * the slot is the explicit route.
    435 				 *
    436 				 * Prefer the best route if their metrics
    437 				 * differ, or the aggregated one if not,
    438 				 * following a sort of longest-match rule.
    439 				 */
    440 				if (pref <= ag->ag_pref) {
    441 					ag->ag_gate = gate;
    442 					ag->ag_ifp = ifp;
    443 					ag->ag_nhop = nhop;
    444 					ag->ag_tag = tag;
    445 					ag->ag_metric = metric;
    446 					ag->ag_pref = pref;
    447 					if (seqno > ag->ag_seqno)
    448 						ag->ag_seqno = seqno;
    449 					tmp = ag->ag_state;
    450 					ag->ag_state = state;
    451 					state = tmp;
    452 				}
    453 
    454 				/*
    455 				 * Some bits are set if they are set on
    456 				 * either route, except when the route is
    457 				 * for an interface.
    458 				 */
    459 				if (!(ag->ag_state & AGS_IF))
    460 					ag->ag_state |=
    461 					    (state & (AGS_AGGREGATE_EITHER |
    462 					    AGS_REDUN0 | AGS_REDUN1));
    463 
    464 				return;
    465 			} else {
    466 				/*
    467 				 * multiple routes to same dest/mask with
    468 				 * differing gate nexthop/or ifp. Flush
    469 				 * both out.
    470 				 */
    471 				break;
    472 			}
    473 		}
    474 
    475 		/*
    476 		 * If one of the routes can be promoted and the other can
    477 		 * be suppressed, it may be possible to combine them or
    478 		 * worthwhile to promote one.
    479 		 *
    480 		 * Any route that can be promoted is always
    481 		 * marked to be eligible to be suppressed.
    482 		 */
    483 		if (!((state & AGS_AGGREGATE) &&
    484 		    (ag->ag_state & AGS_SUPPRESS)) &&
    485 		    !((ag->ag_state & AGS_AGGREGATE) && (state & AGS_SUPPRESS)))
    486 			break;
    487 
    488 		/*
    489 		 * A pair of even/odd twin routes can be combined
    490 		 * if either is redundant, or if they are via the
    491 		 * same gateway and have the same metric.
    492 		 */
    493 		if (AG_IS_REDUN(ag->ag_state) || AG_IS_REDUN(state) ||
    494 		    (ag->ag_nhop == nhop && ag->ag_ifp == ifp &&
    495 		    ag->ag_pref == pref &&
    496 		    (state & ag->ag_state & AGS_AGGREGATE) != 0)) {
    497 
    498 			/*
    499 			 * We have both the even and odd pairs.
    500 			 * Since the routes are encountered in order,
    501 			 * the route in the slot must be the even twin.
    502 			 *
    503 			 * Combine and promote (aggregate) the pair of routes.
    504 			 */
    505 			if (seqno < ag->ag_seqno)
    506 				seqno = ag->ag_seqno;
    507 			if (!AG_IS_REDUN(state))
    508 				state &= ~AGS_REDUN1;
    509 			if (AG_IS_REDUN(ag->ag_state))
    510 				state |= AGS_REDUN0;
    511 			else
    512 				state &= ~AGS_REDUN0;
    513 			state |= (ag->ag_state & AGS_AGGREGATE_EITHER);
    514 			if (ag->ag_tag != tag)
    515 				tag = 0;
    516 			if (ag->ag_nhop != nhop)
    517 				nhop = 0;
    518 
    519 			/*
    520 			 * Get rid of the even twin that was already
    521 			 * in the slot.
    522 			 */
    523 			ag_del(ag);
    524 
    525 		} else if (ag->ag_pref >= pref &&
    526 		    (ag->ag_state & AGS_AGGREGATE)) {
    527 			/*
    528 			 * If we cannot combine the pair, maybe the route
    529 			 * with the worse metric can be promoted.
    530 			 *
    531 			 * Promote the old, even twin, by giving its slot
    532 			 * in the table to the new, odd twin.
    533 			 */
    534 			ag->ag_dst_h = dst;
    535 
    536 			xaddr = ag->ag_gate;
    537 			ag->ag_gate = gate;
    538 			gate = xaddr;
    539 
    540 			xifp = ag->ag_ifp;
    541 			ag->ag_ifp = ifp;
    542 			ifp = xifp;
    543 
    544 			xaddr = ag->ag_nhop;
    545 			ag->ag_nhop = nhop;
    546 			nhop = xaddr;
    547 
    548 			tmp = ag->ag_tag;
    549 			ag->ag_tag = tag;
    550 			tag = tmp;
    551 
    552 			/*
    553 			 * The promoted route is even-redundant only if the
    554 			 * even twin was fully redundant.  It is not
    555 			 * odd-redundant because the odd-twin will still be
    556 			 * in the table.
    557 			 */
    558 			tmp = ag->ag_state;
    559 			if (!AG_IS_REDUN(tmp))
    560 				tmp &= ~AGS_REDUN0;
    561 			tmp &= ~AGS_REDUN1;
    562 			ag->ag_state = state;
    563 			state = tmp;
    564 
    565 			tmp = ag->ag_metric;
    566 			ag->ag_metric = metric;
    567 			metric = tmp;
    568 
    569 			tmp = ag->ag_pref;
    570 			ag->ag_pref = pref;
    571 			pref = tmp;
    572 
    573 			/* take the newest sequence number */
    574 			if (seqno <= ag->ag_seqno)
    575 				seqno = ag->ag_seqno;
    576 			else
    577 				ag->ag_seqno = seqno;
    578 
    579 		} else {
    580 			if (!(state & AGS_AGGREGATE))
    581 				break;	/* cannot promote either twin */
    582 
    583 			/*
    584 			 * Promote the new, odd twin by shaving its
    585 			 * mask and address.
    586 			 * The promoted route is odd-redundant only if the
    587 			 * odd twin was fully redundant.  It is not
    588 			 * even-redundant because the even twin is still in
    589 			 * the table.
    590 			 */
    591 			if (!AG_IS_REDUN(state))
    592 				state &= ~AGS_REDUN1;
    593 			state &= ~AGS_REDUN0;
    594 			if (seqno < ag->ag_seqno)
    595 				seqno = ag->ag_seqno;
    596 			else
    597 				ag->ag_seqno = seqno;
    598 		}
    599 
    600 		mask <<= 1;
    601 		dst &= mask;
    602 
    603 		if (ag_cors == NULL) {
    604 			ag = ag_corsest;
    605 			break;
    606 		}
    607 		ag = ag_cors;
    608 		ag_cors = ag->ag_cors;
    609 	}
    610 
    611 	/*
    612 	 * When we can no longer promote and combine routes,
    613 	 * flush the old route in the target slot.  Also flush
    614 	 * any finer routes that we know will never be aggregated by
    615 	 * the new route.
    616 	 *
    617 	 * In case we moved toward coarser masks,
    618 	 * get back where we belong
    619 	 */
    620 	if (ag != NULL && ag->ag_mask < mask) {
    621 		ag_cors = ag;
    622 		ag = ag->ag_fine;
    623 	}
    624 
    625 	/* Empty the target slot */
    626 	if (ag != NULL && ag->ag_mask == mask) {
    627 		ag_flush(ag->ag_dst_h, ag->ag_mask, out);
    628 		ag = (ag_cors == NULL) ? ag_corsest : ag_cors->ag_fine;
    629 	}
    630 
    631 #ifdef DEBUG_AG
    632 	if (ag == NULL && ag_cors != ag_finest)
    633 		abort();
    634 	if (ag_cors == NULL && ag != ag_corsest)
    635 		abort();
    636 	if (ag != NULL && ag->ag_cors != ag_cors)
    637 		abort();
    638 	if (ag_cors != NULL && ag_cors->ag_fine != ag)
    639 		abort();
    640 	CHECK_AG();
    641 #endif
    642 
    643 	/* Save the new route on the end of the table. */
    644 	nag = ag_avail;
    645 	ag_avail = nag->ag_fine;
    646 
    647 	nag->ag_dst_h = dst;
    648 	nag->ag_mask = mask;
    649 	nag->ag_ifp = ifp;
    650 	nag->ag_gate = gate;
    651 	nag->ag_nhop = nhop;
    652 	nag->ag_metric = metric;
    653 	nag->ag_pref = pref;
    654 	nag->ag_tag = tag;
    655 	nag->ag_state = state;
    656 	nag->ag_seqno = seqno;
    657 
    658 	nag->ag_fine = ag;
    659 	if (ag != NULL)
    660 		ag->ag_cors = nag;
    661 	else
    662 		ag_finest = nag;
    663 	nag->ag_cors = ag_cors;
    664 	if (ag_cors == NULL)
    665 		ag_corsest = nag;
    666 	else
    667 		ag_cors->ag_fine = nag;
    668 	CHECK_AG();
    669 }
    670 
    671 
    672 static const char *
    673 rtm_type_name(uchar_t type)
    674 {
    675 	static const char *rtm_types[] = {
    676 		"RTM_ADD",
    677 		"RTM_DELETE",
    678 		"RTM_CHANGE",
    679 		"RTM_GET",
    680 		"RTM_LOSING",
    681 		"RTM_REDIRECT",
    682 		"RTM_MISS",
    683 		"RTM_LOCK",
    684 		"RTM_OLDADD",
    685 		"RTM_OLDDEL",
    686 		"RTM_RESOLVE",
    687 		"RTM_NEWADDR",
    688 		"RTM_DELADDR",
    689 		"RTM_IFINFO",
    690 		"RTM_CHGMADDR",
    691 		"RTM_FREEMADDR"
    692 	};
    693 #define	NEW_RTM_PAT	"RTM type %#x"
    694 	static char name0[sizeof (NEW_RTM_PAT) + 2];
    695 
    696 	if (type > sizeof (rtm_types) / sizeof (rtm_types[0]) || type == 0) {
    697 		(void) snprintf(name0, sizeof (name0), NEW_RTM_PAT, type);
    698 		return (name0);
    699 	} else {
    700 		return (rtm_types[type-1]);
    701 	}
    702 #undef	NEW_RTM_PAT
    703 }
    704 
    705 
    706 static void
    707 dump_rt_msg(const char *act, struct rt_msghdr *rtm, int mlen)
    708 {
    709 	const char *mtype;
    710 	uchar_t *cp;
    711 	int i, j;
    712 	char buffer[16*3 + 1], *ibs;
    713 	struct ifa_msghdr *ifam;
    714 	struct if_msghdr *ifm;
    715 
    716 	switch (rtm->rtm_type) {
    717 	case RTM_NEWADDR:
    718 	case RTM_DELADDR:
    719 	case RTM_FREEADDR:
    720 	case RTM_CHGADDR:
    721 		mtype = "ifam";
    722 		break;
    723 	case RTM_IFINFO:
    724 		mtype = "ifm";
    725 		break;
    726 	default:
    727 		mtype = "rtm";
    728 		break;
    729 	}
    730 	trace_misc("%s %s %d bytes", act, mtype, mlen);
    731 	if (mlen > rtm->rtm_msglen) {
    732 		trace_misc("%s: extra %d bytes ignored", mtype,
    733 		    mlen - rtm->rtm_msglen);
    734 		mlen = rtm->rtm_msglen;
    735 	} else if (mlen < rtm->rtm_msglen) {
    736 		trace_misc("%s: truncated by %d bytes", mtype,
    737 		    rtm->rtm_msglen - mlen);
    738 	}
    739 	switch (rtm->rtm_type) {
    740 	case RTM_NEWADDR:
    741 	case RTM_DELADDR:
    742 	case RTM_CHGADDR:
    743 	case RTM_FREEADDR:
    744 		ifam = (struct ifa_msghdr *)rtm;
    745 		trace_misc("ifam: msglen %d version %d type %d addrs %X",
    746 		    ifam->ifam_msglen, ifam->ifam_version, ifam->ifam_type,
    747 		    ifam->ifam_addrs);
    748 		trace_misc("ifam: flags %X index %d metric %d",
    749 		    ifam->ifam_flags, ifam->ifam_index, ifam->ifam_metric);
    750 		cp = (uchar_t *)(ifam + 1);
    751 		break;
    752 	case RTM_IFINFO:
    753 		ifm = (struct if_msghdr *)rtm;
    754 		trace_misc("ifm: msglen %d version %d type %d addrs %X",
    755 		    ifm->ifm_msglen, ifm->ifm_version, ifm->ifm_type,
    756 		    ifm->ifm_addrs);
    757 		ibs = if_bit_string(ifm->ifm_flags, _B_TRUE);
    758 		if (ibs == NULL) {
    759 			trace_misc("ifm: flags %#x index %d", ifm->ifm_flags,
    760 			    ifm->ifm_index);
    761 		} else {
    762 			trace_misc("ifm: flags %s index %d", ibs,
    763 			    ifm->ifm_index);
    764 			free(ibs);
    765 		}
    766 		cp = (uchar_t *)(ifm + 1);
    767 		break;
    768 	default:
    769 		trace_misc("rtm: msglen %d version %d type %d index %d",
    770 		    rtm->rtm_msglen, rtm->rtm_version, rtm->rtm_type,
    771 		    rtm->rtm_index);
    772 		trace_misc("rtm: flags %X addrs %X pid %d seq %d",
    773 		    rtm->rtm_flags, rtm->rtm_addrs, rtm->rtm_pid, rtm->rtm_seq);
    774 		trace_misc("rtm: errno %d use %d inits %X", rtm->rtm_errno,
    775 		    rtm->rtm_use, rtm->rtm_inits);
    776 		cp = (uchar_t *)(rtm + 1);
    777 		break;
    778 	}
    779 	i = mlen - (cp - (uint8_t *)rtm);
    780 	while (i > 0) {
    781 		buffer[0] = '\0';
    782 		ibs = buffer;
    783 		for (j = 0; j < 16 && i > 0; j++, i--)
    784 			ibs += sprintf(ibs, " %02X", *cp++);
    785 		trace_misc("addr%s", buffer);
    786 	}
    787 }
    788 
    789 /*
    790  * Tell the kernel to add, delete or change a route
    791  * Pass k_state from khash in for diagnostic info.
    792  */
    793 static void
    794 rtioctl(int action,			/* RTM_DELETE, etc */
    795     in_addr_t dst,
    796     in_addr_t gate,
    797     in_addr_t mask,
    798     struct interface *ifp,
    799     uint8_t metric,
    800     int flags)
    801 {
    802 	static int rt_sock_seqno = 0;
    803 	struct {
    804 		struct rt_msghdr w_rtm;
    805 		struct sockaddr_in w_dst;
    806 		struct sockaddr_in w_gate;
    807 		uint8_t w_space[512];
    808 	} w;
    809 	struct sockaddr_in w_mask;
    810 	struct sockaddr_dl w_ifp;
    811 	uint8_t *cp;
    812 	long cc;
    813 #define	PAT " %-10s %s metric=%d flags=%#x"
    814 #define	ARGS rtm_type_name(action), rtname(dst, mask, gate), metric, flags
    815 
    816 again:
    817 	(void) memset(&w, 0, sizeof (w));
    818 	(void) memset(&w_mask, 0, sizeof (w_mask));
    819 	(void) memset(&w_ifp, 0, sizeof (w_ifp));
    820 	cp = w.w_space;
    821 	w.w_rtm.rtm_msglen = sizeof (struct rt_msghdr) +
    822 	    2 * ROUNDUP_LONG(sizeof (struct sockaddr_in));
    823 	w.w_rtm.rtm_version = RTM_VERSION;
    824 	w.w_rtm.rtm_type = action;
    825 	w.w_rtm.rtm_flags = flags;
    826 	w.w_rtm.rtm_seq = ++rt_sock_seqno;
    827 	w.w_rtm.rtm_addrs = RTA_DST|RTA_GATEWAY;
    828 	if (metric != 0 || action == RTM_CHANGE) {
    829 		w.w_rtm.rtm_rmx.rmx_hopcount = metric;
    830 		w.w_rtm.rtm_inits |= RTV_HOPCOUNT;
    831 	}
    832 	w.w_dst.sin_family = AF_INET;
    833 	w.w_dst.sin_addr.s_addr = dst;
    834 	w.w_gate.sin_family = AF_INET;
    835 	w.w_gate.sin_addr.s_addr = gate;
    836 	if (mask == HOST_MASK) {
    837 		w.w_rtm.rtm_flags |= RTF_HOST;
    838 	} else {
    839 		w.w_rtm.rtm_addrs |= RTA_NETMASK;
    840 		w_mask.sin_family = AF_INET;
    841 		w_mask.sin_addr.s_addr = htonl(mask);
    842 		(void) memmove(cp, &w_mask, sizeof (w_mask));
    843 		cp += ROUNDUP_LONG(sizeof (struct sockaddr_in));
    844 		w.w_rtm.rtm_msglen += ROUNDUP_LONG(sizeof (struct sockaddr_in));
    845 	}
    846 	if (ifp == NULL)
    847 		ifp = iflookup(gate);
    848 
    849 	if (ifp == NULL || (ifp->int_phys == NULL)) {
    850 		trace_misc("no ifp for" PAT, ARGS);
    851 	} else {
    852 		if (ifp->int_phys->phyi_index > UINT16_MAX) {
    853 			trace_misc("ifindex %d is too big for sdl_index",
    854 			    ifp->int_phys->phyi_index);
    855 		} else {
    856 			w_ifp.sdl_family = AF_LINK;
    857 			w.w_rtm.rtm_addrs |= RTA_IFP;
    858 			w_ifp.sdl_index = ifp->int_phys->phyi_index;
    859 			(void) memmove(cp, &w_ifp, sizeof (w_ifp));
    860 			w.w_rtm.rtm_msglen +=
    861 			    ROUNDUP_LONG(sizeof (struct sockaddr_dl));
    862 		}
    863 	}
    864 
    865 
    866 	if (!no_install) {
    867 		if (TRACERTS)
    868 			dump_rt_msg("write", &w.w_rtm, w.w_rtm.rtm_msglen);
    869 		cc = write(rt_sock, &w, w.w_rtm.rtm_msglen);
    870 		if (cc < 0) {
    871 			if (errno == ESRCH && (action == RTM_CHANGE ||
    872 			    action == RTM_DELETE)) {
    873 				trace_act("route disappeared before" PAT, ARGS);
    874 				if (action == RTM_CHANGE) {
    875 					action = RTM_ADD;
    876 					goto again;
    877 				}
    878 				return;
    879 			}
    880 			writelog(LOG_WARNING, "write(rt_sock)" PAT ": %s ",
    881 			    ARGS, rip_strerror(errno));
    882 			return;
    883 		} else if (cc != w.w_rtm.rtm_msglen) {
    884 			msglog("write(rt_sock) wrote %ld instead of %d for" PAT,
    885 			    cc, w.w_rtm.rtm_msglen, ARGS);
    886 			return;
    887 		}
    888 	}
    889 	if (TRACEKERNEL)
    890 		trace_misc("write kernel" PAT, ARGS);
    891 #undef PAT
    892 #undef ARGS
    893 }
    894 
    895 
    896 /* Hash table containing our image of the kernel forwarding table. */
    897 #define	KHASH_SIZE 71			/* should be prime */
    898 #define	KHASH(a, m) khash_bins[((a) ^ (m)) % KHASH_SIZE]
    899 static struct khash *khash_bins[KHASH_SIZE];
    900 
    901 #define	K_KEEP_LIM	30	/* k_keep */
    902 
    903 static struct khash *
    904 kern_find(in_addr_t dst, in_addr_t mask, in_addr_t gate,
    905     struct interface *ifp, struct khash ***ppk)
    906 {
    907 	struct khash *k, **pk;
    908 
    909 	for (pk = &KHASH(dst, mask); (k = *pk) != NULL; pk = &k->k_next) {
    910 		if (k->k_dst == dst && k->k_mask == mask &&
    911 		    (gate == 0 || k->k_gate == gate) &&
    912 		    (ifp == NULL || k->k_ifp == ifp)) {
    913 			break;
    914 		}
    915 	}
    916 	if (ppk != NULL)
    917 		*ppk = pk;
    918 	return (k);
    919 }
    920 
    921 
    922 /*
    923  * Find out if there is an alternate route to a given destination
    924  * off of a given interface.
    925  */
    926 static struct khash *
    927 kern_alternate(in_addr_t dst, in_addr_t mask, in_addr_t gate,
    928     struct interface *ifp, struct khash ***ppk)
    929 {
    930 	struct khash *k, **pk;
    931 
    932 	for (pk = &KHASH(dst, mask); (k = *pk) != NULL; pk = &k->k_next) {
    933 		if (k->k_dst == dst && k->k_mask == mask &&
    934 		    (k->k_gate != gate) &&
    935 		    (k->k_ifp == ifp)) {
    936 			break;
    937 		}
    938 	}
    939 	if (ppk != NULL)
    940 		*ppk = pk;
    941 	return (k);
    942 }
    943 
    944 static struct khash *
    945 kern_add(in_addr_t dst, uint32_t mask, in_addr_t gate, struct interface *ifp)
    946 {
    947 	struct khash *k, **pk;
    948 
    949 	k = kern_find(dst, mask, gate, ifp, &pk);
    950 	if (k != NULL)
    951 		return (k);
    952 
    953 	k = rtmalloc(sizeof (*k), "kern_add");
    954 
    955 	(void) memset(k, 0, sizeof (*k));
    956 	k->k_dst = dst;
    957 	k->k_mask = mask;
    958 	k->k_state = KS_NEW;
    959 	k->k_keep = now.tv_sec;
    960 	k->k_gate = gate;
    961 	k->k_ifp = ifp;
    962 	*pk = k;
    963 
    964 	return (k);
    965 }
    966 
    967 /* delete all khash entries that are wired through the interface ifp */
    968 void
    969 kern_flush_ifp(struct interface *ifp)
    970 {
    971 	struct khash *k, *kprev, *knext;
    972 	int i;
    973 
    974 	for (i = 0; i < KHASH_SIZE; i++) {
    975 		kprev = NULL;
    976 		for (k = khash_bins[i]; k != NULL; k = knext) {
    977 			knext = k->k_next;
    978 			if (k->k_ifp == ifp) {
    979 				if (kprev != NULL)
    980 					kprev->k_next = k->k_next;
    981 				else
    982 					khash_bins[i] = k->k_next;
    983 				free(k);
    984 				continue;
    985 			}
    986 			kprev = k;
    987 		}
    988 	}
    989 }
    990 
    991 /*
    992  * rewire khash entries that currently go through oldifp to
    993  * go through newifp.
    994  */
    995 void
    996 kern_rewire_ifp(struct interface *oldifp, struct interface *newifp)
    997 {
    998 	struct khash *k;
    999 	int i;
   1000 
   1001 	for (i = 0; i < KHASH_SIZE; i++) {
   1002 		for (k = khash_bins[i]; k; k = k->k_next) {
   1003 			if (k->k_ifp == oldifp) {
   1004 				k->k_ifp = newifp;
   1005 				trace_misc("kern_rewire_ifp k 0x%lx "
   1006 				    "from %s to %s", k, oldifp->int_name,
   1007 				    newifp->int_name);
   1008 			}
   1009 		}
   1010 	}
   1011 }
   1012 
   1013 /*
   1014  * Check that a static route it is still in the daemon table, and not
   1015  * deleted by interfaces coming and going.  This is also the routine
   1016  * responsible for adding new static routes to the daemon table.
   1017  */
   1018 static void
   1019 kern_check_static(struct khash *k, struct interface *ifp)
   1020 {
   1021 	struct rt_entry *rt;
   1022 	struct rt_spare new;
   1023 	uint16_t rt_state = RS_STATIC;
   1024 
   1025 	(void) memset(&new, 0, sizeof (new));
   1026 	new.rts_ifp = ifp;
   1027 	new.rts_gate = k->k_gate;
   1028 	new.rts_router = (ifp != NULL) ? ifp->int_addr : loopaddr;
   1029 	new.rts_metric = k->k_metric;
   1030 	new.rts_time = now.tv_sec;
   1031 	new.rts_origin = RO_STATIC;
   1032 
   1033 	rt = rtget(k->k_dst, k->k_mask);
   1034 	if ((ifp != NULL && !IS_IFF_ROUTING(ifp->int_if_flags)) ||
   1035 	    (k->k_state & KS_PRIVATE))
   1036 		rt_state |= RS_NOPROPAGATE;
   1037 
   1038 	if (rt != NULL) {
   1039 		if ((rt->rt_state & RS_STATIC) == 0) {
   1040 			/*
   1041 			 * We are already tracking this dest/mask
   1042 			 * via RIP/RDISC. Ignore the static route,
   1043 			 * because we don't currently have a good
   1044 			 * way to compare metrics on static routes
   1045 			 * with rip metrics, and therefore cannot
   1046 			 * mix and match the two.
   1047 			 */
   1048 			return;
   1049 		}
   1050 		rt_state |= rt->rt_state;
   1051 		if (rt->rt_state != rt_state)
   1052 			rtchange(rt, rt_state, &new, 0);
   1053 	} else {
   1054 		rtadd(k->k_dst, k->k_mask, rt_state, &new);
   1055 	}
   1056 }
   1057 
   1058 
   1059 /* operate on a kernel entry */
   1060 static void
   1061 kern_ioctl(struct khash *k,
   1062     int action,			/* RTM_DELETE, etc */
   1063     int flags)
   1064 {
   1065 	if (((k->k_state & (KS_IF|KS_PASSIVE)) == KS_IF) ||
   1066 	    (k->k_state & KS_DEPRE_IF)) {
   1067 		/*
   1068 		 * Prevent execution of RTM_DELETE, RTM_ADD or
   1069 		 * RTM_CHANGE of interface routes
   1070 		 */
   1071 		trace_act("Blocking execution of %s  %s --> %s ",
   1072 		    rtm_type_name(action),
   1073 		    addrname(k->k_dst, k->k_mask, 0), naddr_ntoa(k->k_gate));
   1074 		return;
   1075 	}
   1076 
   1077 	switch (action) {
   1078 	case RTM_DELETE:
   1079 		k->k_state &= ~KS_DYNAMIC;
   1080 		if (k->k_state & KS_DELETED)
   1081 			return;
   1082 		k->k_state |= KS_DELETED;
   1083 		break;
   1084 	case RTM_ADD:
   1085 		k->k_state &= ~KS_DELETED;
   1086 		break;
   1087 	case RTM_CHANGE:
   1088 		if (k->k_state & KS_DELETED) {
   1089 			action = RTM_ADD;
   1090 			k->k_state &= ~KS_DELETED;
   1091 		}
   1092 		break;
   1093 	}
   1094 
   1095 	/*
   1096 	 * We should be doing an RTM_CHANGE for a KS_CHANGE, but
   1097 	 * RTM_CHANGE in the kernel is not currently multipath-aware and
   1098 	 * assumes that RTF_GATEWAY implies that the gateway of the route for
   1099 	 * dst has to be changed. Moreover, the only change that in.routed
   1100 	 * wants to implement is a change in the ks_metric (rmx_hopcount)
   1101 	 * which the kernel ignores anway, so we skip the RTM_CHANGE operation
   1102 	 * on the kernel
   1103 	 */
   1104 	if (action != RTM_CHANGE) {
   1105 		rtioctl(action, k->k_dst, k->k_gate, k->k_mask, k->k_ifp,
   1106 		    k->k_metric, flags);
   1107 	}
   1108 }
   1109 
   1110 
   1111 /* add a route the kernel told us */
   1112 static void
   1113 rtm_add(struct rt_msghdr *rtm,
   1114     struct rt_addrinfo *info,
   1115     time_t keep,
   1116     boolean_t interf_route,
   1117     struct interface *ifptr)
   1118 {
   1119 	struct khash *k;
   1120 	struct interface *ifp = ifptr;
   1121 	in_addr_t mask, gate = 0;
   1122 	static struct msg_limit msg_no_ifp;
   1123 
   1124 	if (rtm->rtm_flags & RTF_HOST) {
   1125 		mask = HOST_MASK;
   1126 	} else if (INFO_MASK(info) != 0) {
   1127 		mask = ntohl(S_ADDR(INFO_MASK(info)));
   1128 	} else {
   1129 		writelog(LOG_WARNING,
   1130 		    "ignore %s without mask", rtm_type_name(rtm->rtm_type));
   1131 		return;
   1132 	}
   1133 
   1134 	/*
   1135 	 * Find the interface toward the gateway.
   1136 	 */
   1137 	if (INFO_GATE(info) != NULL)
   1138 		gate = S_ADDR(INFO_GATE(info));
   1139 
   1140 	if (ifp == NULL) {
   1141 		if (INFO_GATE(info) != NULL)
   1142 			ifp = iflookup(gate);
   1143 		if (ifp == NULL) {
   1144 			msglim(&msg_no_ifp, gate,
   1145 			    "route %s --> %s nexthop is not directly connected",
   1146 			    addrname(S_ADDR(INFO_DST(info)), mask, 0),
   1147 			    naddr_ntoa(gate));
   1148 		}
   1149 	}
   1150 
   1151 	k = kern_add(S_ADDR(INFO_DST(info)), mask, gate, ifp);
   1152 
   1153 	if (k->k_state & KS_NEW)
   1154 		k->k_keep = now.tv_sec+keep;
   1155 	if (INFO_GATE(info) == 0) {
   1156 		trace_act("note %s without gateway",
   1157 		    rtm_type_name(rtm->rtm_type));
   1158 		k->k_metric = HOPCNT_INFINITY;
   1159 	} else if (INFO_GATE(info)->ss_family != AF_INET) {
   1160 		trace_act("note %s with gateway AF=%d",
   1161 		    rtm_type_name(rtm->rtm_type),
   1162 		    INFO_GATE(info)->ss_family);
   1163 		k->k_metric = HOPCNT_INFINITY;
   1164 	} else {
   1165 		k->k_gate = S_ADDR(INFO_GATE(info));
   1166 		k->k_metric = rtm->rtm_rmx.rmx_hopcount;
   1167 		if (k->k_metric < 0)
   1168 			k->k_metric = 0;
   1169 		else if (k->k_metric > HOPCNT_INFINITY-1)
   1170 			k->k_metric = HOPCNT_INFINITY-1;
   1171 	}
   1172 
   1173 	if ((k->k_state & KS_NEW) && interf_route) {
   1174 		if (k->k_gate != 0 && findifaddr(k->k_gate) == NULL)
   1175 			k->k_state |= KS_DEPRE_IF;
   1176 		else
   1177 			k->k_state |= KS_IF;
   1178 	}
   1179 
   1180 	k->k_state &= ~(KS_NEW | KS_DELETE | KS_ADD | KS_CHANGE | KS_DEL_ADD |
   1181 	    KS_STATIC | KS_GATEWAY | KS_DELETED | KS_PRIVATE | KS_CHECK);
   1182 	if (rtm->rtm_flags & RTF_GATEWAY)
   1183 		k->k_state |= KS_GATEWAY;
   1184 	if (rtm->rtm_flags & RTF_STATIC)
   1185 		k->k_state |= KS_STATIC;
   1186 	if (rtm->rtm_flags & RTF_PRIVATE)
   1187 		k->k_state |= KS_PRIVATE;
   1188 
   1189 
   1190 	if (rtm->rtm_flags & (RTF_DYNAMIC | RTF_MODIFIED)) {
   1191 		if (INFO_AUTHOR(info) != 0 &&
   1192 		    INFO_AUTHOR(info)->ss_family == AF_INET)
   1193 			ifp = iflookup(S_ADDR(INFO_AUTHOR(info)));
   1194 		else
   1195 			ifp = NULL;
   1196 		if (should_supply(ifp) && (ifp == NULL ||
   1197 		    !(ifp->int_state & IS_REDIRECT_OK))) {
   1198 			/*
   1199 			 * Routers are not supposed to listen to redirects,
   1200 			 * so delete it if it came via an unknown interface
   1201 			 * or the interface does not have special permission.
   1202 			 */
   1203 			k->k_state &= ~KS_DYNAMIC;
   1204 			k->k_state |= KS_DELETE;
   1205 			LIM_SEC(need_kern, 0);
   1206 			trace_act("mark for deletion redirected %s --> %s"
   1207 			    " via %s",
   1208 			    addrname(k->k_dst, k->k_mask, 0),
   1209 			    naddr_ntoa(k->k_gate),
   1210 			    ifp ? ifp->int_name : "unknown interface");
   1211 		} else {
   1212 			k->k_state |= KS_DYNAMIC;
   1213 			k->k_redirect_time = now.tv_sec;
   1214 			trace_act("accept redirected %s --> %s via %s",
   1215 			    addrname(k->k_dst, k->k_mask, 0),
   1216 			    naddr_ntoa(k->k_gate),
   1217 			    ifp ? ifp->int_name : "unknown interface");
   1218 		}
   1219 		return;
   1220 	}
   1221 
   1222 	/*
   1223 	 * If it is not a static route, quit until the next comparison
   1224 	 * between the kernel and daemon tables, when it will be deleted.
   1225 	 */
   1226 	if (!(k->k_state & KS_STATIC)) {
   1227 		if (!(k->k_state & (KS_IF|KS_DEPRE_IF|KS_FILE)))
   1228 			k->k_state |= KS_DELETE;
   1229 		LIM_SEC(need_kern, k->k_keep);
   1230 		return;
   1231 	}
   1232 
   1233 	/*
   1234 	 * Put static routes with real metrics into the daemon table so
   1235 	 * they can be advertised.
   1236 	 */
   1237 
   1238 	kern_check_static(k, ifp);
   1239 }
   1240 
   1241 
   1242 /* deal with packet loss */
   1243 static void
   1244 rtm_lose(struct rt_msghdr *rtm, struct rt_addrinfo *info)
   1245 {
   1246 	struct rt_spare new, *rts, *losing_rts = NULL;
   1247 	struct rt_entry *rt;
   1248 	int i, spares;
   1249 
   1250 	if (INFO_GATE(info) == NULL || INFO_GATE(info)->ss_family != AF_INET) {
   1251 		trace_act("ignore %s without gateway",
   1252 		    rtm_type_name(rtm->rtm_type));
   1253 		age(0);
   1254 		return;
   1255 	}
   1256 
   1257 	rt = rtfind(S_ADDR(INFO_DST(info)));
   1258 	if (rt != NULL) {
   1259 		spares = 0;
   1260 		for (i = 0; i < rt->rt_num_spares;  i++) {
   1261 			rts = &rt->rt_spares[i];
   1262 			if (rts->rts_gate == S_ADDR(INFO_GATE(info))) {
   1263 				losing_rts = rts;
   1264 				continue;
   1265 			}
   1266 			if (rts->rts_gate != 0 && rts->rts_ifp != &dummy_ifp)
   1267 				spares++;
   1268 		}
   1269 	}
   1270 	if (rt == NULL || losing_rts == NULL) {
   1271 		trace_act("Ignore RTM_LOSING because no route found"
   1272 		    " for %s through %s",
   1273 		    naddr_ntoa(S_ADDR(INFO_DST(info))),
   1274 		    naddr_ntoa(S_ADDR(INFO_GATE(info))));
   1275 		return;
   1276 	}
   1277 	if (spares == 0) {
   1278 		trace_act("Got RTM_LOSING, but no alternatives to gw %s."
   1279 		    " deprecating route to metric 15",
   1280 		    naddr_ntoa(S_ADDR(INFO_GATE(info))));
   1281 		new = *losing_rts;
   1282 		new.rts_metric = HOPCNT_INFINITY - 1;
   1283 		rtchange(rt, rt->rt_state, &new, 0);
   1284 		return;
   1285 	}
   1286 	trace_act("Got RTM_LOSING. Found a route with %d alternates", spares);
   1287 	if (rdisc_ok)
   1288 		rdisc_age(S_ADDR(INFO_GATE(info)));
   1289 	age(S_ADDR(INFO_GATE(info)));
   1290 }
   1291 
   1292 
   1293 /*
   1294  * Make the gateway slot of an info structure point to something
   1295  * useful.  If it is not already useful, but it specifies an interface,
   1296  * then fill in the sockaddr_in provided and point it there.
   1297  */
   1298 static int
   1299 get_info_gate(struct sockaddr_storage **ssp, struct sockaddr_in *sin)
   1300 {
   1301 	struct sockaddr_dl *sdl = (struct sockaddr_dl *)*ssp;
   1302 	struct interface *ifp;
   1303 
   1304 	if (sdl == NULL)
   1305 		return (0);
   1306 	if ((sdl)->sdl_family == AF_INET)
   1307 		return (1);
   1308 	if ((sdl)->sdl_family != AF_LINK)
   1309 		return (0);
   1310 
   1311 	ifp = ifwithindex(sdl->sdl_index, _B_TRUE);
   1312 	if (ifp == NULL)
   1313 		return (0);
   1314 
   1315 	sin->sin_addr.s_addr = ifp->int_addr;
   1316 	sin->sin_family = AF_INET;
   1317 	/* LINTED */
   1318 	*ssp = (struct sockaddr_storage *)sin;
   1319 
   1320 	return (1);
   1321 }
   1322 
   1323 
   1324 /*
   1325  * Clean the kernel table by copying it to the daemon image.
   1326  * Eventually the daemon will delete any extra routes.
   1327  */
   1328 void
   1329 sync_kern(void)
   1330 {
   1331 	int i;
   1332 	struct khash *k;
   1333 	struct {
   1334 		struct T_optmgmt_req req;
   1335 		struct opthdr hdr;
   1336 	} req;
   1337 	union {
   1338 		struct T_optmgmt_ack ack;
   1339 		unsigned char space[64];
   1340 	} ack;
   1341 	struct opthdr *rh;
   1342 	struct strbuf cbuf, dbuf;
   1343 	int ipfd, nroutes, flags, r;
   1344 	mib2_ipRouteEntry_t routes[8];
   1345 	mib2_ipRouteEntry_t *rp;
   1346 	struct rt_msghdr rtm;
   1347 	struct rt_addrinfo info;
   1348 	struct sockaddr_in sin_dst;
   1349 	struct sockaddr_in sin_gate;
   1350 	struct sockaddr_in sin_mask;
   1351 	struct sockaddr_in sin_author;
   1352 	struct interface *ifp;
   1353 	char ifname[LIFNAMSIZ + 1];
   1354 
   1355 	for (i = 0; i < KHASH_SIZE; i++) {
   1356 		for (k = khash_bins[i]; k != NULL; k = k->k_next) {
   1357 			if (!(k->k_state & (KS_IF|KS_DEPRE_IF)))
   1358 				k->k_state |= KS_CHECK;
   1359 		}
   1360 	}
   1361 
   1362 	ipfd = open(IP_DEV_NAME, O_RDWR);
   1363 	if (ipfd == -1) {
   1364 		msglog("open " IP_DEV_NAME ": %s", rip_strerror(errno));
   1365 		goto hash_clean;
   1366 	}
   1367 
   1368 	req.req.PRIM_type = T_OPTMGMT_REQ;
   1369 	req.req.OPT_offset = (caddr_t)&req.hdr - (caddr_t)&req;
   1370 	req.req.OPT_length = sizeof (req.hdr);
   1371 	req.req.MGMT_flags = T_CURRENT;
   1372 
   1373 	req.hdr.level = MIB2_IP;
   1374 	req.hdr.name = 0;
   1375 	req.hdr.len = 0;
   1376 
   1377 	cbuf.buf = (caddr_t)&req;
   1378 	cbuf.len = sizeof (req);
   1379 
   1380 	if (putmsg(ipfd, &cbuf, NULL, 0) == -1) {
   1381 		msglog("T_OPTMGMT_REQ putmsg: %s", rip_strerror(errno));
   1382 		goto hash_clean;
   1383 	}
   1384 
   1385 	for (;;) {
   1386 		cbuf.buf = (caddr_t)&ack;
   1387 		cbuf.maxlen = sizeof (ack);
   1388 		dbuf.buf = (caddr_t)routes;
   1389 		dbuf.maxlen = sizeof (routes);
   1390 		flags = 0;
   1391 		r = getmsg(ipfd, &cbuf, &dbuf, &flags);
   1392 		if (r == -1) {
   1393 			msglog("T_OPTMGMT_REQ getmsg: %s", rip_strerror(errno));
   1394 			goto hash_clean;
   1395 		}
   1396 
   1397 		if (cbuf.len < sizeof (struct T_optmgmt_ack) ||
   1398 		    ack.ack.PRIM_type != T_OPTMGMT_ACK ||
   1399 		    ack.ack.MGMT_flags != T_SUCCESS ||
   1400 		    ack.ack.OPT_length < sizeof (struct opthdr)) {
   1401 			msglog("bad T_OPTMGMT response; len=%d prim=%d "
   1402 			    "flags=%d optlen=%d", cbuf.len, ack.ack.PRIM_type,
   1403 			    ack.ack.MGMT_flags, ack.ack.OPT_length);
   1404 			goto hash_clean;
   1405 		}
   1406 		/* LINTED */
   1407 		rh = (struct opthdr *)((caddr_t)&ack + ack.ack.OPT_offset);
   1408 		if (rh->level == 0 && rh->name == 0) {
   1409 			break;
   1410 		}
   1411 		if (rh->level != MIB2_IP || rh->name != MIB2_IP_21) {
   1412 			while (r == MOREDATA) {
   1413 				r = getmsg(ipfd, NULL, &dbuf, &flags);
   1414 			}
   1415 			continue;
   1416 		}
   1417 		break;
   1418 	}
   1419 
   1420 	(void) memset(&rtm, 0, sizeof (rtm));
   1421 	(void) memset(&info, 0, sizeof (info));
   1422 	(void) memset(&sin_dst, 0, sizeof (sin_dst));
   1423 	(void) memset(&sin_gate, 0, sizeof (sin_gate));
   1424 	(void) memset(&sin_mask, 0, sizeof (sin_mask));
   1425 	(void) memset(&sin_author, 0, sizeof (sin_author));
   1426 	sin_dst.sin_family = AF_INET;
   1427 	/* LINTED */
   1428 	info.rti_info[RTAX_DST] = (struct sockaddr_storage *)&sin_dst;
   1429 	sin_gate.sin_family = AF_INET;
   1430 	/* LINTED */
   1431 	info.rti_info[RTAX_GATEWAY] = (struct sockaddr_storage *)&sin_gate;
   1432 	sin_mask.sin_family = AF_INET;
   1433 	/* LINTED */
   1434 	info.rti_info[RTAX_NETMASK] = (struct sockaddr_storage *)&sin_mask;
   1435 	sin_dst.sin_family = AF_INET;
   1436 	/* LINTED */
   1437 	info.rti_info[RTAX_AUTHOR] = (struct sockaddr_storage *)&sin_author;
   1438 
   1439 	for (;;) {
   1440 		nroutes = dbuf.len / sizeof (mib2_ipRouteEntry_t);
   1441 		for (rp = routes; nroutes > 0; ++rp, nroutes--) {
   1442 
   1443 			/*
   1444 			 * Ignore IRE cache, broadcast, and local address
   1445 			 * entries; they're not subject to routing socket
   1446 			 * control.
   1447 			 */
   1448 			if (rp->ipRouteInfo.re_ire_type &
   1449 			    (IRE_BROADCAST | IRE_CACHE | IRE_LOCAL))
   1450 				continue;
   1451 
   1452 			/* ignore multicast and link local addresses */
   1453 			if (IN_MULTICAST(ntohl(rp->ipRouteDest)) ||
   1454 			    IN_LINKLOCAL(ntohl(rp->ipRouteDest))) {
   1455 				continue;
   1456 			}
   1457 
   1458 
   1459 #ifdef DEBUG_KERNEL_ROUTE_READ
   1460 			(void) fprintf(stderr, "route type %d, ire type %08X, "
   1461 			    "flags %08X: %s", rp->ipRouteType,
   1462 			    rp->ipRouteInfo.re_ire_type,
   1463 			    rp->ipRouteInfo.re_flags,
   1464 			    naddr_ntoa(rp->ipRouteDest));
   1465 			(void) fprintf(stderr, " %s",
   1466 			    naddr_ntoa(rp->ipRouteMask));
   1467 			(void) fprintf(stderr, " %s\n",
   1468 			    naddr_ntoa(rp->ipRouteNextHop));
   1469 #endif
   1470 
   1471 			/* Fake up the needed entries */
   1472 			rtm.rtm_flags = rp->ipRouteInfo.re_flags;
   1473 			rtm.rtm_type = RTM_GET;
   1474 			rtm.rtm_rmx.rmx_hopcount = rp->ipRouteMetric1;
   1475 
   1476 			(void) memset(ifname, 0, sizeof (ifname));
   1477 			if (rp->ipRouteIfIndex.o_length <
   1478 			    sizeof (rp->ipRouteIfIndex.o_bytes))
   1479 				rp->ipRouteIfIndex.o_bytes[
   1480 				    rp->ipRouteIfIndex.o_length] = '\0';
   1481 				(void) strncpy(ifname,
   1482 				    rp->ipRouteIfIndex.o_bytes,
   1483 				    sizeof (ifname));
   1484 
   1485 			/*
   1486 			 * First try to match up on gwkludge entries
   1487 			 * before trying to match ifp by name/nexthop.
   1488 			 */
   1489 			if ((ifp = gwkludge_iflookup(rp->ipRouteDest,
   1490 			    rp->ipRouteNextHop,
   1491 			    ntohl(rp->ipRouteMask))) == NULL) {
   1492 				ifp = lifp_iflookup(rp->ipRouteNextHop, ifname);
   1493 			}
   1494 
   1495 #ifdef DEBUG_KERNEL_ROUTE_READ
   1496 			if (ifp != NULL) {
   1497 				(void) fprintf(stderr, "   found interface"
   1498 				    " %-4s #%-3d ", ifp->int_name,
   1499 				    (ifp->int_phys != NULL) ?
   1500 				    ifp->int_phys->phyi_index : 0);
   1501 				(void) fprintf(stderr, "%-15s-->%-15s \n",
   1502 				    naddr_ntoa(ifp->int_addr),
   1503 				    addrname(((ifp->int_if_flags &
   1504 				    IFF_POINTOPOINT) ?
   1505 				    ifp->int_dstaddr : htonl(ifp->int_net)),
   1506 				    ifp->int_mask, 1));
   1507 			}
   1508 #endif
   1509 
   1510 			info.rti_addrs = RTA_DST | RTA_GATEWAY | RTA_NETMASK;
   1511 			if (rp->ipRouteInfo.re_ire_type & IRE_HOST_REDIRECT)
   1512 				info.rti_addrs |= RTA_AUTHOR;
   1513 			sin_dst.sin_addr.s_addr = rp->ipRouteDest;
   1514 			sin_gate.sin_addr.s_addr = rp->ipRouteNextHop;
   1515 			sin_mask.sin_addr.s_addr = rp->ipRouteMask;
   1516 			sin_author.sin_addr.s_addr =
   1517 			    rp->ipRouteInfo.re_src_addr;
   1518 
   1519 			/*
   1520 			 * Note static routes and interface routes, and also
   1521 			 * preload the image of the kernel table so that
   1522 			 * we can later clean it, as well as avoid making
   1523 			 * unneeded changes.  Keep the old kernel routes for a
   1524 			 * few seconds to allow a RIP or router-discovery
   1525 			 * response to be heard.
   1526 			 */
   1527 			rtm_add(&rtm, &info, MAX_WAITTIME,
   1528 			    ((rp->ipRouteInfo.re_ire_type &
   1529 			    (IRE_INTERFACE|IRE_LOOPBACK)) != 0), ifp);
   1530 		}
   1531 		if (r == 0) {
   1532 			break;
   1533 		}
   1534 		r = getmsg(ipfd, NULL, &dbuf, &flags);
   1535 	}
   1536 
   1537 hash_clean:
   1538 	if (ipfd != -1)
   1539 		(void) close(ipfd);
   1540 	for (i = 0; i < KHASH_SIZE; i++) {
   1541 		for (k = khash_bins[i]; k != NULL; k = k->k_next) {
   1542 
   1543 			/*
   1544 			 * KS_DELETED routes have been removed from the
   1545 			 * kernel, but we keep them around for reasons
   1546 			 * stated in del_static(), so we skip the check
   1547 			 * for KS_DELETED routes here.
   1548 			 */
   1549 			if ((k->k_state & (KS_CHECK|KS_DELETED)) == KS_CHECK) {
   1550 
   1551 				if (!(k->k_state & KS_DYNAMIC)) {
   1552 					writelog(LOG_WARNING,
   1553 					    "%s --> %s disappeared from kernel",
   1554 					    addrname(k->k_dst, k->k_mask, 0),
   1555 					    naddr_ntoa(k->k_gate));
   1556 				}
   1557 				del_static(k->k_dst, k->k_mask, k->k_gate,
   1558 				    k->k_ifp, 1);
   1559 
   1560 			}
   1561 		}
   1562 	}
   1563 }
   1564 
   1565 
   1566 /* Listen to announcements from the kernel */
   1567 void
   1568 read_rt(void)
   1569 {
   1570 	long cc;
   1571 	struct interface *ifp;
   1572 	struct sockaddr_in gate_sin;
   1573 	in_addr_t mask, gate;
   1574 	union {
   1575 		struct {
   1576 			struct rt_msghdr rtm;
   1577 			struct sockaddr_storage addrs[RTA_NUMBITS];
   1578 		} r;
   1579 		struct if_msghdr ifm;
   1580 	} m;
   1581 	char str[100], *strp;
   1582 	struct rt_addrinfo info;
   1583 
   1584 
   1585 	for (;;) {
   1586 		cc = read(rt_sock, &m, sizeof (m));
   1587 		if (cc <= 0) {
   1588 			if (cc < 0 && errno != EWOULDBLOCK)
   1589 				LOGERR("read(rt_sock)");
   1590 			return;
   1591 		}
   1592 
   1593 		if (TRACERTS)
   1594 			dump_rt_msg("read", &m.r.rtm, cc);
   1595 
   1596 		if (cc < m.r.rtm.rtm_msglen) {
   1597 			msglog("routing message truncated (%d < %d)",
   1598 			    cc, m.r.rtm.rtm_msglen);
   1599 		}
   1600 
   1601 		if (m.r.rtm.rtm_version != RTM_VERSION) {
   1602 			msglog("bogus routing message version %d",
   1603 			    m.r.rtm.rtm_version);
   1604 			continue;
   1605 		}
   1606 
   1607 		ifp = NULL;
   1608 
   1609 		if (m.r.rtm.rtm_type == RTM_IFINFO ||
   1610 		    m.r.rtm.rtm_type == RTM_NEWADDR ||
   1611 		    m.r.rtm.rtm_type == RTM_DELADDR) {
   1612 			strp = if_bit_string(m.ifm.ifm_flags, _B_TRUE);
   1613 			if (strp == NULL) {
   1614 				strp = str;
   1615 				(void) sprintf(str, "%#x", m.ifm.ifm_flags);
   1616 			}
   1617 			ifp = ifwithindex(m.ifm.ifm_index,
   1618 			    m.r.rtm.rtm_type != RTM_DELADDR);
   1619 			if (ifp == NULL) {
   1620 				char ifname[LIFNAMSIZ], *ifnamep;
   1621 
   1622 				ifnamep = if_indextoname(m.ifm.ifm_index,
   1623 				    ifname);
   1624 				if (ifnamep == NULL) {
   1625 					trace_act("note %s with flags %s"
   1626 					    " for unknown interface index #%d",
   1627 					    rtm_type_name(m.r.rtm.rtm_type),
   1628 					    strp, m.ifm.ifm_index);
   1629 				} else {
   1630 					trace_act("note %s with flags %s"
   1631 					    " for unknown interface %s",
   1632 					    rtm_type_name(m.r.rtm.rtm_type),
   1633 					    strp, ifnamep);
   1634 				}
   1635 			} else {
   1636 				trace_act("note %s with flags %s for %s",
   1637 				    rtm_type_name(m.r.rtm.rtm_type),
   1638 				    strp, ifp->int_name);
   1639 			}
   1640 			if (strp != str)
   1641 				free(strp);
   1642 
   1643 			/*
   1644 			 * After being informed of a change to an interface,
   1645 			 * check them all now if the check would otherwise
   1646 			 * be a long time from now, if the interface is
   1647 			 * not known, or if the interface has been turned
   1648 			 * off or on.
   1649 			 */
   1650 			if (ifscan_timer.tv_sec-now.tv_sec >=
   1651 			    CHECK_BAD_INTERVAL || ifp == NULL ||
   1652 			    ((ifp->int_if_flags ^ m.ifm.ifm_flags) &
   1653 			    IFF_UP) != 0)
   1654 				ifscan_timer.tv_sec = now.tv_sec;
   1655 			continue;
   1656 		} else if (m.r.rtm.rtm_type == RTM_CHGADDR ||
   1657 		    m.r.rtm.rtm_type == RTM_FREEADDR) {
   1658 			continue;
   1659 		} else {
   1660 			if (m.r.rtm.rtm_index != 0)
   1661 				ifp = ifwithindex(m.r.rtm.rtm_index, 1);
   1662 		}
   1663 
   1664 		(void) strlcpy(str, rtm_type_name(m.r.rtm.rtm_type),
   1665 		    sizeof (str));
   1666 		strp = &str[strlen(str)];
   1667 		if (m.r.rtm.rtm_type <= RTM_CHANGE)
   1668 			strp += snprintf(strp, sizeof (str) - (strp - str),
   1669 			    " from pid %d", (int)m.r.rtm.rtm_pid);
   1670 
   1671 		/* LINTED */
   1672 		(void) rt_xaddrs(&info, (struct sockaddr_storage *)(&m.r.rtm +
   1673 		    1), (char *)&m + cc, m.r.rtm.rtm_addrs);
   1674 
   1675 		if (INFO_DST(&info) == 0) {
   1676 			trace_act("ignore %s without dst", str);
   1677 			continue;
   1678 		}
   1679 
   1680 		if (INFO_DST(&info)->ss_family != AF_INET) {
   1681 			trace_act("ignore %s for AF %d", str,
   1682 			    INFO_DST(&info)->ss_family);
   1683 			continue;
   1684 		}
   1685 
   1686 		mask = ((INFO_MASK(&info) != 0) ?
   1687 		    ntohl(S_ADDR(INFO_MASK(&info))) :
   1688 		    (m.r.rtm.rtm_flags & RTF_HOST) ?
   1689 		    HOST_MASK : std_mask(S_ADDR(INFO_DST(&info))));
   1690 
   1691 		strp += snprintf(strp, sizeof (str) - (strp - str), ": %s",
   1692 		    addrname(S_ADDR(INFO_DST(&info)), mask, 0));
   1693 
   1694 		if (IN_MULTICAST(ntohl(S_ADDR(INFO_DST(&info)))) ||
   1695 		    IN_LINKLOCAL(ntohl(S_ADDR(INFO_DST(&info))))) {
   1696 			trace_act("ignore multicast/link local %s", str);
   1697 			continue;
   1698 		}
   1699 
   1700 		if (m.r.rtm.rtm_flags & RTF_LLINFO) {
   1701 			trace_act("ignore ARP %s", str);
   1702 			continue;
   1703 		}
   1704 
   1705 		if (get_info_gate(&INFO_GATE(&info), &gate_sin)) {
   1706 			gate = S_ADDR(INFO_GATE(&info));
   1707 			strp += snprintf(strp, sizeof (str) - (strp - str),
   1708 			    " --> %s", naddr_ntoa(gate));
   1709 		} else {
   1710 			gate = 0;
   1711 		}
   1712 
   1713 		if (INFO_AUTHOR(&info) != 0)
   1714 			strp += snprintf(strp, sizeof (str) - (strp - str),
   1715 			    " by authority of %s",
   1716 			    saddr_ntoa(INFO_AUTHOR(&info)));
   1717 
   1718 		switch (m.r.rtm.rtm_type) {
   1719 		case RTM_ADD:
   1720 		case RTM_CHANGE:
   1721 		case RTM_REDIRECT:
   1722 			if (m.r.rtm.rtm_errno != 0) {
   1723 				trace_act("ignore %s with \"%s\" error",
   1724 				    str, rip_strerror(m.r.rtm.rtm_errno));
   1725 			} else {
   1726 				trace_act("%s", str);
   1727 				rtm_add(&m.r.rtm, &info, 0,
   1728 				    !(m.r.rtm.rtm_flags & RTF_GATEWAY) &&
   1729 				    m.r.rtm.rtm_type != RTM_REDIRECT, ifp);
   1730 
   1731 			}
   1732 			break;
   1733 
   1734 		case RTM_DELETE:
   1735 			if (m.r.rtm.rtm_errno != 0 &&
   1736 			    m.r.rtm.rtm_errno != ESRCH) {
   1737 				trace_act("ignore %s with \"%s\" error",
   1738 				    str, rip_strerror(m.r.rtm.rtm_errno));
   1739 			} else {
   1740 				trace_act("%s", str);
   1741 				del_static(S_ADDR(INFO_DST(&info)), mask,
   1742 				    gate, ifp, 1);
   1743 			}
   1744 			break;
   1745 
   1746 		case RTM_LOSING:
   1747 			trace_act("%s", str);
   1748 			rtm_lose(&m.r.rtm, &info);
   1749 			break;
   1750 
   1751 		default:
   1752 			trace_act("ignore %s", str);
   1753 			break;
   1754 		}
   1755 	}
   1756 }
   1757 
   1758 
   1759 /*
   1760  * Disassemble a routing message.  The result is an array of pointers
   1761  * to sockaddr_storage structures stored in the info argument.
   1762  *
   1763  * ss is a pointer to the beginning of the data following the
   1764  * rt_msghdr contained in the routing socket message, which consists
   1765  * of a string of concatenated sockaddr structure of different types.
   1766  *
   1767  * Extended attributes can be appended at the end of the list.
   1768  */
   1769 static int
   1770 rt_xaddrs(struct rt_addrinfo *info,
   1771     struct sockaddr_storage *ss,
   1772     char *lim,
   1773     int addrs)
   1774 {
   1775 	int retv = 0;
   1776 	int i;
   1777 	int abit;
   1778 	int complaints;
   1779 	static int prev_complaints;
   1780 
   1781 #define	XBAD_AF		0x1
   1782 #define	XBAD_SHORT	0x2
   1783 #define	XBAD_LONG	0x4
   1784 
   1785 	(void) memset(info, 0, sizeof (*info));
   1786 	info->rti_addrs = addrs;
   1787 	complaints = 0;
   1788 	for (i = 0, abit = 1; i < RTAX_MAX && (char *)ss < lim;
   1789 	    i++, abit <<= 1) {
   1790 		if ((addrs & abit) == 0)
   1791 			continue;
   1792 		info->rti_info[i] = ss;
   1793 		/* Horrible interface here */
   1794 		switch (ss->ss_family) {
   1795 		case AF_UNIX:
   1796 			/* LINTED */
   1797 			ss = (struct sockaddr_storage *)(
   1798 			    (struct sockaddr_un *)ss + 1);
   1799 			break;
   1800 		case AF_INET:
   1801 			/* LINTED */
   1802 			ss = (struct sockaddr_storage *)(
   1803 			    (struct sockaddr_in *)ss + 1);
   1804 			break;
   1805 		case AF_LINK:
   1806 			/* LINTED */
   1807 			ss = (struct sockaddr_storage *)(
   1808 			    (struct sockaddr_dl *)ss + 1);
   1809 			break;
   1810 		case AF_INET6:
   1811 			/* LINTED */
   1812 			ss = (struct sockaddr_storage *)(
   1813 			    (struct sockaddr_in6 *)ss + 1);
   1814 			break;
   1815 		default:
   1816 			if (!(prev_complaints & XBAD_AF))
   1817 				writelog(LOG_WARNING,
   1818 				    "unknown address family %d "
   1819 				    "encountered", ss->ss_family);
   1820 			if (complaints & XBAD_AF)
   1821 				goto xaddr_done;
   1822 			/* LINTED */
   1823 			ss = (struct sockaddr_storage *)(
   1824 			    (struct sockaddr *)ss + 1);
   1825 			complaints |= XBAD_AF;
   1826 			info->rti_addrs &= abit - 1;
   1827 			addrs = info->rti_addrs;
   1828 			retv = -1;
   1829 			break;
   1830 		}
   1831 		if ((char *)ss > lim) {
   1832 			if (!(prev_complaints & XBAD_SHORT))
   1833 				msglog("sockaddr %d too short by %d "
   1834 				    "bytes", i + 1, (char *)ss - lim);
   1835 			complaints |= XBAD_SHORT;
   1836 			info->rti_info[i] = NULL;
   1837 			info->rti_addrs &= abit - 1;
   1838 			retv = -1;
   1839 			goto xaddr_done;
   1840 		}
   1841 	}
   1842 
   1843 	while (((char *)ss + sizeof (rtm_ext_t)) <= lim) {
   1844 		rtm_ext_t *tp;
   1845 		char *nxt;
   1846 
   1847 		/* LINTED: alignment */
   1848 		tp = (rtm_ext_t *)ss;
   1849 		nxt = (char *)(tp + 1) + tp->rtmex_len;
   1850 
   1851 		if (!IS_P2ALIGNED(tp->rtmex_len, sizeof (uint32_t)) ||
   1852 		    nxt > lim) {
   1853 			break;
   1854 		}
   1855 
   1856 		/* LINTED: alignment */
   1857 		ss = (struct sockaddr_storage *)nxt;
   1858 	}
   1859 
   1860 	if ((char *)ss != lim) {
   1861 		if ((char *)ss > lim) {
   1862 			if (!(prev_complaints & XBAD_SHORT))
   1863 				msglog("routing message too short by %d bytes",
   1864 				    (char *)ss - lim);
   1865 			complaints |= XBAD_SHORT;
   1866 		} else if (!(prev_complaints & XBAD_LONG)) {
   1867 			msglog("%d bytes of routing message left over",
   1868 			    lim - (char *)ss);
   1869 			complaints |= XBAD_LONG;
   1870 		}
   1871 		retv = -1;
   1872 	}
   1873 xaddr_done:
   1874 	prev_complaints = complaints;
   1875 	return (retv);
   1876 }
   1877 
   1878 /* after aggregating, note routes that belong in the kernel */
   1879 static void
   1880 kern_out(struct ag_info *ag)
   1881 {
   1882 	struct khash *k;
   1883 	struct interface *ifp;
   1884 
   1885 	ifp = ag->ag_ifp;
   1886 
   1887 	/*
   1888 	 * Do not install bad routes if they are not already present.
   1889 	 * This includes routes that had RS_NET_SYN for interfaces that
   1890 	 * recently died.
   1891 	 */
   1892 	if (ag->ag_metric == HOPCNT_INFINITY) {
   1893 		k = kern_find(htonl(ag->ag_dst_h), ag->ag_mask,
   1894 		    ag->ag_nhop, ag->ag_ifp, NULL);
   1895 		if (k == NULL)
   1896 			return;
   1897 	} else {
   1898 		k = kern_add(htonl(ag->ag_dst_h), ag->ag_mask, ag->ag_nhop,
   1899 		    ifp);
   1900 	}
   1901 
   1902 	if (k->k_state & KS_NEW) {
   1903 		/* will need to add new entry to the kernel table */
   1904 		k->k_state = KS_ADD;
   1905 		if (ag->ag_state & AGS_GATEWAY)
   1906 			k->k_state |= KS_GATEWAY;
   1907 		if (ag->ag_state & AGS_IF)
   1908 			k->k_state |= KS_IF;
   1909 		if (ag->ag_state & AGS_PASSIVE)
   1910 			k->k_state |= KS_PASSIVE;
   1911 		if (ag->ag_state & AGS_FILE)
   1912 			k->k_state |= KS_FILE;
   1913 		k->k_gate = ag->ag_nhop;
   1914 		k->k_ifp = ifp;
   1915 		k->k_metric = ag->ag_metric;
   1916 		return;
   1917 	}
   1918 
   1919 	if ((k->k_state & (KS_STATIC|KS_DEPRE_IF)) ||
   1920 	    ((k->k_state & (KS_IF|KS_PASSIVE)) == KS_IF)) {
   1921 		return;
   1922 	}
   1923 
   1924 	/* modify existing kernel entry if necessary */
   1925 	if (k->k_gate == ag->ag_nhop && k->k_ifp == ag->ag_ifp &&
   1926 	    k->k_metric != ag->ag_metric) {
   1927 			/*
   1928 			 * Must delete bad interface routes etc.
   1929 			 * to change them.
   1930 			 */
   1931 			if (k->k_metric == HOPCNT_INFINITY)
   1932 				k->k_state |= KS_DEL_ADD;
   1933 			k->k_gate = ag->ag_nhop;
   1934 			k->k_metric = ag->ag_metric;
   1935 			k->k_state |= KS_CHANGE;
   1936 	}
   1937 
   1938 	/*
   1939 	 * If the daemon thinks the route should exist, forget
   1940 	 * about any redirections.
   1941 	 * If the daemon thinks the route should exist, eventually
   1942 	 * override manual intervention by the operator.
   1943 	 */
   1944 	if ((k->k_state & (KS_DYNAMIC | KS_DELETED)) != 0) {
   1945 		k->k_state &= ~KS_DYNAMIC;
   1946 		k->k_state |= (KS_ADD | KS_DEL_ADD);
   1947 	}
   1948 
   1949 	if ((k->k_state & KS_GATEWAY) && !(ag->ag_state & AGS_GATEWAY)) {
   1950 		k->k_state &= ~KS_GATEWAY;
   1951 		k->k_state |= (KS_ADD | KS_DEL_ADD);
   1952 	} else if (!(k->k_state & KS_GATEWAY) && (ag->ag_state & AGS_GATEWAY)) {
   1953 		k->k_state |= KS_GATEWAY;
   1954 		k->k_state |= (KS_ADD | KS_DEL_ADD);
   1955 	}
   1956 
   1957 	/*
   1958 	 * Deleting-and-adding is necessary to change aspects of a route.
   1959 	 * Just delete instead of deleting and then adding a bad route.
   1960 	 * Otherwise, we want to keep the route in the kernel.
   1961 	 */
   1962 	if (k->k_metric == HOPCNT_INFINITY && (k->k_state & KS_DEL_ADD))
   1963 		k->k_state |= KS_DELETE;
   1964 	else
   1965 		k->k_state &= ~KS_DELETE;
   1966 #undef RT
   1967 }
   1968 
   1969 /*
   1970  * Update our image of the kernel forwarding table using the given
   1971  * route from our internal routing table.
   1972  */
   1973 
   1974 /*ARGSUSED1*/
   1975 static int
   1976 walk_kern(struct radix_node *rn, void *argp)
   1977 {
   1978 #define	RT ((struct rt_entry *)rn)
   1979 	uint8_t metric, pref;
   1980 	uint_t ags = 0;
   1981 	int i;
   1982 	struct rt_spare *rts;
   1983 
   1984 	/* Do not install synthetic routes */
   1985 	if (RT->rt_state & RS_NET_SYN)
   1986 		return (0);
   1987 
   1988 	/*
   1989 	 * Do not install static routes here. Only
   1990 	 * read_rt->rtm_add->kern_add should install those
   1991 	 */
   1992 	if ((RT->rt_state & RS_STATIC) &&
   1993 	    (RT->rt_spares[0].rts_origin != RO_FILE))
   1994 		return (0);
   1995 
   1996 	/* Do not clobber kernel if this is a route for a dead interface */
   1997 	if (RT->rt_state & RS_BADIF)
   1998 		return (0);
   1999 
   2000 	if (!(RT->rt_state & RS_IF)) {
   2001 		/* This is an ordinary route, not for an interface. */
   2002 
   2003 		/*
   2004 		 * aggregate, ordinary good routes without regard to
   2005 		 * their metric
   2006 		 */
   2007 		pref = 1;
   2008 		ags |= (AGS_GATEWAY | AGS_SUPPRESS | AGS_AGGREGATE);
   2009 
   2010 		/*
   2011 		 * Do not install host routes directly to hosts, to avoid
   2012 		 * interfering with ARP entries in the kernel table.
   2013 		 */
   2014 		if (RT_ISHOST(RT) && ntohl(RT->rt_dst) == RT->rt_gate)
   2015 			return (0);
   2016 
   2017 	} else {
   2018 		/*
   2019 		 * This is an interface route.
   2020 		 * Do not install routes for "external" remote interfaces.
   2021 		 */
   2022 		if (RT->rt_ifp != NULL && (RT->rt_ifp->int_state & IS_EXTERNAL))
   2023 			return (0);
   2024 
   2025 		/* Interfaces should override received routes. */
   2026 		pref = 0;
   2027 		ags |= (AGS_IF | AGS_CORS_GATE);
   2028 		if (RT->rt_ifp != NULL &&
   2029 		    !(RT->rt_ifp->int_if_flags & IFF_LOOPBACK) &&
   2030 		    (RT->rt_ifp->int_state & (IS_PASSIVE|IS_ALIAS)) ==
   2031 		    IS_PASSIVE) {
   2032 			ags |= AGS_PASSIVE;
   2033 		}
   2034 
   2035 		/*
   2036 		 * If it is not an interface, or an alias for an interface,
   2037 		 * it must be a "gateway."
   2038 		 *
   2039 		 * If it is a "remote" interface, it is also a "gateway" to
   2040 		 * the kernel if is not a alias.
   2041 		 */
   2042 		if (RT->rt_ifp == NULL || (RT->rt_ifp->int_state & IS_REMOTE)) {
   2043 
   2044 			ags |= (AGS_GATEWAY | AGS_SUPPRESS);
   2045 
   2046 			/*
   2047 			 * Do not aggregate IS_PASSIVE routes.
   2048 			 */
   2049 			if (!(RT->rt_ifp->int_state & IS_PASSIVE))
   2050 				ags |= AGS_AGGREGATE;
   2051 		}
   2052 	}
   2053 
   2054 	metric = RT->rt_metric;
   2055 	if (metric == HOPCNT_INFINITY) {
   2056 		/* If the route is dead, try hard to aggregate. */
   2057 		pref = HOPCNT_INFINITY;
   2058 		ags |= (AGS_FINE_GATE | AGS_SUPPRESS);
   2059 		ags &= ~(AGS_IF | AGS_CORS_GATE);
   2060 	}
   2061 
   2062 	/*
   2063 	 * dump all routes that have the same metric as rt_spares[0]
   2064 	 * into the kern_table, to be added to the kernel.
   2065 	 */
   2066 	for (i = 0; i < RT->rt_num_spares; i++) {
   2067 		rts = &RT->rt_spares[i];
   2068 
   2069 		/* Do not install external routes */
   2070 		if (rts->rts_flags & RTS_EXTERNAL)
   2071 			continue;
   2072 
   2073 		if (rts->rts_metric == metric) {
   2074 			ag_check(RT->rt_dst, RT->rt_mask,
   2075 			    rts->rts_router, rts->rts_ifp, rts->rts_gate,
   2076 			    metric, pref, 0, 0,
   2077 			    (rts->rts_origin & RO_FILE) ? (ags|AGS_FILE) : ags,
   2078 			    kern_out);
   2079 		}
   2080 	}
   2081 	return (0);
   2082 #undef RT
   2083 }
   2084 
   2085 
   2086 /* Update the kernel table to match the daemon table. */
   2087 static void
   2088 fix_kern(void)
   2089 {
   2090 	int i;
   2091 	struct khash *k, *pk, *knext;
   2092 
   2093 
   2094 	need_kern = age_timer;
   2095 
   2096 	/* Walk daemon table, updating the copy of the kernel table. */
   2097 	(void) rn_walktree(rhead, walk_kern, NULL);
   2098 	ag_flush(0, 0, kern_out);
   2099 
   2100 	for (i = 0; i < KHASH_SIZE; i++) {
   2101 		pk = NULL;
   2102 		for (k = khash_bins[i]; k != NULL;  k = knext) {
   2103 			knext = k->k_next;
   2104 
   2105 			/* Do not touch local interface routes */
   2106 			if ((k->k_state & KS_DEPRE_IF) ||
   2107 			    (k->k_state & (KS_IF|KS_PASSIVE)) == KS_IF) {
   2108 				pk = k;
   2109 				continue;
   2110 			}
   2111 
   2112 			/* Do not touch static routes */
   2113 			if (k->k_state & KS_STATIC) {
   2114 				kern_check_static(k, 0);
   2115 				pk = k;
   2116 				continue;
   2117 			}
   2118 
   2119 			/* check hold on routes deleted by the operator */
   2120 			if (k->k_keep > now.tv_sec) {
   2121 				/* ensure we check when the hold is over */
   2122 				LIM_SEC(need_kern, k->k_keep);
   2123 				pk = k;
   2124 				continue;
   2125 			}
   2126 
   2127 			if ((k->k_state & KS_DELETE) &&
   2128 			    !(k->k_state & KS_DYNAMIC)) {
   2129 				if ((k->k_dst == RIP_DEFAULT) &&
   2130 				    (k->k_ifp != NULL) &&
   2131 				    (kern_alternate(RIP_DEFAULT,
   2132 				    k->k_mask, k->k_gate, k->k_ifp,
   2133 				    NULL) == NULL))
   2134 					rdisc_restore(k->k_ifp);
   2135 				kern_ioctl(k, RTM_DELETE, 0);
   2136 				if (pk != NULL)
   2137 					pk->k_next = knext;
   2138 				else
   2139 					khash_bins[i] = knext;
   2140 				free(k);
   2141 				continue;
   2142 			}
   2143 
   2144 			if (k->k_state & KS_DEL_ADD)
   2145 				kern_ioctl(k, RTM_DELETE, 0);
   2146 
   2147 			if (k->k_state & KS_ADD) {
   2148 				if ((k->k_dst == RIP_DEFAULT) &&
   2149 				    (k->k_ifp != NULL))
   2150 					rdisc_suppress(k->k_ifp);
   2151 				kern_ioctl(k, RTM_ADD,
   2152 				    ((0 != (k->k_state & (KS_GATEWAY |
   2153 				    KS_DYNAMIC))) ? RTF_GATEWAY : 0));
   2154 			} else if (k->k_state & KS_CHANGE) {
   2155 				kern_ioctl(k, RTM_CHANGE,
   2156 				    ((0 != (k->k_state & (KS_GATEWAY |
   2157 				    KS_DYNAMIC))) ? RTF_GATEWAY : 0));
   2158 			}
   2159 			k->k_state &= ~(KS_ADD|KS_CHANGE|KS_DEL_ADD);
   2160 
   2161 			/*
   2162 			 * Mark this route to be deleted in the next cycle.
   2163 			 * This deletes routes that disappear from the
   2164 			 * daemon table, since the normal aging code
   2165 			 * will clear the bit for routes that have not
   2166 			 * disappeared from the daemon table.
   2167 			 */
   2168 			k->k_state |= KS_DELETE;
   2169 			pk = k;
   2170 		}
   2171 	}
   2172 }
   2173 
   2174 
   2175 /* Delete a static route in the image of the kernel table. */
   2176 void
   2177 del_static(in_addr_t dst, in_addr_t mask, in_addr_t gate,
   2178     struct interface *ifp, int gone)
   2179 {
   2180 	struct khash *k;
   2181 	struct rt_entry *rt;
   2182 
   2183 	/*
   2184 	 * Just mark it in the table to be deleted next time the kernel
   2185 	 * table is updated.
   2186 	 * If it has already been deleted, mark it as such, and set its
   2187 	 * keep-timer so that it will not be deleted again for a while.
   2188 	 * This lets the operator delete a route added by the daemon
   2189 	 * and add a replacement.
   2190 	 */
   2191 	k = kern_find(dst, mask, gate, ifp, NULL);
   2192 	if (k != NULL && (gate == 0 || k->k_gate == gate)) {
   2193 		k->k_state &= ~(KS_STATIC | KS_DYNAMIC | KS_CHECK);
   2194 		k->k_state |= KS_DELETE;
   2195 		if (gone) {
   2196 			k->k_state |= KS_DELETED;
   2197 			k->k_keep = now.tv_sec + K_KEEP_LIM;
   2198 		}
   2199 	}
   2200 
   2201 	rt = rtget(dst, mask);
   2202 	if (rt != NULL && (rt->rt_state & RS_STATIC))
   2203 		rtbad(rt, NULL);
   2204 }
   2205 
   2206 
   2207 /*
   2208  * Delete all routes generated from ICMP Redirects that use a given gateway,
   2209  * as well as old redirected routes.
   2210  */
   2211 void
   2212 del_redirects(in_addr_t bad_gate, time_t old)
   2213 {
   2214 	int i;
   2215 	struct khash *k;
   2216 	boolean_t dosupply = should_supply(NULL);
   2217 
   2218 	for (i = 0; i < KHASH_SIZE; i++) {
   2219 		for (k = khash_bins[i]; k != NULL; k = k->k_next) {
   2220 			if (!(k->k_state & KS_DYNAMIC) ||
   2221 			    (k->k_state & (KS_STATIC|KS_IF|KS_DEPRE_IF)))
   2222 				continue;
   2223 
   2224 			if (k->k_gate != bad_gate && k->k_redirect_time > old &&
   2225 			    !dosupply)
   2226 				continue;
   2227 
   2228 			k->k_state |= KS_DELETE;
   2229 			k->k_state &= ~KS_DYNAMIC;
   2230 			need_kern.tv_sec = now.tv_sec;
   2231 			trace_act("mark redirected %s --> %s for deletion",
   2232 			    addrname(k->k_dst, k->k_mask, 0),
   2233 			    naddr_ntoa(k->k_gate));
   2234 		}
   2235 	}
   2236 }
   2237 
   2238 /* Start the daemon tables. */
   2239 void
   2240 rtinit(void)
   2241 {
   2242 	int i;
   2243 	struct ag_info *ag;
   2244 
   2245 	/* Initialize the radix trees */
   2246 	rn_init();
   2247 	(void) rn_inithead((void**)&rhead, 32);
   2248 
   2249 	/* mark all of the slots in the table free */
   2250 	ag_avail = ag_slots;
   2251 	for (ag = ag_slots, i = 1; i < NUM_AG_SLOTS; i++) {
   2252 		ag->ag_fine = ag+1;
   2253 		ag++;
   2254 	}
   2255 }
   2256 
   2257 
   2258 static struct sockaddr_in dst_sock = {AF_INET};
   2259 static struct sockaddr_in mask_sock = {AF_INET};
   2260 
   2261 
   2262 static void
   2263 set_need_flash(void)
   2264 {
   2265 	if (!need_flash) {
   2266 		need_flash = _B_TRUE;
   2267 		/*
   2268 		 * Do not send the flash update immediately.  Wait a little
   2269 		 * while to hear from other routers.
   2270 		 */
   2271 		no_flash.tv_sec = now.tv_sec + MIN_WAITTIME;
   2272 	}
   2273 }
   2274 
   2275 
   2276 /* Get a particular routing table entry */
   2277 struct rt_entry *
   2278 rtget(in_addr_t dst, in_addr_t mask)
   2279 {
   2280 	struct rt_entry *rt;
   2281 
   2282 	dst_sock.sin_addr.s_addr = dst;
   2283 	mask_sock.sin_addr.s_addr = htonl(mask);
   2284 	rt = (struct rt_entry *)rhead->rnh_lookup(&dst_sock, &mask_sock, rhead);
   2285 	if (rt == NULL || rt->rt_dst != dst || rt->rt_mask != mask)
   2286 		return (NULL);
   2287 
   2288 	return (rt);
   2289 }
   2290 
   2291 
   2292 /* Find a route to dst as the kernel would. */
   2293 struct rt_entry *
   2294 rtfind(in_addr_t dst)
   2295 {
   2296 	dst_sock.sin_addr.s_addr = dst;
   2297 	return ((struct rt_entry *)rhead->rnh_matchaddr(&dst_sock, rhead));
   2298 }
   2299 
   2300 /* add a route to the table */
   2301 void
   2302 rtadd(in_addr_t	dst,
   2303     in_addr_t	mask,
   2304     uint16_t	state,			/* rt_state for the entry */
   2305     struct	rt_spare *new)
   2306 {
   2307 	struct rt_entry *rt;
   2308 	in_addr_t smask;
   2309 	int i;
   2310 	struct rt_spare *rts;
   2311 
   2312 	/* This is the only function that increments total_routes. */
   2313 	if (total_routes == MAX_ROUTES) {
   2314 		msglog("have maximum (%d) routes", total_routes);
   2315 		return;
   2316 	}
   2317 
   2318 	rt = rtmalloc(sizeof (*rt), "rtadd");
   2319 	(void) memset(rt, 0, sizeof (*rt));
   2320 	rt->rt_spares = rtmalloc(SPARE_INC  * sizeof (struct rt_spare),
   2321 	    "rtadd");
   2322 	rt->rt_num_spares = SPARE_INC;
   2323 	(void) memset(rt->rt_spares, 0, SPARE_INC  * sizeof (struct rt_spare));
   2324 	for (rts = rt->rt_spares, i = rt->rt_num_spares; i != 0; i--, rts++)
   2325 		rts->rts_metric = HOPCNT_INFINITY;
   2326 
   2327 	rt->rt_nodes->rn_key = (uint8_t *)&rt->rt_dst_sock;
   2328 	rt->rt_dst = dst;
   2329 	rt->rt_dst_sock.sin_family = AF_INET;
   2330 	if (mask != HOST_MASK) {
   2331 		smask = std_mask(dst);
   2332 		if ((smask & ~mask) == 0 && mask > smask)
   2333 			state |= RS_SUBNET;
   2334 	}
   2335 	mask_sock.sin_addr.s_addr = htonl(mask);
   2336 	rt->rt_mask = mask;
   2337 	rt->rt_spares[0] = *new;
   2338 	rt->rt_state = state;
   2339 	rt->rt_time = now.tv_sec;
   2340 	rt->rt_poison_metric = HOPCNT_INFINITY;
   2341 	rt->rt_seqno = update_seqno;
   2342 
   2343 	if (TRACEACTIONS)
   2344 		trace_add_del("Add", rt);
   2345 
   2346 	need_kern.tv_sec = now.tv_sec;
   2347 	set_need_flash();
   2348 
   2349 	if (NULL == rhead->rnh_addaddr(&rt->rt_dst_sock, &mask_sock, rhead,
   2350 	    rt->rt_nodes)) {
   2351 		msglog("rnh_addaddr() failed for %s mask=%s",
   2352 		    naddr_ntoa(dst), naddr_ntoa(htonl(mask)));
   2353 		free(rt);
   2354 	}
   2355 
   2356 	total_routes++;
   2357 }
   2358 
   2359 
   2360 /* notice a changed route */
   2361 void
   2362 rtchange(struct rt_entry *rt,
   2363     uint16_t	state,			/* new state bits */
   2364     struct rt_spare *new,
   2365     char	*label)
   2366 {
   2367 	if (rt->rt_metric != new->rts_metric) {
   2368 		/*
   2369 		 * Fix the kernel immediately if it seems the route
   2370 		 * has gone bad, since there may be a working route that
   2371 		 * aggregates this route.
   2372 		 */
   2373 		if (new->rts_metric == HOPCNT_INFINITY) {
   2374 			need_kern.tv_sec = now.tv_sec;
   2375 			if (new->rts_time >= now.tv_sec - EXPIRE_TIME)
   2376 				new->rts_time = now.tv_sec - EXPIRE_TIME;
   2377 		}
   2378 		rt->rt_seqno = update_seqno;
   2379 		set_need_flash();
   2380 	}
   2381 
   2382 	if (rt->rt_gate != new->rts_gate) {
   2383 		need_kern.tv_sec = now.tv_sec;
   2384 		rt->rt_seqno = update_seqno;
   2385 		set_need_flash();
   2386 	}
   2387 
   2388 	state |= (rt->rt_state & RS_SUBNET);
   2389 
   2390 	/* Keep various things from deciding ageless routes are stale. */
   2391 	if (!AGE_RT(state, rt->rt_spares[0].rts_origin, new->rts_ifp))
   2392 		new->rts_time = now.tv_sec;
   2393 
   2394 	if (TRACEACTIONS)
   2395 		trace_change(rt, state, new,
   2396 		    label ? label : "Chg   ");
   2397 
   2398 	rt->rt_state = state;
   2399 	/*
   2400 	 * If the interface state of the new primary route is good,
   2401 	 * turn off RS_BADIF flag
   2402 	 */
   2403 	if ((rt->rt_state & RS_BADIF) &&
   2404 	    IS_IFF_UP(new->rts_ifp->int_if_flags) &&
   2405 	    !(new->rts_ifp->int_state & (IS_BROKE | IS_SICK)))
   2406 		rt->rt_state &= ~(RS_BADIF);
   2407 
   2408 	rt->rt_spares[0] = *new;
   2409 }
   2410 
   2411 
   2412 /* check for a better route among the spares */
   2413 static struct rt_spare *
   2414 rts_better(struct rt_entry *rt)
   2415 {
   2416 	struct rt_spare *rts, *rts1;
   2417 	int i;
   2418 
   2419 	/* find the best alternative among the spares */
   2420 	rts = rt->rt_spares+1;
   2421 	for (i = rt->rt_num_spares, rts1 = rts+1; i > 2; i--, rts1++) {
   2422 		if (BETTER_LINK(rt, rts1, rts))
   2423 			rts = rts1;
   2424 	}
   2425 
   2426 	return (rts);
   2427 }
   2428 
   2429 
   2430 /* switch to a backup route */
   2431 void
   2432 rtswitch(struct rt_entry *rt,
   2433     struct rt_spare *rts)
   2434 {
   2435 	struct rt_spare swap;
   2436 	char label[10];
   2437 
   2438 	/* Do not change permanent routes */
   2439 	if (0 != (rt->rt_state & (RS_MHOME | RS_STATIC |
   2440 	    RS_NET_SYN | RS_IF)))
   2441 		return;
   2442 
   2443 	/* find the best alternative among the spares */
   2444 	if (rts == NULL)
   2445 		rts = rts_better(rt);
   2446 
   2447 	/* Do not bother if it is not worthwhile. */
   2448 	if (!BETTER_LINK(rt, rts, rt->rt_spares))
   2449 		return;
   2450 
   2451 	swap = rt->rt_spares[0];
   2452 	(void) snprintf(label, sizeof (label), "Use #%d",
   2453 	    (int)(rts - rt->rt_spares));
   2454 	rtchange(rt, rt->rt_state & ~(RS_NET_SYN), rts, label);
   2455 
   2456 	if (swap.rts_metric == HOPCNT_INFINITY) {
   2457 		*rts = rts_empty;
   2458 	} else {
   2459 		*rts = swap;
   2460 	}
   2461 
   2462 }
   2463 
   2464 
   2465 void
   2466 rtdelete(struct rt_entry *rt)
   2467 {
   2468 	struct rt_entry *deleted_rt;
   2469 	struct rt_spare *rts;
   2470 	int i;
   2471 	in_addr_t gate = rt->rt_gate; /* for debugging */
   2472 
   2473 	if (TRACEACTIONS)
   2474 		trace_add_del("Del", rt);
   2475 
   2476 	for (i = 0; i < rt->rt_num_spares; i++) {
   2477 		rts = &rt->rt_spares[i];
   2478 		rts_delete(rt, rts);
   2479 	}
   2480 
   2481 	dst_sock.sin_addr.s_addr = rt->rt_dst;
   2482 	mask_sock.sin_addr.s_addr = htonl(rt->rt_mask);
   2483 	if (rt != (deleted_rt =
   2484 	    ((struct rt_entry *)rhead->rnh_deladdr(&dst_sock, &mask_sock,
   2485 	    rhead)))) {
   2486 		msglog("rnh_deladdr(%s) failed; found rt 0x%lx",
   2487 		    rtname(rt->rt_dst, rt->rt_mask, gate), deleted_rt);
   2488 		if (deleted_rt != NULL)
   2489 			free(deleted_rt);
   2490 	}
   2491 	total_routes--;
   2492 	free(rt->rt_spares);
   2493 	free(rt);
   2494 
   2495 	if (dst_sock.sin_addr.s_addr == RIP_DEFAULT) {
   2496 		/*
   2497 		 * we just deleted the default route. Trigger rdisc_sort
   2498 		 * so that we can recover from any rdisc information that
   2499 		 * is valid
   2500 		 */
   2501 		rdisc_timer.tv_sec = 0;
   2502 	}
   2503 }
   2504 
   2505 void
   2506 rts_delete(struct rt_entry *rt, struct rt_spare *rts)
   2507 {
   2508 	struct khash *k;
   2509 
   2510 	trace_upslot(rt, rts, &rts_empty);
   2511 	k = kern_find(rt->rt_dst, rt->rt_mask,
   2512 	    rts->rts_gate, rts->rts_ifp, NULL);
   2513 	if (k != NULL &&
   2514 	    !(k->k_state & KS_DEPRE_IF) &&
   2515 	    ((k->k_state & (KS_IF|KS_PASSIVE)) != KS_IF)) {
   2516 		k->k_state |= KS_DELETE;
   2517 		need_kern.tv_sec = now.tv_sec;
   2518 	}
   2519 
   2520 	*rts = rts_empty;
   2521 }
   2522 
   2523 /*
   2524  * Get rid of a bad route, and try to switch to a replacement.
   2525  * If the route has gone bad because of a bad interface,
   2526  * the information about the dead interface is available in badifp
   2527  * for the purpose of sanity checks, if_flags checks etc.
   2528  */
   2529 static void
   2530 rtbad(struct rt_entry *rt, struct interface *badifp)
   2531 {
   2532 	struct rt_spare new;
   2533 	uint16_t rt_state;
   2534 
   2535 
   2536 	if (badifp == NULL || (rt->rt_spares[0].rts_ifp == badifp)) {
   2537 		/* Poison the route */
   2538 		new = rt->rt_spares[0];
   2539 		new.rts_metric = HOPCNT_INFINITY;
   2540 		rt_state = rt->rt_state & ~(RS_IF | RS_LOCAL | RS_STATIC);
   2541 	}
   2542 
   2543 	if (badifp != NULL) {
   2544 		/*
   2545 		 * Dont mark the rtentry bad unless the ifp for the primary
   2546 		 * route is the bad ifp
   2547 		 */
   2548 		if (rt->rt_spares[0].rts_ifp != badifp)
   2549 			return;
   2550 		/*
   2551 		 * badifp has just gone bad. We want to keep this
   2552 		 * rt_entry around so that we tell our rip-neighbors
   2553 		 * about the bad route, but we can't do anything
   2554 		 * to the kernel itself, so mark it as RS_BADIF
   2555 		 */
   2556 		trace_misc("rtbad:Setting RS_BADIF (%s)", badifp->int_name);
   2557 		rt_state |= RS_BADIF;
   2558 		new.rts_ifp = &dummy_ifp;
   2559 	}
   2560 	rtchange(rt, rt_state, &new, 0);
   2561 	rtswitch(rt, 0);
   2562 }
   2563 
   2564 
   2565 /*
   2566  * Junk a RS_NET_SYN or RS_LOCAL route,
   2567  *	unless it is needed by another interface.
   2568  */
   2569 void
   2570 rtbad_sub(struct rt_entry *rt, struct interface *badifp)
   2571 {
   2572 	struct interface *ifp, *ifp1;
   2573 	struct intnet *intnetp;
   2574 	uint_t state;
   2575 
   2576 
   2577 	ifp1 = NULL;
   2578 	state = 0;
   2579 
   2580 	if (rt->rt_state & RS_LOCAL) {
   2581 		/*
   2582 		 * Is this the route through loopback for the interface?
   2583 		 * If so, see if it is used by any other interfaces, such
   2584 		 * as a point-to-point interface with the same local address.
   2585 		 */
   2586 		for (ifp = ifnet; ifp != NULL; ifp = ifp->int_next) {
   2587 			/* Retain it if another interface needs it. */
   2588 			if (ifp->int_addr == rt->rt_ifp->int_addr) {
   2589 				state |= RS_LOCAL;
   2590 				ifp1 = ifp;
   2591 				break;
   2592 			}
   2593 		}
   2594 
   2595 	}
   2596 
   2597 	if (!(state & RS_LOCAL)) {
   2598 		/*
   2599 		 * Retain RIPv1 logical network route if there is another
   2600 		 * interface that justifies it.
   2601 		 */
   2602 		if (rt->rt_state & RS_NET_SYN) {
   2603 			for (ifp = ifnet; ifp != NULL; ifp = ifp->int_next) {
   2604 				if ((ifp->int_state & IS_NEED_NET_SYN) &&
   2605 				    rt->rt_mask == ifp->int_std_mask &&
   2606 				    rt->rt_dst == ifp->int_std_addr) {
   2607 					state |= RS_NET_SYN;
   2608 					ifp1 = ifp;
   2609 					break;
   2610 				}
   2611 			}
   2612 		}
   2613 
   2614 		/* or if there is an authority route that needs it. */
   2615 		for (intnetp = intnets; intnetp != NULL;
   2616 		    intnetp = intnetp->intnet_next) {
   2617 			if (intnetp->intnet_addr == rt->rt_dst &&
   2618 			    intnetp->intnet_mask == rt->rt_mask) {
   2619 				state |= (RS_NET_SYN | RS_NET_INT);
   2620 				break;
   2621 			}
   2622 		}
   2623 	}
   2624 
   2625 	if (ifp1 != NULL || (state & RS_NET_SYN)) {
   2626 		struct rt_spare new = rt->rt_spares[0];
   2627 		new.rts_ifp = ifp1;
   2628 		rtchange(rt, ((rt->rt_state & ~(RS_NET_SYN|RS_LOCAL)) | state),
   2629 		    &new, 0);
   2630 	} else {
   2631 		rtbad(rt, badifp);
   2632 	}
   2633 }
   2634 
   2635 /*
   2636  * Called while walking the table looking for sick interfaces
   2637  * or after a time change.
   2638  */
   2639 int
   2640 walk_bad(struct radix_node *rn,
   2641     void *argp)
   2642 {
   2643 #define	RT ((struct rt_entry *)rn)
   2644 	struct rt_spare *rts;
   2645 	int i, j = -1;
   2646 
   2647 	/* fix any spare routes through the interface */
   2648 	for (i = 1; i < RT->rt_num_spares; i++) {
   2649 		rts = &((struct rt_entry *)rn)->rt_spares[i];
   2650 
   2651 		if (rts->rts_metric < HOPCNT_INFINITY &&
   2652 		    (rts->rts_ifp == NULL ||
   2653 		    (rts->rts_ifp->int_state & IS_BROKE)))
   2654 			rts_delete(RT, rts);
   2655 		else {
   2656 			if (rts->rts_origin != RO_NONE)
   2657 				j = i;
   2658 		}
   2659 	}
   2660 
   2661 	/*
   2662 	 * Deal with the main route
   2663 	 * finished if it has been handled before or if its interface is ok
   2664 	 */
   2665 	if (RT->rt_ifp == NULL || !(RT->rt_ifp->int_state & IS_BROKE))
   2666 		return (0);
   2667 
   2668 	/* Bad routes for other than interfaces are easy. */
   2669 	if (!(RT->rt_state & (RS_IF | RS_NET_SYN | RS_LOCAL))) {
   2670 		if (j > 0) {
   2671 			RT->rt_spares[0].rts_metric = HOPCNT_INFINITY;
   2672 			rtswitch(RT, NULL);
   2673 		} else {
   2674 			rtbad(RT, (struct interface *)argp);
   2675 		}
   2676 		return (0);
   2677 	}
   2678 
   2679 	rtbad_sub(RT, (struct interface *)argp);
   2680 	return (0);
   2681 #undef RT
   2682 }
   2683 
   2684 /*
   2685  * Called while walking the table to replace a duplicate interface
   2686  * with a backup.
   2687  */
   2688 int
   2689 walk_rewire(struct radix_node *rn, void *argp)
   2690 {
   2691 	struct rt_entry *RT = (struct rt_entry *)rn;
   2692 	struct rewire_data *wire = (struct rewire_data *)argp;
   2693 	struct rt_spare *rts;
   2694 	int i;
   2695 
   2696 	/* fix any spare routes through the interface */
   2697 	rts = RT->rt_spares;
   2698 	for (i = RT->rt_num_spares; i > 0; i--, rts++) {
   2699 		if (rts->rts_ifp == wire->if_old) {
   2700 			rts->rts_ifp = wire->if_new;
   2701 			if ((RT->rt_dst == RIP_DEFAULT) &&
   2702 			    (wire->if_old->int_state & IS_SUPPRESS_RDISC))
   2703 				rdisc_suppress(rts->rts_ifp);
   2704 			if ((rts->rts_metric += wire->metric_delta) >
   2705 			    HOPCNT_INFINITY)
   2706 				rts->rts_metric = HOPCNT_INFINITY;
   2707 
   2708 			/*
   2709 			 * If the main route is getting a worse metric,
   2710 			 * then it may be time to switch to a backup.
   2711 			 */
   2712 			if (i == RT->rt_num_spares && wire->metric_delta > 0) {
   2713 				rtswitch(RT, NULL);
   2714 			}
   2715 		}
   2716 	}
   2717 
   2718 	return (0);
   2719 }
   2720 
   2721 /* Check the age of an individual route. */
   2722 static int
   2723 walk_age(struct radix_node *rn, void *argp)
   2724 {
   2725 #define	RT ((struct rt_entry *)rn)
   2726 	struct interface *ifp;
   2727 	struct rt_spare *rts;
   2728 	int i;
   2729 	in_addr_t age_bad_gate = *(in_addr_t *)argp;
   2730 
   2731 
   2732 	/*
   2733 	 * age all of the spare routes, including the primary route
   2734 	 * currently in use
   2735 	 */
   2736 	rts = RT->rt_spares;
   2737 	for (i = RT->rt_num_spares; i != 0; i--, rts++) {
   2738 
   2739 		ifp = rts->rts_ifp;
   2740 		if (i == RT->rt_num_spares) {
   2741 			if (!AGE_RT(RT->rt_state, rts->rts_origin, ifp)) {
   2742 				/*
   2743 				 * Keep various things from deciding ageless
   2744 				 * routes are stale
   2745 				 */
   2746 				rts->rts_time = now.tv_sec;
   2747 				continue;
   2748 			}
   2749 
   2750 			/* forget RIP routes after RIP has been turned off. */
   2751 			if (rip_sock < 0) {
   2752 				rts->rts_time = now_stale + 1;
   2753 			}
   2754 		}
   2755 
   2756 		/* age failing routes */
   2757 		if (age_bad_gate == rts->rts_gate &&
   2758 		    rts->rts_time >= now_stale) {
   2759 			rts->rts_time -= SUPPLY_INTERVAL;
   2760 		}
   2761 
   2762 		/* trash the spare routes when they go bad */
   2763 		if (rts->rts_origin == RO_RIP &&
   2764 		    ((rip_sock < 0) ||
   2765 		    (rts->rts_metric < HOPCNT_INFINITY &&
   2766 		    now_garbage > rts->rts_time)) &&
   2767 		    i != RT->rt_num_spares) {
   2768 			rts_delete(RT, rts);
   2769 		}
   2770 	}
   2771 
   2772 
   2773 	/* finished if the active route is still fresh */
   2774 	if (now_stale <= RT->rt_time)
   2775 		return (0);
   2776 
   2777 	/* try to switch to an alternative */
   2778 	rtswitch(RT, NULL);
   2779 
   2780 	/* Delete a dead route after it has been publically mourned. */
   2781 	if (now_garbage > RT->rt_time) {
   2782 		rtdelete(RT);
   2783 		return (0);
   2784 	}
   2785 
   2786 	/* Start poisoning a bad route before deleting it. */
   2787 	if (now.tv_sec - RT->rt_time > EXPIRE_TIME) {
   2788 		struct rt_spare new = RT->rt_spares[0];
   2789 
   2790 		new.rts_metric = HOPCNT_INFINITY;
   2791 		rtchange(RT, RT->rt_state, &new, 0);
   2792 	}
   2793 	return (0);
   2794 }
   2795 
   2796 
   2797 /* Watch for dead routes and interfaces. */
   2798 void
   2799 age(in_addr_t bad_gate)
   2800 {
   2801 	struct interface *ifp;
   2802 	int need_query = 0;
   2803 
   2804 	/*
   2805 	 * If not listening to RIP, there is no need to age the routes in
   2806 	 * the table.
   2807 	 */
   2808 	age_timer.tv_sec = (now.tv_sec
   2809 	    + ((rip_sock < 0) ? NEVER : SUPPLY_INTERVAL));
   2810 
   2811 	/*
   2812 	 * Check for dead IS_REMOTE interfaces by timing their
   2813 	 * transmissions.
   2814 	 */
   2815 	for (ifp = ifnet; ifp; ifp = ifp->int_next) {
   2816 		if (!(ifp->int_state & IS_REMOTE))
   2817 			continue;
   2818 
   2819 		/* ignore unreachable remote interfaces */
   2820 		if (!check_remote(ifp))
   2821 			continue;
   2822 
   2823 		/* Restore remote interface that has become reachable */
   2824 		if (ifp->int_state & IS_BROKE)
   2825 			if_ok(ifp, "remote ", _B_FALSE);
   2826 
   2827 		if (ifp->int_act_time != NEVER &&
   2828 		    now.tv_sec - ifp->int_act_time > EXPIRE_TIME) {
   2829 			writelog(LOG_NOTICE,
   2830 			    "remote interface %s to %s timed out after"
   2831 			    " %ld:%ld",
   2832 			    ifp->int_name,
   2833 			    naddr_ntoa(ifp->int_dstaddr),
   2834 			    (now.tv_sec - ifp->int_act_time)/60,
   2835 			    (now.tv_sec - ifp->int_act_time)%60);
   2836 			if_sick(ifp, _B_FALSE);
   2837 		}
   2838 
   2839 		/*
   2840 		 * If we have not heard from the other router
   2841 		 * recently, ask it.
   2842 		 */
   2843 		if (now.tv_sec >= ifp->int_query_time) {
   2844 			ifp->int_query_time = NEVER;
   2845 			need_query = 1;
   2846 		}
   2847 	}
   2848 
   2849 	/* Age routes. */
   2850 	(void) rn_walktree(rhead, walk_age, &bad_gate);
   2851 
   2852 	/*
   2853 	 * delete old redirected routes to keep the kernel table small
   2854 	 * and prevent blackholes
   2855 	 */
   2856 	del_redirects(bad_gate, now.tv_sec-STALE_TIME);
   2857 
   2858 	/* Update the kernel routing table. */
   2859 	fix_kern();
   2860 
   2861 	/* poke reticent remote gateways */
   2862 	if (need_query)
   2863 		rip_query();
   2864 }
   2865 
   2866 void
   2867 kern_dump(void)
   2868 {
   2869 	int i;
   2870 	struct khash *k;
   2871 
   2872 	for (i = 0; i < KHASH_SIZE; i++) {
   2873 		for (k = khash_bins[i]; k != NULL; k = k->k_next)
   2874 			trace_khash(k);
   2875 	}
   2876 }
   2877 
   2878 
   2879 static struct interface *
   2880 gwkludge_iflookup(in_addr_t dstaddr, in_addr_t addr, in_addr_t mask)
   2881 {
   2882 	uint32_t int_state;
   2883 	struct interface *ifp;
   2884 
   2885 	for (ifp = ifnet; ifp != NULL; ifp = ifp->int_next) {
   2886 		int_state = ifp->int_state;
   2887 
   2888 		if (!(int_state & IS_REMOTE))
   2889 			continue;
   2890 
   2891 		if (ifp->int_dstaddr == dstaddr && ifp->int_addr == addr &&
   2892 		    ifp->int_mask == mask)
   2893 			return (ifp);
   2894 	}
   2895 	return (NULL);
   2896 }
   2897 
   2898 /*
   2899  * Lookup logical interface structure given the gateway address.
   2900  * Returns null if no interfaces match the given name.
   2901  */
   2902 static struct interface *
   2903 lifp_iflookup(in_addr_t addr, const char *name)
   2904 {
   2905 	struct physical_interface *phyi;
   2906 	struct interface *ifp;
   2907 	struct interface *best = NULL;
   2908 
   2909 	if ((phyi = phys_byname(name)) == NULL)
   2910 		return (NULL);
   2911 
   2912 	for (ifp = phyi->phyi_interface; ifp != NULL;
   2913 	    ifp = ifp->int_ilist.hl_next) {
   2914 
   2915 #ifdef DEBUG_KERNEL_ROUTE_READ
   2916 		(void) fprintf(stderr, " checking interface"
   2917 		    " %-4s %-4s %-15s-->%-15s \n",
   2918 		    phyi->phyi_name, ifp->int_name,
   2919 		    naddr_ntoa(ifp->int_addr),
   2920 		    addrname(((ifp->int_if_flags & IFF_POINTOPOINT) ?
   2921 		    ifp->int_dstaddr : htonl(ifp->int_net)),
   2922 		    ifp->int_mask, 1));
   2923 #endif
   2924 		/* Exact match found */
   2925 		if (addr_on_ifp(addr, ifp, &best))
   2926 			return (ifp);
   2927 	}
   2928 	/* No exact match found but return any best match found */
   2929 	return (best);
   2930 }
   2931