Home | History | Annotate | Download | only in ip
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 
     22 /*
     23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     24  * Use is subject to license terms.
     25  */
     26 
     27 #include <inet/ip_arp.h>
     28 #include <inet/ip_ndp.h>
     29 #include <net/if_arp.h>
     30 #include <netinet/if_ether.h>
     31 #include <sys/strsubr.h>
     32 #include <inet/ip6.h>
     33 #include <inet/ip.h>
     34 #include <inet/ip_ire.h>
     35 #include <inet/ip_if.h>
     36 #include <sys/dlpi.h>
     37 #include <sys/sunddi.h>
     38 #include <sys/strsun.h>
     39 #include <sys/sdt.h>
     40 #include <inet/mi.h>
     41 #include <inet/arp.h>
     42 #include <inet/ipdrop.h>
     43 #include <sys/sockio.h>
     44 #include <inet/ip_impl.h>
     45 #include <sys/policy.h>
     46 
     47 #define	ARL_LL_ADDR_OFFSET(arl)	(((arl)->arl_sap_length) < 0 ? \
     48 	(sizeof (dl_unitdata_req_t)) : \
     49 	((sizeof (dl_unitdata_req_t)) + (ABS((arl)->arl_sap_length))))
     50 
     51 /*
     52  * MAC-specific intelligence.  Shouldn't be needed, but the DL_INFO_ACK
     53  * doesn't quite do it for us.
     54  */
     55 typedef struct arp_m_s {
     56 	t_uscalar_t	arp_mac_type;
     57 	uint32_t	arp_mac_arp_hw_type;
     58 	t_scalar_t	arp_mac_sap_length;
     59 	uint32_t	arp_mac_hw_addr_length;
     60 } arp_m_t;
     61 
     62 static int arp_close(queue_t *, int);
     63 static void arp_rput(queue_t *, mblk_t *);
     64 static void arp_wput(queue_t *, mblk_t *);
     65 static arp_m_t	*arp_m_lookup(t_uscalar_t mac_type);
     66 static void arp_notify(ipaddr_t, mblk_t *, uint32_t, ip_recv_attr_t *,
     67 	ncec_t *);
     68 static int arp_output(ill_t *, uint32_t, const uchar_t *, const uchar_t *,
     69 	const uchar_t *, const uchar_t *, uchar_t *);
     70 static int  arp_modclose(arl_t *);
     71 static void  arp_mod_close_tail(arl_t *);
     72 static mblk_t *arl_unbind(arl_t *);
     73 static void arp_process_packet(ill_t *, mblk_t *);
     74 static void arp_excl(ipsq_t *, queue_t *, mblk_t *, void *);
     75 static void arp_drop_packet(const char *str, mblk_t *, ill_t *);
     76 static int arp_open(queue_t *, dev_t *, int, int, cred_t *);
     77 static int ip_sioctl_ifunitsel_arp(queue_t *, int *);
     78 static int ip_sioctl_slifname_arp(queue_t *, void *);
     79 static void arp_dlpi_send(arl_t *, mblk_t *);
     80 static void arl_defaults_common(arl_t *, mblk_t *);
     81 static int arp_modopen(queue_t *, dev_t *, int, int, cred_t *);
     82 static void arp_ifname_notify(arl_t *);
     83 static void arp_rput_dlpi_writer(ipsq_t *, queue_t *, mblk_t *, void *);
     84 static arl_t *ill_to_arl(ill_t *);
     85 
     86 #define	DL_PRIM(mp)	(((union DL_primitives *)(mp)->b_rptr)->dl_primitive)
     87 #define	IS_DLPI_DATA(mp)						\
     88 	((DB_TYPE(mp) == M_PROTO) &&					\
     89 	MBLKL(mp) >= sizeof (dl_unitdata_ind_t) &&			\
     90 	(DL_PRIM(mp) == DL_UNITDATA_IND))
     91 
     92 #define	AR_NOTFOUND	1	/* No matching ace found in cache */
     93 #define	AR_MERGED	2	/* Matching ace updated (RFC 826 Merge_flag) */
     94 #define	AR_LOOPBACK	3	/* Our own arp packet was received */
     95 #define	AR_BOGON	4	/* Another host has our IP addr. */
     96 #define	AR_FAILED	5	/* Duplicate Address Detection has failed */
     97 #define	AR_CHANGED	6	/* Address has changed; tell IP (and merged) */
     98 
     99 boolean_t arp_no_defense;
    100 
    101 struct module_info arp_mod_info = {
    102 	IP_MOD_ID, "arp", 1, INFPSZ, 65536, 1024
    103 };
    104 static struct qinit rinit_arp = {
    105 	(pfi_t)arp_rput, NULL, arp_open, arp_close, NULL, &arp_mod_info
    106 };
    107 static struct qinit winit_arp = {
    108 	(pfi_t)arp_wput, NULL, arp_open, arp_close, NULL,
    109 	&arp_mod_info
    110 };
    111 struct streamtab arpinfo = {
    112 	&rinit_arp, &winit_arp
    113 };
    114 #define	ARH_FIXED_LEN	8
    115 #define	AR_LL_HDR_SLACK	32
    116 
    117 /*
    118  * pfhooks for ARP.
    119  */
    120 #define	ARP_HOOK_IN(_hook, _event, _ilp, _hdr, _fm, _m, ipst)		\
    121 									\
    122 	if ((_hook).he_interested) {                       		\
    123 		hook_pkt_event_t info;                          	\
    124 									\
    125 		info.hpe_protocol = ipst->ips_arp_net_data;		\
    126 		info.hpe_ifp = _ilp;                       		\
    127 		info.hpe_ofp = 0;                       		\
    128 		info.hpe_hdr = _hdr;                            	\
    129 		info.hpe_mp = &(_fm);                           	\
    130 		info.hpe_mb = _m;                               	\
    131 		if (hook_run(ipst->ips_arp_net_data->netd_hooks,	\
    132 		    _event, (hook_data_t)&info) != 0) {			\
    133 			if (_fm != NULL) {                      	\
    134 				freemsg(_fm);                   	\
    135 				_fm = NULL;                     	\
    136 			}                                       	\
    137 			_hdr = NULL;                            	\
    138 			_m = NULL;                              	\
    139 		} else {                                        	\
    140 			_hdr = info.hpe_hdr;                    	\
    141 			_m = info.hpe_mb;                       	\
    142 		}                                               	\
    143 	}
    144 
    145 #define	ARP_HOOK_OUT(_hook, _event, _olp, _hdr, _fm, _m, ipst)		\
    146 									\
    147 	if ((_hook).he_interested) {                       		\
    148 		hook_pkt_event_t info;                          	\
    149 									\
    150 		info.hpe_protocol = ipst->ips_arp_net_data;		\
    151 		info.hpe_ifp = 0;                       		\
    152 		info.hpe_ofp = _olp;                       		\
    153 		info.hpe_hdr = _hdr;                            	\
    154 		info.hpe_mp = &(_fm);                           	\
    155 		info.hpe_mb = _m;                               	\
    156 		if (hook_run(ipst->ips_arp_net_data->netd_hooks,	\
    157 		    _event, (hook_data_t)&info) != 0) {			\
    158 			if (_fm != NULL) {                      	\
    159 				freemsg(_fm);                   	\
    160 				_fm = NULL;                     	\
    161 			}                                       	\
    162 			_hdr = NULL;                            	\
    163 			_m = NULL;                              	\
    164 		} else {                                        	\
    165 			_hdr = info.hpe_hdr;                    	\
    166 			_m = info.hpe_mb;                       	\
    167 		}                                               	\
    168 	}
    169 
    170 static arp_m_t	arp_m_tbl[] = {
    171 	{ DL_CSMACD,	ARPHRD_ETHER,	-2,	6},	/* 802.3 */
    172 	{ DL_TPB,	ARPHRD_IEEE802,	-2,	6},	/* 802.4 */
    173 	{ DL_TPR,	ARPHRD_IEEE802,	-2,	6},	/* 802.5 */
    174 	{ DL_METRO,	ARPHRD_IEEE802,	-2,	6},	/* 802.6 */
    175 	{ DL_ETHER,	ARPHRD_ETHER,	-2,	6},	/* Ethernet */
    176 	{ DL_FDDI,	ARPHRD_ETHER,	-2,	6},	/* FDDI */
    177 	{ DL_IB,	ARPHRD_IB,	-2,	20},	/* Infiniband */
    178 	{ DL_OTHER,	ARPHRD_ETHER,	-2,	6}	/* unknown */
    179 };
    180 
    181 static void
    182 arl_refhold_locked(arl_t *arl)
    183 {
    184 	ASSERT(MUTEX_HELD(&arl->arl_lock));
    185 	arl->arl_refcnt++;
    186 	ASSERT(arl->arl_refcnt != 0);
    187 }
    188 
    189 static void
    190 arl_refrele(arl_t *arl)
    191 {
    192 	mutex_enter(&arl->arl_lock);
    193 	ASSERT(arl->arl_refcnt != 0);
    194 	arl->arl_refcnt--;
    195 	if (arl->arl_refcnt > 1) {
    196 		mutex_exit(&arl->arl_lock);
    197 		return;
    198 	}
    199 
    200 	/* ill_close or arp_unbind_complete may be waiting */
    201 	cv_broadcast(&arl->arl_cv);
    202 	mutex_exit(&arl->arl_lock);
    203 }
    204 
    205 /*
    206  * wake up any pending ip ioctls.
    207  */
    208 static void
    209 arp_cmd_done(ill_t *ill, int err, t_uscalar_t lastprim)
    210 {
    211 	if (lastprim == DL_UNBIND_REQ && ill->ill_replumbing)
    212 		arp_replumb_done(ill, 0);
    213 	else
    214 		arp_bringup_done(ill, err);
    215 }
    216 
    217 static int
    218 ip_nce_resolve_all(ill_t *ill, uchar_t *src_haddr, uint32_t hlen,
    219     const in_addr_t *src_paddr, ncec_t **sncec, int op)
    220 {
    221 	int retv;
    222 	ncec_t *ncec;
    223 	boolean_t ll_changed;
    224 	uchar_t *lladdr = NULL;
    225 	int new_state;
    226 
    227 	ASSERT(ill != NULL);
    228 
    229 	ncec = ncec_lookup_illgrp_v4(ill, src_paddr);
    230 	*sncec = ncec;
    231 
    232 	if (ncec == NULL) {
    233 		retv = AR_NOTFOUND;
    234 		goto done;
    235 	}
    236 
    237 	mutex_enter(&ncec->ncec_lock);
    238 	/*
    239 	 * IP addr and hardware address match what we already
    240 	 * have, then this is a broadcast packet emitted by one of our
    241 	 * interfaces, reflected by the switch and received on another
    242 	 * interface.  We return AR_LOOPBACK.
    243 	 */
    244 	lladdr = ncec->ncec_lladdr;
    245 	if (NCE_MYADDR(ncec) && hlen == ncec->ncec_ill->ill_phys_addr_length &&
    246 	    bcmp(lladdr, src_haddr, hlen) == 0) {
    247 		mutex_exit(&ncec->ncec_lock);
    248 		retv = AR_LOOPBACK;
    249 		goto done;
    250 	}
    251 	/*
    252 	 * If the entry is unverified, then we've just verified that
    253 	 * someone else already owns this address, because this is a
    254 	 * message with the same protocol address but different
    255 	 * hardware address.
    256 	 */
    257 	if (ncec->ncec_flags & NCE_F_UNVERIFIED) {
    258 		mutex_exit(&ncec->ncec_lock);
    259 		ncec_delete(ncec);
    260 		ncec_refrele(ncec);
    261 		*sncec = NULL;
    262 		retv = AR_FAILED;
    263 		goto done;
    264 	}
    265 
    266 	/*
    267 	 * If the IP address matches ours and we're authoritative for
    268 	 * this entry, then some other node is using our IP addr, so
    269 	 * return AR_BOGON.  Also reset the transmit count to zero so
    270 	 * that, if we're currently in initial announcement mode, we
    271 	 * switch back to the lazier defense mode.  Knowing that
    272 	 * there's at least one duplicate out there, we ought not
    273 	 * blindly announce.
    274 	 *
    275 	 * NCE_F_AUTHORITY is set in one of two ways:
    276 	 * 1. /sbin/arp told us so, via the "permanent" flag.
    277 	 * 2. This is one of my addresses.
    278 	 */
    279 	if (ncec->ncec_flags & NCE_F_AUTHORITY) {
    280 		ncec->ncec_unsolicit_count = 0;
    281 		mutex_exit(&ncec->ncec_lock);
    282 		retv = AR_BOGON;
    283 		goto done;
    284 	}
    285 
    286 	/*
    287 	 * No address conflict was detected, and we are getting
    288 	 * ready to update the ncec's hwaddr. The nce MUST NOT be on an
    289 	 * under interface, because all dynamic nce's are created on the
    290 	 * native interface (in the non-IPMP case) or on the IPMP
    291 	 * meta-interface (in the IPMP case)
    292 	 */
    293 	ASSERT(!IS_UNDER_IPMP(ncec->ncec_ill));
    294 
    295 	/*
    296 	 * update ncec with src_haddr, hlen.
    297 	 *
    298 	 * We are trying to resolve this ncec_addr/src_paddr and we
    299 	 * got a REQUEST/RESPONSE from the ncec_addr/src_paddr.
    300 	 * So the new_state is at least "STALE". If, in addition,
    301 	 * this a solicited, unicast ARP_RESPONSE, we can transition
    302 	 * to REACHABLE.
    303 	 */
    304 	new_state = ND_STALE;
    305 	ip1dbg(("got info for ncec %p from addr %x\n",
    306 	    (void *)ncec, *src_paddr));
    307 	retv = AR_MERGED;
    308 	if (ncec->ncec_state == ND_INCOMPLETE ||
    309 	    ncec->ncec_state == ND_INITIAL) {
    310 		ll_changed = B_TRUE;
    311 	} else {
    312 		ll_changed = nce_cmp_ll_addr(ncec, src_haddr, hlen);
    313 		if (!ll_changed)
    314 			new_state = ND_UNCHANGED;
    315 		else
    316 			retv = AR_CHANGED;
    317 	}
    318 	/*
    319 	 * We don't have the equivalent of the IPv6 'S' flag indicating
    320 	 * a solicited response, so we assume that if we are in
    321 	 * INCOMPLETE, or got back an unchanged lladdr in PROBE state,
    322 	 * and this is an ARP_RESPONSE, it must be a
    323 	 * solicited response allowing us to transtion to REACHABLE.
    324 	 */
    325 	if (op == ARP_RESPONSE) {
    326 		switch (ncec->ncec_state) {
    327 		case ND_PROBE:
    328 			new_state = (ll_changed ? ND_STALE : ND_REACHABLE);
    329 			break;
    330 		case ND_INCOMPLETE:
    331 			new_state = ND_REACHABLE;
    332 			break;
    333 		}
    334 	}
    335 	/*
    336 	 * Call nce_update() to refresh fastpath information on any
    337 	 * dependent nce_t entries.
    338 	 */
    339 	nce_update(ncec, new_state, (ll_changed ? src_haddr : NULL));
    340 	mutex_exit(&ncec->ncec_lock);
    341 	nce_resolv_ok(ncec);
    342 done:
    343 	return (retv);
    344 }
    345 
    346 /* Find an entry for a particular MAC type in the arp_m_tbl. */
    347 static arp_m_t	*
    348 arp_m_lookup(t_uscalar_t mac_type)
    349 {
    350 	arp_m_t	*arm;
    351 
    352 	for (arm = arp_m_tbl; arm < A_END(arp_m_tbl); arm++) {
    353 		if (arm->arp_mac_type == mac_type)
    354 			return (arm);
    355 	}
    356 	return (NULL);
    357 }
    358 
    359 static uint32_t
    360 arp_hw_type(t_uscalar_t mactype)
    361 {
    362 	arp_m_t *arm;
    363 
    364 	if ((arm = arp_m_lookup(mactype)) == NULL)
    365 		arm = arp_m_lookup(DL_OTHER);
    366 	return (arm->arp_mac_arp_hw_type);
    367 }
    368 
    369 /*
    370  * Called when an DLPI control message has been acked; send down the next
    371  * queued message (if any).
    372  * The DLPI messages of interest being bind, attach and unbind since
    373  * these are the only ones sent by ARP via arp_dlpi_send.
    374  */
    375 static void
    376 arp_dlpi_done(arl_t *arl, ill_t *ill)
    377 {
    378 	mblk_t *mp;
    379 	int err;
    380 	t_uscalar_t prim;
    381 
    382 	mutex_enter(&arl->arl_lock);
    383 	prim = arl->arl_dlpi_pending;
    384 
    385 	if ((mp = arl->arl_dlpi_deferred) == NULL) {
    386 		arl->arl_dlpi_pending = DL_PRIM_INVAL;
    387 		if (arl->arl_state_flags & ARL_LL_DOWN)
    388 			err = ENETDOWN;
    389 		else
    390 			err = 0;
    391 		mutex_exit(&arl->arl_lock);
    392 
    393 		mutex_enter(&ill->ill_lock);
    394 		ill->ill_arl_dlpi_pending = 0;
    395 		mutex_exit(&ill->ill_lock);
    396 		arp_cmd_done(ill, err, prim);
    397 		return;
    398 	}
    399 
    400 	arl->arl_dlpi_deferred = mp->b_next;
    401 	mp->b_next = NULL;
    402 
    403 	ASSERT(DB_TYPE(mp) == M_PROTO || DB_TYPE(mp) == M_PCPROTO);
    404 
    405 	arl->arl_dlpi_pending = DL_PRIM(mp);
    406 	mutex_exit(&arl->arl_lock);
    407 
    408 	mutex_enter(&ill->ill_lock);
    409 	ill->ill_arl_dlpi_pending = 1;
    410 	mutex_exit(&ill->ill_lock);
    411 
    412 	putnext(arl->arl_wq, mp);
    413 }
    414 
    415 /*
    416  * This routine is called during module initialization when the DL_INFO_ACK
    417  * comes back from the device.	We set up defaults for all the device dependent
    418  * doo-dads we are going to need.  This will leave us ready to roll if we are
    419  * attempting auto-configuration.  Alternatively, these defaults can be
    420  * overridden by initialization procedures possessing higher intelligence.
    421  *
    422  * Caller will free the mp.
    423  */
    424 static void
    425 arp_ll_set_defaults(arl_t *arl, mblk_t *mp)
    426 {
    427 	arp_m_t		*arm;
    428 	dl_info_ack_t	*dlia = (dl_info_ack_t *)mp->b_rptr;
    429 
    430 	if ((arm = arp_m_lookup(dlia->dl_mac_type)) == NULL)
    431 		arm = arp_m_lookup(DL_OTHER);
    432 	ASSERT(arm != NULL);
    433 
    434 	/*
    435 	 * We initialize based on parameters in the (currently) not too
    436 	 * exhaustive arp_m_tbl.
    437 	 */
    438 	if (dlia->dl_version == DL_VERSION_2) {
    439 		arl->arl_sap_length = dlia->dl_sap_length;
    440 		arl->arl_phys_addr_length = dlia->dl_brdcst_addr_length;
    441 		if (dlia->dl_provider_style == DL_STYLE2)
    442 			arl->arl_needs_attach = 1;
    443 	} else {
    444 		arl->arl_sap_length = arm->arp_mac_sap_length;
    445 		arl->arl_phys_addr_length = arm->arp_mac_hw_addr_length;
    446 	}
    447 	/*
    448 	 * Note: the arp_hw_type in the arp header may be derived from
    449 	 * the ill_mac_type and arp_m_lookup().
    450 	 */
    451 	arl->arl_sap = ETHERTYPE_ARP;
    452 	arl_defaults_common(arl, mp);
    453 }
    454 
    455 static void
    456 arp_wput(queue_t *q, mblk_t *mp)
    457 {
    458 	int err = EINVAL;
    459 	struct iocblk *ioc;
    460 	mblk_t *mp1;
    461 
    462 	switch (DB_TYPE(mp)) {
    463 	case M_IOCTL:
    464 		ASSERT(q->q_next != NULL);
    465 		ioc = (struct iocblk *)mp->b_rptr;
    466 		if (ioc->ioc_cmd != SIOCSLIFNAME &&
    467 		    ioc->ioc_cmd != IF_UNITSEL) {
    468 			DTRACE_PROBE4(arl__dlpi, char *, "arp_wput",
    469 			    char *, "<some ioctl>", char *, "-",
    470 			    arl_t *, (arl_t *)q->q_ptr);
    471 			putnext(q, mp);
    472 			return;
    473 		}
    474 		if ((mp1 = mp->b_cont) == 0)
    475 			err = EINVAL;
    476 		else if (ioc->ioc_cmd == SIOCSLIFNAME)
    477 			err = ip_sioctl_slifname_arp(q, mp1->b_rptr);
    478 		else if (ioc->ioc_cmd == IF_UNITSEL)
    479 			err = ip_sioctl_ifunitsel_arp(q, (int *)mp1->b_rptr);
    480 		if (err == 0)
    481 			miocack(q, mp, 0, 0);
    482 		else
    483 			miocnak(q, mp, 0, err);
    484 		return;
    485 	default:
    486 		DTRACE_PROBE4(arl__dlpi, char *, "arp_wput default",
    487 		    char *, "default mblk", char *, "-",
    488 		    arl_t *, (arl_t *)q->q_ptr);
    489 		putnext(q, mp);
    490 		return;
    491 	}
    492 }
    493 
    494 /*
    495  * similar to ill_dlpi_pending(): verify that the received DLPI response
    496  * matches the one that is pending for the arl.
    497  */
    498 static boolean_t
    499 arl_dlpi_pending(arl_t *arl, t_uscalar_t prim)
    500 {
    501 	t_uscalar_t pending;
    502 
    503 	mutex_enter(&arl->arl_lock);
    504 	if (arl->arl_dlpi_pending == prim) {
    505 		mutex_exit(&arl->arl_lock);
    506 		return (B_TRUE);
    507 	}
    508 
    509 	if (arl->arl_state_flags & ARL_CONDEMNED) {
    510 		mutex_exit(&arl->arl_lock);
    511 		return (B_FALSE);
    512 	}
    513 	pending = arl->arl_dlpi_pending;
    514 	mutex_exit(&arl->arl_lock);
    515 
    516 	if (pending == DL_PRIM_INVAL) {
    517 		ip0dbg(("arl_dlpi_pending unsolicited ack for %s on %s",
    518 		    dl_primstr(prim), arl->arl_name));
    519 	} else {
    520 		ip0dbg(("arl_dlpi_pending ack for %s on %s expect %s",
    521 		    dl_primstr(prim), arl->arl_name, dl_primstr(pending)));
    522 	}
    523 	return (B_FALSE);
    524 }
    525 
    526 /* DLPI messages, other than DL_UNITDATA_IND are handled here. */
    527 static void
    528 arp_rput_dlpi(queue_t *q, mblk_t *mp)
    529 {
    530 	arl_t		*arl = (arl_t *)q->q_ptr;
    531 	union DL_primitives *dlp;
    532 	t_uscalar_t	prim;
    533 	t_uscalar_t	reqprim = DL_PRIM_INVAL;
    534 	ill_t		*ill;
    535 
    536 	if ((mp->b_wptr - mp->b_rptr) < sizeof (dlp->dl_primitive)) {
    537 		putnext(q, mp);
    538 		return;
    539 	}
    540 	dlp = (union DL_primitives *)mp->b_rptr;
    541 	prim = dlp->dl_primitive;
    542 
    543 	/*
    544 	 * If we received an ACK but didn't send a request for it, then it
    545 	 * can't be part of any pending operation; discard up-front.
    546 	 */
    547 	switch (prim) {
    548 	case DL_ERROR_ACK:
    549 		/*
    550 		 * ce is confused about how DLPI works, so we have to interpret
    551 		 * an "error" on DL_NOTIFY_ACK (which we never could have sent)
    552 		 * as really meaning an error on DL_NOTIFY_REQ.
    553 		 *
    554 		 * Note that supporting DL_NOTIFY_REQ is optional, so printing
    555 		 * out an error message on the console isn't warranted except
    556 		 * for debug.
    557 		 */
    558 		if (dlp->error_ack.dl_error_primitive == DL_NOTIFY_ACK ||
    559 		    dlp->error_ack.dl_error_primitive == DL_NOTIFY_REQ) {
    560 			reqprim = DL_NOTIFY_REQ;
    561 		} else {
    562 			reqprim = dlp->error_ack.dl_error_primitive;
    563 		}
    564 		break;
    565 	case DL_INFO_ACK:
    566 		reqprim = DL_INFO_REQ;
    567 		break;
    568 	case DL_OK_ACK:
    569 		reqprim = dlp->ok_ack.dl_correct_primitive;
    570 		break;
    571 	case DL_BIND_ACK:
    572 		reqprim = DL_BIND_REQ;
    573 		break;
    574 	default:
    575 		DTRACE_PROBE2(rput_dl_badprim, arl_t *, arl,
    576 		    union DL_primitives *, dlp);
    577 		putnext(q, mp);
    578 		return;
    579 	}
    580 	if (reqprim == DL_PRIM_INVAL || !arl_dlpi_pending(arl, reqprim)) {
    581 		freemsg(mp);
    582 		return;
    583 	}
    584 	DTRACE_PROBE4(arl__dlpi, char *, "arp_rput_dlpi received",
    585 	    char *, dl_primstr(prim), char *, dl_primstr(reqprim),
    586 	    arl_t *, arl);
    587 
    588 	ASSERT(prim != DL_NOTIFY_IND);
    589 
    590 	ill = arl_to_ill(arl);
    591 
    592 	switch (reqprim) {
    593 	case DL_INFO_REQ:
    594 		/*
    595 		 * ill has not been set up yet for this case. This is the
    596 		 * DL_INFO_ACK for the first DL_INFO_REQ sent from
    597 		 * arp_modopen(). There should be no other arl_dlpi_deferred
    598 		 * messages pending. We initialize the arl here.
    599 		 */
    600 		ASSERT(!arl->arl_dlpi_style_set);
    601 		ASSERT(arl->arl_dlpi_pending == DL_INFO_REQ);
    602 		ASSERT(arl->arl_dlpi_deferred == NULL);
    603 		arl->arl_dlpi_pending = DL_PRIM_INVAL;
    604 		arp_ll_set_defaults(arl, mp);
    605 		freemsg(mp);
    606 		return;
    607 	case DL_UNBIND_REQ:
    608 		mutex_enter(&arl->arl_lock);
    609 		arl->arl_state_flags &= ~ARL_DL_UNBIND_IN_PROGRESS;
    610 		/*
    611 		 * This is not an error, so we don't set ARL_LL_DOWN
    612 		 */
    613 		arl->arl_state_flags &= ~ARL_LL_UP;
    614 		arl->arl_state_flags |= ARL_LL_UNBOUND;
    615 		if (arl->arl_state_flags & ARL_CONDEMNED) {
    616 			/*
    617 			 * if this is part of the unplumb the arl may
    618 			 * vaporize any moment after we cv_signal the
    619 			 * arl_cv so we reset arl_dlpi_pending here.
    620 			 * All other cases (including replumb) will
    621 			 * have the arl_dlpi_pending reset in
    622 			 * arp_dlpi_done.
    623 			 */
    624 			arl->arl_dlpi_pending = DL_PRIM_INVAL;
    625 		}
    626 		cv_signal(&arl->arl_cv);
    627 		mutex_exit(&arl->arl_lock);
    628 		break;
    629 	}
    630 	if (ill != NULL) {
    631 		/*
    632 		 * ill ref obtained by arl_to_ill()  will be released
    633 		 * by qwriter_ip()
    634 		 */
    635 		qwriter_ip(ill, ill->ill_wq, mp, arp_rput_dlpi_writer,
    636 		    CUR_OP, B_TRUE);
    637 		return;
    638 	}
    639 	freemsg(mp);
    640 }
    641 
    642 /*
    643  * Handling of DLPI messages that require exclusive access to the ipsq.
    644  */
    645 /* ARGSUSED */
    646 static void
    647 arp_rput_dlpi_writer(ipsq_t *ipsq, queue_t *q, mblk_t *mp, void *dummy_arg)
    648 {
    649 	union DL_primitives *dlp = (union DL_primitives *)mp->b_rptr;
    650 	ill_t		*ill = (ill_t *)q->q_ptr;
    651 	arl_t		*arl = ill_to_arl(ill);
    652 
    653 	if (arl == NULL) {
    654 		/*
    655 		 * happens as a result arp_modclose triggering unbind.
    656 		 * arp_rput_dlpi will cv_signal the arl_cv and the modclose
    657 		 * will complete, but when it does ipsq_exit, the waiting
    658 		 * qwriter_ip gets into the ipsq but will find the arl null.
    659 		 * There should be no deferred messages in this case, so
    660 		 * just complete and exit.
    661 		 */
    662 		arp_cmd_done(ill, 0, DL_UNBIND_REQ);
    663 		freemsg(mp);
    664 		return;
    665 	}
    666 	switch (dlp->dl_primitive) {
    667 	case DL_ERROR_ACK:
    668 		switch (dlp->error_ack.dl_error_primitive) {
    669 		case DL_UNBIND_REQ:
    670 			mutex_enter(&arl->arl_lock);
    671 			arl->arl_state_flags &= ~ARL_DL_UNBIND_IN_PROGRESS;
    672 			arl->arl_state_flags &= ~ARL_LL_UP;
    673 			arl->arl_state_flags |= ARL_LL_UNBOUND;
    674 			arl->arl_state_flags |= ARL_LL_DOWN;
    675 			cv_signal(&arl->arl_cv);
    676 			mutex_exit(&arl->arl_lock);
    677 			break;
    678 		case DL_BIND_REQ:
    679 			mutex_enter(&arl->arl_lock);
    680 			arl->arl_state_flags &= ~ARL_LL_UP;
    681 			arl->arl_state_flags |= ARL_LL_DOWN;
    682 			arl->arl_state_flags |= ARL_LL_UNBOUND;
    683 			cv_signal(&arl->arl_cv);
    684 			mutex_exit(&arl->arl_lock);
    685 			break;
    686 		case DL_ATTACH_REQ:
    687 			break;
    688 		default:
    689 			/* If it's anything else, we didn't send it. */
    690 			arl_refrele(arl);
    691 			putnext(q, mp);
    692 			return;
    693 		}
    694 		break;
    695 	case DL_OK_ACK:
    696 		DTRACE_PROBE4(arl__dlpi, char *, "arp_rput_dlpi_writer ok",
    697 		    char *, dl_primstr(dlp->ok_ack.dl_correct_primitive),
    698 		    char *, dl_primstr(dlp->ok_ack.dl_correct_primitive),
    699 		    arl_t *, arl);
    700 		mutex_enter(&arl->arl_lock);
    701 		switch (dlp->ok_ack.dl_correct_primitive) {
    702 		case DL_UNBIND_REQ:
    703 		case DL_ATTACH_REQ:
    704 			break;
    705 		default:
    706 			ip0dbg(("Dropping unrecognized DL_OK_ACK for %s",
    707 			    dl_primstr(dlp->ok_ack.dl_correct_primitive)));
    708 			mutex_exit(&arl->arl_lock);
    709 			arl_refrele(arl);
    710 			freemsg(mp);
    711 			return;
    712 		}
    713 		mutex_exit(&arl->arl_lock);
    714 		break;
    715 	case DL_BIND_ACK:
    716 		DTRACE_PROBE2(rput_dl_bind, arl_t *, arl,
    717 		    dl_bind_ack_t *, &dlp->bind_ack);
    718 
    719 		mutex_enter(&arl->arl_lock);
    720 		ASSERT(arl->arl_state_flags & ARL_LL_BIND_PENDING);
    721 		arl->arl_state_flags &=
    722 		    ~(ARL_LL_BIND_PENDING|ARL_LL_DOWN|ARL_LL_UNBOUND);
    723 		arl->arl_state_flags |= ARL_LL_UP;
    724 		mutex_exit(&arl->arl_lock);
    725 		break;
    726 	case DL_UDERROR_IND:
    727 		DTRACE_PROBE2(rput_dl_uderror, arl_t *, arl,
    728 		    dl_uderror_ind_t *, &dlp->uderror_ind);
    729 		arl_refrele(arl);
    730 		putnext(q, mp);
    731 		return;
    732 	default:
    733 		DTRACE_PROBE2(rput_dl_badprim, arl_t *, arl,
    734 		    union DL_primitives *, dlp);
    735 		arl_refrele(arl);
    736 		putnext(q, mp);
    737 		return;
    738 	}
    739 	arp_dlpi_done(arl, ill);
    740 	arl_refrele(arl);
    741 	freemsg(mp);
    742 }
    743 
    744 void
    745 arp_rput(queue_t *q, mblk_t *mp)
    746 {
    747 	arl_t		*arl = q->q_ptr;
    748 	boolean_t	need_refrele = B_FALSE;
    749 
    750 	mutex_enter(&arl->arl_lock);
    751 	if (((arl->arl_state_flags &
    752 	    (ARL_CONDEMNED | ARL_LL_REPLUMBING)) != 0)) {
    753 		/*
    754 		 * Only allow high priority DLPI messages during unplumb or
    755 		 * replumb, and we don't take an arl_refcnt for that case.
    756 		 */
    757 		if (DB_TYPE(mp) != M_PCPROTO) {
    758 			mutex_exit(&arl->arl_lock);
    759 			freemsg(mp);
    760 			return;
    761 		}
    762 	} else {
    763 		arl_refhold_locked(arl);
    764 		need_refrele = B_TRUE;
    765 	}
    766 	mutex_exit(&arl->arl_lock);
    767 
    768 	switch (DB_TYPE(mp)) {
    769 	case M_PCPROTO:
    770 	case M_PROTO: {
    771 		ill_t *ill;
    772 
    773 		/*
    774 		 * could be one of
    775 		 * (i)   real message from the wire, (DLPI_DATA)
    776 		 * (ii)  DLPI message
    777 		 * Take a ref on the ill associated with this arl to
    778 		 * prevent the ill from being unplumbed until this thread
    779 		 * is done.
    780 		 */
    781 		if (IS_DLPI_DATA(mp)) {
    782 			ill = arl_to_ill(arl);
    783 			if (ill == NULL) {
    784 				arp_drop_packet("No ill", mp, ill);
    785 				break;
    786 			}
    787 			arp_process_packet(ill, mp);
    788 			ill_refrele(ill);
    789 			break;
    790 		}
    791 		/* Miscellaneous DLPI messages get shuffled off. */
    792 		arp_rput_dlpi(q, mp);
    793 		break;
    794 	}
    795 	case M_ERROR:
    796 	case M_HANGUP:
    797 		if (mp->b_rptr < mp->b_wptr)
    798 			arl->arl_error = (int)(*mp->b_rptr & 0xFF);
    799 		if (arl->arl_error == 0)
    800 			arl->arl_error = ENXIO;
    801 		freemsg(mp);
    802 		break;
    803 	default:
    804 		ip1dbg(("arp_rput other db type %x\n", DB_TYPE(mp)));
    805 		putnext(q, mp);
    806 		break;
    807 	}
    808 	if (need_refrele)
    809 		arl_refrele(arl);
    810 }
    811 
    812 static void
    813 arp_process_packet(ill_t *ill, mblk_t *mp)
    814 {
    815 	mblk_t 		*mp1;
    816 	arh_t		*arh;
    817 	in_addr_t	src_paddr, dst_paddr;
    818 	uint32_t	hlen, plen;
    819 	boolean_t	is_probe;
    820 	int		op;
    821 	ncec_t		*dst_ncec, *src_ncec = NULL;
    822 	uchar_t		*src_haddr, *arhp, *dst_haddr, *dp, *sp;
    823 	int		err;
    824 	ip_stack_t	*ipst;
    825 	boolean_t	need_ill_refrele = B_FALSE;
    826 	nce_t		*nce;
    827 	uchar_t		*src_lladdr;
    828 	dl_unitdata_ind_t *dlui;
    829 	ip_recv_attr_t	iras;
    830 
    831 	ASSERT(ill != NULL);
    832 	if (ill->ill_flags & ILLF_NOARP) {
    833 		arp_drop_packet("Interface does not support ARP", mp, ill);
    834 		return;
    835 	}
    836 	ipst = ill->ill_ipst;
    837 	/*
    838 	 * What we should have at this point is a DL_UNITDATA_IND message
    839 	 * followed by an ARP packet.  We do some initial checks and then
    840 	 * get to work.
    841 	 */
    842 	dlui = (dl_unitdata_ind_t *)mp->b_rptr;
    843 	if (dlui->dl_group_address == 1) {
    844 		/*
    845 		 * multicast or broadcast  packet. Only accept on the ipmp
    846 		 * nominated interface for multicasts ('cast_ill').
    847 		 * If we have no cast_ill we are liberal and accept everything.
    848 		 */
    849 		if (IS_UNDER_IPMP(ill)) {
    850 			/* For an under ill_grp can change under lock */
    851 			rw_enter(&ipst->ips_ill_g_lock, RW_READER);
    852 			if (!ill->ill_nom_cast && ill->ill_grp != NULL &&
    853 			    ill->ill_grp->ig_cast_ill != NULL) {
    854 				rw_exit(&ipst->ips_ill_g_lock);
    855 				arp_drop_packet("Interface is not nominated "
    856 				    "for multicast sends and receives",
    857 				    mp, ill);
    858 				return;
    859 			}
    860 			rw_exit(&ipst->ips_ill_g_lock);
    861 		}
    862 	}
    863 	mp1 = mp->b_cont;
    864 	if (mp1 == NULL) {
    865 		arp_drop_packet("Missing ARP packet", mp, ill);
    866 		return;
    867 	}
    868 	if (mp1->b_cont != NULL) {
    869 		/* No fooling around with funny messages. */
    870 		if (!pullupmsg(mp1, -1)) {
    871 			arp_drop_packet("Funny message: pullup failed",
    872 			    mp, ill);
    873 			return;
    874 		}
    875 	}
    876 	arh = (arh_t *)mp1->b_rptr;
    877 	hlen = arh->arh_hlen;
    878 	plen = arh->arh_plen;
    879 	if (MBLKL(mp1) < ARH_FIXED_LEN + 2 * hlen + 2 * plen) {
    880 		arp_drop_packet("mblk len too small", mp, ill);
    881 		return;
    882 	}
    883 	/*
    884 	 * hlen 0 is used for RFC 1868 UnARP.
    885 	 *
    886 	 * Note that the rest of the code checks that hlen is what we expect
    887 	 * for this hardware address type, so might as well discard packets
    888 	 * here that don't match.
    889 	 */
    890 	if ((hlen > 0 && hlen != ill->ill_phys_addr_length) || plen == 0) {
    891 		DTRACE_PROBE2(rput_bogus, ill_t *, ill, mblk_t *, mp1);
    892 		arp_drop_packet("Bogus hlen or plen", mp, ill);
    893 		return;
    894 	}
    895 	/*
    896 	 * Historically, Solaris has been lenient about hardware type numbers.
    897 	 * We should check here, but don't.
    898 	 */
    899 	DTRACE_PROBE3(arp__physical__in__start, ill_t *, ill, arh_t *, arh,
    900 	    mblk_t *, mp);
    901 	/*
    902 	 * If ill is in an ipmp group, it will be the under ill. If we want
    903 	 * to report the packet as coming up the IPMP interface, we should
    904 	 * convert it to the ipmp ill.
    905 	 */
    906 	ARP_HOOK_IN(ipst->ips_arp_physical_in_event, ipst->ips_arp_physical_in,
    907 	    ill->ill_phyint->phyint_ifindex, arh, mp, mp1, ipst);
    908 	DTRACE_PROBE1(arp__physical__in__end, mblk_t *, mp);
    909 	if (mp == NULL)
    910 		return;
    911 	arhp = (uchar_t *)arh + ARH_FIXED_LEN;
    912 	src_haddr = arhp;			/* ar$sha */
    913 	arhp += hlen;
    914 	bcopy(arhp, &src_paddr, IP_ADDR_LEN);	/* ar$spa */
    915 	sp = arhp;
    916 	arhp += IP_ADDR_LEN;
    917 	dst_haddr = arhp;			/* ar$dha */
    918 	arhp += hlen;
    919 	bcopy(arhp, &dst_paddr, IP_ADDR_LEN);	/* ar$tpa */
    920 	dp = arhp;
    921 	op = BE16_TO_U16(arh->arh_operation);
    922 
    923 	DTRACE_PROBE2(ip__arp__input, (in_addr_t), src_paddr,
    924 	    (in_addr_t), dst_paddr);
    925 
    926 	/* Determine if this is just a probe */
    927 	is_probe = (src_paddr == INADDR_ANY);
    928 
    929 	/*
    930 	 * ira_ill is the only field used down the arp_notify path.
    931 	 */
    932 	bzero(&iras, sizeof (iras));
    933 	iras.ira_ill = iras.ira_rill = ill;
    934 	/*
    935 	 * RFC 826: first check if the <protocol, sender protocol address> is
    936 	 * in the cache, if there is a sender protocol address.  Note that this
    937 	 * step also handles resolutions based on source.
    938 	 */
    939 	/* Note: after here we need to freeb(mp) and freemsg(mp1) separately */
    940 	mp->b_cont = NULL;
    941 	if (is_probe) {
    942 		err = AR_NOTFOUND;
    943 	} else {
    944 		if (plen != 4) {
    945 			arp_drop_packet("bad protocol len", mp, ill);
    946 			return;
    947 		}
    948 		err = ip_nce_resolve_all(ill, src_haddr, hlen, &src_paddr,
    949 		    &src_ncec, op);
    950 		switch (err) {
    951 		case AR_BOGON:
    952 			ASSERT(src_ncec != NULL);
    953 			arp_notify(src_paddr, mp1, AR_CN_BOGON,
    954 			    &iras, src_ncec);
    955 			break;
    956 		case AR_FAILED:
    957 			arp_notify(src_paddr, mp1, AR_CN_FAILED, &iras,
    958 			    src_ncec);
    959 			break;
    960 		case AR_LOOPBACK:
    961 			DTRACE_PROBE2(rput_loopback, ill_t *, ill, arh_t *,
    962 			    arh);
    963 			freemsg(mp1);
    964 			break;
    965 		default:
    966 			goto update;
    967 		}
    968 		freemsg(mp);
    969 		if (src_ncec != NULL)
    970 			ncec_refrele(src_ncec);
    971 		return;
    972 	}
    973 update:
    974 	/*
    975 	 * Now look up the destination address.  By RFC 826, we ignore the
    976 	 * packet at this step if the target isn't one of our addresses (i.e.,
    977 	 * one we have been asked to PUBLISH).  This is true even if the
    978 	 * target is something we're trying to resolve and the packet
    979 	 * is a response.
    980 	 */
    981 	dst_ncec = ncec_lookup_illgrp_v4(ill, &dst_paddr);
    982 	if (dst_ncec == NULL || !NCE_PUBLISH(dst_ncec)) {
    983 		/*
    984 		 * Let the client know if the source mapping has changed, even
    985 		 * if the destination provides no useful information for the
    986 		 * client.
    987 		 */
    988 		if (err == AR_CHANGED) {
    989 			arp_notify(src_paddr, mp1, AR_CN_ANNOUNCE, &iras,
    990 			    NULL);
    991 			freemsg(mp);
    992 		} else {
    993 			freemsg(mp);
    994 			arp_drop_packet("Target is not interesting", mp1, ill);
    995 		}
    996 		if (dst_ncec != NULL)
    997 			ncec_refrele(dst_ncec);
    998 		if (src_ncec != NULL)
    999 			ncec_refrele(src_ncec);
   1000 		return;
   1001 	}
   1002 
   1003 	if (dst_ncec->ncec_flags & NCE_F_UNVERIFIED) {
   1004 		/*
   1005 		 * Check for a reflection.  Some misbehaving bridges will
   1006 		 * reflect our own transmitted packets back to us.
   1007 		 */
   1008 		ASSERT(NCE_PUBLISH(dst_ncec));
   1009 		if (hlen != dst_ncec->ncec_ill->ill_phys_addr_length) {
   1010 			ncec_refrele(dst_ncec);
   1011 			if (src_ncec != NULL)
   1012 				ncec_refrele(src_ncec);
   1013 			freemsg(mp);
   1014 			arp_drop_packet("bad arh_len", mp1, ill);
   1015 			return;
   1016 		}
   1017 		if (!nce_cmp_ll_addr(dst_ncec, src_haddr, hlen)) {
   1018 			DTRACE_PROBE3(rput_probe_reflected, ill_t *, ill,
   1019 			    arh_t *, arh, ncec_t *, dst_ncec);
   1020 			ncec_refrele(dst_ncec);
   1021 			if (src_ncec != NULL)
   1022 				ncec_refrele(src_ncec);
   1023 			freemsg(mp);
   1024 			arp_drop_packet("Reflected probe", mp1, ill);
   1025 			return;
   1026 		}
   1027 		/*
   1028 		 * Responses targeting our HW address that are not responses to
   1029 		 * our DAD probe must be ignored as they are related to requests
   1030 		 * sent before DAD was restarted.
   1031 		 */
   1032 		if (op == ARP_RESPONSE &&
   1033 		    (nce_cmp_ll_addr(dst_ncec, dst_haddr, hlen) == 0)) {
   1034 			ncec_refrele(dst_ncec);
   1035 			if (src_ncec != NULL)
   1036 				ncec_refrele(src_ncec);
   1037 			freemsg(mp);
   1038 			arp_drop_packet(
   1039 			    "Response to request that was sent before DAD",
   1040 			    mp1, ill);
   1041 			return;
   1042 		}
   1043 		/*
   1044 		 * Responses targeted to HW addresses which are not ours but
   1045 		 * sent to our unverified proto address are also conflicts.
   1046 		 * These may be reported by a proxy rather than the interface
   1047 		 * with the conflicting address, dst_paddr is in conflict
   1048 		 * rather than src_paddr. To ensure IP can locate the correct
   1049 		 * ipif to take down, it is necessary to copy dst_paddr to
   1050 		 * the src_paddr field before sending it to IP. The same is
   1051 		 * required for probes, where src_paddr will be INADDR_ANY.
   1052 		 */
   1053 		if (is_probe || op == ARP_RESPONSE) {
   1054 			bcopy(dp, sp, plen);
   1055 			arp_notify(src_paddr, mp1, AR_CN_FAILED, &iras,
   1056 			    NULL);
   1057 			ncec_delete(dst_ncec);
   1058 		} else if (err == AR_CHANGED) {
   1059 			arp_notify(src_paddr, mp1, AR_CN_ANNOUNCE, &iras,
   1060 			    NULL);
   1061 		} else {
   1062 			DTRACE_PROBE3(rput_request_unverified,
   1063 			    ill_t *, ill, arh_t *, arh, ncec_t *, dst_ncec);
   1064 			arp_drop_packet("Unverified request", mp1, ill);
   1065 		}
   1066 		freemsg(mp);
   1067 		ncec_refrele(dst_ncec);
   1068 		if (src_ncec != NULL)
   1069 			ncec_refrele(src_ncec);
   1070 		return;
   1071 	}
   1072 	/*
   1073 	 * If it's a request, then we reply to this, and if we think the
   1074 	 * sender's unknown, then we create an entry to avoid unnecessary ARPs.
   1075 	 * The design assumption is that someone ARPing us is likely to send us
   1076 	 * a packet soon, and that we'll want to reply to it.
   1077 	 */
   1078 	if (op == ARP_REQUEST) {
   1079 		const uchar_t *nce_hwaddr;
   1080 		struct in_addr nce_paddr;
   1081 		clock_t now;
   1082 		ill_t *under_ill = ill;
   1083 		boolean_t send_unicast = B_TRUE;
   1084 
   1085 		ASSERT(NCE_PUBLISH(dst_ncec));
   1086 
   1087 		if ((dst_ncec->ncec_flags & (NCE_F_BCAST|NCE_F_MCAST)) != 0) {
   1088 			/*
   1089 			 * Ignore senders who are deliberately or accidentally
   1090 			 * confused.
   1091 			 */
   1092 			goto bail;
   1093 		}
   1094 
   1095 		if (!is_probe && err == AR_NOTFOUND) {
   1096 			ASSERT(src_ncec == NULL);
   1097 
   1098 			if (IS_UNDER_IPMP(under_ill)) {
   1099 				/*
   1100 				 * create the ncec for the sender on ipmp_ill.
   1101 				 * We pass in the ipmp_ill itself to avoid
   1102 				 * creating an nce_t on the under_ill.
   1103 				 */
   1104 				ill = ipmp_ill_hold_ipmp_ill(under_ill);
   1105 				if (ill == NULL)
   1106 					ill = under_ill;
   1107 				else
   1108 					need_ill_refrele = B_TRUE;
   1109 			}
   1110 
   1111 			err = nce_lookup_then_add_v4(ill, src_haddr, hlen,
   1112 			    &src_paddr, 0, ND_STALE, &nce);
   1113 
   1114 			switch (err) {
   1115 			case 0:
   1116 			case EEXIST:
   1117 				ip1dbg(("added ncec %p in state %d ill %s\n",
   1118 				    (void *)src_ncec, src_ncec->ncec_state,
   1119 				    ill->ill_name));
   1120 				src_ncec = nce->nce_common;
   1121 				break;
   1122 			default:
   1123 				/*
   1124 				 * Either no memory, or the outgoing interface
   1125 				 * is in the process of down/unplumb. In the
   1126 				 * latter case, we will fail the send anyway,
   1127 				 * and in the former case, we should try to send
   1128 				 * the ARP response.
   1129 				 */
   1130 				src_lladdr = src_haddr;
   1131 				goto send_response;
   1132 			}
   1133 			ncec_refhold(src_ncec);
   1134 			nce_refrele(nce);
   1135 			/* set up cleanup interval on ncec */
   1136 		}
   1137 
   1138 		/*
   1139 		 * This implements periodic address defense based on a modified
   1140 		 * version of the RFC 3927 requirements.  Instead of sending a
   1141 		 * broadcasted reply every time, as demanded by the RFC, we
   1142 		 * send at most one broadcast reply per arp_broadcast_interval.
   1143 		 */
   1144 		now = ddi_get_lbolt();
   1145 		if ((now - dst_ncec->ncec_last_time_defended) >
   1146 		    MSEC_TO_TICK(ipst->ips_ipv4_dad_announce_interval)) {
   1147 			dst_ncec->ncec_last_time_defended = now;
   1148 			/*
   1149 			 * If this is one of the long-suffering entries,
   1150 			 * pull it out now.  It no longer needs separate
   1151 			 * defense, because we're now doing that with this
   1152 			 * broadcasted reply.
   1153 			 */
   1154 			dst_ncec->ncec_flags &= ~NCE_F_DELAYED;
   1155 			send_unicast = B_FALSE;
   1156 		}
   1157 		if (src_ncec != NULL && send_unicast) {
   1158 			src_lladdr = src_ncec->ncec_lladdr;
   1159 		} else {
   1160 			src_lladdr = under_ill->ill_bcast_mp->b_rptr +
   1161 			    NCE_LL_ADDR_OFFSET(under_ill);
   1162 		}
   1163 send_response:
   1164 		nce_hwaddr = dst_ncec->ncec_lladdr;
   1165 		IN6_V4MAPPED_TO_INADDR(&dst_ncec->ncec_addr, &nce_paddr);
   1166 
   1167 		(void) arp_output(under_ill, ARP_RESPONSE,
   1168 		    nce_hwaddr, (uchar_t *)&nce_paddr, src_haddr,
   1169 		    (uchar_t *)&src_paddr, src_lladdr);
   1170 	}
   1171 bail:
   1172 	if (dst_ncec != NULL) {
   1173 		ncec_refrele(dst_ncec);
   1174 	}
   1175 	if (src_ncec != NULL) {
   1176 		ncec_refrele(src_ncec);
   1177 	}
   1178 	if (err == AR_CHANGED) {
   1179 		mp->b_cont = NULL;
   1180 		arp_notify(src_paddr, mp1, AR_CN_ANNOUNCE, &iras, NULL);
   1181 		mp1 = NULL;
   1182 	}
   1183 	if (need_ill_refrele)
   1184 		ill_refrele(ill);
   1185 done:
   1186 	freemsg(mp);
   1187 	freemsg(mp1);
   1188 }
   1189 
   1190 /*
   1191  * Basic initialization of the arl_t and the arl_common structure shared with
   1192  * the ill_t that is done after SLIFNAME/IF_UNITSEL.
   1193  */
   1194 static int
   1195 arl_ill_init(arl_t *arl, char *ill_name)
   1196 {
   1197 	ill_t *ill;
   1198 	arl_ill_common_t *ai;
   1199 
   1200 	ill = ill_lookup_on_name(ill_name, B_FALSE, B_FALSE, B_FALSE,
   1201 	    arl->arl_ipst);
   1202 
   1203 	if (ill == NULL)
   1204 		return (ENXIO);
   1205 
   1206 	/*
   1207 	 * By the time we set up the arl, we expect the ETHERTYPE_IP
   1208 	 * stream to be fully bound and attached. So we copy/verify
   1209 	 * relevant information as possible from/against the ill.
   1210 	 *
   1211 	 * The following should have been set up in arp_ll_set_defaults()
   1212 	 * after the first DL_INFO_ACK was received.
   1213 	 */
   1214 	ASSERT(arl->arl_phys_addr_length == ill->ill_phys_addr_length);
   1215 	ASSERT(arl->arl_sap == ETHERTYPE_ARP);
   1216 	ASSERT(arl->arl_mactype == ill->ill_mactype);
   1217 	ASSERT(arl->arl_sap_length == ill->ill_sap_length);
   1218 
   1219 	ai =  kmem_zalloc(sizeof (*ai), KM_SLEEP);
   1220 	mutex_enter(&ill->ill_lock);
   1221 	/* First ensure that the ill is not CONDEMNED.  */
   1222 	if (ill->ill_state_flags & ILL_CONDEMNED) {
   1223 		mutex_exit(&ill->ill_lock);
   1224 		ill_refrele(ill);
   1225 		kmem_free(ai, sizeof (*ai));
   1226 		return (ENXIO);
   1227 	}
   1228 	if (ill->ill_common != NULL || arl->arl_common != NULL) {
   1229 		mutex_exit(&ill->ill_lock);
   1230 		ip0dbg(("%s: PPA already exists", ill->ill_name));
   1231 		ill_refrele(ill);
   1232 		kmem_free(ai, sizeof (*ai));
   1233 		return (EEXIST);
   1234 	}
   1235 	mutex_init(&ai->ai_lock, NULL, MUTEX_DEFAULT, NULL);
   1236 	ai->ai_arl = arl;
   1237 	ai->ai_ill = ill;
   1238 	ill->ill_common = ai;
   1239 	arl->arl_common = ai;
   1240 	mutex_exit(&ill->ill_lock);
   1241 	(void) strlcpy(arl->arl_name, ill->ill_name, LIFNAMSIZ);
   1242 	arl->arl_name_length = ill->ill_name_length;
   1243 	ill_refrele(ill);
   1244 	arp_ifname_notify(arl);
   1245 	return (0);
   1246 }
   1247 
   1248 /* Allocate and do common initializations for DLPI messages. */
   1249 static mblk_t *
   1250 ip_ar_dlpi_comm(t_uscalar_t prim, size_t size)
   1251 {
   1252 	mblk_t  *mp;
   1253 
   1254 	if ((mp = allocb(size, BPRI_HI)) == NULL)
   1255 		return (NULL);
   1256 
   1257 	/*
   1258 	 * DLPIv2 says that DL_INFO_REQ and DL_TOKEN_REQ (the latter
   1259 	 * of which we don't seem to use) are sent with M_PCPROTO, and
   1260 	 * that other DLPI are M_PROTO.
   1261 	 */
   1262 	DB_TYPE(mp) = (prim == DL_INFO_REQ) ? M_PCPROTO : M_PROTO;
   1263 
   1264 	mp->b_wptr = mp->b_rptr + size;
   1265 	bzero(mp->b_rptr, size);
   1266 	DL_PRIM(mp) = prim;
   1267 	return (mp);
   1268 }
   1269 
   1270 
   1271 int
   1272 ip_sioctl_ifunitsel_arp(queue_t *q, int *ppa)
   1273 {
   1274 	arl_t *arl;
   1275 	char *cp, ill_name[LIFNAMSIZ];
   1276 
   1277 	if (q->q_next == NULL)
   1278 		return (EINVAL);
   1279 
   1280 	do {
   1281 		q = q->q_next;
   1282 	} while (q->q_next != NULL);
   1283 	cp = q->q_qinfo->qi_minfo->mi_idname;
   1284 
   1285 	arl = (arl_t *)q->q_ptr;
   1286 	(void) snprintf(ill_name, sizeof (ill_name), "%s%d", cp, *ppa);
   1287 	arl->arl_ppa = *ppa;
   1288 	return (arl_ill_init(arl, ill_name));
   1289 }
   1290 
   1291 int
   1292 ip_sioctl_slifname_arp(queue_t *q, void *lifreq)
   1293 {
   1294 	arl_t *arl;
   1295 	struct lifreq *lifr = lifreq;
   1296 
   1297 	/* ioctl not valid when IP opened as a device */
   1298 	if (q->q_next == NULL)
   1299 		return (EINVAL);
   1300 
   1301 	arl = (arl_t *)q->q_ptr;
   1302 	arl->arl_ppa = lifr->lifr_ppa;
   1303 	return (arl_ill_init(arl, lifr->lifr_name));
   1304 }
   1305 
   1306 arl_t *
   1307 ill_to_arl(ill_t *ill)
   1308 {
   1309 	arl_ill_common_t *ai = ill->ill_common;
   1310 	arl_t *arl = NULL;
   1311 
   1312 	if (ai == NULL)
   1313 		return (NULL);
   1314 	/*
   1315 	 * Find the arl_t that corresponds to this ill_t from the shared
   1316 	 * ill_common structure. We can safely access the ai here as it
   1317 	 * will only be freed in arp_modclose() after we have become
   1318 	 * single-threaded.
   1319 	 */
   1320 	mutex_enter(&ai->ai_lock);
   1321 	if ((arl = ai->ai_arl) != NULL) {
   1322 		mutex_enter(&arl->arl_lock);
   1323 		if (!(arl->arl_state_flags & ARL_CONDEMNED)) {
   1324 			arl_refhold_locked(arl);
   1325 			mutex_exit(&arl->arl_lock);
   1326 		} else {
   1327 			mutex_exit(&arl->arl_lock);
   1328 			arl = NULL;
   1329 		}
   1330 	}
   1331 	mutex_exit(&ai->ai_lock);
   1332 	return (arl);
   1333 }
   1334 
   1335 ill_t *
   1336 arl_to_ill(arl_t *arl)
   1337 {
   1338 	arl_ill_common_t *ai = arl->arl_common;
   1339 	ill_t *ill = NULL;
   1340 
   1341 	if (ai == NULL) {
   1342 		/*
   1343 		 * happens when the arp stream is just being opened, and
   1344 		 * arl_ill_init has not been executed yet.
   1345 		 */
   1346 		return (NULL);
   1347 	}
   1348 	/*
   1349 	 * Find the ill_t that corresponds to this arl_t from the shared
   1350 	 * arl_common structure. We can safely access the ai here as it
   1351 	 * will only be freed in arp_modclose() after we have become
   1352 	 * single-threaded.
   1353 	 */
   1354 	mutex_enter(&ai->ai_lock);
   1355 	if ((ill = ai->ai_ill) != NULL) {
   1356 		mutex_enter(&ill->ill_lock);
   1357 		if (!ILL_IS_CONDEMNED(ill)) {
   1358 			ill_refhold_locked(ill);
   1359 			mutex_exit(&ill->ill_lock);
   1360 		} else {
   1361 			mutex_exit(&ill->ill_lock);
   1362 			ill = NULL;
   1363 		}
   1364 	}
   1365 	mutex_exit(&ai->ai_lock);
   1366 	return (ill);
   1367 }
   1368 
   1369 int
   1370 arp_ll_up(ill_t *ill)
   1371 {
   1372 	mblk_t	*attach_mp = NULL;
   1373 	mblk_t	*bind_mp = NULL;
   1374 	mblk_t	*unbind_mp = NULL;
   1375 	arl_t 	*arl;
   1376 
   1377 	ASSERT(IAM_WRITER_ILL(ill));
   1378 	arl = ill_to_arl(ill);
   1379 
   1380 	DTRACE_PROBE2(ill__downup, char *, "arp_ll_up", ill_t *, ill);
   1381 	if (arl == NULL)
   1382 		return (ENXIO);
   1383 	DTRACE_PROBE2(arl__downup, char *, "arp_ll_up", arl_t *, arl);
   1384 	if ((arl->arl_state_flags & ARL_LL_UP) != 0) {
   1385 		arl_refrele(arl);
   1386 		return (0);
   1387 	}
   1388 	if (arl->arl_needs_attach) { /* DL_STYLE2 */
   1389 		attach_mp =
   1390 		    ip_ar_dlpi_comm(DL_ATTACH_REQ, sizeof (dl_attach_req_t));
   1391 		if (attach_mp == NULL)
   1392 			goto bad;
   1393 		((dl_attach_req_t *)attach_mp->b_rptr)->dl_ppa = arl->arl_ppa;
   1394 	}
   1395 
   1396 	/* Allocate and initialize a bind message. */
   1397 	bind_mp = ip_ar_dlpi_comm(DL_BIND_REQ, sizeof (dl_bind_req_t));
   1398 	if (bind_mp == NULL)
   1399 		goto bad;
   1400 	((dl_bind_req_t *)bind_mp->b_rptr)->dl_sap = ETHERTYPE_ARP;
   1401 	((dl_bind_req_t *)bind_mp->b_rptr)->dl_service_mode = DL_CLDLS;
   1402 
   1403 	unbind_mp = ip_ar_dlpi_comm(DL_UNBIND_REQ, sizeof (dl_unbind_req_t));
   1404 	if (unbind_mp == NULL)
   1405 		goto bad;
   1406 	if (arl->arl_needs_attach) {
   1407 		arp_dlpi_send(arl, attach_mp);
   1408 	}
   1409 	arl->arl_unbind_mp = unbind_mp;
   1410 
   1411 	arl->arl_state_flags |= ARL_LL_BIND_PENDING;
   1412 	arp_dlpi_send(arl, bind_mp);
   1413 	arl_refrele(arl);
   1414 	return (EINPROGRESS);
   1415 
   1416 bad:
   1417 	freemsg(attach_mp);
   1418 	freemsg(bind_mp);
   1419 	freemsg(unbind_mp);
   1420 	arl_refrele(arl);
   1421 	return (ENOMEM);
   1422 }
   1423 
   1424 /*
   1425  * consumes/frees mp
   1426  */
   1427 static void
   1428 arp_notify(in_addr_t src, mblk_t *mp, uint32_t arcn_code,
   1429     ip_recv_attr_t *ira, ncec_t *ncec)
   1430 {
   1431 	char		hbuf[MAC_STR_LEN];
   1432 	char		sbuf[INET_ADDRSTRLEN];
   1433 	ill_t		*ill = ira->ira_ill;
   1434 	ip_stack_t	*ipst = ill->ill_ipst;
   1435 	arh_t		*arh = (arh_t *)mp->b_rptr;
   1436 
   1437 	switch (arcn_code) {
   1438 	case AR_CN_BOGON:
   1439 		/*
   1440 		 * Someone is sending ARP packets with a source protocol
   1441 		 * address that we have published and for which we believe our
   1442 		 * entry is authoritative and verified to be unique on
   1443 		 * the network.
   1444 		 *
   1445 		 * arp_process_packet() sends AR_CN_FAILED for the case when
   1446 		 * a DAD probe is received and the hardware address of a
   1447 		 * non-authoritative entry has changed. Thus, AR_CN_BOGON
   1448 		 * indicates a real conflict, and we have to do resolution.
   1449 		 *
   1450 		 * We back away quickly from the address if it's from DHCP or
   1451 		 * otherwise temporary and hasn't been used recently (or at
   1452 		 * all).  We'd like to include "deprecated" addresses here as
   1453 		 * well (as there's no real reason to defend something we're
   1454 		 * discarding), but IPMP "reuses" this flag to mean something
   1455 		 * other than the standard meaning.
   1456 		 */
   1457 		if (ip_nce_conflict(mp, ira, ncec)) {
   1458 			(void) mac_colon_addr((uint8_t *)(arh + 1),
   1459 			    arh->arh_hlen, hbuf, sizeof (hbuf));
   1460 			(void) ip_dot_addr(src, sbuf);
   1461 			cmn_err(CE_WARN,
   1462 			    "proxy ARP problem?  Node '%s' is using %s on %s",
   1463 			    hbuf, sbuf, ill->ill_name);
   1464 			if (!arp_no_defense)
   1465 				(void) arp_announce(ncec);
   1466 			/*
   1467 			 * ncec_last_time_defended has been adjusted in
   1468 			 * ip_nce_conflict.
   1469 			 */
   1470 		} else {
   1471 			ncec_delete(ncec);
   1472 		}
   1473 		freemsg(mp);
   1474 		break;
   1475 	case AR_CN_ANNOUNCE: {
   1476 		nce_hw_map_t hwm;
   1477 		/*
   1478 		 * ARP gives us a copy of any packet where it thinks
   1479 		 * the address has changed, so that we can update our
   1480 		 * caches.  We're responsible for caching known answers
   1481 		 * in the current design.  We check whether the
   1482 		 * hardware address really has changed in all of our
   1483 		 * entries that have cached this mapping, and if so, we
   1484 		 * blow them away.  This way we will immediately pick
   1485 		 * up the rare case of a host changing hardware
   1486 		 * address.
   1487 		 */
   1488 		if (src == 0) {
   1489 			freemsg(mp);
   1490 			break;
   1491 		}
   1492 		hwm.hwm_addr = src;
   1493 		hwm.hwm_hwlen = arh->arh_hlen;
   1494 		hwm.hwm_hwaddr = (uchar_t *)(arh + 1);
   1495 		hwm.hwm_flags = 0;
   1496 		ncec_walk_common(ipst->ips_ndp4, NULL,
   1497 		    (pfi_t)nce_update_hw_changed, &hwm, B_TRUE);
   1498 		freemsg(mp);
   1499 		break;
   1500 	}
   1501 	case AR_CN_FAILED:
   1502 		if (arp_no_defense) {
   1503 			(void) mac_colon_addr((uint8_t *)(arh + 1),
   1504 			    arh->arh_hlen, hbuf, sizeof (hbuf));
   1505 			(void) ip_dot_addr(src, sbuf);
   1506 
   1507 			cmn_err(CE_WARN,
   1508 			    "node %s is using our IP address %s on %s",
   1509 			    hbuf, sbuf, ill->ill_name);
   1510 			freemsg(mp);
   1511 			break;
   1512 		}
   1513 		/*
   1514 		 * mp will be freed by arp_excl.
   1515 		 */
   1516 		ill_refhold(ill);
   1517 		qwriter_ip(ill, ill->ill_rq, mp, arp_excl, NEW_OP, B_FALSE);
   1518 		return;
   1519 	default:
   1520 		ASSERT(0);
   1521 		freemsg(mp);
   1522 		break;
   1523 	}
   1524 }
   1525 
   1526 /*
   1527  * arp_output is called to transmit an ARP Request or Response. The mapping
   1528  * to RFC 826 variables is:
   1529  *   haddr1 == ar$sha
   1530  *   paddr1 == ar$spa
   1531  *   haddr2 == ar$tha
   1532  *   paddr2 == ar$tpa
   1533  * The ARP frame is sent to the ether_dst in dst_lladdr.
   1534  */
   1535 static int
   1536 arp_output(ill_t *ill, uint32_t operation,
   1537     const uchar_t *haddr1, const uchar_t *paddr1, const uchar_t *haddr2,
   1538     const uchar_t *paddr2, uchar_t *dst_lladdr)
   1539 {
   1540 	arh_t	*arh;
   1541 	uint8_t	*cp;
   1542 	uint_t	hlen;
   1543 	uint32_t plen = IPV4_ADDR_LEN; /* ar$pln from RFC 826 */
   1544 	uint32_t proto = IP_ARP_PROTO_TYPE;
   1545 	mblk_t *mp;
   1546 	arl_t *arl;
   1547 
   1548 	ASSERT(dst_lladdr != NULL);
   1549 	hlen = ill->ill_phys_addr_length; /* ar$hln from RFC 826 */
   1550 	mp = ill_dlur_gen(dst_lladdr, hlen, ETHERTYPE_ARP, ill->ill_sap_length);
   1551 
   1552 	if (mp == NULL)
   1553 		return (ENOMEM);
   1554 
   1555 	/* IFF_NOARP flag is set or link down: do not send arp messages */
   1556 	if ((ill->ill_flags & ILLF_NOARP) || !ill->ill_dl_up) {
   1557 		freemsg(mp);
   1558 		return (ENXIO);
   1559 	}
   1560 
   1561 	mp->b_cont = allocb(AR_LL_HDR_SLACK + ARH_FIXED_LEN + (hlen * 4) +
   1562 	    plen + plen, BPRI_MED);
   1563 	if (mp->b_cont == NULL) {
   1564 		freeb(mp);
   1565 		return (ENOMEM);
   1566 	}
   1567 
   1568 	/* Fill in the ARP header. */
   1569 	cp = mp->b_cont->b_rptr + (AR_LL_HDR_SLACK + hlen + hlen);
   1570 	mp->b_cont->b_rptr = cp;
   1571 	arh = (arh_t *)cp;
   1572 	U16_TO_BE16(arp_hw_type(ill->ill_mactype), arh->arh_hardware);
   1573 	U16_TO_BE16(proto, arh->arh_proto);
   1574 	arh->arh_hlen = (uint8_t)hlen;
   1575 	arh->arh_plen = (uint8_t)plen;
   1576 	U16_TO_BE16(operation, arh->arh_operation);
   1577 	cp += ARH_FIXED_LEN;
   1578 	bcopy(haddr1, cp, hlen);
   1579 	cp += hlen;
   1580 	if (paddr1 == NULL)
   1581 		bzero(cp, plen);
   1582 	else
   1583 		bcopy(paddr1, cp, plen);
   1584 	cp += plen;
   1585 	if (haddr2 == NULL)
   1586 		bzero(cp, hlen);
   1587 	else
   1588 		bcopy(haddr2, cp, hlen);
   1589 	cp += hlen;
   1590 	bcopy(paddr2, cp, plen);
   1591 	cp += plen;
   1592 	mp->b_cont->b_wptr = cp;
   1593 
   1594 	DTRACE_PROBE3(arp__physical__out__start,
   1595 	    ill_t *, ill, arh_t *, arh, mblk_t *, mp);
   1596 	ARP_HOOK_OUT(ill->ill_ipst->ips_arp_physical_out_event,
   1597 	    ill->ill_ipst->ips_arp_physical_out,
   1598 	    ill->ill_phyint->phyint_ifindex, arh, mp, mp->b_cont,
   1599 	    ill->ill_ipst);
   1600 	DTRACE_PROBE1(arp__physical__out__end, mblk_t *, mp);
   1601 	if (mp == NULL)
   1602 		return (0);
   1603 
   1604 	/* Ship it out. */
   1605 	arl = ill_to_arl(ill);
   1606 	if (arl == NULL) {
   1607 		freemsg(mp);
   1608 		return (0);
   1609 	}
   1610 	if (canputnext(arl->arl_wq))
   1611 		putnext(arl->arl_wq, mp);
   1612 	else
   1613 		freemsg(mp);
   1614 	arl_refrele(arl);
   1615 	return (0);
   1616 }
   1617 
   1618 /*
   1619  * Process resolve requests.
   1620  * If we are not yet reachable then we check and decrease ncec_rcnt; otherwise
   1621  * we leave it alone (the caller will check and manage ncec_pcnt in those
   1622  * cases.)
   1623  */
   1624 int
   1625 arp_request(ncec_t *ncec, in_addr_t sender, ill_t *ill)
   1626 {
   1627 	int err;
   1628 	const uchar_t *target_hwaddr;
   1629 	struct in_addr nce_paddr;
   1630 	uchar_t *dst_lladdr;
   1631 	boolean_t use_rcnt = !NCE_ISREACHABLE(ncec);
   1632 
   1633 	ASSERT(MUTEX_HELD(&ncec->ncec_lock));
   1634 	ASSERT(!IS_IPMP(ill));
   1635 
   1636 	if (use_rcnt && ncec->ncec_rcnt == 0) {
   1637 		/* not allowed any more retransmits. */
   1638 		return (0);
   1639 	}
   1640 
   1641 	if ((ill->ill_flags & ILLF_NOARP) != 0)
   1642 		return (0);
   1643 
   1644 	IN6_V4MAPPED_TO_INADDR(&ncec->ncec_addr, &nce_paddr);
   1645 
   1646 	target_hwaddr =
   1647 	    ill->ill_bcast_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill);
   1648 
   1649 	if (NCE_ISREACHABLE(ncec)) {
   1650 		dst_lladdr =  ncec->ncec_lladdr;
   1651 	} else {
   1652 		dst_lladdr =  ill->ill_bcast_mp->b_rptr +
   1653 		    NCE_LL_ADDR_OFFSET(ill);
   1654 	}
   1655 
   1656 	mutex_exit(&ncec->ncec_lock);
   1657 	err = arp_output(ill, ARP_REQUEST,
   1658 	    ill->ill_phys_addr, (uchar_t *)&sender, target_hwaddr,
   1659 	    (uchar_t *)&nce_paddr, dst_lladdr);
   1660 	mutex_enter(&ncec->ncec_lock);
   1661 
   1662 	if (err != 0) {
   1663 		/*
   1664 		 * Some transient error such as ENOMEM or a down link was
   1665 		 * encountered. If the link has been taken down permanently,
   1666 		 * the ncec will eventually be cleaned up (ipif_down_tail()
   1667 		 * will call ipif_nce_down() and flush the ncec), to terminate
   1668 		 * recurring attempts to send ARP requests. In all other cases,
   1669 		 * allow the caller another chance at success next time.
   1670 		 */
   1671 		return (ncec->ncec_ill->ill_reachable_retrans_time);
   1672 	}
   1673 
   1674 	if (use_rcnt)
   1675 		ncec->ncec_rcnt--;
   1676 
   1677 	return (ncec->ncec_ill->ill_reachable_retrans_time);
   1678 }
   1679 
   1680 /* return B_TRUE if dropped */
   1681 boolean_t
   1682 arp_announce(ncec_t *ncec)
   1683 {
   1684 	ill_t *ill;
   1685 	int err;
   1686 	uchar_t *sphys_addr, *bcast_addr;
   1687 	struct in_addr ncec_addr;
   1688 	boolean_t need_refrele = B_FALSE;
   1689 
   1690 	ASSERT((ncec->ncec_flags & NCE_F_BCAST) == 0);
   1691 	ASSERT((ncec->ncec_flags & NCE_F_MCAST) == 0);
   1692 
   1693 	if (IS_IPMP(ncec->ncec_ill)) {
   1694 		/* sent on the cast_ill */
   1695 		ill = ipmp_ill_get_xmit_ill(ncec->ncec_ill, B_FALSE);
   1696 		if (ill == NULL)
   1697 			return (B_TRUE);
   1698 		need_refrele = B_TRUE;
   1699 	} else {
   1700 		ill = ncec->ncec_ill;
   1701 	}
   1702 
   1703 	/*
   1704 	 * broadcast an announce to ill_bcast address.
   1705 	 */
   1706 	IN6_V4MAPPED_TO_INADDR(&ncec->ncec_addr, &ncec_addr);
   1707 
   1708 	sphys_addr = ncec->ncec_lladdr;
   1709 	bcast_addr = ill->ill_bcast_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill);
   1710 
   1711 	err = arp_output(ill, ARP_REQUEST,
   1712 	    sphys_addr, (uchar_t *)&ncec_addr, bcast_addr,
   1713 	    (uchar_t *)&ncec_addr, bcast_addr);
   1714 
   1715 	if (need_refrele)
   1716 		ill_refrele(ill);
   1717 	return (err != 0);
   1718 }
   1719 
   1720 /* return B_TRUE if dropped */
   1721 boolean_t
   1722 arp_probe(ncec_t *ncec)
   1723 {
   1724 	ill_t *ill;
   1725 	int err;
   1726 	struct in_addr ncec_addr;
   1727 	uchar_t *sphys_addr, *dst_lladdr;
   1728 
   1729 	if (IS_IPMP(ncec->ncec_ill)) {
   1730 		ill = ipmp_ill_get_xmit_ill(ncec->ncec_ill, B_FALSE);
   1731 		if (ill == NULL)
   1732 			return (B_TRUE);
   1733 	} else {
   1734 		ill = ncec->ncec_ill;
   1735 	}
   1736 
   1737 	IN6_V4MAPPED_TO_INADDR(&ncec->ncec_addr, &ncec_addr);
   1738 
   1739 	sphys_addr = ncec->ncec_lladdr;
   1740 	dst_lladdr = ill->ill_bcast_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill);
   1741 	err = arp_output(ill, ARP_REQUEST,
   1742 	    sphys_addr, NULL, NULL, (uchar_t *)&ncec_addr, dst_lladdr);
   1743 
   1744 	if (IS_IPMP(ncec->ncec_ill))
   1745 		ill_refrele(ill);
   1746 	return (err != 0);
   1747 }
   1748 
   1749 static mblk_t *
   1750 arl_unbind(arl_t *arl)
   1751 {
   1752 	mblk_t *mp;
   1753 
   1754 	if ((mp = arl->arl_unbind_mp) != NULL) {
   1755 		arl->arl_unbind_mp = NULL;
   1756 		arl->arl_state_flags |= ARL_DL_UNBIND_IN_PROGRESS;
   1757 	}
   1758 	return (mp);
   1759 }
   1760 
   1761 int
   1762 arp_ll_down(ill_t *ill)
   1763 {
   1764 	arl_t 	*arl;
   1765 	mblk_t *unbind_mp;
   1766 	int err = 0;
   1767 	boolean_t replumb = (ill->ill_replumbing == 1);
   1768 
   1769 	DTRACE_PROBE2(ill__downup, char *, "arp_ll_down", ill_t *, ill);
   1770 	if ((arl = ill_to_arl(ill)) == NULL)
   1771 		return (ENXIO);
   1772 	DTRACE_PROBE2(arl__downup, char *, "arp_ll_down", arl_t *, arl);
   1773 	mutex_enter(&arl->arl_lock);
   1774 	unbind_mp = arl_unbind(arl);
   1775 	if (unbind_mp != NULL) {
   1776 		ASSERT(arl->arl_state_flags & ARL_DL_UNBIND_IN_PROGRESS);
   1777 		DTRACE_PROBE2(arp__unbinding, mblk_t *, unbind_mp,
   1778 		    arl_t *, arl);
   1779 		err = EINPROGRESS;
   1780 		if (replumb)
   1781 			arl->arl_state_flags |= ARL_LL_REPLUMBING;
   1782 	}
   1783 	mutex_exit(&arl->arl_lock);
   1784 	if (unbind_mp != NULL)
   1785 		arp_dlpi_send(arl, unbind_mp);
   1786 	arl_refrele(arl);
   1787 	return (err);
   1788 }
   1789 
   1790 /* ARGSUSED */
   1791 int
   1792 arp_close(queue_t *q, int flags)
   1793 {
   1794 	if (WR(q)->q_next != NULL) {
   1795 		/* This is a module close */
   1796 		return (arp_modclose(q->q_ptr));
   1797 	}
   1798 	qprocsoff(q);
   1799 	q->q_ptr = WR(q)->q_ptr = NULL;
   1800 	return (0);
   1801 }
   1802 
   1803 static int
   1804 arp_modclose(arl_t *arl)
   1805 {
   1806 	arl_ill_common_t *ai = arl->arl_common;
   1807 	ill_t		*ill;
   1808 	queue_t		*q = arl->arl_rq;
   1809 	mblk_t		*mp, *nextmp;
   1810 	ipsq_t		*ipsq = NULL;
   1811 
   1812 	ill = arl_to_ill(arl);
   1813 	if (ill != NULL) {
   1814 		if (!ill_waiter_inc(ill)) {
   1815 			ill_refrele(ill);
   1816 		} else {
   1817 			ill_refrele(ill);
   1818 			if (ipsq_enter(ill, B_FALSE, NEW_OP))
   1819 				ipsq = ill->ill_phyint->phyint_ipsq;
   1820 			ill_waiter_dcr(ill);
   1821 		}
   1822 		if (ipsq == NULL) {
   1823 			/*
   1824 			 * could not enter the ipsq because ill is already
   1825 			 * marked CONDEMNED.
   1826 			 */
   1827 			ill = NULL;
   1828 		}
   1829 	}
   1830 	if (ai != NULL && ipsq == NULL) {
   1831 		/*
   1832 		 * Either we did not get an ill because it was marked CONDEMNED
   1833 		 * or we could not enter the ipsq because it was unplumbing.
   1834 		 * In both cases, wait for the ill to complete ip_modclose().
   1835 		 *
   1836 		 * If the arp_modclose happened even before SLIFNAME, the ai
   1837 		 * itself would be NULL, in which case we can complete the close
   1838 		 * without waiting.
   1839 		 */
   1840 		mutex_enter(&ai->ai_lock);
   1841 		while (ai->ai_ill != NULL)
   1842 			cv_wait(&ai->ai_ill_unplumb_done, &ai->ai_lock);
   1843 		mutex_exit(&ai->ai_lock);
   1844 	}
   1845 	ASSERT(ill == NULL || IAM_WRITER_ILL(ill));
   1846 
   1847 	mutex_enter(&arl->arl_lock);
   1848 	/*
   1849 	 * If the ill had completed unplumbing before arp_modclose(), there
   1850 	 * would be no ill (and therefore, no ipsq) to serialize arp_modclose()
   1851 	 * so that we need to explicitly check for ARL_CONDEMNED and back off
   1852 	 * if it is set.
   1853 	 */
   1854 	if ((arl->arl_state_flags & ARL_CONDEMNED) != 0) {
   1855 		mutex_exit(&arl->arl_lock);
   1856 		ASSERT(ipsq == NULL);
   1857 		return (0);
   1858 	}
   1859 	arl->arl_state_flags |= ARL_CONDEMNED;
   1860 
   1861 	/*
   1862 	 * send out all pending dlpi messages, don't wait for the ack (which
   1863 	 * will be ignored in arp_rput when CONDEMNED is set)
   1864 	 *
   1865 	 * We have to check for pending DL_UNBIND_REQ because, in the case
   1866 	 * that ip_modclose() executed before arp_modclose(), the call to
   1867 	 * ill_delete_tail->ipif_arp_down() would have triggered a
   1868 	 * DL_UNBIND_REQ. When arp_modclose() executes ipsq_enter() will fail
   1869 	 * (since ip_modclose() is in the ipsq) but the DL_UNBIND_ACK may not
   1870 	 * have been processed yet. In this scenario, we cannot reset
   1871 	 * arl_dlpi_pending, because the setting/clearing of arl_state_flags
   1872 	 * related to unbind, and the associated cv_waits must be allowed to
   1873 	 * continue.
   1874 	 */
   1875 	if (arl->arl_dlpi_pending != DL_UNBIND_REQ)
   1876 		arl->arl_dlpi_pending = DL_PRIM_INVAL;
   1877 	mp = arl->arl_dlpi_deferred;
   1878 	arl->arl_dlpi_deferred = NULL;
   1879 	mutex_exit(&arl->arl_lock);
   1880 
   1881 	for (; mp != NULL; mp = nextmp) {
   1882 		nextmp = mp->b_next;
   1883 		mp->b_next = NULL;
   1884 		putnext(arl->arl_wq, mp);
   1885 	}
   1886 
   1887 	/* Wait for data paths to quiesce */
   1888 	mutex_enter(&arl->arl_lock);
   1889 	while (arl->arl_refcnt != 0)
   1890 		cv_wait(&arl->arl_cv, &arl->arl_lock);
   1891 
   1892 	/*
   1893 	 * unbind, so that nothing else can come up from driver.
   1894 	 */
   1895 	mp = arl_unbind(arl);
   1896 	mutex_exit(&arl->arl_lock);
   1897 	if (mp != NULL)
   1898 		arp_dlpi_send(arl, mp);
   1899 	mutex_enter(&arl->arl_lock);
   1900 
   1901 	/* wait for unbind ack  */
   1902 	while (arl->arl_state_flags & ARL_DL_UNBIND_IN_PROGRESS)
   1903 		cv_wait(&arl->arl_cv, &arl->arl_lock);
   1904 	mutex_exit(&arl->arl_lock);
   1905 
   1906 	qprocsoff(q);
   1907 
   1908 	if (ill != NULL) {
   1909 		mutex_enter(&ill->ill_lock);
   1910 		ill->ill_arl_dlpi_pending = 0;
   1911 		mutex_exit(&ill->ill_lock);
   1912 	}
   1913 
   1914 	if (ai != NULL) {
   1915 		mutex_enter(&ai->ai_lock);
   1916 		ai->ai_arl = NULL;
   1917 		if (ai->ai_ill == NULL) {
   1918 			mutex_destroy(&ai->ai_lock);
   1919 			kmem_free(ai, sizeof (*ai));
   1920 		} else {
   1921 			mutex_exit(&ai->ai_lock);
   1922 		}
   1923 	}
   1924 
   1925 	/* free up the rest */
   1926 	arp_mod_close_tail(arl);
   1927 
   1928 	q->q_ptr = WR(q)->q_ptr = NULL;
   1929 
   1930 	if (ipsq != NULL)
   1931 		ipsq_exit(ipsq);
   1932 
   1933 	return (0);
   1934 }
   1935 
   1936 static void
   1937 arp_mod_close_tail(arl_t *arl)
   1938 {
   1939 	ip_stack_t	*ipst = arl->arl_ipst;
   1940 	mblk_t		**mpp;
   1941 
   1942 	netstack_hold(ipst->ips_netstack);
   1943 
   1944 	mutex_enter(&ipst->ips_ip_mi_lock);
   1945 	mi_close_unlink(&ipst->ips_arp_g_head, (IDP)arl);
   1946 	mutex_exit(&ipst->ips_ip_mi_lock);
   1947 
   1948 	/*
   1949 	 * credp could be null if the open didn't succeed and ip_modopen
   1950 	 * itself calls ip_close.
   1951 	 */
   1952 	if (arl->arl_credp != NULL)
   1953 		crfree(arl->arl_credp);
   1954 
   1955 	/* Free all retained control messages. */
   1956 	mpp = &arl->arl_first_mp_to_free;
   1957 	do {
   1958 		while (mpp[0]) {
   1959 			mblk_t  *mp;
   1960 			mblk_t  *mp1;
   1961 
   1962 			mp = mpp[0];
   1963 			mpp[0] = mp->b_next;
   1964 			for (mp1 = mp; mp1 != NULL; mp1 = mp1->b_cont) {
   1965 				mp1->b_next = NULL;
   1966 				mp1->b_prev = NULL;
   1967 			}
   1968 			freemsg(mp);
   1969 		}
   1970 	} while (mpp++ != &arl->arl_last_mp_to_free);
   1971 
   1972 	netstack_rele(ipst->ips_netstack);
   1973 	mi_free(arl->arl_name);
   1974 	mi_close_free((IDP)arl);
   1975 }
   1976 
   1977 /*
   1978  * DAD failed. Tear down ipifs with the specified srce address. Note that
   1979  * tearing down the ipif also meas deleting the ncec through ipif_down,
   1980  * so it is not possible to use nce_timer for recovery. Instead we start
   1981  * a timer on the ipif. Caller has to free the mp.
   1982  */
   1983 void
   1984 arp_failure(mblk_t *mp, ip_recv_attr_t *ira)
   1985 {
   1986 	ill_t *ill = ira->ira_ill;
   1987 
   1988 	if ((mp = copymsg(mp)) != NULL) {
   1989 		ill_refhold(ill);
   1990 		qwriter_ip(ill, ill->ill_rq, mp, arp_excl, NEW_OP, B_FALSE);
   1991 	}
   1992 }
   1993 
   1994 /*
   1995  * This is for exclusive changes due to ARP.  Tear down an interface due
   1996  * to AR_CN_FAILED and AR_CN_BOGON.
   1997  */
   1998 /* ARGSUSED */
   1999 static void
   2000 arp_excl(ipsq_t *ipsq, queue_t *rq, mblk_t *mp, void *dummy_arg)
   2001 {
   2002 	ill_t	*ill = rq->q_ptr;
   2003 	arh_t *arh;
   2004 	ipaddr_t src;
   2005 	ipif_t	*ipif;
   2006 	ip_stack_t *ipst = ill->ill_ipst;
   2007 	uchar_t	*haddr;
   2008 	uint_t	haddrlen;
   2009 
   2010 	/* first try src = ar$spa */
   2011 	arh = (arh_t *)mp->b_rptr;
   2012 	bcopy((char *)&arh[1] + arh->arh_hlen, &src, IP_ADDR_LEN);
   2013 
   2014 	haddrlen = arh->arh_hlen;
   2015 	haddr = (uint8_t *)(arh + 1);
   2016 
   2017 	if (haddrlen == ill->ill_phys_addr_length) {
   2018 		/*
   2019 		 * Ignore conflicts generated by misbehaving switches that
   2020 		 * just reflect our own messages back to us.  For IPMP, we may
   2021 		 * see reflections across any ill in the illgrp.
   2022 		 */
   2023 		/* For an under ill_grp can change under lock */
   2024 		rw_enter(&ipst->ips_ill_g_lock, RW_READER);
   2025 		if (bcmp(haddr, ill->ill_phys_addr, haddrlen) == 0 ||
   2026 		    IS_UNDER_IPMP(ill) && ill->ill_grp != NULL &&
   2027 		    ipmp_illgrp_find_ill(ill->ill_grp, haddr,
   2028 		    haddrlen) != NULL) {
   2029 			rw_exit(&ipst->ips_ill_g_lock);
   2030 			goto ignore_conflict;
   2031 		}
   2032 		rw_exit(&ipst->ips_ill_g_lock);
   2033 	}
   2034 
   2035 	/*
   2036 	 * Look up the appropriate ipif.
   2037 	 */
   2038 	ipif = ipif_lookup_addr(src, ill, ALL_ZONES, ipst);
   2039 	if (ipif == NULL)
   2040 		goto ignore_conflict;
   2041 
   2042 	/* Reload the ill to match the ipif */
   2043 	ill = ipif->ipif_ill;
   2044 
   2045 	/* If it's already duplicate or ineligible, then don't do anything. */
   2046 	if (ipif->ipif_flags & (IPIF_POINTOPOINT|IPIF_DUPLICATE)) {
   2047 		ipif_refrele(ipif);
   2048 		goto ignore_conflict;
   2049 	}
   2050 
   2051 	/*
   2052 	 * If we failed on a recovery probe, then restart the timer to
   2053 	 * try again later.
   2054 	 */
   2055 	if (!ipif->ipif_was_dup) {
   2056 		char hbuf[MAC_STR_LEN];
   2057 		char sbuf[INET_ADDRSTRLEN];
   2058 		char ibuf[LIFNAMSIZ];
   2059 
   2060 		(void) mac_colon_addr(haddr, haddrlen, hbuf, sizeof (hbuf));
   2061 		(void) ip_dot_addr(src, sbuf);
   2062 		ipif_get_name(ipif, ibuf, sizeof (ibuf));
   2063 
   2064 		cmn_err(CE_WARN, "%s has duplicate address %s (in use by %s);"
   2065 		    " disabled", ibuf, sbuf, hbuf);
   2066 	}
   2067 	mutex_enter(&ill->ill_lock);
   2068 	ASSERT(!(ipif->ipif_flags & IPIF_DUPLICATE));
   2069 	ipif->ipif_flags |= IPIF_DUPLICATE;
   2070 	ill->ill_ipif_dup_count++;
   2071 	mutex_exit(&ill->ill_lock);
   2072 	(void) ipif_down(ipif, NULL, NULL);
   2073 	(void) ipif_down_tail(ipif);
   2074 	mutex_enter(&ill->ill_lock);
   2075 	if (!(ipif->ipif_flags & (IPIF_DHCPRUNNING|IPIF_TEMPORARY)) &&
   2076 	    ill->ill_net_type == IRE_IF_RESOLVER &&
   2077 	    !(ipif->ipif_state_flags & IPIF_CONDEMNED) &&
   2078 	    ipst->ips_ip_dup_recovery > 0) {
   2079 		ASSERT(ipif->ipif_recovery_id == 0);
   2080 		ipif->ipif_recovery_id = timeout(ipif_dup_recovery,
   2081 		    ipif, MSEC_TO_TICK(ipst->ips_ip_dup_recovery));
   2082 	}
   2083 	mutex_exit(&ill->ill_lock);
   2084 	ipif_refrele(ipif);
   2085 
   2086 ignore_conflict:
   2087 	freemsg(mp);
   2088 }
   2089 
   2090 /*
   2091  * This is a place for a dtrace hook.
   2092  * Note that mp can be either the DL_UNITDATA_IND with a b_cont payload,
   2093  * or just the ARP packet payload as an M_DATA.
   2094  */
   2095 /* ARGSUSED */
   2096 static void
   2097 arp_drop_packet(const char *str, mblk_t *mp, ill_t *ill)
   2098 {
   2099 	freemsg(mp);
   2100 }
   2101 
   2102 static boolean_t
   2103 arp_over_driver(queue_t *q)
   2104 {
   2105 	queue_t *qnext = STREAM(q)->sd_wrq->q_next;
   2106 
   2107 	/*
   2108 	 * check if first module below stream head is IP or UDP.
   2109 	 */
   2110 	ASSERT(qnext != NULL);
   2111 	if (strcmp(Q2NAME(qnext), "ip") != 0 &&
   2112 	    strcmp(Q2NAME(qnext), "udp") != 0) {
   2113 		/*
   2114 		 * module below is not ip or udp, so arp has been pushed
   2115 		 * on the driver.
   2116 		 */
   2117 		return (B_TRUE);
   2118 	}
   2119 	return (B_FALSE);
   2120 }
   2121 
   2122 static int
   2123 arp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
   2124 {
   2125 	int err;
   2126 
   2127 	ASSERT(sflag & MODOPEN);
   2128 	if (!arp_over_driver(q)) {
   2129 		q->q_qinfo = dummymodinfo.st_rdinit;
   2130 		WR(q)->q_qinfo = dummymodinfo.st_wrinit;
   2131 		return ((*dummymodinfo.st_rdinit->qi_qopen)(q, devp, flag,
   2132 		    sflag, credp));
   2133 	}
   2134 	err = arp_modopen(q, devp, flag, sflag, credp);
   2135 	return (err);
   2136 }
   2137 
   2138 /*
   2139  * In most cases we must be a writer on the IP stream before coming to
   2140  * arp_dlpi_send(), to serialize DLPI sends to the driver. The exceptions
   2141  * when we are not a writer are very early duing initialization (in
   2142  * arl_init, before the arl has done a SLIFNAME, so that we don't yet know
   2143  * the associated ill) or during arp_mod_close, when we could not enter the
   2144  * ipsq because the ill has already unplumbed.
   2145  */
   2146 static void
   2147 arp_dlpi_send(arl_t *arl, mblk_t *mp)
   2148 {
   2149 	mblk_t **mpp;
   2150 	t_uscalar_t prim;
   2151 	arl_ill_common_t *ai;
   2152 
   2153 	ASSERT(DB_TYPE(mp) == M_PROTO || DB_TYPE(mp) == M_PCPROTO);
   2154 
   2155 #ifdef DEBUG
   2156 	ai = arl->arl_common;
   2157 	if (ai != NULL) {
   2158 		mutex_enter(&ai->ai_lock);
   2159 		if (ai->ai_ill != NULL)
   2160 			ASSERT(IAM_WRITER_ILL(ai->ai_ill));
   2161 		mutex_exit(&ai->ai_lock);
   2162 	}
   2163 #endif /* DEBUG */
   2164 
   2165 	mutex_enter(&arl->arl_lock);
   2166 	if (arl->arl_dlpi_pending != DL_PRIM_INVAL) {
   2167 		/* Must queue message. Tail insertion */
   2168 		mpp = &arl->arl_dlpi_deferred;
   2169 		while (*mpp != NULL)
   2170 			mpp = &((*mpp)->b_next);
   2171 
   2172 		*mpp = mp;
   2173 		mutex_exit(&arl->arl_lock);
   2174 		return;
   2175 	}
   2176 	mutex_exit(&arl->arl_lock);
   2177 	if ((prim = ((union DL_primitives *)mp->b_rptr)->dl_primitive)
   2178 	    == DL_BIND_REQ) {
   2179 		ASSERT((arl->arl_state_flags & ARL_DL_UNBIND_IN_PROGRESS) == 0);
   2180 	}
   2181 	/*
   2182 	 * No need to take the arl_lock to examine ARL_CONDEMNED at this point
   2183 	 * because the only thread that can see ARL_CONDEMNED here is the
   2184 	 * closing arp_modclose() thread which sets the flag after becoming a
   2185 	 * writer on the ipsq. Threads from IP must have finished and
   2186 	 * cannot be active now.
   2187 	 */
   2188 	if (!(arl->arl_state_flags & ARL_CONDEMNED) ||
   2189 	    (prim == DL_UNBIND_REQ)) {
   2190 		if (prim != DL_NOTIFY_CONF) {
   2191 			ill_t *ill = arl_to_ill(arl);
   2192 
   2193 			arl->arl_dlpi_pending = prim;
   2194 			if (ill != NULL) {
   2195 				mutex_enter(&ill->ill_lock);
   2196 				ill->ill_arl_dlpi_pending = 1;
   2197 				mutex_exit(&ill->ill_lock);
   2198 				ill_refrele(ill);
   2199 			}
   2200 		}
   2201 	}
   2202 	DTRACE_PROBE4(arl__dlpi, char *, "arp_dlpi_send",
   2203 	    char *, dl_primstr(prim), char *, "-",  arl_t *, arl);
   2204 	putnext(arl->arl_wq, mp);
   2205 }
   2206 
   2207 static void
   2208 arl_defaults_common(arl_t *arl, mblk_t *mp)
   2209 {
   2210 	dl_info_ack_t	*dlia = (dl_info_ack_t *)mp->b_rptr;
   2211 	/*
   2212 	 * Till the ill is fully up  the ill is not globally visible.
   2213 	 * So no need for a lock.
   2214 	 */
   2215 	arl->arl_mactype = dlia->dl_mac_type;
   2216 	arl->arl_sap_length = dlia->dl_sap_length;
   2217 
   2218 	if (!arl->arl_dlpi_style_set) {
   2219 		if (dlia->dl_provider_style == DL_STYLE2)
   2220 			arl->arl_needs_attach = 1;
   2221 		mutex_enter(&arl->arl_lock);
   2222 		ASSERT(arl->arl_dlpi_style_set == 0);
   2223 		arl->arl_dlpi_style_set = 1;
   2224 		arl->arl_state_flags &= ~ARL_LL_SUBNET_PENDING;
   2225 		cv_broadcast(&arl->arl_cv);
   2226 		mutex_exit(&arl->arl_lock);
   2227 	}
   2228 }
   2229 
   2230 int
   2231 arl_init(queue_t *q, arl_t *arl)
   2232 {
   2233 	mblk_t *info_mp;
   2234 	dl_info_req_t   *dlir;
   2235 
   2236 	/* subset of ill_init */
   2237 	mutex_init(&arl->arl_lock, NULL, MUTEX_DEFAULT, 0);
   2238 
   2239 	arl->arl_rq = q;
   2240 	arl->arl_wq = WR(q);
   2241 
   2242 	info_mp = allocb(MAX(sizeof (dl_info_req_t), sizeof (dl_info_ack_t)),
   2243 	    BPRI_HI);
   2244 	if (info_mp == NULL)
   2245 		return (ENOMEM);
   2246 	/*
   2247 	 * allocate sufficient space to contain device name.
   2248 	 */
   2249 	arl->arl_name = (char *)(mi_zalloc(2 * LIFNAMSIZ));
   2250 	arl->arl_ppa = UINT_MAX;
   2251 	arl->arl_state_flags |= (ARL_LL_SUBNET_PENDING | ARL_LL_UNBOUND);
   2252 
   2253 	/* Send down the Info Request to the driver. */
   2254 	info_mp->b_datap->db_type = M_PCPROTO;
   2255 	dlir = (dl_info_req_t *)info_mp->b_rptr;
   2256 	info_mp->b_wptr = (uchar_t *)&dlir[1];
   2257 	dlir->dl_primitive = DL_INFO_REQ;
   2258 	arl->arl_dlpi_pending = DL_PRIM_INVAL;
   2259 	qprocson(q);
   2260 
   2261 	arp_dlpi_send(arl, info_mp);
   2262 	return (0);
   2263 }
   2264 
   2265 int
   2266 arl_wait_for_info_ack(arl_t *arl)
   2267 {
   2268 	int err;
   2269 
   2270 	mutex_enter(&arl->arl_lock);
   2271 	while (arl->arl_state_flags & ARL_LL_SUBNET_PENDING) {
   2272 		/*
   2273 		 * Return value of 0 indicates a pending signal.
   2274 		 */
   2275 		err = cv_wait_sig(&arl->arl_cv, &arl->arl_lock);
   2276 		if (err == 0) {
   2277 			mutex_exit(&arl->arl_lock);
   2278 			return (EINTR);
   2279 		}
   2280 	}
   2281 	mutex_exit(&arl->arl_lock);
   2282 	/*
   2283 	 * ip_rput_other could have set an error  in ill_error on
   2284 	 * receipt of M_ERROR.
   2285 	 */
   2286 	return (arl->arl_error);
   2287 }
   2288 
   2289 void
   2290 arl_set_muxid(ill_t *ill, int muxid)
   2291 {
   2292 	arl_t *arl;
   2293 
   2294 	arl = ill_to_arl(ill);
   2295 	if (arl != NULL) {
   2296 		arl->arl_muxid = muxid;
   2297 		arl_refrele(arl);
   2298 	}
   2299 }
   2300 
   2301 int
   2302 arl_get_muxid(ill_t *ill)
   2303 {
   2304 	arl_t *arl;
   2305 	int muxid = 0;
   2306 
   2307 	arl = ill_to_arl(ill);
   2308 	if (arl != NULL) {
   2309 		muxid = arl->arl_muxid;
   2310 		arl_refrele(arl);
   2311 	}
   2312 	return (muxid);
   2313 }
   2314 
   2315 static int
   2316 arp_modopen(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
   2317 {
   2318 	int	err;
   2319 	zoneid_t zoneid;
   2320 	netstack_t *ns;
   2321 	ip_stack_t *ipst;
   2322 	arl_t	*arl = NULL;
   2323 
   2324 	/*
   2325 	 * Prevent unprivileged processes from pushing IP so that
   2326 	 * they can't send raw IP.
   2327 	 */
   2328 	if (secpolicy_net_rawaccess(credp) != 0)
   2329 		return (EPERM);
   2330 
   2331 	ns = netstack_find_by_cred(credp);
   2332 	ASSERT(ns != NULL);
   2333 	ipst = ns->netstack_ip;
   2334 	ASSERT(ipst != NULL);
   2335 
   2336 	/*
   2337 	 * For exclusive stacks we set the zoneid to zero
   2338 	 * to make IP operate as if in the global zone.
   2339 	 */
   2340 	if (ipst->ips_netstack->netstack_stackid != GLOBAL_NETSTACKID)
   2341 		zoneid = GLOBAL_ZONEID;
   2342 	else
   2343 		zoneid = crgetzoneid(credp);
   2344 
   2345 	arl = (arl_t *)mi_open_alloc_sleep(sizeof (arl_t));
   2346 	q->q_ptr = WR(q)->q_ptr = arl;
   2347 	arl->arl_ipst = ipst;
   2348 	arl->arl_zoneid = zoneid;
   2349 	err = arl_init(q, arl);
   2350 
   2351 	if (err != 0) {
   2352 		mi_free(arl->arl_name);
   2353 		mi_free(arl);
   2354 		netstack_rele(ipst->ips_netstack);
   2355 		q->q_ptr = NULL;
   2356 		WR(q)->q_ptr = NULL;
   2357 		return (err);
   2358 	}
   2359 
   2360 	/*
   2361 	 * Wait for the DL_INFO_ACK if a DL_INFO_REQ was sent.
   2362 	 */
   2363 	err = arl_wait_for_info_ack(arl);
   2364 	if (err == 0)
   2365 		arl->arl_credp = credp;
   2366 	else
   2367 		goto fail;
   2368 
   2369 	crhold(credp);
   2370 
   2371 	mutex_enter(&ipst->ips_ip_mi_lock);
   2372 	err = mi_open_link(&ipst->ips_arp_g_head, (IDP)q->q_ptr, devp, flag,
   2373 	    sflag, credp);
   2374 	mutex_exit(&ipst->ips_ip_mi_lock);
   2375 fail:
   2376 	if (err) {
   2377 		(void) arp_close(q, 0);
   2378 		return (err);
   2379 	}
   2380 	return (0);
   2381 }
   2382 
   2383 /*
   2384  * Notify any downstream modules (esp softmac and hitbox) of the name
   2385  * of this interface using an M_CTL.
   2386  */
   2387 static void
   2388 arp_ifname_notify(arl_t *arl)
   2389 {
   2390 	mblk_t *mp1, *mp2;
   2391 	struct iocblk *iocp;
   2392 	struct lifreq *lifr;
   2393 
   2394 	if ((mp1 = mkiocb(SIOCSLIFNAME)) == NULL)
   2395 		return;
   2396 	if ((mp2 = allocb(sizeof (struct lifreq), BPRI_HI)) == NULL) {
   2397 		freemsg(mp1);
   2398 		return;
   2399 	}
   2400 
   2401 	lifr = (struct lifreq *)mp2->b_rptr;
   2402 	mp2->b_wptr += sizeof (struct lifreq);
   2403 	bzero(lifr, sizeof (struct lifreq));
   2404 
   2405 	(void) strncpy(lifr->lifr_name, arl->arl_name, LIFNAMSIZ);
   2406 	lifr->lifr_ppa = arl->arl_ppa;
   2407 	lifr->lifr_flags = ILLF_IPV4;
   2408 
   2409 	/* Use M_CTL to avoid confusing anyone else who might be listening. */
   2410 	DB_TYPE(mp1) = M_CTL;
   2411 	mp1->b_cont = mp2;
   2412 	iocp = (struct iocblk *)mp1->b_rptr;
   2413 	iocp->ioc_count = msgsize(mp1->b_cont);
   2414 	DTRACE_PROBE4(arl__dlpi, char *, "arp_ifname_notify",
   2415 	    char *, "SIOCSLIFNAME", char *, "-",  arl_t *, arl);
   2416 	putnext(arl->arl_wq, mp1);
   2417 }
   2418 
   2419 void
   2420 arp_send_replumb_conf(ill_t *ill)
   2421 {
   2422 	mblk_t *mp;
   2423 	arl_t *arl = ill_to_arl(ill);
   2424 
   2425 	if (arl == NULL)
   2426 		return;
   2427 	/*
   2428 	 * arl_got_replumb and arl_got_unbind to be cleared after we complete
   2429 	 * arp_cmd_done.
   2430 	 */
   2431 	mp = mexchange(NULL, NULL, sizeof (dl_notify_conf_t), M_PROTO,
   2432 	    DL_NOTIFY_CONF);
   2433 	((dl_notify_conf_t *)(mp->b_rptr))->dl_notification =
   2434 	    DL_NOTE_REPLUMB_DONE;
   2435 	arp_dlpi_send(arl, mp);
   2436 	mutex_enter(&arl->arl_lock);
   2437 	arl->arl_state_flags &= ~ARL_LL_REPLUMBING;
   2438 	mutex_exit(&arl->arl_lock);
   2439 	arl_refrele(arl);
   2440 }
   2441 
   2442 /*
   2443  * The unplumb code paths call arp_unbind_complete() to make sure that it is
   2444  * safe to tear down the ill. We wait for DL_UNBIND_ACK to complete, and also
   2445  * for the arl_refcnt to fall to one so that, when we return from
   2446  * arp_unbind_complete(), we know for certain that there are no threads in
   2447  * arp_rput() that might access the arl_ill.
   2448  */
   2449 void
   2450 arp_unbind_complete(ill_t *ill)
   2451 {
   2452 	arl_t *arl = ill_to_arl(ill);
   2453 
   2454 	if (arl == NULL)
   2455 		return;
   2456 	mutex_enter(&arl->arl_lock);
   2457 	/*
   2458 	 * wait for unbind ack and arl_refcnt to drop to 1. Note that the
   2459 	 * quiescent arl_refcnt for this function is 1 (and not 0) because
   2460 	 * ill_to_arl() will itself return after taking a ref on the arl_t.
   2461 	 */
   2462 	while (arl->arl_state_flags & ARL_DL_UNBIND_IN_PROGRESS)
   2463 		cv_wait(&arl->arl_cv, &arl->arl_lock);
   2464 	while (arl->arl_refcnt != 1)
   2465 		cv_wait(&arl->arl_cv, &arl->arl_lock);
   2466 	mutex_exit(&arl->arl_lock);
   2467 	arl_refrele(arl);
   2468 }
   2469