Home | History | Annotate | Download | only in dls
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 /*
     27  * Data-Link Services Module
     28  */
     29 
     30 #include	<sys/sysmacros.h>
     31 #include	<sys/strsubr.h>
     32 #include	<sys/strsun.h>
     33 #include	<sys/vlan.h>
     34 #include	<sys/dld_impl.h>
     35 #include	<sys/sdt.h>
     36 #include	<sys/atomic.h>
     37 
     38 static void		dls_bpf_newzone(dls_link_t *dlp, zoneid_t zid);
     39 
     40 static kmem_cache_t	*i_dls_link_cachep;
     41 mod_hash_t		*i_dls_link_hash;
     42 static uint_t		i_dls_link_count;
     43 
     44 #define		LINK_HASHSZ	67	/* prime */
     45 #define		IMPL_HASHSZ	67	/* prime */
     46 
     47 /*
     48  * Construct a hash key encompassing both DLSAP value and VLAN idenitifier.
     49  */
     50 #define	MAKE_KEY(_sap)						\
     51 	((mod_hash_key_t)(uintptr_t)((_sap) << VLAN_ID_SIZE))
     52 
     53 #define	DLS_STRIP_PADDING(pktsize, p) {			\
     54 	if (pktsize != 0) {				\
     55 		ssize_t delta = pktsize - msgdsize(p);	\
     56 							\
     57 		if (delta < 0)				\
     58 			(void) adjmsg(p, delta);	\
     59 	}						\
     60 }
     61 
     62 /*
     63  * Private functions.
     64  */
     65 
     66 /*ARGSUSED*/
     67 static int
     68 i_dls_link_constructor(void *buf, void *arg, int kmflag)
     69 {
     70 	dls_link_t	*dlp = buf;
     71 	char		name[MAXNAMELEN];
     72 
     73 	bzero(buf, sizeof (dls_link_t));
     74 
     75 	(void) snprintf(name, MAXNAMELEN, "dls_link_t_%p_hash", buf);
     76 	dlp->dl_str_hash = mod_hash_create_idhash(name, IMPL_HASHSZ,
     77 	    mod_hash_null_valdtor);
     78 
     79 	return (0);
     80 }
     81 
     82 /*ARGSUSED*/
     83 static void
     84 i_dls_link_destructor(void *buf, void *arg)
     85 {
     86 	dls_link_t	*dlp = buf;
     87 
     88 	ASSERT(dlp->dl_ref == 0);
     89 	ASSERT(dlp->dl_mh == NULL);
     90 	ASSERT(dlp->dl_mah == NULL);
     91 	ASSERT(dlp->dl_unknowns == 0);
     92 
     93 	mod_hash_destroy_idhash(dlp->dl_str_hash);
     94 	dlp->dl_str_hash = NULL;
     95 
     96 }
     97 
     98 /*
     99  * - Parse the mac header information of the given packet.
    100  * - Strip the padding and skip over the header. Note that because some
    101  *   DLS consumers only check the db_ref count of the first mblk, we
    102  *   pullup the message into a single mblk. Because the original message
    103  *   is freed as the result of message pulling up, mac_vlan_header_info()
    104  *   is called again to update the mhi_saddr and mhi_daddr pointers in the
    105  *   mhip. Further, the mac_vlan_header_info() function ensures that the
    106  *   size of the pulled message is greater than the MAC header size,
    107  *   therefore we can directly advance b_rptr to point at the payload.
    108  *
    109  * We choose to use a macro for performance reasons.
    110  */
    111 #define	DLS_PREPARE_PKT(mh, mp, mhip, err) {				\
    112 	mblk_t *nextp = (mp)->b_next;					\
    113 	if (((err) = mac_vlan_header_info((mh), (mp), (mhip))) == 0) {	\
    114 		DLS_STRIP_PADDING((mhip)->mhi_pktsize, (mp));		\
    115 		if (MBLKL((mp)) < (mhip)->mhi_hdrsize) {		\
    116 			mblk_t *newmp;					\
    117 			if ((newmp = msgpullup((mp), -1)) == NULL) {	\
    118 				(err) = EINVAL;				\
    119 			} else {					\
    120 				(mp)->b_next = NULL;			\
    121 				freemsg((mp));				\
    122 				(mp) = newmp;				\
    123 				VERIFY(mac_vlan_header_info((mh),	\
    124 				    (mp), (mhip)) == 0);		\
    125 				(mp)->b_next = nextp;			\
    126 				(mp)->b_rptr += (mhip)->mhi_hdrsize;	\
    127 			}						\
    128 		} else {						\
    129 			(mp)->b_rptr += (mhip)->mhi_hdrsize;		\
    130 		}							\
    131 	}								\
    132 }
    133 
    134 /*
    135  * Truncate the chain starting at mp such that all packets in the chain
    136  * have identical source and destination addresses, saps, and tag types
    137  * (see below).  It returns a pointer to the mblk following the chain,
    138  * NULL if there is no further packet following the processed chain.
    139  * The countp argument is set to the number of valid packets in the chain.
    140  * Note that the whole MAC header (including the VLAN tag if any) in each
    141  * packet will be stripped.
    142  */
    143 static mblk_t *
    144 i_dls_link_subchain(dls_link_t *dlp, mblk_t *mp, const mac_header_info_t *mhip,
    145     uint_t *countp)
    146 {
    147 	mblk_t		*prevp;
    148 	uint_t		npacket = 1;
    149 	size_t		addr_size = dlp->dl_mip->mi_addr_length;
    150 	uint16_t	vid = VLAN_ID(mhip->mhi_tci);
    151 	uint16_t	pri = VLAN_PRI(mhip->mhi_tci);
    152 
    153 	/*
    154 	 * Compare with subsequent headers until we find one that has
    155 	 * differing header information. After checking each packet
    156 	 * strip padding and skip over the header.
    157 	 */
    158 	for (prevp = mp; (mp = mp->b_next) != NULL; prevp = mp) {
    159 		mac_header_info_t cmhi;
    160 		uint16_t cvid, cpri;
    161 		int err;
    162 
    163 		DLS_PREPARE_PKT(dlp->dl_mh, mp, &cmhi, err);
    164 		if (err != 0)
    165 			break;
    166 
    167 		prevp->b_next = mp;
    168 
    169 		/*
    170 		 * The source, destination, sap, vlan tag must all match in
    171 		 * a given subchain.
    172 		 */
    173 		if (mhip->mhi_saddr == NULL || cmhi.mhi_saddr == NULL ||
    174 		    memcmp(mhip->mhi_daddr, cmhi.mhi_daddr, addr_size) != 0 ||
    175 		    memcmp(mhip->mhi_saddr, cmhi.mhi_saddr, addr_size) != 0 ||
    176 		    mhip->mhi_bindsap != cmhi.mhi_bindsap) {
    177 			/*
    178 			 * Note that we don't need to restore the padding.
    179 			 */
    180 			mp->b_rptr -= cmhi.mhi_hdrsize;
    181 			break;
    182 		}
    183 
    184 		cvid = VLAN_ID(cmhi.mhi_tci);
    185 		cpri = VLAN_PRI(cmhi.mhi_tci);
    186 
    187 		/*
    188 		 * There are several types of packets. Packets don't match
    189 		 * if they are classified to different type or if they are
    190 		 * VLAN packets but belong to different VLANs:
    191 		 *
    192 		 * packet type		tagged		vid		pri
    193 		 * ---------------------------------------------------------
    194 		 * untagged		No		zero		zero
    195 		 * VLAN packets		Yes		non-zero	-
    196 		 * priority tagged	Yes		zero		non-zero
    197 		 * 0 tagged		Yes		zero		zero
    198 		 */
    199 		if ((mhip->mhi_istagged != cmhi.mhi_istagged) ||
    200 		    (vid != cvid) || ((vid == VLAN_ID_NONE) &&
    201 		    (((pri == 0) && (cpri != 0)) ||
    202 		    ((pri != 0) && (cpri == 0))))) {
    203 			mp->b_rptr -= cmhi.mhi_hdrsize;
    204 			break;
    205 		}
    206 
    207 		npacket++;
    208 	}
    209 
    210 	/*
    211 	 * Break the chain at this point and return a pointer to the next
    212 	 * sub-chain.
    213 	 */
    214 	prevp->b_next = NULL;
    215 	*countp = npacket;
    216 	return (mp);
    217 }
    218 
    219 /* ARGSUSED */
    220 static int
    221 i_dls_head_hold(mod_hash_key_t key, mod_hash_val_t val)
    222 {
    223 	dls_head_t *dhp = (dls_head_t *)val;
    224 
    225 	/*
    226 	 * The lock order is  mod_hash's internal lock -> dh_lock as in the
    227 	 * call to i_dls_link_rx -> mod_hash_find_cb_rval -> i_dls_head_hold
    228 	 */
    229 	mutex_enter(&dhp->dh_lock);
    230 	if (dhp->dh_removing) {
    231 		mutex_exit(&dhp->dh_lock);
    232 		return (-1);
    233 	}
    234 	dhp->dh_ref++;
    235 	mutex_exit(&dhp->dh_lock);
    236 	return (0);
    237 }
    238 
    239 void
    240 i_dls_head_rele(dls_head_t *dhp)
    241 {
    242 	mutex_enter(&dhp->dh_lock);
    243 	dhp->dh_ref--;
    244 	if (dhp->dh_ref == 0 && dhp->dh_removing != 0)
    245 		cv_broadcast(&dhp->dh_cv);
    246 	mutex_exit(&dhp->dh_lock);
    247 }
    248 
    249 static dls_head_t *
    250 i_dls_head_alloc(mod_hash_key_t key)
    251 {
    252 	dls_head_t	*dhp;
    253 
    254 	dhp = kmem_zalloc(sizeof (dls_head_t), KM_SLEEP);
    255 	dhp->dh_key = key;
    256 	return (dhp);
    257 }
    258 
    259 static void
    260 i_dls_head_free(dls_head_t *dhp)
    261 {
    262 	ASSERT(dhp->dh_ref == 0);
    263 	kmem_free(dhp, sizeof (dls_head_t));
    264 }
    265 
    266 /*
    267  * Try to send mp up to the streams of the given sap and vid. Return B_TRUE
    268  * if this message is sent to any streams.
    269  * Note that this function will copy the message chain and the original
    270  * mp will remain valid after this function
    271  */
    272 static uint_t
    273 i_dls_link_rx_func(dls_link_t *dlp, mac_resource_handle_t mrh,
    274     mac_header_info_t *mhip, mblk_t *mp, uint32_t sap,
    275     boolean_t (*acceptfunc)())
    276 {
    277 	mod_hash_t	*hash = dlp->dl_str_hash;
    278 	mod_hash_key_t	key;
    279 	dls_head_t	*dhp;
    280 	dld_str_t	*dsp;
    281 	mblk_t		*nmp;
    282 	dls_rx_t	ds_rx;
    283 	void		*ds_rx_arg;
    284 	uint_t		naccepted = 0;
    285 	int		rval;
    286 
    287 	/*
    288 	 * Construct a hash key from the VLAN identifier and the
    289 	 * DLSAP that represents dld_str_t in promiscuous mode.
    290 	 */
    291 	key = MAKE_KEY(sap);
    292 
    293 	/*
    294 	 * Search the hash table for dld_str_t eligible to receive
    295 	 * a packet chain for this DLSAP/VLAN combination. The mod hash's
    296 	 * internal lock serializes find/insert/remove from the mod hash list.
    297 	 * Incrementing the dh_ref (while holding the mod hash lock) ensures
    298 	 * dls_link_remove will wait for the upcall to finish.
    299 	 */
    300 	if (mod_hash_find_cb_rval(hash, key, (mod_hash_val_t *)&dhp,
    301 	    i_dls_head_hold, &rval) != 0 || (rval != 0)) {
    302 		return (B_FALSE);
    303 	}
    304 
    305 	/*
    306 	 * Find dld_str_t that will accept the sub-chain.
    307 	 */
    308 	for (dsp = dhp->dh_list; dsp != NULL; dsp = dsp->ds_next) {
    309 		if (!acceptfunc(dsp, mhip, &ds_rx, &ds_rx_arg))
    310 			continue;
    311 
    312 		/*
    313 		 * We have at least one acceptor.
    314 		 */
    315 		naccepted++;
    316 
    317 		/*
    318 		 * There will normally be at least more dld_str_t
    319 		 * (since we've yet to check for non-promiscuous
    320 		 * dld_str_t) so dup the sub-chain.
    321 		 */
    322 		if ((nmp = copymsgchain(mp)) != NULL)
    323 			ds_rx(ds_rx_arg, mrh, nmp, mhip);
    324 	}
    325 
    326 	/*
    327 	 * Release the hold on the dld_str_t chain now that we have
    328 	 * finished walking it.
    329 	 */
    330 	i_dls_head_rele(dhp);
    331 	return (naccepted);
    332 }
    333 
    334 /* ARGSUSED */
    335 void
    336 i_dls_link_rx(void *arg, mac_resource_handle_t mrh, mblk_t *mp,
    337     boolean_t loopback)
    338 {
    339 	dls_link_t			*dlp = arg;
    340 	mod_hash_t			*hash = dlp->dl_str_hash;
    341 	mblk_t				*nextp;
    342 	mac_header_info_t		mhi;
    343 	dls_head_t			*dhp;
    344 	dld_str_t			*dsp;
    345 	dld_str_t			*ndsp;
    346 	mblk_t				*nmp;
    347 	mod_hash_key_t			key;
    348 	uint_t				npacket;
    349 	boolean_t			accepted;
    350 	dls_rx_t			ds_rx, nds_rx;
    351 	void				*ds_rx_arg, *nds_rx_arg;
    352 	uint16_t			vid;
    353 	int				err, rval;
    354 
    355 	/*
    356 	 * Walk the packet chain.
    357 	 */
    358 	for (; mp != NULL; mp = nextp) {
    359 		/*
    360 		 * Wipe the accepted state.
    361 		 */
    362 		accepted = B_FALSE;
    363 
    364 		DLS_PREPARE_PKT(dlp->dl_mh, mp, &mhi, err);
    365 		if (err != 0) {
    366 			atomic_add_32(&(dlp->dl_unknowns), 1);
    367 			nextp = mp->b_next;
    368 			mp->b_next = NULL;
    369 			freemsg(mp);
    370 			continue;
    371 		}
    372 
    373 		/*
    374 		 * Grab the longest sub-chain we can process as a single
    375 		 * unit.
    376 		 */
    377 		nextp = i_dls_link_subchain(dlp, mp, &mhi, &npacket);
    378 		ASSERT(npacket != 0);
    379 
    380 		vid = VLAN_ID(mhi.mhi_tci);
    381 
    382 		if (mhi.mhi_istagged) {
    383 			/*
    384 			 * If it is tagged traffic, send it upstream to
    385 			 * all dld_str_t which are attached to the physical
    386 			 * link and bound to SAP 0x8100.
    387 			 */
    388 			if (i_dls_link_rx_func(dlp, mrh, &mhi, mp,
    389 			    ETHERTYPE_VLAN, dls_accept) > 0) {
    390 				accepted = B_TRUE;
    391 			}
    392 
    393 			/*
    394 			 * Don't pass the packets up if they are tagged
    395 			 * packets and:
    396 			 *  - their VID and priority are both zero and the
    397 			 *    original packet isn't using the PVID (invalid
    398 			 *    packets).
    399 			 *  - their sap is ETHERTYPE_VLAN and their VID is
    400 			 *    zero as they have already been sent upstreams.
    401 			 */
    402 			if ((vid == VLAN_ID_NONE && !mhi.mhi_ispvid &&
    403 			    VLAN_PRI(mhi.mhi_tci) == 0) ||
    404 			    (mhi.mhi_bindsap == ETHERTYPE_VLAN &&
    405 			    vid == VLAN_ID_NONE)) {
    406 				freemsgchain(mp);
    407 				goto loop;
    408 			}
    409 		}
    410 
    411 		/*
    412 		 * Construct a hash key from the VLAN identifier and the
    413 		 * DLSAP.
    414 		 */
    415 		key = MAKE_KEY(mhi.mhi_bindsap);
    416 
    417 		/*
    418 		 * Search the has table for dld_str_t eligible to receive
    419 		 * a packet chain for this DLSAP/VLAN combination.
    420 		 */
    421 		if (mod_hash_find_cb_rval(hash, key, (mod_hash_val_t *)&dhp,
    422 		    i_dls_head_hold, &rval) != 0 || (rval != 0)) {
    423 			freemsgchain(mp);
    424 			goto loop;
    425 		}
    426 
    427 		/*
    428 		 * Find the first dld_str_t that will accept the sub-chain.
    429 		 */
    430 		for (dsp = dhp->dh_list; dsp != NULL; dsp = dsp->ds_next)
    431 			if (dls_accept(dsp, &mhi, &ds_rx, &ds_rx_arg))
    432 				break;
    433 
    434 		/*
    435 		 * If we did not find any dld_str_t willing to accept the
    436 		 * sub-chain then throw it away.
    437 		 */
    438 		if (dsp == NULL) {
    439 			i_dls_head_rele(dhp);
    440 			freemsgchain(mp);
    441 			goto loop;
    442 		}
    443 
    444 		/*
    445 		 * We have at least one acceptor.
    446 		 */
    447 		accepted = B_TRUE;
    448 		for (;;) {
    449 			/*
    450 			 * Find the next dld_str_t that will accept the
    451 			 * sub-chain.
    452 			 */
    453 			for (ndsp = dsp->ds_next; ndsp != NULL;
    454 			    ndsp = ndsp->ds_next)
    455 				if (dls_accept(ndsp, &mhi, &nds_rx,
    456 				    &nds_rx_arg))
    457 					break;
    458 
    459 			/*
    460 			 * If there are no more dld_str_t that are willing
    461 			 * to accept the sub-chain then we don't need to dup
    462 			 * it before handing it to the current one.
    463 			 */
    464 			if (ndsp == NULL) {
    465 				ds_rx(ds_rx_arg, mrh, mp, &mhi);
    466 
    467 				/*
    468 				 * Since there are no more dld_str_t, we're
    469 				 * done.
    470 				 */
    471 				break;
    472 			}
    473 
    474 			/*
    475 			 * There are more dld_str_t so dup the sub-chain.
    476 			 */
    477 			if ((nmp = copymsgchain(mp)) != NULL)
    478 				ds_rx(ds_rx_arg, mrh, nmp, &mhi);
    479 
    480 			dsp = ndsp;
    481 			ds_rx = nds_rx;
    482 			ds_rx_arg = nds_rx_arg;
    483 		}
    484 
    485 		/*
    486 		 * Release the hold on the dld_str_t chain now that we have
    487 		 * finished walking it.
    488 		 */
    489 		i_dls_head_rele(dhp);
    490 
    491 loop:
    492 		/*
    493 		 * If there were no acceptors then add the packet count to the
    494 		 * 'unknown' count.
    495 		 */
    496 		if (!accepted)
    497 			atomic_add_32(&(dlp->dl_unknowns), npacket);
    498 	}
    499 }
    500 
    501 /* ARGSUSED */
    502 void
    503 dls_rx_vlan_promisc(void *arg, mac_resource_handle_t mrh, mblk_t *mp,
    504     boolean_t loopback)
    505 {
    506 	dld_str_t			*dsp = arg;
    507 	dls_link_t			*dlp = dsp->ds_dlp;
    508 	mac_header_info_t		mhi;
    509 	dls_rx_t			ds_rx;
    510 	void				*ds_rx_arg;
    511 	int				err;
    512 
    513 	DLS_PREPARE_PKT(dlp->dl_mh, mp, &mhi, err);
    514 	if (err != 0)
    515 		goto drop;
    516 
    517 	/*
    518 	 * If there is promiscuous handle for vlan, we filter out the untagged
    519 	 * pkts and pkts that are not for the primary unicast address.
    520 	 */
    521 	if (dsp->ds_vlan_mph != NULL) {
    522 		uint8_t prim_addr[MAXMACADDRLEN];
    523 		size_t	addr_length = dsp->ds_mip->mi_addr_length;
    524 
    525 		if (!(mhi.mhi_istagged))
    526 			goto drop;
    527 		ASSERT(dsp->ds_mh != NULL);
    528 		mac_unicast_primary_get(dsp->ds_mh, (uint8_t *)prim_addr);
    529 		if (memcmp(mhi.mhi_daddr, prim_addr, addr_length) != 0)
    530 			goto drop;
    531 
    532 		if (!dls_accept(dsp, &mhi, &ds_rx, &ds_rx_arg))
    533 			goto drop;
    534 
    535 		ds_rx(ds_rx_arg, NULL, mp, &mhi);
    536 		return;
    537 	}
    538 
    539 drop:
    540 	atomic_add_32(&dlp->dl_unknowns, 1);
    541 	freemsg(mp);
    542 }
    543 
    544 /* ARGSUSED */
    545 void
    546 dls_rx_promisc(void *arg, mac_resource_handle_t mrh, mblk_t *mp,
    547     boolean_t loopback)
    548 {
    549 	dld_str_t			*dsp = arg;
    550 	dls_link_t			*dlp = dsp->ds_dlp;
    551 	mac_header_info_t		mhi;
    552 	dls_rx_t			ds_rx;
    553 	void				*ds_rx_arg;
    554 	int				err;
    555 	dls_head_t			*dhp;
    556 	mod_hash_key_t			key;
    557 
    558 	DLS_PREPARE_PKT(dlp->dl_mh, mp, &mhi, err);
    559 	if (err != 0)
    560 		goto drop;
    561 
    562 	/*
    563 	 * In order to filter out sap pkt that no dls channel listens, search
    564 	 * the hash table trying to find a dld_str_t eligible to receive the pkt
    565 	 */
    566 	if ((dsp->ds_promisc & DLS_PROMISC_SAP) == 0) {
    567 		key = MAKE_KEY(mhi.mhi_bindsap);
    568 		if (mod_hash_find(dsp->ds_dlp->dl_str_hash, key,
    569 		    (mod_hash_val_t *)&dhp) != 0)
    570 			goto drop;
    571 	}
    572 
    573 	if (!dls_accept_promisc(dsp, &mhi, &ds_rx, &ds_rx_arg, loopback))
    574 		goto drop;
    575 
    576 	ds_rx(ds_rx_arg, NULL, mp, &mhi);
    577 	return;
    578 
    579 drop:
    580 	atomic_add_32(&dlp->dl_unknowns, 1);
    581 	freemsg(mp);
    582 }
    583 
    584 static void
    585 i_dls_link_destroy(dls_link_t *dlp)
    586 {
    587 	ASSERT(dlp->dl_nactive == 0);
    588 	ASSERT(dlp->dl_impl_count == 0);
    589 	ASSERT(dlp->dl_zone_ref == 0);
    590 
    591 	/*
    592 	 * Free the structure back to the cache.
    593 	 */
    594 	if (dlp->dl_mch != NULL)
    595 		mac_client_close(dlp->dl_mch, 0);
    596 
    597 	if (dlp->dl_mh != NULL) {
    598 		ASSERT(MAC_PERIM_HELD(dlp->dl_mh));
    599 		mac_close(dlp->dl_mh);
    600 	}
    601 
    602 	dlp->dl_mh = NULL;
    603 	dlp->dl_mch = NULL;
    604 	dlp->dl_mip = NULL;
    605 	dlp->dl_unknowns = 0;
    606 	dlp->dl_nonip_cnt = 0;
    607 	kmem_cache_free(i_dls_link_cachep, dlp);
    608 }
    609 
    610 static int
    611 i_dls_link_create(const char *name, dls_link_t **dlpp)
    612 {
    613 	dls_link_t		*dlp;
    614 	int			err;
    615 
    616 	/*
    617 	 * Allocate a new dls_link_t structure.
    618 	 */
    619 	dlp = kmem_cache_alloc(i_dls_link_cachep, KM_SLEEP);
    620 
    621 	/*
    622 	 * Name the dls_link_t after the MAC interface it represents.
    623 	 */
    624 	(void) strlcpy(dlp->dl_name, name, sizeof (dlp->dl_name));
    625 
    626 	/*
    627 	 * First reference; hold open the MAC interface.
    628 	 */
    629 	ASSERT(dlp->dl_mh == NULL);
    630 	err = mac_open(dlp->dl_name, &dlp->dl_mh);
    631 	if (err != 0)
    632 		goto bail;
    633 
    634 	ASSERT(MAC_PERIM_HELD(dlp->dl_mh));
    635 	dlp->dl_mip = mac_info(dlp->dl_mh);
    636 
    637 	/* DLS is the "primary" MAC client */
    638 	ASSERT(dlp->dl_mch == NULL);
    639 
    640 	err = mac_client_open(dlp->dl_mh, &dlp->dl_mch, NULL,
    641 	    MAC_OPEN_FLAGS_USE_DATALINK_NAME);
    642 	if (err != 0)
    643 		goto bail;
    644 
    645 	DTRACE_PROBE2(dls__primary__client, char *, dlp->dl_name, void *,
    646 	    dlp->dl_mch);
    647 
    648 	*dlpp = dlp;
    649 	return (0);
    650 
    651 bail:
    652 	i_dls_link_destroy(dlp);
    653 	return (err);
    654 }
    655 
    656 /*
    657  * Module initialization functions.
    658  */
    659 
    660 void
    661 dls_link_init(void)
    662 {
    663 	/*
    664 	 * Create a kmem_cache of dls_link_t structures.
    665 	 */
    666 	i_dls_link_cachep = kmem_cache_create("dls_link_cache",
    667 	    sizeof (dls_link_t), 0, i_dls_link_constructor,
    668 	    i_dls_link_destructor, NULL, NULL, NULL, 0);
    669 	ASSERT(i_dls_link_cachep != NULL);
    670 
    671 	/*
    672 	 * Create a dls_link_t hash table and associated lock.
    673 	 */
    674 	i_dls_link_hash = mod_hash_create_extended("dls_link_hash",
    675 	    IMPL_HASHSZ, mod_hash_null_keydtor, mod_hash_null_valdtor,
    676 	    mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP);
    677 	i_dls_link_count = 0;
    678 }
    679 
    680 int
    681 dls_link_fini(void)
    682 {
    683 	if (i_dls_link_count > 0)
    684 		return (EBUSY);
    685 
    686 	/*
    687 	 * Destroy the kmem_cache.
    688 	 */
    689 	kmem_cache_destroy(i_dls_link_cachep);
    690 
    691 	/*
    692 	 * Destroy the hash table and associated lock.
    693 	 */
    694 	mod_hash_destroy_hash(i_dls_link_hash);
    695 	return (0);
    696 }
    697 
    698 /*
    699  * Exported functions.
    700  */
    701 
    702 static int
    703 dls_link_hold_common(const char *name, dls_link_t **dlpp, boolean_t create)
    704 {
    705 	dls_link_t		*dlp;
    706 	int			err;
    707 
    708 	/*
    709 	 * Look up a dls_link_t corresponding to the given macname in the
    710 	 * global hash table. The i_dls_link_hash itself is protected by the
    711 	 * mod_hash package's internal lock which synchronizes
    712 	 * find/insert/remove into the global mod_hash list. Assumes that
    713 	 * inserts and removes are single threaded on a per mac end point
    714 	 * by the mac perimeter.
    715 	 */
    716 	if ((err = mod_hash_find(i_dls_link_hash, (mod_hash_key_t)name,
    717 	    (mod_hash_val_t *)&dlp)) == 0)
    718 		goto done;
    719 
    720 	ASSERT(err == MH_ERR_NOTFOUND);
    721 	if (!create)
    722 		return (ENOENT);
    723 
    724 	/*
    725 	 * We didn't find anything so we need to create one.
    726 	 */
    727 	if ((err = i_dls_link_create(name, &dlp)) != 0)
    728 		return (err);
    729 
    730 	/*
    731 	 * Insert the dls_link_t.
    732 	 */
    733 	err = mod_hash_insert(i_dls_link_hash, (mod_hash_key_t)dlp->dl_name,
    734 	    (mod_hash_val_t)dlp);
    735 	ASSERT(err == 0);
    736 
    737 	atomic_add_32(&i_dls_link_count, 1);
    738 	ASSERT(i_dls_link_count != 0);
    739 
    740 done:
    741 	ASSERT(MAC_PERIM_HELD(dlp->dl_mh));
    742 	/*
    743 	 * Bump the reference count and hand back the reference.
    744 	 */
    745 	dlp->dl_ref++;
    746 	*dlpp = dlp;
    747 	return (0);
    748 }
    749 
    750 int
    751 dls_link_hold_create(const char *name, dls_link_t **dlpp)
    752 {
    753 	return (dls_link_hold_common(name, dlpp, B_TRUE));
    754 }
    755 
    756 int
    757 dls_link_hold(const char *name, dls_link_t **dlpp)
    758 {
    759 	return (dls_link_hold_common(name, dlpp, B_FALSE));
    760 }
    761 
    762 dev_info_t *
    763 dls_link_devinfo(dev_t dev)
    764 {
    765 	dls_link_t	*dlp;
    766 	dev_info_t	*dip;
    767 	char	macname[MAXNAMELEN];
    768 	char	*drv;
    769 	mac_perim_handle_t	mph;
    770 
    771 	if ((drv = ddi_major_to_name(getmajor(dev))) == NULL)
    772 		return (NULL);
    773 	(void) snprintf(macname, MAXNAMELEN, "%s%d", drv,
    774 	    DLS_MINOR2INST(getminor(dev)));
    775 
    776 	/*
    777 	 * The code below assumes that the name constructed above is the
    778 	 * macname. This is not the case for legacy devices. Currently this
    779 	 * is ok because this function is only called in the getinfo(9e) path,
    780 	 * which for a legacy device would directly end up in the driver's
    781 	 * getinfo, rather than here
    782 	 */
    783 	if (mac_perim_enter_by_macname(macname, &mph) != 0)
    784 		return (NULL);
    785 
    786 	if (dls_link_hold(macname, &dlp) != 0) {
    787 		mac_perim_exit(mph);
    788 		return (NULL);
    789 	}
    790 
    791 	dip = mac_devinfo_get(dlp->dl_mh);
    792 	dls_link_rele(dlp);
    793 	mac_perim_exit(mph);
    794 
    795 	return (dip);
    796 }
    797 
    798 dev_t
    799 dls_link_dev(dls_link_t *dlp)
    800 {
    801 	return (makedevice(ddi_driver_major(mac_devinfo_get(dlp->dl_mh)),
    802 	    mac_minor(dlp->dl_mh)));
    803 }
    804 
    805 void
    806 dls_link_rele(dls_link_t *dlp)
    807 {
    808 	mod_hash_val_t	val;
    809 
    810 	ASSERT(MAC_PERIM_HELD(dlp->dl_mh));
    811 	/*
    812 	 * Check if there are any more references.
    813 	 */
    814 	if (--dlp->dl_ref == 0) {
    815 		(void) mod_hash_remove(i_dls_link_hash,
    816 		    (mod_hash_key_t)dlp->dl_name, &val);
    817 		ASSERT(dlp == (dls_link_t *)val);
    818 
    819 		/*
    820 		 * Destroy the dls_link_t.
    821 		 */
    822 		i_dls_link_destroy(dlp);
    823 		ASSERT(i_dls_link_count > 0);
    824 		atomic_add_32(&i_dls_link_count, -1);
    825 	}
    826 }
    827 
    828 int
    829 dls_link_rele_by_name(const char *name)
    830 {
    831 	dls_link_t		*dlp;
    832 
    833 	if (mod_hash_find(i_dls_link_hash, (mod_hash_key_t)name,
    834 	    (mod_hash_val_t *)&dlp) != 0)
    835 		return (ENOENT);
    836 
    837 	ASSERT(MAC_PERIM_HELD(dlp->dl_mh));
    838 
    839 	/*
    840 	 * Must fail detach if mac client is busy.
    841 	 */
    842 	ASSERT(dlp->dl_ref > 0 && dlp->dl_mch != NULL);
    843 	if (mac_link_has_flows(dlp->dl_mch))
    844 		return (ENOTEMPTY);
    845 
    846 	dls_link_rele(dlp);
    847 	return (0);
    848 }
    849 
    850 int
    851 dls_link_setzid(const char *name, zoneid_t zid)
    852 {
    853 	dls_link_t	*dlp;
    854 	int		err = 0;
    855 	zoneid_t	old_zid;
    856 
    857 	if ((err = dls_link_hold_create(name, &dlp)) != 0)
    858 		return (err);
    859 
    860 	ASSERT(MAC_PERIM_HELD(dlp->dl_mh));
    861 
    862 	if ((old_zid = dlp->dl_zid) == zid)
    863 		goto done;
    864 
    865 	/*
    866 	 * Check whether this dlp is used by its own zone.  If yes, we cannot
    867 	 * change its zoneid.
    868 	 */
    869 	if (dlp->dl_zone_ref != 0) {
    870 		err = EBUSY;
    871 		goto done;
    872 	}
    873 
    874 	dls_bpf_newzone(dlp, zid);
    875 	dlp->dl_zid = zid;
    876 
    877 	if (zid == GLOBAL_ZONEID) {
    878 		/*
    879 		 * The link is moving from a non-global zone to the global
    880 		 * zone, so we need to release the reference that was held
    881 		 * when the link was originally assigned to the non-global
    882 		 * zone.
    883 		 */
    884 		dls_link_rele(dlp);
    885 	}
    886 
    887 done:
    888 	/*
    889 	 * We only keep the reference to this link open if the link has
    890 	 * successfully moved from the global zone to a non-global zone.
    891 	 */
    892 	if (err != 0 || old_zid != GLOBAL_ZONEID)
    893 		dls_link_rele(dlp);
    894 	return (err);
    895 }
    896 
    897 
    898 /*
    899  * When a NIC changes zone, that change needs to be communicated to BPF
    900  * so that it can correctly enforce access rights on it via BPF. In the
    901  * absence of a function from BPF to just change the zoneid, this is
    902  * done with a detach followed by an attach.
    903  */
    904 static void
    905 dls_bpf_newzone(dls_link_t *dlp, zoneid_t zid)
    906 {
    907 	if (dls_bpfdetach_fn != NULL)
    908 		dls_bpfdetach_fn((uintptr_t)dlp->dl_mh);
    909 
    910 	if (dls_bpfattach_fn != NULL)
    911 		dls_bpfattach_fn((uintptr_t)dlp->dl_mh, mac_type(dlp->dl_mh),
    912 		    zid, BPR_MAC);
    913 }
    914 
    915 int
    916 dls_link_getzid(const char *name, zoneid_t *zidp)
    917 {
    918 	dls_link_t	*dlp;
    919 	int		err = 0;
    920 
    921 	if ((err = dls_link_hold(name, &dlp)) != 0)
    922 		return (err);
    923 
    924 	ASSERT(MAC_PERIM_HELD(dlp->dl_mh));
    925 
    926 	*zidp = dlp->dl_zid;
    927 
    928 	dls_link_rele(dlp);
    929 	return (0);
    930 }
    931 
    932 void
    933 dls_link_add(dls_link_t *dlp, uint32_t sap, dld_str_t *dsp)
    934 {
    935 	mod_hash_t	*hash = dlp->dl_str_hash;
    936 	mod_hash_key_t	key;
    937 	dls_head_t	*dhp;
    938 	dld_str_t	*p;
    939 	int		err;
    940 
    941 	ASSERT(MAC_PERIM_HELD(dlp->dl_mh));
    942 
    943 	/*
    944 	 * Generate a hash key based on the sap.
    945 	 */
    946 	key = MAKE_KEY(sap);
    947 
    948 	/*
    949 	 * Search the table for a list head with this key.
    950 	 */
    951 	if ((err = mod_hash_find(hash, key, (mod_hash_val_t *)&dhp)) != 0) {
    952 		ASSERT(err == MH_ERR_NOTFOUND);
    953 
    954 		dhp = i_dls_head_alloc(key);
    955 		err = mod_hash_insert(hash, key, (mod_hash_val_t)dhp);
    956 		ASSERT(err == 0);
    957 	}
    958 
    959 	/*
    960 	 * Add the dld_str_t to the head of the list. List walkers in
    961 	 * i_dls_link_rx_* bump up dh_ref to ensure the list does not change
    962 	 * while they walk the list. The membar below ensures that list walkers
    963 	 * see exactly the old list or the new list.
    964 	 */
    965 	ASSERT(dsp->ds_next == NULL);
    966 	p = dhp->dh_list;
    967 	dsp->ds_next = p;
    968 
    969 	membar_producer();
    970 
    971 	dhp->dh_list = dsp;
    972 
    973 	/*
    974 	 * Save a pointer to the list head.
    975 	 */
    976 	dsp->ds_head = dhp;
    977 	dlp->dl_impl_count++;
    978 }
    979 
    980 void
    981 dls_link_remove(dls_link_t *dlp, dld_str_t *dsp)
    982 {
    983 	mod_hash_t	*hash = dlp->dl_str_hash;
    984 	dld_str_t	**pp;
    985 	dld_str_t	*p;
    986 	dls_head_t	*dhp;
    987 
    988 	ASSERT(MAC_PERIM_HELD(dlp->dl_mh));
    989 
    990 	/*
    991 	 * We set dh_removing here to tell the receive callbacks not to pass
    992 	 * up packets anymore. Then wait till the current callbacks are done.
    993 	 * This happens either in the close path or in processing the
    994 	 * DL_UNBIND_REQ via a taskq thread, and it is ok to cv_wait in either.
    995 	 * The dh_ref ensures there aren't and there won't be any upcalls
    996 	 * walking or using the dh_list. The mod hash internal lock ensures
    997 	 * that the insert/remove of the dls_head_t itself synchronizes with
    998 	 * any i_dls_link_rx trying to locate it. The perimeter ensures that
    999 	 * there isn't another simultaneous dls_link_add/remove.
   1000 	 */
   1001 	dhp = dsp->ds_head;
   1002 
   1003 	mutex_enter(&dhp->dh_lock);
   1004 	dhp->dh_removing = B_TRUE;
   1005 	while (dhp->dh_ref != 0)
   1006 		cv_wait(&dhp->dh_cv, &dhp->dh_lock);
   1007 	mutex_exit(&dhp->dh_lock);
   1008 
   1009 	/*
   1010 	 * Walk the list and remove the dld_str_t.
   1011 	 */
   1012 	for (pp = &dhp->dh_list; (p = *pp) != NULL; pp = &(p->ds_next)) {
   1013 		if (p == dsp)
   1014 			break;
   1015 	}
   1016 	ASSERT(p != NULL);
   1017 	*pp = p->ds_next;
   1018 	p->ds_next = NULL;
   1019 	p->ds_head = NULL;
   1020 
   1021 	ASSERT(dlp->dl_impl_count != 0);
   1022 	dlp->dl_impl_count--;
   1023 
   1024 	if (dhp->dh_list == NULL) {
   1025 		mod_hash_val_t	val = NULL;
   1026 
   1027 		/*
   1028 		 * The list is empty so remove the hash table entry.
   1029 		 */
   1030 		(void) mod_hash_remove(hash, dhp->dh_key, &val);
   1031 		ASSERT(dhp == (dls_head_t *)val);
   1032 		i_dls_head_free(dhp);
   1033 	} else {
   1034 		mutex_enter(&dhp->dh_lock);
   1035 		dhp->dh_removing = B_FALSE;
   1036 		mutex_exit(&dhp->dh_lock);
   1037 	}
   1038 }
   1039