Home | History | Annotate | Download | only in mac
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 
     22 /*
     23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     24  * Use is subject to license terms.
     25  */
     26 
     27 /*
     28  * - General Introduction:
     29  *
     30  * This file contains the implementation of the MAC client kernel
     31  * API and related code. The MAC client API allows a kernel module
     32  * to gain access to a MAC instance (physical NIC, link aggregation, etc).
     33  * It allows a MAC client to associate itself with a MAC address,
     34  * VLANs, callback functions for data traffic and for promiscuous mode.
     35  * The MAC client API is also used to specify the properties associated
     36  * with a MAC client, such as bandwidth limits, priority, CPUS, etc.
     37  * These properties are further used to determine the hardware resources
     38  * to allocate to the various MAC clients.
     39  *
     40  * - Primary MAC clients:
     41  *
     42  * The MAC client API refers to "primary MAC clients". A primary MAC
     43  * client is a client which "owns" the primary MAC address of
     44  * the underlying MAC instance. The primary MAC address is called out
     45  * since it is associated with specific semantics: the primary MAC
     46  * address is the MAC address which is assigned to the IP interface
     47  * when it is plumbed, and the primary MAC address is assigned
     48  * to VLAN data-links. The primary address of a MAC instance can
     49  * also change dynamically from under the MAC client, for example
     50  * as a result of a change of state of a link aggregation. In that
     51  * case the MAC layer automatically updates all data-structures which
     52  * refer to the current value of the primary MAC address. Typical
     53  * primary MAC clients are dls, aggr, and xnb. A typical non-primary
     54  * MAC client is the vnic driver.
     55  *
     56  * - Virtual Switching:
     57  *
     58  * The MAC layer implements a virtual switch between the MAC clients
     59  * (primary and non-primary) defined on top of the same underlying
     60  * NIC (physical, link aggregation, etc). The virtual switch is
     61  * VLAN-aware, i.e. it allows multiple MAC clients to be member
     62  * of one or more VLANs, and the virtual switch will distribute
     63  * multicast tagged packets only to the member of the corresponding
     64  * VLANs.
     65  *
     66  * - Upper vs Lower MAC:
     67  *
     68  * Creating a VNIC on top of a MAC instance effectively causes
     69  * two MAC instances to be layered on top of each other, one for
     70  * the VNIC(s), one for the underlying MAC instance (physical NIC,
     71  * link aggregation, etc). In the code below we refer to the
     72  * underlying NIC as the "lower MAC", and we refer to VNICs as
     73  * the "upper MAC".
     74  *
     75  * - Pass-through for VNICs:
     76  *
     77  * When VNICs are created on top of an underlying MAC, this causes
     78  * a layering of two MAC instances. Since the lower MAC already
     79  * does the switching and demultiplexing to its MAC clients, the
     80  * upper MAC would simply have to pass packets to the layer below
     81  * or above it, which would introduce overhead. In order to avoid
     82  * this overhead, the MAC layer implements a pass-through mechanism
     83  * for VNICs. When a VNIC opens the lower MAC instance, it saves
     84  * the MAC client handle it optains from the MAC layer. When a MAC
     85  * client opens a VNIC (upper MAC), the MAC layer detects that
     86  * the MAC being opened is a VNIC, and gets the MAC client handle
     87  * that the VNIC driver obtained from the lower MAC. This exchange
     88  * is doing through a private capability between the MAC layer
     89  * and the VNIC driver. The upper MAC then returns that handle
     90  * directly to its MAC client. Any operation done by the upper
     91  * MAC client is now done on the lower MAC client handle, which
     92  * allows the VNIC driver to be completely bypassed for the
     93  * performance sensitive data-path.
     94  *
     95  */
     96 
     97 #include <sys/types.h>
     98 #include <sys/conf.h>
     99 #include <sys/id_space.h>
    100 #include <sys/esunddi.h>
    101 #include <sys/stat.h>
    102 #include <sys/mkdev.h>
    103 #include <sys/stream.h>
    104 #include <sys/strsun.h>
    105 #include <sys/strsubr.h>
    106 #include <sys/dlpi.h>
    107 #include <sys/modhash.h>
    108 #include <sys/mac_impl.h>
    109 #include <sys/mac_client_impl.h>
    110 #include <sys/mac_soft_ring.h>
    111 #include <sys/dls.h>
    112 #include <sys/dld.h>
    113 #include <sys/modctl.h>
    114 #include <sys/fs/dv_node.h>
    115 #include <sys/thread.h>
    116 #include <sys/proc.h>
    117 #include <sys/callb.h>
    118 #include <sys/cpuvar.h>
    119 #include <sys/atomic.h>
    120 #include <sys/sdt.h>
    121 #include <sys/mac_flow.h>
    122 #include <sys/ddi_intr_impl.h>
    123 #include <sys/disp.h>
    124 #include <sys/sdt.h>
    125 #include <sys/vnic.h>
    126 #include <sys/vnic_impl.h>
    127 #include <sys/vlan.h>
    128 #include <inet/ip.h>
    129 #include <inet/ip6.h>
    130 #include <sys/exacct.h>
    131 #include <sys/exacct_impl.h>
    132 #include <inet/nd.h>
    133 #include <sys/ethernet.h>
    134 
    135 kmem_cache_t	*mac_client_impl_cache;
    136 kmem_cache_t	*mac_promisc_impl_cache;
    137 
    138 static boolean_t mac_client_single_rcvr(mac_client_impl_t *);
    139 static flow_entry_t *mac_client_swap_mciflent(mac_client_impl_t *);
    140 static flow_entry_t *mac_client_get_flow(mac_client_impl_t *,
    141     mac_unicast_impl_t *);
    142 static void mac_client_remove_flow_from_list(mac_client_impl_t *,
    143     flow_entry_t *);
    144 static void mac_client_add_to_flow_list(mac_client_impl_t *, flow_entry_t *);
    145 static void mac_rename_flow_names(mac_client_impl_t *, const char *);
    146 static void mac_virtual_link_update(mac_impl_t *);
    147 
    148 /* ARGSUSED */
    149 static int
    150 i_mac_client_impl_ctor(void *buf, void *arg, int kmflag)
    151 {
    152 	int	i;
    153 	mac_client_impl_t	*mcip = buf;
    154 
    155 	bzero(buf, MAC_CLIENT_IMPL_SIZE);
    156 	mutex_init(&mcip->mci_tx_cb_lock, NULL, MUTEX_DRIVER, NULL);
    157 	mcip->mci_tx_notify_cb_info.mcbi_lockp = &mcip->mci_tx_cb_lock;
    158 
    159 	ASSERT(mac_tx_percpu_cnt >= 0);
    160 	for (i = 0; i <= mac_tx_percpu_cnt; i++) {
    161 		mutex_init(&mcip->mci_tx_pcpu[i].pcpu_tx_lock, NULL,
    162 		    MUTEX_DRIVER, NULL);
    163 	}
    164 	cv_init(&mcip->mci_tx_cv, NULL, CV_DRIVER, NULL);
    165 
    166 	return (0);
    167 }
    168 
    169 /* ARGSUSED */
    170 static void
    171 i_mac_client_impl_dtor(void *buf, void *arg)
    172 {
    173 	int	i;
    174 	mac_client_impl_t *mcip = buf;
    175 
    176 	ASSERT(mcip->mci_promisc_list == NULL);
    177 	ASSERT(mcip->mci_unicast_list == NULL);
    178 	ASSERT(mcip->mci_state_flags == 0);
    179 	ASSERT(mcip->mci_tx_flag == 0);
    180 
    181 	mutex_destroy(&mcip->mci_tx_cb_lock);
    182 
    183 	ASSERT(mac_tx_percpu_cnt >= 0);
    184 	for (i = 0; i <= mac_tx_percpu_cnt; i++) {
    185 		ASSERT(mcip->mci_tx_pcpu[i].pcpu_tx_refcnt == 0);
    186 		mutex_destroy(&mcip->mci_tx_pcpu[i].pcpu_tx_lock);
    187 	}
    188 	cv_destroy(&mcip->mci_tx_cv);
    189 }
    190 
    191 /* ARGSUSED */
    192 static int
    193 i_mac_promisc_impl_ctor(void *buf, void *arg, int kmflag)
    194 {
    195 	mac_promisc_impl_t	*mpip = buf;
    196 
    197 	bzero(buf, sizeof (mac_promisc_impl_t));
    198 	mpip->mpi_mci_link.mcb_objp = buf;
    199 	mpip->mpi_mci_link.mcb_objsize = sizeof (mac_promisc_impl_t);
    200 	mpip->mpi_mi_link.mcb_objp = buf;
    201 	mpip->mpi_mi_link.mcb_objsize = sizeof (mac_promisc_impl_t);
    202 	return (0);
    203 }
    204 
    205 /* ARGSUSED */
    206 static void
    207 i_mac_promisc_impl_dtor(void *buf, void *arg)
    208 {
    209 	mac_promisc_impl_t	*mpip = buf;
    210 
    211 	ASSERT(mpip->mpi_mci_link.mcb_objp != NULL);
    212 	ASSERT(mpip->mpi_mci_link.mcb_objsize == sizeof (mac_promisc_impl_t));
    213 	ASSERT(mpip->mpi_mi_link.mcb_objp == mpip->mpi_mci_link.mcb_objp);
    214 	ASSERT(mpip->mpi_mi_link.mcb_objsize == sizeof (mac_promisc_impl_t));
    215 
    216 	mpip->mpi_mci_link.mcb_objp = NULL;
    217 	mpip->mpi_mci_link.mcb_objsize = 0;
    218 	mpip->mpi_mi_link.mcb_objp = NULL;
    219 	mpip->mpi_mi_link.mcb_objsize = 0;
    220 
    221 	ASSERT(mpip->mpi_mci_link.mcb_flags == 0);
    222 	mpip->mpi_mci_link.mcb_objsize = 0;
    223 }
    224 
    225 void
    226 mac_client_init(void)
    227 {
    228 	ASSERT(mac_tx_percpu_cnt >= 0);
    229 
    230 	mac_client_impl_cache = kmem_cache_create("mac_client_impl_cache",
    231 	    MAC_CLIENT_IMPL_SIZE, 0, i_mac_client_impl_ctor,
    232 	    i_mac_client_impl_dtor, NULL, NULL, NULL, 0);
    233 	ASSERT(mac_client_impl_cache != NULL);
    234 
    235 	mac_promisc_impl_cache = kmem_cache_create("mac_promisc_impl_cache",
    236 	    sizeof (mac_promisc_impl_t), 0, i_mac_promisc_impl_ctor,
    237 	    i_mac_promisc_impl_dtor, NULL, NULL, NULL, 0);
    238 	ASSERT(mac_promisc_impl_cache != NULL);
    239 }
    240 
    241 void
    242 mac_client_fini(void)
    243 {
    244 	kmem_cache_destroy(mac_client_impl_cache);
    245 	kmem_cache_destroy(mac_promisc_impl_cache);
    246 }
    247 
    248 /*
    249  * Return the lower MAC client handle from the VNIC driver for the
    250  * specified VNIC MAC instance.
    251  */
    252 mac_client_impl_t *
    253 mac_vnic_lower(mac_impl_t *mip)
    254 {
    255 	mac_capab_vnic_t cap;
    256 	mac_client_impl_t *mcip;
    257 
    258 	VERIFY(i_mac_capab_get((mac_handle_t)mip, MAC_CAPAB_VNIC, &cap));
    259 	mcip = cap.mcv_mac_client_handle(cap.mcv_arg);
    260 
    261 	return (mcip);
    262 }
    263 
    264 /*
    265  * Return the MAC client handle of the primary MAC client for the
    266  * specified MAC instance, or NULL otherwise.
    267  */
    268 mac_client_impl_t *
    269 mac_primary_client_handle(mac_impl_t *mip)
    270 {
    271 	mac_client_impl_t *mcip;
    272 
    273 	if (mip->mi_state_flags & MIS_IS_VNIC)
    274 		return (mac_vnic_lower(mip));
    275 
    276 	ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
    277 
    278 	for (mcip = mip->mi_clients_list; mcip != NULL;
    279 	    mcip = mcip->mci_client_next) {
    280 		if (MCIP_DATAPATH_SETUP(mcip) && mac_is_primary_client(mcip))
    281 			return (mcip);
    282 	}
    283 	return (NULL);
    284 }
    285 
    286 /*
    287  * Open a MAC specified by its MAC name.
    288  */
    289 int
    290 mac_open(const char *macname, mac_handle_t *mhp)
    291 {
    292 	mac_impl_t	*mip;
    293 	int		err;
    294 
    295 	/*
    296 	 * Look up its entry in the global hash table.
    297 	 */
    298 	if ((err = mac_hold(macname, &mip)) != 0)
    299 		return (err);
    300 
    301 	/*
    302 	 * Hold the dip associated to the MAC to prevent it from being
    303 	 * detached. For a softmac, its underlying dip is held by the
    304 	 * mi_open() callback.
    305 	 *
    306 	 * This is done to be more tolerant with some defective drivers,
    307 	 * which incorrectly handle mac_unregister() failure in their
    308 	 * xxx_detach() routine. For example, some drivers ignore the
    309 	 * failure of mac_unregister() and free all resources that
    310 	 * that are needed for data transmition.
    311 	 */
    312 	e_ddi_hold_devi(mip->mi_dip);
    313 
    314 	if (!(mip->mi_callbacks->mc_callbacks & MC_OPEN)) {
    315 		*mhp = (mac_handle_t)mip;
    316 		return (0);
    317 	}
    318 
    319 	/*
    320 	 * The mac perimeter is used in both mac_open and mac_close by the
    321 	 * framework to single thread the MC_OPEN/MC_CLOSE of drivers.
    322 	 */
    323 	i_mac_perim_enter(mip);
    324 	mip->mi_oref++;
    325 	if (mip->mi_oref != 1 || ((err = mip->mi_open(mip->mi_driver)) == 0)) {
    326 		*mhp = (mac_handle_t)mip;
    327 		i_mac_perim_exit(mip);
    328 		return (0);
    329 	}
    330 	mip->mi_oref--;
    331 	ddi_release_devi(mip->mi_dip);
    332 	mac_rele(mip);
    333 	i_mac_perim_exit(mip);
    334 	return (err);
    335 }
    336 
    337 /*
    338  * Open a MAC specified by its linkid.
    339  */
    340 int
    341 mac_open_by_linkid(datalink_id_t linkid, mac_handle_t *mhp)
    342 {
    343 	dls_dl_handle_t	dlh;
    344 	int		err;
    345 
    346 	if ((err = dls_devnet_hold_tmp(linkid, &dlh)) != 0)
    347 		return (err);
    348 
    349 	dls_devnet_prop_task_wait(dlh);
    350 
    351 	err = mac_open(dls_devnet_mac(dlh), mhp);
    352 
    353 	dls_devnet_rele_tmp(dlh);
    354 	return (err);
    355 }
    356 
    357 /*
    358  * Open a MAC specified by its link name.
    359  */
    360 int
    361 mac_open_by_linkname(const char *link, mac_handle_t *mhp)
    362 {
    363 	datalink_id_t	linkid;
    364 	int		err;
    365 
    366 	if ((err = dls_mgmt_get_linkid(link, &linkid)) != 0)
    367 		return (err);
    368 	return (mac_open_by_linkid(linkid, mhp));
    369 }
    370 
    371 /*
    372  * Close the specified MAC.
    373  */
    374 void
    375 mac_close(mac_handle_t mh)
    376 {
    377 	mac_impl_t	*mip = (mac_impl_t *)mh;
    378 
    379 	i_mac_perim_enter(mip);
    380 	/*
    381 	 * The mac perimeter is used in both mac_open and mac_close by the
    382 	 * framework to single thread the MC_OPEN/MC_CLOSE of drivers.
    383 	 */
    384 	if (mip->mi_callbacks->mc_callbacks & MC_OPEN) {
    385 		ASSERT(mip->mi_oref != 0);
    386 		if (--mip->mi_oref == 0) {
    387 			if ((mip->mi_callbacks->mc_callbacks & MC_CLOSE))
    388 				mip->mi_close(mip->mi_driver);
    389 		}
    390 	}
    391 	i_mac_perim_exit(mip);
    392 	ddi_release_devi(mip->mi_dip);
    393 	mac_rele(mip);
    394 }
    395 
    396 /*
    397  * Misc utility functions to retrieve various information about a MAC
    398  * instance or a MAC client.
    399  */
    400 
    401 const mac_info_t *
    402 mac_info(mac_handle_t mh)
    403 {
    404 	return (&((mac_impl_t *)mh)->mi_info);
    405 }
    406 
    407 dev_info_t *
    408 mac_devinfo_get(mac_handle_t mh)
    409 {
    410 	return (((mac_impl_t *)mh)->mi_dip);
    411 }
    412 
    413 void *
    414 mac_driver(mac_handle_t mh)
    415 {
    416 	return (((mac_impl_t *)mh)->mi_driver);
    417 }
    418 
    419 const char *
    420 mac_name(mac_handle_t mh)
    421 {
    422 	return (((mac_impl_t *)mh)->mi_name);
    423 }
    424 
    425 int
    426 mac_type(mac_handle_t mh)
    427 {
    428 	return (((mac_impl_t *)mh)->mi_type->mt_type);
    429 }
    430 
    431 char *
    432 mac_client_name(mac_client_handle_t mch)
    433 {
    434 	return (((mac_client_impl_t *)mch)->mci_name);
    435 }
    436 
    437 minor_t
    438 mac_minor(mac_handle_t mh)
    439 {
    440 	return (((mac_impl_t *)mh)->mi_minor);
    441 }
    442 
    443 /*
    444  * Return the VID associated with a MAC client. This function should
    445  * be called for clients which are associated with only one VID.
    446  */
    447 uint16_t
    448 mac_client_vid(mac_client_handle_t mch)
    449 {
    450 	uint16_t		vid = VLAN_ID_NONE;
    451 	mac_client_impl_t	*mcip = (mac_client_impl_t *)mch;
    452 	flow_desc_t		flow_desc;
    453 
    454 	if (mcip->mci_nflents == 0)
    455 		return (vid);
    456 
    457 	ASSERT(MCIP_DATAPATH_SETUP(mcip) && mac_client_single_rcvr(mcip));
    458 
    459 	mac_flow_get_desc(mcip->mci_flent, &flow_desc);
    460 	if ((flow_desc.fd_mask & FLOW_LINK_VID) != 0)
    461 		vid = flow_desc.fd_vid;
    462 
    463 	return (vid);
    464 }
    465 
    466 /*
    467  * Return whether the specified MAC client corresponds to a VLAN VNIC.
    468  */
    469 boolean_t
    470 mac_client_is_vlan_vnic(mac_client_handle_t mch)
    471 {
    472 	mac_client_impl_t *mcip = (mac_client_impl_t *)mch;
    473 
    474 	return (((mcip->mci_state_flags & MCIS_IS_VNIC) != 0) &&
    475 	    ((mcip->mci_flent->fe_type & FLOW_PRIMARY_MAC) != 0));
    476 }
    477 
    478 /*
    479  * Return the link speed associated with the specified MAC client.
    480  *
    481  * The link speed of a MAC client is equal to the smallest value of
    482  * 1) the current link speed of the underlying NIC, or
    483  * 2) the bandwidth limit set for the MAC client.
    484  *
    485  * Note that the bandwidth limit can be higher than the speed
    486  * of the underlying NIC. This is allowed to avoid spurious
    487  * administration action failures or artifically lowering the
    488  * bandwidth limit of a link that may  have temporarily lowered
    489  * its link speed due to hardware problem or administrator action.
    490  */
    491 static uint64_t
    492 mac_client_ifspeed(mac_client_impl_t *mcip)
    493 {
    494 	mac_impl_t *mip = mcip->mci_mip;
    495 	uint64_t nic_speed;
    496 
    497 	nic_speed = mac_stat_get((mac_handle_t)mip, MAC_STAT_IFSPEED);
    498 
    499 	if (nic_speed == 0) {
    500 		return (0);
    501 	} else {
    502 		uint64_t policy_limit = (uint64_t)-1;
    503 
    504 		if (MCIP_RESOURCE_PROPS_MASK(mcip) & MRP_MAXBW)
    505 			policy_limit = MCIP_RESOURCE_PROPS_MAXBW(mcip);
    506 
    507 		return (MIN(policy_limit, nic_speed));
    508 	}
    509 }
    510 
    511 /*
    512  * Return the link state of the specified client. If here are more
    513  * than one clients of the underying mac_impl_t, the link state
    514  * will always be UP regardless of the link state of the underlying
    515  * mac_impl_t. This is needed to allow the MAC clients to continue
    516  * to communicate with each other even when the physical link of
    517  * their mac_impl_t is down.
    518  */
    519 static uint64_t
    520 mac_client_link_state(mac_client_impl_t *mcip)
    521 {
    522 	mac_impl_t *mip = mcip->mci_mip;
    523 	uint16_t vid;
    524 	mac_client_impl_t *mci_list;
    525 	mac_unicast_impl_t *mui_list, *oth_mui_list;
    526 
    527 	/*
    528 	 * Returns LINK_STATE_UP if there are other MAC clients defined on
    529 	 * mac_impl_t which share same VLAN ID as that of mcip. Note that
    530 	 * if 'mcip' has more than one VID's then we match ANY one of the
    531 	 * VID's with other MAC client's VID's and return LINK_STATE_UP.
    532 	 */
    533 	rw_enter(&mcip->mci_rw_lock, RW_READER);
    534 	for (mui_list = mcip->mci_unicast_list; mui_list != NULL;
    535 	    mui_list = mui_list->mui_next) {
    536 		vid = mui_list->mui_vid;
    537 		for (mci_list = mip->mi_clients_list; mci_list != NULL;
    538 		    mci_list = mci_list->mci_client_next) {
    539 			if (mci_list == mcip)
    540 				continue;
    541 			for (oth_mui_list = mci_list->mci_unicast_list;
    542 			    oth_mui_list != NULL; oth_mui_list = oth_mui_list->
    543 			    mui_next) {
    544 				if (vid == oth_mui_list->mui_vid) {
    545 					rw_exit(&mcip->mci_rw_lock);
    546 					return (LINK_STATE_UP);
    547 				}
    548 			}
    549 		}
    550 	}
    551 	rw_exit(&mcip->mci_rw_lock);
    552 
    553 	return (mac_stat_get((mac_handle_t)mip, MAC_STAT_LINK_STATE));
    554 }
    555 
    556 /*
    557  * Return the statistics of a MAC client. These statistics are different
    558  * then the statistics of the underlying MAC which are returned by
    559  * mac_stat_get().
    560  */
    561 uint64_t
    562 mac_client_stat_get(mac_client_handle_t mch, uint_t stat)
    563 {
    564 	mac_client_impl_t *mcip = (mac_client_impl_t *)mch;
    565 	mac_impl_t *mip = mcip->mci_mip;
    566 	uint64_t val;
    567 
    568 	switch (stat) {
    569 	case MAC_STAT_LINK_STATE:
    570 		val = mac_client_link_state(mcip);
    571 		break;
    572 	case MAC_STAT_LINK_UP:
    573 		val = (mac_client_link_state(mcip) == LINK_STATE_UP);
    574 		break;
    575 	case MAC_STAT_PROMISC:
    576 		val = mac_stat_get((mac_handle_t)mip, MAC_STAT_PROMISC);
    577 		break;
    578 	case MAC_STAT_LOWLINK_STATE:
    579 		val = mac_stat_get((mac_handle_t)mip, MAC_STAT_LOWLINK_STATE);
    580 		break;
    581 	case MAC_STAT_IFSPEED:
    582 		val = mac_client_ifspeed(mcip);
    583 		break;
    584 	case MAC_STAT_MULTIRCV:
    585 		val = mcip->mci_stat_multircv;
    586 		break;
    587 	case MAC_STAT_BRDCSTRCV:
    588 		val = mcip->mci_stat_brdcstrcv;
    589 		break;
    590 	case MAC_STAT_MULTIXMT:
    591 		val = mcip->mci_stat_multixmt;
    592 		break;
    593 	case MAC_STAT_BRDCSTXMT:
    594 		val = mcip->mci_stat_brdcstxmt;
    595 		break;
    596 	case MAC_STAT_OBYTES:
    597 		val = mcip->mci_stat_obytes;
    598 		break;
    599 	case MAC_STAT_OPACKETS:
    600 		val = mcip->mci_stat_opackets;
    601 		break;
    602 	case MAC_STAT_OERRORS:
    603 		val = mcip->mci_stat_oerrors;
    604 		break;
    605 	case MAC_STAT_IPACKETS:
    606 		val = mcip->mci_stat_ipackets;
    607 		break;
    608 	case MAC_STAT_RBYTES:
    609 		val = mcip->mci_stat_ibytes;
    610 		break;
    611 	case MAC_STAT_IERRORS:
    612 		val = mcip->mci_stat_ierrors;
    613 		break;
    614 	default:
    615 		val = mac_stat_default(mip, stat);
    616 		break;
    617 	}
    618 
    619 	return (val);
    620 }
    621 
    622 /*
    623  * Return the statistics of the specified MAC instance.
    624  */
    625 uint64_t
    626 mac_stat_get(mac_handle_t mh, uint_t stat)
    627 {
    628 	mac_impl_t	*mip = (mac_impl_t *)mh;
    629 	uint64_t	val;
    630 	int		ret;
    631 
    632 	/*
    633 	 * The range of stat determines where it is maintained.  Stat
    634 	 * values from 0 up to (but not including) MAC_STAT_MIN are
    635 	 * mainteined by the mac module itself.  Everything else is
    636 	 * maintained by the driver.
    637 	 *
    638 	 * If the mac_impl_t being queried corresponds to a VNIC,
    639 	 * the stats need to be queried from the lower MAC client
    640 	 * corresponding to the VNIC. (The mac_link_update()
    641 	 * invoked by the driver to the lower MAC causes the *lower
    642 	 * MAC* to update its mi_linkstate, and send a notification
    643 	 * to its MAC clients. Due to the VNIC passthrough,
    644 	 * these notifications are sent to the upper MAC clients
    645 	 * of the VNIC directly, and the upper mac_impl_t of the VNIC
    646 	 * does not have a valid mi_linkstate.
    647 	 */
    648 	if (stat < MAC_STAT_MIN && !(mip->mi_state_flags & MIS_IS_VNIC)) {
    649 		/* these stats are maintained by the mac module itself */
    650 		switch (stat) {
    651 		case MAC_STAT_LINK_STATE:
    652 			return (mip->mi_linkstate);
    653 		case MAC_STAT_LINK_UP:
    654 			return (mip->mi_linkstate == LINK_STATE_UP);
    655 		case MAC_STAT_PROMISC:
    656 			return (mip->mi_devpromisc != 0);
    657 		case MAC_STAT_LOWLINK_STATE:
    658 			return (mip->mi_lowlinkstate);
    659 		default:
    660 			ASSERT(B_FALSE);
    661 		}
    662 	}
    663 
    664 	/*
    665 	 * Call the driver to get the given statistic.
    666 	 */
    667 	ret = mip->mi_getstat(mip->mi_driver, stat, &val);
    668 	if (ret != 0) {
    669 		/*
    670 		 * The driver doesn't support this statistic.  Get the
    671 		 * statistic's default value.
    672 		 */
    673 		val = mac_stat_default(mip, stat);
    674 	}
    675 	return (val);
    676 }
    677 
    678 /*
    679  * Utility function which returns the VID associated with a flow entry.
    680  */
    681 uint16_t
    682 i_mac_flow_vid(flow_entry_t *flent)
    683 {
    684 	flow_desc_t	flow_desc;
    685 
    686 	mac_flow_get_desc(flent, &flow_desc);
    687 
    688 	if ((flow_desc.fd_mask & FLOW_LINK_VID) != 0)
    689 		return (flow_desc.fd_vid);
    690 	return (VLAN_ID_NONE);
    691 }
    692 
    693 /*
    694  * Verify the validity of the specified unicast MAC address. Returns B_TRUE
    695  * if the address is valid, B_FALSE otherwise (multicast address, or incorrect
    696  * length.
    697  */
    698 boolean_t
    699 mac_unicst_verify(mac_handle_t mh, const uint8_t *addr, uint_t len)
    700 {
    701 	mac_impl_t	*mip = (mac_impl_t *)mh;
    702 
    703 	/*
    704 	 * Verify the address. No lock is needed since mi_type and plugin
    705 	 * details don't change after mac_register().
    706 	 */
    707 	if ((len != mip->mi_type->mt_addr_length) ||
    708 	    (mip->mi_type->mt_ops.mtops_unicst_verify(addr,
    709 	    mip->mi_pdata)) != 0) {
    710 		return (B_FALSE);
    711 	} else {
    712 		return (B_TRUE);
    713 	}
    714 }
    715 
    716 void
    717 mac_sdu_get(mac_handle_t mh, uint_t *min_sdu, uint_t *max_sdu)
    718 {
    719 	mac_impl_t	*mip = (mac_impl_t *)mh;
    720 
    721 	if (min_sdu != NULL)
    722 		*min_sdu = mip->mi_sdu_min;
    723 	if (max_sdu != NULL)
    724 		*max_sdu = mip->mi_sdu_max;
    725 }
    726 
    727 /*
    728  * Update the MAC unicast address of the specified client's flows. Currently
    729  * only one unicast MAC unicast address is allowed per client.
    730  */
    731 static void
    732 mac_unicast_update_client_flow(mac_client_impl_t *mcip)
    733 {
    734 	mac_impl_t *mip = mcip->mci_mip;
    735 	flow_entry_t *flent = mcip->mci_flent;
    736 	mac_address_t *map = mcip->mci_unicast;
    737 	flow_desc_t flow_desc;
    738 
    739 	ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
    740 	ASSERT(flent != NULL);
    741 
    742 	mac_flow_get_desc(flent, &flow_desc);
    743 	ASSERT(flow_desc.fd_mask & FLOW_LINK_DST);
    744 
    745 	bcopy(map->ma_addr, flow_desc.fd_dst_mac, map->ma_len);
    746 	mac_flow_set_desc(flent, &flow_desc);
    747 
    748 	/*
    749 	 * A MAC client could have one MAC address but multiple
    750 	 * VLANs. In that case update the flow entries corresponding
    751 	 * to all VLANs of the MAC client.
    752 	 */
    753 	for (flent = mcip->mci_flent_list; flent != NULL;
    754 	    flent = flent->fe_client_next) {
    755 		mac_flow_get_desc(flent, &flow_desc);
    756 		if (!(flent->fe_type & FLOW_PRIMARY_MAC ||
    757 		    flent->fe_type & FLOW_VNIC_MAC))
    758 			continue;
    759 
    760 		bcopy(map->ma_addr, flow_desc.fd_dst_mac, map->ma_len);
    761 		mac_flow_set_desc(flent, &flow_desc);
    762 	}
    763 }
    764 
    765 /*
    766  * Update all clients that share the same unicast address.
    767  */
    768 void
    769 mac_unicast_update_clients(mac_impl_t *mip, mac_address_t *map)
    770 {
    771 	mac_client_impl_t *mcip;
    772 
    773 	ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
    774 
    775 	/*
    776 	 * Find all clients that share the same unicast MAC address and update
    777 	 * them appropriately.
    778 	 */
    779 	for (mcip = mip->mi_clients_list; mcip != NULL;
    780 	    mcip = mcip->mci_client_next) {
    781 		/*
    782 		 * Ignore clients that don't share this MAC address.
    783 		 */
    784 		if (map != mcip->mci_unicast)
    785 			continue;
    786 
    787 		/*
    788 		 * Update those clients with same old unicast MAC address.
    789 		 */
    790 		mac_unicast_update_client_flow(mcip);
    791 	}
    792 }
    793 
    794 /*
    795  * Update the unicast MAC address of the specified VNIC MAC client.
    796  *
    797  * Check whether the operation is valid. Any of following cases should fail:
    798  *
    799  * 1. It's a VLAN type of VNIC.
    800  * 2. The new value is current "primary" MAC address.
    801  * 3. The current MAC address is shared with other clients.
    802  * 4. The new MAC address has been used. This case will be valid when
    803  *    client migration is fully supported.
    804  */
    805 int
    806 mac_vnic_unicast_set(mac_client_handle_t mch, const uint8_t *addr)
    807 {
    808 	mac_client_impl_t *mcip = (mac_client_impl_t *)mch;
    809 	mac_impl_t *mip = mcip->mci_mip;
    810 	mac_address_t *map = mcip->mci_unicast;
    811 	int err;
    812 
    813 	ASSERT(!(mip->mi_state_flags & MIS_IS_VNIC));
    814 	ASSERT(mcip->mci_state_flags & MCIS_IS_VNIC);
    815 	ASSERT(mcip->mci_flags != MAC_CLIENT_FLAGS_PRIMARY);
    816 
    817 	i_mac_perim_enter(mip);
    818 
    819 	/*
    820 	 * If this is a VLAN type of VNIC, it's using "primary" MAC address
    821 	 * of the underlying interface. Must fail here. Refer to case 1 above.
    822 	 */
    823 	if (bcmp(map->ma_addr, mip->mi_addr, map->ma_len) == 0) {
    824 		i_mac_perim_exit(mip);
    825 		return (ENOTSUP);
    826 	}
    827 
    828 	/*
    829 	 * If the new address is the "primary" one, must fail. Refer to
    830 	 * case 2 above.
    831 	 */
    832 	if (bcmp(addr, mip->mi_addr, map->ma_len) == 0) {
    833 		i_mac_perim_exit(mip);
    834 		return (EACCES);
    835 	}
    836 
    837 	/*
    838 	 * If the address is shared by multiple clients, must fail. Refer
    839 	 * to case 3 above.
    840 	 */
    841 	if (mac_check_macaddr_shared(map)) {
    842 		i_mac_perim_exit(mip);
    843 		return (EBUSY);
    844 	}
    845 
    846 	/*
    847 	 * If the new address has been used, must fail for now. Refer to
    848 	 * case 4 above.
    849 	 */
    850 	if (mac_find_macaddr(mip, (uint8_t *)addr) != NULL) {
    851 		i_mac_perim_exit(mip);
    852 		return (ENOTSUP);
    853 	}
    854 
    855 	/*
    856 	 * Update the MAC address.
    857 	 */
    858 	err = mac_update_macaddr(map, (uint8_t *)addr);
    859 
    860 	if (err != 0) {
    861 		i_mac_perim_exit(mip);
    862 		return (err);
    863 	}
    864 
    865 	/*
    866 	 * Update all flows of this MAC client.
    867 	 */
    868 	mac_unicast_update_client_flow(mcip);
    869 
    870 	i_mac_perim_exit(mip);
    871 	return (0);
    872 }
    873 
    874 /*
    875  * Program the new primary unicast address of the specified MAC.
    876  *
    877  * Function mac_update_macaddr() takes care different types of underlying
    878  * MAC. If the underlying MAC is VNIC, the VNIC driver must have registerd
    879  * mi_unicst() entry point, that indirectly calls mac_vnic_unicast_set()
    880  * which will take care of updating the MAC address of the corresponding
    881  * MAC client.
    882  *
    883  * This is the only interface that allow the client to update the "primary"
    884  * MAC address of the underlying MAC. The new value must have not been
    885  * used by other clients.
    886  */
    887 int
    888 mac_unicast_primary_set(mac_handle_t mh, const uint8_t *addr)
    889 {
    890 	mac_impl_t *mip = (mac_impl_t *)mh;
    891 	mac_address_t *map;
    892 	int err;
    893 
    894 	/* verify the address validity */
    895 	if (!mac_unicst_verify(mh, addr, mip->mi_type->mt_addr_length))
    896 		return (EINVAL);
    897 
    898 	i_mac_perim_enter(mip);
    899 
    900 	/*
    901 	 * If the new value is the same as the current primary address value,
    902 	 * there's nothing to do.
    903 	 */
    904 	if (bcmp(addr, mip->mi_addr, mip->mi_type->mt_addr_length) == 0) {
    905 		i_mac_perim_exit(mip);
    906 		return (0);
    907 	}
    908 
    909 	if (mac_find_macaddr(mip, (uint8_t *)addr) != 0) {
    910 		i_mac_perim_exit(mip);
    911 		return (EBUSY);
    912 	}
    913 
    914 	map = mac_find_macaddr(mip, mip->mi_addr);
    915 	ASSERT(map != NULL);
    916 
    917 	/*
    918 	 * Update the MAC address.
    919 	 */
    920 	if (mip->mi_state_flags & MIS_IS_AGGR) {
    921 		mac_capab_aggr_t aggr_cap;
    922 
    923 		/*
    924 		 * If the mac is an aggregation, other than the unicast
    925 		 * addresses programming, aggr must be informed about this
    926 		 * primary unicst address change to change its mac address
    927 		 * policy to be user-specified.
    928 		 */
    929 		ASSERT(map->ma_type == MAC_ADDRESS_TYPE_UNICAST_CLASSIFIED);
    930 		VERIFY(i_mac_capab_get(mh, MAC_CAPAB_AGGR, &aggr_cap));
    931 		err = aggr_cap.mca_unicst(mip->mi_driver, addr);
    932 		if (err == 0)
    933 			bcopy(addr, map->ma_addr, map->ma_len);
    934 	} else {
    935 		err = mac_update_macaddr(map, (uint8_t *)addr);
    936 	}
    937 
    938 	if (err != 0) {
    939 		i_mac_perim_exit(mip);
    940 		return (err);
    941 	}
    942 
    943 	mac_unicast_update_clients(mip, map);
    944 
    945 	/*
    946 	 * Save the new primary MAC address in mac_impl_t.
    947 	 */
    948 	bcopy(addr, mip->mi_addr, mip->mi_type->mt_addr_length);
    949 
    950 	i_mac_perim_exit(mip);
    951 
    952 	if (err == 0)
    953 		i_mac_notify(mip, MAC_NOTE_UNICST);
    954 
    955 	return (err);
    956 }
    957 
    958 /*
    959  * Return the current primary MAC address of the specified MAC.
    960  */
    961 void
    962 mac_unicast_primary_get(mac_handle_t mh, uint8_t *addr)
    963 {
    964 	mac_impl_t *mip = (mac_impl_t *)mh;
    965 
    966 	rw_enter(&mip->mi_rw_lock, RW_READER);
    967 	bcopy(mip->mi_addr, addr, mip->mi_type->mt_addr_length);
    968 	rw_exit(&mip->mi_rw_lock);
    969 }
    970 
    971 /*
    972  * Return information about the use of the primary MAC address of the
    973  * specified MAC instance:
    974  *
    975  * - if client_name is non-NULL, it must point to a string of at
    976  *   least MAXNAMELEN bytes, and will be set to the name of the MAC
    977  *   client which uses the primary MAC address.
    978  *
    979  * - if in_use is non-NULL, used to return whether the primary MAC
    980  *   address is currently in use.
    981  */
    982 void
    983 mac_unicast_primary_info(mac_handle_t mh, char *client_name, boolean_t *in_use)
    984 {
    985 	mac_impl_t *mip = (mac_impl_t *)mh;
    986 	mac_client_impl_t *cur_client;
    987 
    988 	if (in_use != NULL)
    989 		*in_use = B_FALSE;
    990 	if (client_name != NULL)
    991 		bzero(client_name, MAXNAMELEN);
    992 
    993 	/*
    994 	 * The mi_rw_lock is used to protect threads that don't hold the
    995 	 * mac perimeter to get a consistent view of the mi_clients_list.
    996 	 * Threads that modify the list must hold both the mac perimeter and
    997 	 * mi_rw_lock(RW_WRITER)
    998 	 */
    999 	rw_enter(&mip->mi_rw_lock, RW_READER);
   1000 	for (cur_client = mip->mi_clients_list; cur_client != NULL;
   1001 	    cur_client = cur_client->mci_client_next) {
   1002 		if (mac_is_primary_client(cur_client) ||
   1003 		    (mip->mi_state_flags & MIS_IS_VNIC)) {
   1004 			rw_exit(&mip->mi_rw_lock);
   1005 			if (in_use != NULL)
   1006 				*in_use = B_TRUE;
   1007 			if (client_name != NULL) {
   1008 				bcopy(cur_client->mci_name, client_name,
   1009 				    MAXNAMELEN);
   1010 			}
   1011 			return;
   1012 		}
   1013 	}
   1014 	rw_exit(&mip->mi_rw_lock);
   1015 }
   1016 
   1017 /*
   1018  * Return the current destination MAC address of the specified MAC.
   1019  */
   1020 boolean_t
   1021 mac_dst_get(mac_handle_t mh, uint8_t *addr)
   1022 {
   1023 	mac_impl_t *mip = (mac_impl_t *)mh;
   1024 
   1025 	rw_enter(&mip->mi_rw_lock, RW_READER);
   1026 	if (mip->mi_dstaddr_set)
   1027 		bcopy(mip->mi_dstaddr, addr, mip->mi_type->mt_addr_length);
   1028 	rw_exit(&mip->mi_rw_lock);
   1029 	return (mip->mi_dstaddr_set);
   1030 }
   1031 
   1032 /*
   1033  * Add the specified MAC client to the list of clients which opened
   1034  * the specified MAC.
   1035  */
   1036 static void
   1037 mac_client_add(mac_client_impl_t *mcip)
   1038 {
   1039 	mac_impl_t *mip = mcip->mci_mip;
   1040 
   1041 	ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
   1042 
   1043 	/* add VNIC to the front of the list */
   1044 	rw_enter(&mip->mi_rw_lock, RW_WRITER);
   1045 	mcip->mci_client_next = mip->mi_clients_list;
   1046 	mip->mi_clients_list = mcip;
   1047 	mip->mi_nclients++;
   1048 	rw_exit(&mip->mi_rw_lock);
   1049 }
   1050 
   1051 /*
   1052  * Remove the specified MAC client from the list of clients which opened
   1053  * the specified MAC.
   1054  */
   1055 static void
   1056 mac_client_remove(mac_client_impl_t *mcip)
   1057 {
   1058 	mac_impl_t *mip = mcip->mci_mip;
   1059 	mac_client_impl_t **prev, *cclient;
   1060 
   1061 	ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
   1062 
   1063 	rw_enter(&mip->mi_rw_lock, RW_WRITER);
   1064 	prev = &mip->mi_clients_list;
   1065 	cclient = *prev;
   1066 	while (cclient != NULL && cclient != mcip) {
   1067 		prev = &cclient->mci_client_next;
   1068 		cclient = *prev;
   1069 	}
   1070 	ASSERT(cclient != NULL);
   1071 	*prev = cclient->mci_client_next;
   1072 	mip->mi_nclients--;
   1073 	rw_exit(&mip->mi_rw_lock);
   1074 }
   1075 
   1076 static mac_unicast_impl_t *
   1077 mac_client_find_vid(mac_client_impl_t *mcip, uint16_t vid)
   1078 {
   1079 	mac_unicast_impl_t *muip = mcip->mci_unicast_list;
   1080 
   1081 	while ((muip != NULL) && (muip->mui_vid != vid))
   1082 		muip = muip->mui_next;
   1083 
   1084 	return (muip);
   1085 }
   1086 
   1087 /*
   1088  * Return whether the specified (MAC address, VID) tuple is already used by
   1089  * one of the MAC clients associated with the specified MAC.
   1090  */
   1091 static boolean_t
   1092 mac_addr_in_use(mac_impl_t *mip, uint8_t *mac_addr, uint16_t vid)
   1093 {
   1094 	mac_client_impl_t *client;
   1095 	mac_address_t *map;
   1096 
   1097 	ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
   1098 
   1099 	for (client = mip->mi_clients_list; client != NULL;
   1100 	    client = client->mci_client_next) {
   1101 
   1102 		/*
   1103 		 * Ignore clients that don't have unicast address.
   1104 		 */
   1105 		if (client->mci_unicast_list == NULL)
   1106 			continue;
   1107 
   1108 		map = client->mci_unicast;
   1109 
   1110 		if ((bcmp(mac_addr, map->ma_addr, map->ma_len) == 0) &&
   1111 		    (mac_client_find_vid(client, vid) != NULL)) {
   1112 			return (B_TRUE);
   1113 		}
   1114 	}
   1115 
   1116 	return (B_FALSE);
   1117 }
   1118 
   1119 /*
   1120  * Generate a random MAC address. The MAC address prefix is
   1121  * stored in the array pointed to by mac_addr, and its length, in bytes,
   1122  * is specified by prefix_len. The least significant bits
   1123  * after prefix_len bytes are generated, and stored after the prefix
   1124  * in the mac_addr array.
   1125  */
   1126 int
   1127 mac_addr_random(mac_client_handle_t mch, uint_t prefix_len,
   1128     uint8_t *mac_addr, mac_diag_t *diag)
   1129 {
   1130 	mac_client_impl_t *mcip = (mac_client_impl_t *)mch;
   1131 	mac_impl_t *mip = mcip->mci_mip;
   1132 	size_t addr_len = mip->mi_type->mt_addr_length;
   1133 
   1134 	if (prefix_len >= addr_len) {
   1135 		*diag = MAC_DIAG_MACPREFIXLEN_INVALID;
   1136 		return (EINVAL);
   1137 	}
   1138 
   1139 	/* check the prefix value */
   1140 	if (prefix_len > 0) {
   1141 		bzero(mac_addr + prefix_len, addr_len - prefix_len);
   1142 		if (!mac_unicst_verify((mac_handle_t)mip, mac_addr,
   1143 		    addr_len)) {
   1144 			*diag = MAC_DIAG_MACPREFIX_INVALID;
   1145 			return (EINVAL);
   1146 		}
   1147 	}
   1148 
   1149 	/* generate the MAC address */
   1150 	if (prefix_len < addr_len) {
   1151 		(void) random_get_pseudo_bytes(mac_addr +
   1152 		    prefix_len, addr_len - prefix_len);
   1153 	}
   1154 
   1155 	*diag = 0;
   1156 	return (0);
   1157 }
   1158 
   1159 /*
   1160  * Set the priority range for this MAC client. This will be used to
   1161  * determine the absolute priority for the threads created for this
   1162  * MAC client using the specified "low", "medium" and "high" level.
   1163  * This will also be used for any subflows on this MAC client.
   1164  */
   1165 #define	MAC_CLIENT_SET_PRIORITY_RANGE(mcip, pri) {			\
   1166 	(mcip)->mci_min_pri = FLOW_MIN_PRIORITY(MINCLSYSPRI,	\
   1167 	    MAXCLSYSPRI, (pri));					\
   1168 	(mcip)->mci_max_pri = FLOW_MAX_PRIORITY(MINCLSYSPRI,	\
   1169 	    MAXCLSYSPRI, (mcip)->mci_min_pri);				\
   1170 	}
   1171 
   1172 /*
   1173  * MAC client open entry point. Return a new MAC client handle. Each
   1174  * MAC client is associated with a name, specified through the 'name'
   1175  * argument.
   1176  */
   1177 int
   1178 mac_client_open(mac_handle_t mh, mac_client_handle_t *mchp, char *name,
   1179     uint16_t flags)
   1180 {
   1181 	mac_impl_t *mip = (mac_impl_t *)mh;
   1182 	mac_client_impl_t *mcip;
   1183 	int err = 0;
   1184 	boolean_t share_desired =
   1185 	    ((flags & MAC_OPEN_FLAGS_SHARES_DESIRED) != 0);
   1186 	boolean_t no_hwrings = ((flags & MAC_OPEN_FLAGS_NO_HWRINGS) != 0);
   1187 	boolean_t req_hwrings = ((flags & MAC_OPEN_FLAGS_REQ_HWRINGS) != 0);
   1188 	flow_entry_t	*flent = NULL;
   1189 
   1190 	*mchp = NULL;
   1191 	if (share_desired && no_hwrings) {
   1192 		/* can't have shares but no hardware rings */
   1193 		return (EINVAL);
   1194 	}
   1195 
   1196 	i_mac_perim_enter(mip);
   1197 
   1198 	if (mip->mi_state_flags & MIS_IS_VNIC) {
   1199 		/*
   1200 		 * The underlying MAC is a VNIC. Return the MAC client
   1201 		 * handle of the lower MAC which was obtained by
   1202 		 * the VNIC driver when it did its mac_client_open().
   1203 		 */
   1204 
   1205 		mcip = mac_vnic_lower(mip);
   1206 
   1207 		/*
   1208 		 * Note that multiple mac clients share the same mcip in
   1209 		 * this case.
   1210 		 */
   1211 		if (flags & MAC_OPEN_FLAGS_EXCLUSIVE)
   1212 			mcip->mci_state_flags |= MCIS_EXCLUSIVE;
   1213 
   1214 		if (flags & MAC_OPEN_FLAGS_MULTI_PRIMARY)
   1215 			mcip->mci_flags |= MAC_CLIENT_FLAGS_MULTI_PRIMARY;
   1216 
   1217 		mip->mi_clients_list = mcip;
   1218 		i_mac_perim_exit(mip);
   1219 		*mchp = (mac_client_handle_t)mcip;
   1220 		return (err);
   1221 	}
   1222 
   1223 	mcip = kmem_cache_alloc(mac_client_impl_cache, KM_SLEEP);
   1224 
   1225 	mcip->mci_mip = mip;
   1226 	mcip->mci_upper_mip = NULL;
   1227 	mcip->mci_rx_fn = mac_pkt_drop;
   1228 	mcip->mci_rx_arg = NULL;
   1229 	mcip->mci_rx_p_fn = NULL;
   1230 	mcip->mci_rx_p_arg = NULL;
   1231 	mcip->mci_p_unicast_list = NULL;
   1232 	mcip->mci_direct_rx_fn = NULL;
   1233 	mcip->mci_direct_rx_arg = NULL;
   1234 
   1235 	mcip->mci_unicast_list = NULL;
   1236 
   1237 	if ((flags & MAC_OPEN_FLAGS_IS_VNIC) != 0)
   1238 		mcip->mci_state_flags |= MCIS_IS_VNIC;
   1239 
   1240 	if ((flags & MAC_OPEN_FLAGS_EXCLUSIVE) != 0)
   1241 		mcip->mci_state_flags |= MCIS_EXCLUSIVE;
   1242 
   1243 	if ((flags & MAC_OPEN_FLAGS_IS_AGGR_PORT) != 0)
   1244 		mcip->mci_state_flags |= MCIS_IS_AGGR_PORT;
   1245 
   1246 	if ((flags & MAC_OPEN_FLAGS_USE_DATALINK_NAME) != 0) {
   1247 		datalink_id_t	linkid;
   1248 
   1249 		ASSERT(name == NULL);
   1250 		if ((err = dls_devnet_macname2linkid(mip->mi_name,
   1251 		    &linkid)) != 0) {
   1252 			goto done;
   1253 		}
   1254 		if ((err = dls_mgmt_get_linkinfo(linkid, mcip->mci_name, NULL,
   1255 		    NULL, NULL)) != 0) {
   1256 			/*
   1257 			 * Use mac name if dlmgmtd is not available.
   1258 			 */
   1259 			if (err == EBADF) {
   1260 				(void) strlcpy(mcip->mci_name, mip->mi_name,
   1261 				    sizeof (mcip->mci_name));
   1262 				err = 0;
   1263 			} else {
   1264 				goto done;
   1265 			}
   1266 		}
   1267 		mcip->mci_state_flags |= MCIS_USE_DATALINK_NAME;
   1268 	} else {
   1269 		ASSERT(name != NULL);
   1270 		if (strlen(name) > MAXNAMELEN) {
   1271 			err = EINVAL;
   1272 			goto done;
   1273 		}
   1274 		(void) strlcpy(mcip->mci_name, name, sizeof (mcip->mci_name));
   1275 	}
   1276 
   1277 	if (flags & MAC_OPEN_FLAGS_MULTI_PRIMARY)
   1278 		mcip->mci_flags |= MAC_CLIENT_FLAGS_MULTI_PRIMARY;
   1279 
   1280 	/* the subflow table will be created dynamically */
   1281 	mcip->mci_subflow_tab = NULL;
   1282 	mcip->mci_stat_multircv = 0;
   1283 	mcip->mci_stat_brdcstrcv = 0;
   1284 	mcip->mci_stat_multixmt = 0;
   1285 	mcip->mci_stat_brdcstxmt = 0;
   1286 
   1287 	mcip->mci_stat_obytes = 0;
   1288 	mcip->mci_stat_opackets = 0;
   1289 	mcip->mci_stat_oerrors = 0;
   1290 	mcip->mci_stat_ibytes = 0;
   1291 	mcip->mci_stat_ipackets = 0;
   1292 	mcip->mci_stat_ierrors = 0;
   1293 
   1294 	/* Create an initial flow */
   1295 
   1296 	err = mac_flow_create(NULL, NULL, mcip->mci_name, NULL,
   1297 	    mcip->mci_state_flags & MCIS_IS_VNIC ? FLOW_VNIC_MAC :
   1298 	    FLOW_PRIMARY_MAC, &flent);
   1299 	if (err != 0)
   1300 		goto done;
   1301 	mcip->mci_flent = flent;
   1302 	FLOW_MARK(flent, FE_MC_NO_DATAPATH);
   1303 	flent->fe_mcip = mcip;
   1304 	/*
   1305 	 * Place initial creation reference on the flow. This reference
   1306 	 * is released in the corresponding delete action viz.
   1307 	 * mac_unicast_remove after waiting for all transient refs to
   1308 	 * to go away. The wait happens in mac_flow_wait.
   1309 	 */
   1310 	FLOW_REFHOLD(flent);
   1311 
   1312 	/*
   1313 	 * Do this ahead of the mac_bcast_add() below so that the mi_nclients
   1314 	 * will have the right value for mac_rx_srs_setup().
   1315 	 */
   1316 	mac_client_add(mcip);
   1317 
   1318 	if (no_hwrings)
   1319 		mcip->mci_state_flags |= MCIS_NO_HWRINGS;
   1320 	if (req_hwrings)
   1321 		mcip->mci_state_flags |= MCIS_REQ_HWRINGS;
   1322 	mcip->mci_share = NULL;
   1323 	if (share_desired) {
   1324 		ASSERT(!no_hwrings);
   1325 		i_mac_share_alloc(mcip);
   1326 	}
   1327 
   1328 	DTRACE_PROBE2(mac__client__open__allocated, mac_impl_t *,
   1329 	    mcip->mci_mip, mac_client_impl_t *, mcip);
   1330 	*mchp = (mac_client_handle_t)mcip;
   1331 
   1332 	i_mac_perim_exit(mip);
   1333 	return (0);
   1334 
   1335 done:
   1336 	i_mac_perim_exit(mip);
   1337 	mcip->mci_state_flags = 0;
   1338 	mcip->mci_tx_flag = 0;
   1339 	kmem_cache_free(mac_client_impl_cache, mcip);
   1340 	return (err);
   1341 }
   1342 
   1343 /*
   1344  * Close the specified MAC client handle.
   1345  */
   1346 void
   1347 mac_client_close(mac_client_handle_t mch, uint16_t flags)
   1348 {
   1349 	mac_client_impl_t	*mcip = (mac_client_impl_t *)mch;
   1350 	mac_impl_t		*mip = mcip->mci_mip;
   1351 	flow_entry_t		*flent;
   1352 
   1353 	i_mac_perim_enter(mip);
   1354 
   1355 	if (flags & MAC_CLOSE_FLAGS_EXCLUSIVE)
   1356 		mcip->mci_state_flags &= ~MCIS_EXCLUSIVE;
   1357 
   1358 	if ((mcip->mci_state_flags & MCIS_IS_VNIC) &&
   1359 	    !(flags & MAC_CLOSE_FLAGS_IS_VNIC)) {
   1360 		/*
   1361 		 * This is an upper VNIC client initiated operation.
   1362 		 * The lower MAC client will be closed by the VNIC driver
   1363 		 * when the VNIC is deleted.
   1364 		 */
   1365 
   1366 		i_mac_perim_exit(mip);
   1367 		return;
   1368 	}
   1369 
   1370 	/*
   1371 	 * Remove the flent associated with the MAC client
   1372 	 */
   1373 	flent = mcip->mci_flent;
   1374 	mcip->mci_flent = NULL;
   1375 	FLOW_FINAL_REFRELE(flent);
   1376 
   1377 	/*
   1378 	 * MAC clients must remove the unicast addresses and promisc callbacks
   1379 	 * they added before issuing a mac_client_close().
   1380 	 */
   1381 	ASSERT(mcip->mci_unicast_list == NULL);
   1382 	ASSERT(mcip->mci_promisc_list == NULL);
   1383 	ASSERT(mcip->mci_tx_notify_cb_list == NULL);
   1384 
   1385 	i_mac_share_free(mcip);
   1386 
   1387 	mac_client_remove(mcip);
   1388 
   1389 	i_mac_perim_exit(mip);
   1390 	mcip->mci_subflow_tab = NULL;
   1391 	mcip->mci_state_flags = 0;
   1392 	mcip->mci_tx_flag = 0;
   1393 	kmem_cache_free(mac_client_impl_cache, mch);
   1394 }
   1395 
   1396 /*
   1397  * Set the rx bypass receive callback.
   1398  */
   1399 boolean_t
   1400 mac_rx_bypass_set(mac_client_handle_t mch, mac_direct_rx_t rx_fn, void *arg1)
   1401 {
   1402 	mac_client_impl_t	*mcip = (mac_client_impl_t *)mch;
   1403 	mac_impl_t		*mip = mcip->mci_mip;
   1404 
   1405 	ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
   1406 
   1407 	/*
   1408 	 * If the mac_client is a VLAN, we should not do DLS bypass and
   1409 	 * instead let the packets come up via mac_rx_deliver so the vlan
   1410 	 * header can be stripped.
   1411 	 */
   1412 	if (mcip->mci_nvids > 0)
   1413 		return (B_FALSE);
   1414 
   1415 	/*
   1416 	 * These are not accessed directly in the data path, and hence
   1417 	 * don't need any protection
   1418 	 */
   1419 	mcip->mci_direct_rx_fn = rx_fn;
   1420 	mcip->mci_direct_rx_arg = arg1;
   1421 	return (B_TRUE);
   1422 }
   1423 
   1424 /*
   1425  * Enable/Disable rx bypass. By default, bypass is assumed to be enabled.
   1426  */
   1427 void
   1428 mac_rx_bypass_enable(mac_client_handle_t mch)
   1429 {
   1430 	((mac_client_impl_t *)mch)->mci_state_flags &= ~MCIS_RX_BYPASS_DISABLE;
   1431 }
   1432 
   1433 void
   1434 mac_rx_bypass_disable(mac_client_handle_t mch)
   1435 {
   1436 	((mac_client_impl_t *)mch)->mci_state_flags |= MCIS_RX_BYPASS_DISABLE;
   1437 }
   1438 
   1439 /*
   1440  * Set the receive callback for the specified MAC client. There can be
   1441  * at most one such callback per MAC client.
   1442  */
   1443 void
   1444 mac_rx_set(mac_client_handle_t mch, mac_rx_t rx_fn, void *arg)
   1445 {
   1446 	mac_client_impl_t *mcip = (mac_client_impl_t *)mch;
   1447 	mac_impl_t	*mip = mcip->mci_mip;
   1448 
   1449 	/*
   1450 	 * Instead of adding an extra set of locks and refcnts in
   1451 	 * the datapath at the mac client boundary, we temporarily quiesce
   1452 	 * the SRS and related entities. We then change the receive function
   1453 	 * without interference from any receive data thread and then reenable
   1454 	 * the data flow subsequently.
   1455 	 */
   1456 	i_mac_perim_enter(mip);
   1457 	mac_rx_client_quiesce(mch);
   1458 
   1459 	mcip->mci_rx_fn = rx_fn;
   1460 	mcip->mci_rx_arg = arg;
   1461 	mac_rx_client_restart(mch);
   1462 	i_mac_perim_exit(mip);
   1463 }
   1464 
   1465 /*
   1466  * Reset the receive callback for the specified MAC client.
   1467  */
   1468 void
   1469 mac_rx_clear(mac_client_handle_t mch)
   1470 {
   1471 	mac_rx_set(mch, mac_pkt_drop, NULL);
   1472 }
   1473 
   1474 /*
   1475  * Walk the MAC client subflow table and updates their priority values.
   1476  */
   1477 static int
   1478 mac_update_subflow_priority_cb(flow_entry_t *flent, void *arg)
   1479 {
   1480 	mac_flow_update_priority(arg, flent);
   1481 	return (0);
   1482 }
   1483 
   1484 void
   1485 mac_update_subflow_priority(mac_client_impl_t *mcip)
   1486 {
   1487 	(void) mac_flow_walk(mcip->mci_subflow_tab,
   1488 	    mac_update_subflow_priority_cb, mcip);
   1489 }
   1490 
   1491 /*
   1492  * When the MAC client is being brought up (i.e. we do a unicast_add) we need
   1493  * to initialize the cpu and resource control structure in the
   1494  * mac_client_impl_t from the mac_impl_t (i.e if there are any cached
   1495  * properties before the flow entry for the unicast address was created).
   1496  */
   1497 int
   1498 mac_resource_ctl_set(mac_client_handle_t mch, mac_resource_props_t *mrp)
   1499 {
   1500 	mac_client_impl_t 	*mcip = (mac_client_impl_t *)mch;
   1501 	mac_impl_t		*mip = (mac_impl_t *)mcip->mci_mip;
   1502 	int			err = 0;
   1503 
   1504 	ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
   1505 
   1506 	err = mac_validate_props(mrp);
   1507 	if (err != 0)
   1508 		return (err);
   1509 
   1510 	mac_update_resources(mrp, MCIP_RESOURCE_PROPS(mcip), B_FALSE);
   1511 	if (MCIP_DATAPATH_SETUP(mcip)) {
   1512 		/*
   1513 		 * We have to set this prior to calling mac_flow_modify.
   1514 		 */
   1515 		if (mrp->mrp_mask & MRP_PRIORITY) {
   1516 			if (mrp->mrp_priority == MPL_RESET) {
   1517 				MAC_CLIENT_SET_PRIORITY_RANGE(mcip,
   1518 				    MPL_LINK_DEFAULT);
   1519 			} else {
   1520 				MAC_CLIENT_SET_PRIORITY_RANGE(mcip,
   1521 				    mrp->mrp_priority);
   1522 			}
   1523 		}
   1524 
   1525 		mac_flow_modify(mip->mi_flow_tab, mcip->mci_flent, mrp);
   1526 		if (mrp->mrp_mask & MRP_PRIORITY)
   1527 			mac_update_subflow_priority(mcip);
   1528 		return (0);
   1529 	}
   1530 	return (0);
   1531 }
   1532 
   1533 void
   1534 mac_resource_ctl_get(mac_client_handle_t mch, mac_resource_props_t *mrp)
   1535 {
   1536 	mac_client_impl_t	*mcip = (mac_client_impl_t *)mch;
   1537 	mac_resource_props_t	*mcip_mrp = MCIP_RESOURCE_PROPS(mcip);
   1538 
   1539 	bcopy(mcip_mrp, mrp, sizeof (mac_resource_props_t));
   1540 }
   1541 
   1542 static int
   1543 mac_unicast_flow_create(mac_client_impl_t *mcip, uint8_t *mac_addr,
   1544     uint16_t vid, boolean_t is_primary, boolean_t first_flow,
   1545     flow_entry_t **flent, mac_resource_props_t *mrp)
   1546 {
   1547 	mac_impl_t	*mip = (mac_impl_t *)mcip->mci_mip;
   1548 	flow_desc_t	flow_desc;
   1549 	char		flowname[MAXFLOWNAMELEN];
   1550 	int		err;
   1551 	uint_t		flent_flags;
   1552 
   1553 	/*
   1554 	 * First unicast address being added, create a new flow
   1555 	 * for that MAC client.
   1556 	 */
   1557 	bzero(&flow_desc, sizeof (flow_desc));
   1558 
   1559 	flow_desc.fd_mac_len = mip->mi_type->mt_addr_length;
   1560 	bcopy(mac_addr, flow_desc.fd_dst_mac, flow_desc.fd_mac_len);
   1561 	flow_desc.fd_mask = FLOW_LINK_DST;
   1562 	if (vid != 0) {
   1563 		flow_desc.fd_vid = vid;
   1564 		flow_desc.fd_mask |= FLOW_LINK_VID;
   1565 	}
   1566 
   1567 	/*
   1568 	 * XXX-nicolas. For now I'm keeping the FLOW_PRIMARY_MAC
   1569 	 * and FLOW_VNIC. Even though they're a hack inherited
   1570 	 * from the SRS code, we'll keep them for now. They're currently
   1571 	 * consumed by mac_datapath_setup() to create the SRS.
   1572 	 * That code should be eventually moved out of
   1573 	 * mac_datapath_setup() and moved to a mac_srs_create()
   1574 	 * function of some sort to keep things clean.
   1575 	 *
   1576 	 * Also, there's no reason why the SRS for the primary MAC
   1577 	 * client should be different than any other MAC client. Until
   1578 	 * this is cleaned-up, we support only one MAC unicast address
   1579 	 * per client.
   1580 	 *
   1581 	 * We set FLOW_PRIMARY_MAC for the primary MAC address,
   1582 	 * FLOW_VNIC for everything else.
   1583 	 */
   1584 	if (is_primary)
   1585 		flent_flags = FLOW_PRIMARY_MAC;
   1586 	else
   1587 		flent_flags = FLOW_VNIC_MAC;
   1588 
   1589 	/*
   1590 	 * For the first flow we use the mac client's name - mci_name, for
   1591 	 * subsequent ones we just create a name with the vid. This is
   1592 	 * so that we can add these flows to the same flow table. This is
   1593 	 * fine as the flow name (except for the one with the mac client's
   1594 	 * name) is not visible. When the first flow is removed, we just replace
   1595 	 * its fdesc with another from the list, so we will still retain the
   1596 	 * flent with the MAC client's flow name.
   1597 	 */
   1598 	if (first_flow) {
   1599 		bcopy(mcip->mci_name, flowname, MAXFLOWNAMELEN);
   1600 	} else {
   1601 		(void) sprintf(flowname, "%s%u", mcip->mci_name, vid);
   1602 		flent_flags = FLOW_NO_STATS;
   1603 	}
   1604 
   1605 	if ((err = mac_flow_create(&flow_desc, mrp, flowname, NULL,
   1606 	    flent_flags, flent)) != 0)
   1607 		return (err);
   1608 
   1609 	FLOW_MARK(*flent, FE_INCIPIENT);
   1610 	(*flent)->fe_mcip = mcip;
   1611 
   1612 	/*
   1613 	 * Place initial creation reference on the flow. This reference
   1614 	 * is released in the corresponding delete action viz.
   1615 	 * mac_unicast_remove after waiting for all transient refs to
   1616 	 * to go away. The wait happens in mac_flow_wait.
   1617 	 * We have already held the reference in mac_client_open().
   1618 	 */
   1619 	if (!first_flow)
   1620 		FLOW_REFHOLD(*flent);
   1621 	return (0);
   1622 }
   1623 
   1624 /* Refresh the multicast grouping for this VID. */
   1625 int
   1626 mac_client_update_mcast(void *arg, boolean_t add, const uint8_t *addrp)
   1627 {
   1628 	flow_entry_t		*flent = arg;
   1629 	mac_client_impl_t	*mcip = flent->fe_mcip;
   1630 	uint16_t		vid;
   1631 	flow_desc_t		flow_desc;
   1632 
   1633 	mac_flow_get_desc(flent, &flow_desc);
   1634 	vid = (flow_desc.fd_mask & FLOW_LINK_VID) != 0 ?
   1635 	    flow_desc.fd_vid : VLAN_ID_NONE;
   1636 
   1637 	/*
   1638 	 * We don't call mac_multicast_add()/mac_multicast_remove() as
   1639 	 * we want to add/remove for this specific vid.
   1640 	 */
   1641 	if (add) {
   1642 		return (mac_bcast_add(mcip, addrp, vid,
   1643 		    MAC_ADDRTYPE_MULTICAST));
   1644 	} else {
   1645 		mac_bcast_delete(mcip, addrp, vid);
   1646 		return (0);
   1647 	}
   1648 }
   1649 
   1650 static void
   1651 mac_update_single_active_client(mac_impl_t *mip)
   1652 {
   1653 	mac_client_impl_t *client = NULL;
   1654 
   1655 	ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
   1656 
   1657 	rw_enter(&mip->mi_rw_lock, RW_WRITER);
   1658 	if (mip->mi_nactiveclients == 1) {
   1659 		/*
   1660 		 * Find the one active MAC client from the list of MAC
   1661 		 * clients. The active MAC client has at least one
   1662 		 * unicast address.
   1663 		 */
   1664 		for (client = mip->mi_clients_list; client != NULL;
   1665 		    client = client->mci_client_next) {
   1666 			if (client->mci_unicast_list != NULL)
   1667 				break;
   1668 		}
   1669 		ASSERT(client != NULL);
   1670 	}
   1671 
   1672 	/*
   1673 	 * mi_single_active_client is protected by the MAC impl's read/writer
   1674 	 * lock, which allows mac_rx() to check the value of that pointer
   1675 	 * as a reader.
   1676 	 */
   1677 	mip->mi_single_active_client = client;
   1678 	rw_exit(&mip->mi_rw_lock);
   1679 }
   1680 
   1681 /*
   1682  * Set up the data path. Called from i_mac_unicast_add after having
   1683  * done all the validations including making sure this is an active
   1684  * client (i.e that is ready to process packets.)
   1685  */
   1686 static int
   1687 mac_client_datapath_setup(mac_client_impl_t *mcip, uint16_t vid,
   1688     uint8_t *mac_addr, mac_resource_props_t *mrp, boolean_t isprimary,
   1689     mac_unicast_impl_t *muip)
   1690 {
   1691 	mac_impl_t	*mip = mcip->mci_mip;
   1692 	boolean_t	mac_started = B_FALSE;
   1693 	boolean_t	bcast_added = B_FALSE;
   1694 	boolean_t	nactiveclients_added = B_FALSE;
   1695 	flow_entry_t	*flent;
   1696 	int		err = 0;
   1697 
   1698 	if ((err = mac_start((mac_handle_t)mip)) != 0)
   1699 		goto bail;
   1700 
   1701 	mac_started = B_TRUE;
   1702 
   1703 	/* add the MAC client to the broadcast address group by default */
   1704 	if (mip->mi_type->mt_brdcst_addr != NULL) {
   1705 		err = mac_bcast_add(mcip, mip->mi_type->mt_brdcst_addr, vid,
   1706 		    MAC_ADDRTYPE_BROADCAST);
   1707 		if (err != 0)
   1708 			goto bail;
   1709 		bcast_added = B_TRUE;
   1710 	}
   1711 
   1712 	/*
   1713 	 * If this is the first unicast address addition for this
   1714 	 * client, reuse the pre-allocated larval flow entry associated with
   1715 	 * the MAC client.
   1716 	 */
   1717 	flent = (mcip->mci_nflents == 0) ? mcip->mci_flent : NULL;
   1718 
   1719 	/* We are configuring the unicast flow now */
   1720 	if (!MCIP_DATAPATH_SETUP(mcip)) {
   1721 
   1722 		MAC_CLIENT_SET_PRIORITY_RANGE(mcip,
   1723 		    (mrp->mrp_mask & MRP_PRIORITY) ? mrp->mrp_priority :
   1724 		    MPL_LINK_DEFAULT);
   1725 
   1726 		if ((err = mac_unicast_flow_create(mcip, mac_addr, vid,
   1727 		    isprimary, B_TRUE, &flent, mrp)) != 0)
   1728 			goto bail;
   1729 
   1730 		mip->mi_nactiveclients++;
   1731 		nactiveclients_added = B_TRUE;
   1732 
   1733 		/*
   1734 		 * This will allocate the RX ring group if possible for the
   1735 		 * flow and program the software classifier as needed.
   1736 		 */
   1737 		if ((err = mac_datapath_setup(mcip, flent, SRST_LINK)) != 0)
   1738 			goto bail;
   1739 
   1740 		/*
   1741 		 * The unicast MAC address must have been added successfully.
   1742 		 */
   1743 		ASSERT(mcip->mci_unicast != NULL);
   1744 		/*
   1745 		 * Push down the sub-flows that were defined on this link
   1746 		 * hitherto. The flows are added to the active flow table
   1747 		 * and SRS, softrings etc. are created as needed.
   1748 		 */
   1749 		mac_link_init_flows((mac_client_handle_t)mcip);
   1750 	} else {
   1751 		mac_address_t *map = mcip->mci_unicast;
   1752 
   1753 		/*
   1754 		 * A unicast flow already exists for that MAC client,
   1755 		 * this flow must be the same mac address but with
   1756 		 * different VID. It has been checked by mac_addr_in_use().
   1757 		 *
   1758 		 * We will use the SRS etc. from the mci_flent. Note that
   1759 		 * We don't need to create kstat for this as except for
   1760 		 * the fdesc, everything will be used from in the 1st flent.
   1761 		 */
   1762 
   1763 		if (bcmp(mac_addr, map->ma_addr, map->ma_len) != 0) {
   1764 			err = EINVAL;
   1765 			goto bail;
   1766 		}
   1767 
   1768 		if ((err = mac_unicast_flow_create(mcip, mac_addr, vid,
   1769 		    isprimary, B_FALSE, &flent, NULL)) != 0) {
   1770 			goto bail;
   1771 		}
   1772 		if ((err = mac_flow_add(mip->mi_flow_tab, flent)) != 0) {
   1773 			FLOW_FINAL_REFRELE(flent);
   1774 			goto bail;
   1775 		}
   1776 
   1777 		/* update the multicast group for this vid */
   1778 		mac_client_bcast_refresh(mcip, mac_client_update_mcast,
   1779 		    (void *)flent, B_TRUE);
   1780 
   1781 	}
   1782 
   1783 	/* populate the shared MAC address */
   1784 	muip->mui_map = mcip->mci_unicast;
   1785 
   1786 	rw_enter(&mcip->mci_rw_lock, RW_WRITER);
   1787 	muip->mui_next = mcip->mci_unicast_list;
   1788 	mcip->mci_unicast_list = muip;
   1789 	rw_exit(&mcip->mci_rw_lock);
   1790 
   1791 
   1792 	/*
   1793 	 * First add the flent to the flow list of this mcip. Then set
   1794 	 * the mip's mi_single_active_client if needed. The Rx path assumes
   1795 	 * that mip->mi_single_active_client will always have an associated
   1796 	 * flent.
   1797 	 */
   1798 	mac_client_add_to_flow_list(mcip, flent);
   1799 
   1800 	if (nactiveclients_added)
   1801 		mac_update_single_active_client(mip);
   1802 	/*
   1803 	 * Trigger a renegotiation of the capabilities when the number of
   1804 	 * active clients changes from 1 to 2, since some of the capabilities
   1805 	 * might have to be disabled. Also send a MAC_NOTE_LINK notification
   1806 	 * to all the MAC clients whenever physical link is DOWN.
   1807 	 */
   1808 	if (mip->mi_nactiveclients == 2) {
   1809 		mac_capab_update((mac_handle_t)mip);
   1810 		mac_virtual_link_update(mip);
   1811 	}
   1812 	/*
   1813 	 * Now that the setup is complete, clear the INCIPIENT flag.
   1814 	 * The flag was set to avoid incoming packets seeing inconsistent
   1815 	 * structures while the setup was in progress. Clear the mci_tx_flag
   1816 	 * by calling mac_tx_client_block. It is possible that
   1817 	 * mac_unicast_remove was called prior to this mac_unicast_add which
   1818 	 * could have set the MCI_TX_QUIESCE flag.
   1819 	 */
   1820 	if (flent->fe_rx_ring_group != NULL)
   1821 		mac_rx_group_unmark(flent->fe_rx_ring_group, MR_INCIPIENT);
   1822 	FLOW_UNMARK(flent, FE_INCIPIENT);
   1823 	FLOW_UNMARK(flent, FE_MC_NO_DATAPATH);
   1824 	mac_tx_client_unblock(mcip);
   1825 	return (0);
   1826 bail:
   1827 	if (bcast_added)
   1828 		mac_bcast_delete(mcip, mip->mi_type->mt_brdcst_addr, vid);
   1829 
   1830 	if (nactiveclients_added)
   1831 		mip->mi_nactiveclients--;
   1832 
   1833 	if (mac_started)
   1834 		mac_stop((mac_handle_t)mip);
   1835 
   1836 	return (err);
   1837 }
   1838 
   1839 /*
   1840  * Return the passive primary MAC client, if present. The passive client is
   1841  * a stand-by client that has the same unicast address as another that is
   1842  * currenly active. Once the active client goes away, the passive client
   1843  * becomes active.
   1844  */
   1845 static mac_client_impl_t *
   1846 mac_get_passive_primary_client(mac_impl_t *mip)
   1847 {
   1848 	mac_client_impl_t	*mcip;
   1849 
   1850 	for (mcip = mip->mi_clients_list; mcip != NULL;
   1851 	    mcip = mcip->mci_client_next) {
   1852 		if (mac_is_primary_client(mcip) &&
   1853 		    (mcip->mci_flags & MAC_CLIENT_FLAGS_PASSIVE_PRIMARY) != 0) {
   1854 			return (mcip);
   1855 		}
   1856 	}
   1857 	return (NULL);
   1858 }
   1859 
   1860 /*
   1861  * Add a new unicast address to the MAC client.
   1862  *
   1863  * The MAC address can be specified either by value, or the MAC client
   1864  * can specify that it wants to use the primary MAC address of the
   1865  * underlying MAC. See the introductory comments at the beginning
   1866  * of this file for more more information on primary MAC addresses.
   1867  *
   1868  * Note also the tuple (MAC address, VID) must be unique
   1869  * for the MAC clients defined on top of the same underlying MAC
   1870  * instance, unless the MAC_UNICAST_NODUPCHECK is specified.
   1871  *
   1872  * In no case can a client use the PVID for the MAC, if the MAC has one set.
   1873  */
   1874 int
   1875 i_mac_unicast_add(mac_client_handle_t mch, uint8_t *mac_addr, uint16_t flags,
   1876     mac_unicast_handle_t *mah, uint16_t vid, mac_diag_t *diag)
   1877 {
   1878 	mac_client_impl_t	*mcip = (mac_client_impl_t *)mch;
   1879 	mac_impl_t		*mip = mcip->mci_mip;
   1880 	int			err;
   1881 	uint_t			mac_len = mip->mi_type->mt_addr_length;
   1882 	boolean_t		check_dups = !(flags & MAC_UNICAST_NODUPCHECK);
   1883 	boolean_t		fastpath_disabled = B_FALSE;
   1884 	boolean_t		is_primary = (flags & MAC_UNICAST_PRIMARY);
   1885 	boolean_t		is_unicast_hw = (flags & MAC_UNICAST_HW);
   1886 	mac_resource_props_t	mrp;
   1887 	boolean_t		passive_client = B_FALSE;
   1888 	mac_unicast_impl_t	*muip;
   1889 	boolean_t		is_vnic_primary =
   1890 	    (flags & MAC_UNICAST_VNIC_PRIMARY);
   1891 
   1892 	/* when VID is non-zero, the underlying MAC can not be VNIC */
   1893 	ASSERT(!((mip->mi_state_flags & MIS_IS_VNIC) && (vid != 0)));
   1894 
   1895 	/*
   1896 	 * Check for an attempted use of the current Port VLAN ID, if enabled.
   1897 	 * No client may use it.
   1898 	 */
   1899 	if (mip->mi_pvid != 0 && vid == mip->mi_pvid)
   1900 		return (EBUSY);
   1901 
   1902 	/*
   1903 	 * Check whether it's the primary client and flag it.
   1904 	 */
   1905 	if (!(mcip->mci_state_flags & MCIS_IS_VNIC) && is_primary && vid == 0)
   1906 		mcip->mci_flags |= MAC_CLIENT_FLAGS_PRIMARY;
   1907 
   1908 	/*
   1909 	 * is_vnic_primary is true when we come here as a VLAN VNIC
   1910 	 * which uses the primary mac client's address but with a non-zero
   1911 	 * VID. In this case the MAC address is not specified by an upper
   1912 	 * MAC client.
   1913 	 */
   1914 	if ((mcip->mci_state_flags & MCIS_IS_VNIC) && is_primary &&
   1915 	    !is_vnic_primary) {
   1916 		/*
   1917 		 * The address is being set by the upper MAC client
   1918 		 * of a VNIC. The MAC address was already set by the
   1919 		 * VNIC driver during VNIC creation.
   1920 		 *
   1921 		 * Note: a VNIC has only one MAC address. We return
   1922 		 * the MAC unicast address handle of the lower MAC client
   1923 		 * corresponding to the VNIC. We allocate a new entry
   1924 		 * which is flagged appropriately, so that mac_unicast_remove()
   1925 		 * doesn't attempt to free the original entry that
   1926 		 * was allocated by the VNIC driver.
   1927 		 */
   1928 		ASSERT(mcip->mci_unicast != NULL);
   1929 
   1930 		/* Check for VLAN flags, if present */
   1931 		if ((flags & MAC_UNICAST_TAG_DISABLE) != 0)
   1932 			mcip->mci_state_flags |= MCIS_TAG_DISABLE;
   1933 
   1934 		if ((flags & MAC_UNICAST_STRIP_DISABLE) != 0)
   1935 			mcip->mci_state_flags |= MCIS_STRIP_DISABLE;
   1936 
   1937 		if ((flags & MAC_UNICAST_DISABLE_TX_VID_CHECK) != 0)
   1938 			mcip->mci_state_flags |= MCIS_DISABLE_TX_VID_CHECK;
   1939 
   1940 		/*
   1941 		 * Ensure that the primary unicast address of the VNIC
   1942 		 * is added only once unless we have the
   1943 		 * MAC_CLIENT_FLAGS_MULTI_PRIMARY set (and this is not
   1944 		 * a passive MAC client).
   1945 		 */
   1946 		if ((mcip->mci_flags & MAC_CLIENT_FLAGS_VNIC_PRIMARY) != 0) {
   1947 			if ((mcip->mci_flags &
   1948 			    MAC_CLIENT_FLAGS_MULTI_PRIMARY) == 0 ||
   1949 			    (mcip->mci_flags &
   1950 			    MAC_CLIENT_FLAGS_PASSIVE_PRIMARY) != 0) {
   1951 				return (EBUSY);
   1952 			}
   1953 			mcip->mci_flags |= MAC_CLIENT_FLAGS_PASSIVE_PRIMARY;
   1954 			passive_client = B_TRUE;
   1955 		}
   1956 
   1957 		mcip->mci_flags |= MAC_CLIENT_FLAGS_VNIC_PRIMARY;
   1958 
   1959 		/*
   1960 		 * Create a handle for vid 0.
   1961 		 */
   1962 		ASSERT(vid == 0);
   1963 		muip = kmem_zalloc(sizeof (mac_unicast_impl_t), KM_SLEEP);
   1964 		muip->mui_vid = vid;
   1965 		*mah = (mac_unicast_handle_t)muip;
   1966 		/*
   1967 		 * This will be used by the caller to defer setting the
   1968 		 * rx functions.
   1969 		 */
   1970 		if (passive_client)
   1971 			return (EAGAIN);
   1972 		return (0);
   1973 	}
   1974 
   1975 	/* primary MAC clients cannot be opened on top of anchor VNICs */
   1976 	if ((is_vnic_primary || is_primary) &&
   1977 	    i_mac_capab_get((mac_handle_t)mip, MAC_CAPAB_ANCHOR_VNIC, NULL)) {
   1978 		return (ENXIO);
   1979 	}
   1980 
   1981 	/*
   1982 	 * If this is a VNIC/VLAN, disable softmac fast-path.
   1983 	 */
   1984 	if (mcip->mci_state_flags & MCIS_IS_VNIC) {
   1985 		err = mac_fastpath_disable((mac_handle_t)mip);
   1986 		if (err != 0)
   1987 			return (err);
   1988 		fastpath_disabled = B_TRUE;
   1989 	}
   1990 
   1991 	/*
   1992 	 * Return EBUSY if:
   1993 	 *  - there is an exclusively active mac client exists.
   1994 	 *  - this is an exclusive active mac client but
   1995 	 *	a. there is already active mac clients exist, or
   1996 	 *	b. fastpath streams are already plumbed on this legacy device
   1997 	 *  - the mac creator has disallowed active mac clients.
   1998 	 */
   1999 	if (mip->mi_state_flags & (MIS_EXCLUSIVE|MIS_NO_ACTIVE)) {
   2000 		if (fastpath_disabled)
   2001 			mac_fastpath_enable((mac_handle_t)mip);
   2002 		return (EBUSY);
   2003 	}
   2004 
   2005 	if (mcip->mci_state_flags & MCIS_EXCLUSIVE) {
   2006 		ASSERT(!fastpath_disabled);
   2007 		if (mip->mi_nactiveclients != 0)
   2008 			return (EBUSY);
   2009 
   2010 		if ((mip->mi_state_flags & MIS_LEGACY) &&
   2011 		    !(mip->mi_capab_legacy.ml_active_set(mip->mi_driver))) {
   2012 			return (EBUSY);
   2013 		}
   2014 		mip->mi_state_flags |= MIS_EXCLUSIVE;
   2015 	}
   2016 
   2017 	bzero(&mrp, sizeof (mac_resource_props_t));
   2018 	if (is_primary && !(mcip->mci_state_flags & (MCIS_IS_VNIC |
   2019 	    MCIS_IS_AGGR_PORT))) {
   2020 		/*
   2021 		 * Apply the property cached in the mac_impl_t to the primary
   2022 		 * mac client. If the mac client is a VNIC or an aggregation
   2023 		 * port, its property should be set in the mcip when the
   2024 		 * VNIC/aggr was created.
   2025 		 */
   2026 		mac_get_resources((mac_handle_t)mip, &mrp);
   2027 		(void) mac_client_set_resources(mch, &mrp);
   2028 	} else if (mcip->mci_state_flags & MCIS_IS_VNIC) {
   2029 		bcopy(MCIP_RESOURCE_PROPS(mcip), &mrp,
   2030 		    sizeof (mac_resource_props_t));
   2031 	}
   2032 
   2033 	muip = kmem_zalloc(sizeof (mac_unicast_impl_t), KM_SLEEP);
   2034 	muip->mui_vid = vid;
   2035 
   2036 	if (is_primary || is_vnic_primary) {
   2037 		mac_addr = mip->mi_addr;
   2038 	} else {
   2039 
   2040 		/*
   2041 		 * Verify the validity of the specified MAC addresses value.
   2042 		 */
   2043 		if (!mac_unicst_verify((mac_handle_t)mip, mac_addr, mac_len)) {
   2044 			*diag = MAC_DIAG_MACADDR_INVALID;
   2045 			err = EINVAL;
   2046 			goto bail_out;
   2047 		}
   2048 
   2049 		/*
   2050 		 * Make sure that the specified MAC address is different
   2051 		 * than the unicast MAC address of the underlying NIC.
   2052 		 */
   2053 		if (check_dups && bcmp(mip->mi_addr, mac_addr, mac_len) == 0) {
   2054 			*diag = MAC_DIAG_MACADDR_NIC;
   2055 			err = EINVAL;
   2056 			goto bail_out;
   2057 		}
   2058 	}
   2059 
   2060 	/*
   2061 	 * Set the flags here so that if this is a passive client, we
   2062 	 * can return  and set it when we call mac_client_datapath_setup
   2063 	 * when this becomes the active client. If we defer to using these
   2064 	 * flags to mac_client_datapath_setup, then for a passive client,
   2065 	 * we'd have to store the flags somewhere (probably fe_flags)
   2066 	 * and then use it.
   2067 	 */
   2068 	if (!MCIP_DATAPATH_SETUP(mcip)) {
   2069 		if (is_unicast_hw) {
   2070 			/*
   2071 			 * The client requires a hardware MAC address slot
   2072 			 * for that unicast address. Since we support only
   2073 			 * one unicast MAC address per client, flag the
   2074 			 * MAC client itself.
   2075 			 */
   2076 			mcip->mci_state_flags |= MCIS_UNICAST_HW;
   2077 		}
   2078 
   2079 		/* Check for VLAN flags, if present */
   2080 		if ((flags & MAC_UNICAST_TAG_DISABLE) != 0)
   2081 			mcip->mci_state_flags |= MCIS_TAG_DISABLE;
   2082 
   2083 		if ((flags & MAC_UNICAST_STRIP_DISABLE) != 0)
   2084 			mcip->mci_state_flags |= MCIS_STRIP_DISABLE;
   2085 
   2086 		if ((flags & MAC_UNICAST_DISABLE_TX_VID_CHECK) != 0)
   2087 			mcip->mci_state_flags |= MCIS_DISABLE_TX_VID_CHECK;
   2088 	} else {
   2089 		/*
   2090 		 * Assert that the specified flags are consistent with the
   2091 		 * flags specified by previous calls to mac_unicast_add().
   2092 		 */
   2093 		ASSERT(((flags & MAC_UNICAST_TAG_DISABLE) != 0 &&
   2094 		    (mcip->mci_state_flags & MCIS_TAG_DISABLE) != 0) ||
   2095 		    ((flags & MAC_UNICAST_TAG_DISABLE) == 0 &&
   2096 		    (mcip->mci_state_flags & MCIS_TAG_DISABLE) == 0));
   2097 
   2098 		ASSERT(((flags & MAC_UNICAST_STRIP_DISABLE) != 0 &&
   2099 		    (mcip->mci_state_flags & MCIS_STRIP_DISABLE) != 0) ||
   2100 		    ((flags & MAC_UNICAST_STRIP_DISABLE) == 0 &&
   2101 		    (mcip->mci_state_flags & MCIS_STRIP_DISABLE) == 0));
   2102 
   2103 		ASSERT(((flags & MAC_UNICAST_DISABLE_TX_VID_CHECK) != 0 &&
   2104 		    (mcip->mci_state_flags & MCIS_DISABLE_TX_VID_CHECK) != 0) ||
   2105 		    ((flags & MAC_UNICAST_DISABLE_TX_VID_CHECK) == 0 &&
   2106 		    (mcip->mci_state_flags & MCIS_DISABLE_TX_VID_CHECK) == 0));
   2107 
   2108 		/*
   2109 		 * Make sure the client is consistent about its requests
   2110 		 * for MAC addresses. I.e. all requests from the clients
   2111 		 * must have the MAC_UNICAST_HW flag set or clear.
   2112 		 */
   2113 		if ((mcip->mci_state_flags & MCIS_UNICAST_HW) != 0 &&
   2114 		    !is_unicast_hw ||
   2115 		    (mcip->mci_state_flags & MCIS_UNICAST_HW) == 0 &&
   2116 		    is_unicast_hw) {
   2117 			err = EINVAL;
   2118 			goto bail_out;
   2119 		}
   2120 	}
   2121 	/*
   2122 	 * Make sure the MAC address is not already used by
   2123 	 * another MAC client defined on top of the same
   2124 	 * underlying NIC. Unless we have MAC_CLIENT_FLAGS_MULTI_PRIMARY
   2125 	 * set when we allow a passive client to be present which will
   2126 	 * be activated when the currently active client goes away - this
   2127 	 * works only with primary addresses.
   2128 	 */
   2129 	if ((check_dups || is_primary || is_vnic_primary) &&
   2130 	    mac_addr_in_use(mip, mac_addr, vid)) {
   2131 		/*
   2132 		 * Must have set the multiple primary address flag when
   2133 		 * we did a mac_client_open AND this should be a primary
   2134 		 * MAC client AND there should not already be a passive
   2135 		 * primary. If all is true then we let this succeed
   2136 		 * even if the address is a dup.
   2137 		 */
   2138 		if ((mcip->mci_flags & MAC_CLIENT_FLAGS_MULTI_PRIMARY) == 0 ||
   2139 		    (mcip->mci_flags & MAC_CLIENT_FLAGS_PRIMARY) == 0 ||
   2140 		    mac_get_passive_primary_client(mip) != NULL) {
   2141 			*diag = MAC_DIAG_MACADDR_INUSE;
   2142 			err = EEXIST;
   2143 			goto bail_out;
   2144 		}
   2145 		ASSERT((mcip->mci_flags &
   2146 		    MAC_CLIENT_FLAGS_PASSIVE_PRIMARY) == 0);
   2147 		mcip->mci_flags |= MAC_CLIENT_FLAGS_PASSIVE_PRIMARY;
   2148 
   2149 		/*
   2150 		 * Stash the unicast address handle, we will use it when
   2151 		 * we set up the passive client.
   2152 		 */
   2153 		mcip->mci_p_unicast_list = muip;
   2154 		*mah = (mac_unicast_handle_t)muip;
   2155 		return (0);
   2156 	}
   2157 
   2158 	err = mac_client_datapath_setup(mcip, vid, mac_addr, &mrp,
   2159 	    is_primary || is_vnic_primary, muip);
   2160 	if (err != 0)
   2161 		goto bail_out;
   2162 	*mah = (mac_unicast_handle_t)muip;
   2163 	return (0);
   2164 
   2165 bail_out:
   2166 	if (fastpath_disabled)
   2167 		mac_fastpath_enable((mac_handle_t)mip);
   2168 	if (mcip->mci_state_flags & MCIS_EXCLUSIVE) {
   2169 		mip->mi_state_flags &= ~MIS_EXCLUSIVE;
   2170 		if (mip->mi_state_flags & MIS_LEGACY) {
   2171 			mip->mi_capab_legacy.ml_active_clear(
   2172 			    mip->mi_driver);
   2173 		}
   2174 	}
   2175 	kmem_free(muip, sizeof (mac_unicast_impl_t));
   2176 	return (err);
   2177 }
   2178 
   2179 /*
   2180  * Wrapper function to mac_unicast_add when we want to have the same mac
   2181  * client open for two instances, one that is currently active and another
   2182  * that will become active when the current one is removed. In this case
   2183  * mac_unicast_add will return EGAIN and we will save the rx function and
   2184  * arg which will be used when we activate the passive client in
   2185  * mac_unicast_remove.
   2186  */
   2187 int
   2188 mac_unicast_add_set_rx(mac_client_handle_t mch, uint8_t *mac_addr,
   2189     uint16_t flags, mac_unicast_handle_t *mah,  uint16_t vid, mac_diag_t *diag,
   2190     mac_rx_t rx_fn, void *arg)
   2191 {
   2192 	mac_client_impl_t	*mcip = (mac_client_impl_t *)mch;
   2193 	uint_t			err;
   2194 
   2195 	err = mac_unicast_add(mch, mac_addr, flags, mah, vid, diag);
   2196 	if (err != 0 && err != EAGAIN)
   2197 		return (err);
   2198 	if (err == EAGAIN) {
   2199 		if (rx_fn != NULL) {
   2200 			mcip->mci_rx_p_fn = rx_fn;
   2201 			mcip->mci_rx_p_arg = arg;
   2202 		}
   2203 		return (0);
   2204 	}
   2205 	if (rx_fn != NULL)
   2206 		mac_rx_set(mch, rx_fn, arg);
   2207 	return (err);
   2208 }
   2209 
   2210 int
   2211 mac_unicast_add(mac_client_handle_t mch, uint8_t *mac_addr, uint16_t flags,
   2212     mac_unicast_handle_t *mah, uint16_t vid, mac_diag_t *diag)
   2213 {
   2214 	mac_impl_t *mip = ((mac_client_impl_t *)mch)->mci_mip;
   2215 	uint_t err;
   2216 
   2217 	i_mac_perim_enter(mip);
   2218 	err = i_mac_unicast_add(mch, mac_addr, flags, mah, vid, diag);
   2219 	i_mac_perim_exit(mip);
   2220 
   2221 	return (err);
   2222 }
   2223 
   2224 void
   2225 mac_client_datapath_teardown(mac_client_handle_t mch, mac_unicast_impl_t *muip,
   2226     flow_entry_t *flent)
   2227 {
   2228 	mac_client_impl_t	*mcip = (mac_client_impl_t *)mch;
   2229 	mac_impl_t		*mip = mcip->mci_mip;
   2230 
   2231 	/*
   2232 	 * We would have initialized subflows etc. only if we brought up
   2233 	 * the primary client and set the unicast unicast address etc.
   2234 	 * Deactivate the flows. The flow entry will be removed from the
   2235 	 * active flow tables, and the associated SRS, softrings etc will
   2236 	 * be deleted. But the flow entry itself won't be destroyed, instead
   2237 	 * it will continue to be archived off the  the global flow hash
   2238 	 * list, for a possible future activation when say IP is plumbed
   2239 	 * again.
   2240 	 */
   2241 	mac_link_release_flows(mch);
   2242 
   2243 	mip->mi_nactiveclients--;
   2244 	mac_update_single_active_client(mip);
   2245 
   2246 	/* Tear down the data path */
   2247 	mac_datapath_teardown(mcip, mcip->mci_flent, SRST_LINK);
   2248 
   2249 	/*
   2250 	 * Prevent any future access to the flow entry through the mci_flent
   2251 	 * pointer by setting the mci_flent to NULL. Access to mci_flent in
   2252 	 * mac_bcast_send is also under mi_rw_lock.
   2253 	 */
   2254 	rw_enter(&mip->mi_rw_lock, RW_WRITER);
   2255 	flent = mcip->mci_flent;
   2256 	mac_client_remove_flow_from_list(mcip, flent);
   2257 
   2258 	if (mcip->mci_state_flags & MCIS_DESC_LOGGED)
   2259 		mcip->mci_state_flags &= ~MCIS_DESC_LOGGED;
   2260 
   2261 	/*
   2262 	 * This is the last unicast address being removed and there shouldn't
   2263 	 * be any outbound data threads at this point coming down from mac
   2264 	 * clients. We have waited for the data threads to finish before
   2265 	 * starting dld_str_detach. Non-data threads must access TX SRS
   2266 	 * under mi_rw_lock.
   2267 	 */
   2268 	rw_exit(&mip->mi_rw_lock);
   2269 
   2270 	/*
   2271 	 * Don't use FLOW_MARK with FE_MC_NO_DATAPATH, as the flow might
   2272 	 * contain other flags, such as FE_CONDEMNED, which we need to
   2273 	 * cleared. We don't call mac_flow_cleanup() for this unicast
   2274 	 * flow as we have a already cleaned up SRSs etc. (via the teadown
   2275 	 * path). We just clear the stats and reset the initial callback
   2276 	 * function, the rest will be set when we call mac_flow_create,
   2277 	 * if at all.
   2278 	 */
   2279 	mutex_enter(&flent->fe_lock);
   2280 	ASSERT(flent->fe_refcnt == 1 && flent->fe_mbg == NULL &&
   2281 	    flent->fe_tx_srs == NULL && flent->fe_rx_srs_cnt == 0);
   2282 	flent->fe_flags = FE_MC_NO_DATAPATH;
   2283 	flow_stat_destroy(flent);
   2284 
   2285 	/* Initialize the receiver function to a safe routine */
   2286 	flent->fe_cb_fn = (flow_fn_t)mac_pkt_drop;
   2287 	flent->fe_cb_arg1 = NULL;
   2288 	flent->fe_cb_arg2 = NULL;
   2289 
   2290 	flent->fe_index = -1;
   2291 	mutex_exit(&flent->fe_lock);
   2292 
   2293 	if (mip->mi_type->mt_brdcst_addr != NULL) {
   2294 		mac_bcast_delete(mcip, mip->mi_type->mt_brdcst_addr,
   2295 		    muip->mui_vid);
   2296 	}
   2297 
   2298 	if (mip->mi_nactiveclients == 1) {
   2299 		mac_capab_update((mac_handle_t)mip);
   2300 		mac_virtual_link_update(mip);
   2301 	}
   2302 
   2303 	if (mcip->mci_state_flags & MCIS_EXCLUSIVE) {
   2304 		mip->mi_state_flags &= ~MIS_EXCLUSIVE;
   2305 
   2306 		if (mip->mi_state_flags & MIS_LEGACY)
   2307 			mip->mi_capab_legacy.ml_active_clear(mip->mi_driver);
   2308 	}
   2309 
   2310 	mcip->mci_state_flags &= ~MCIS_UNICAST_HW;
   2311 
   2312 	if (mcip->mci_state_flags & MCIS_TAG_DISABLE)
   2313 		mcip->mci_state_flags &= ~MCIS_TAG_DISABLE;
   2314 
   2315 	if (mcip->mci_state_flags & MCIS_STRIP_DISABLE)
   2316 		mcip->mci_state_flags &= ~MCIS_STRIP_DISABLE;
   2317 
   2318 	if (mcip->mci_state_flags & MCIS_DISABLE_TX_VID_CHECK)
   2319 		mcip->mci_state_flags &= ~MCIS_DISABLE_TX_VID_CHECK;
   2320 
   2321 	kmem_free(muip, sizeof (mac_unicast_impl_t));
   2322 
   2323 	/*
   2324 	 * Disable fastpath if this is a VNIC or a VLAN.
   2325 	 */
   2326 	if (mcip->mci_state_flags & MCIS_IS_VNIC)
   2327 		mac_fastpath_enable((mac_handle_t)mip);
   2328 	mac_stop((mac_handle_t)mip);
   2329 }
   2330 
   2331 /*
   2332  * Remove a MAC address which was previously added by mac_unicast_add().
   2333  */
   2334 int
   2335 mac_unicast_remove(mac_client_handle_t mch, mac_unicast_handle_t mah)
   2336 {
   2337 	mac_client_impl_t *mcip = (mac_client_impl_t *)mch;
   2338 	mac_unicast_impl_t *muip = (mac_unicast_impl_t *)mah;
   2339 	mac_unicast_impl_t *pre;
   2340 	mac_impl_t *mip = mcip->mci_mip;
   2341 	flow_entry_t		*flent;
   2342 	boolean_t		isprimary = B_FALSE;
   2343 
   2344 	i_mac_perim_enter(mip);
   2345 	if (mcip->mci_flags & MAC_CLIENT_FLAGS_VNIC_PRIMARY) {
   2346 		/*
   2347 		 * Called made by the upper MAC client of a VNIC.
   2348 		 * There's nothing much to do, the unicast address will
   2349 		 * be removed by the VNIC driver when the VNIC is deleted,
   2350 		 * but let's ensure that all our transmit is done before
   2351 		 * the client does a mac_client_stop lest it trigger an
   2352 		 * assert in the driver.
   2353 		 */
   2354 		ASSERT(muip->mui_vid == 0);
   2355 
   2356 		mac_tx_client_flush(mcip);
   2357 
   2358 		if ((mcip->mci_flags & MAC_CLIENT_FLAGS_PASSIVE_PRIMARY) != 0) {
   2359 			mcip->mci_flags &= ~MAC_CLIENT_FLAGS_PASSIVE_PRIMARY;
   2360 			if (mcip->mci_rx_p_fn != NULL) {
   2361 				mac_rx_set(mch, mcip->mci_rx_p_fn,
   2362 				    mcip->mci_rx_p_arg);
   2363 				mcip->mci_rx_p_fn = NULL;
   2364 				mcip->mci_rx_p_arg = NULL;
   2365 			}
   2366 			kmem_free(muip, sizeof (mac_unicast_impl_t));
   2367 			i_mac_perim_exit(mip);
   2368 			return (0);
   2369 		}
   2370 		mcip->mci_flags &= ~MAC_CLIENT_FLAGS_VNIC_PRIMARY;
   2371 
   2372 		if (mcip->mci_state_flags & MCIS_TAG_DISABLE)
   2373 			mcip->mci_state_flags &= ~MCIS_TAG_DISABLE;
   2374 
   2375 		if (mcip->mci_state_flags & MCIS_STRIP_DISABLE)
   2376 			mcip->mci_state_flags &= ~MCIS_STRIP_DISABLE;
   2377 
   2378 		if (mcip->mci_state_flags & MCIS_DISABLE_TX_VID_CHECK)
   2379 			mcip->mci_state_flags &= ~MCIS_DISABLE_TX_VID_CHECK;
   2380 
   2381 		kmem_free(muip, sizeof (mac_unicast_impl_t));
   2382 		i_mac_perim_exit(mip);
   2383 		return (0);
   2384 	}
   2385 
   2386 	ASSERT(muip != NULL);
   2387 
   2388 	/*
   2389 	 * We are removing a passive client, we haven't setup the datapath
   2390 	 * for this yet, so nothing much to do.
   2391 	 */
   2392 	if ((mcip->mci_flags & MAC_CLIENT_FLAGS_PASSIVE_PRIMARY) != 0) {
   2393 
   2394 		ASSERT((mcip->mci_flent->fe_flags & FE_MC_NO_DATAPATH) != 0);
   2395 		ASSERT(mcip->mci_p_unicast_list == muip);
   2396 
   2397 		mcip->mci_flags &= ~MAC_CLIENT_FLAGS_PASSIVE_PRIMARY;
   2398 
   2399 		mcip->mci_p_unicast_list = NULL;
   2400 		mcip->mci_rx_p_fn = NULL;
   2401 		mcip->mci_rx_p_arg = NULL;
   2402 
   2403 		mcip->mci_state_flags &= ~MCIS_UNICAST_HW;
   2404 
   2405 		if (mcip->mci_state_flags & MCIS_TAG_DISABLE)
   2406 			mcip->mci_state_flags &= ~MCIS_TAG_DISABLE;
   2407 
   2408 		if (mcip->mci_state_flags & MCIS_STRIP_DISABLE)
   2409 			mcip->mci_state_flags &= ~MCIS_STRIP_DISABLE;
   2410 
   2411 		if (mcip->mci_state_flags & MCIS_DISABLE_TX_VID_CHECK)
   2412 			mcip->mci_state_flags &= ~MCIS_DISABLE_TX_VID_CHECK;
   2413 
   2414 		kmem_free(muip, sizeof (mac_unicast_impl_t));
   2415 		i_mac_perim_exit(mip);
   2416 		return (0);
   2417 	}
   2418 	/*
   2419 	 * Remove the VID from the list of client's VIDs.
   2420 	 */
   2421 	pre = mcip->mci_unicast_list;
   2422 	if (muip == pre) {
   2423 		mcip->mci_unicast_list = muip->mui_next;
   2424 	} else {
   2425 		while ((pre->mui_next != NULL) && (pre->mui_next != muip))
   2426 			pre = pre->mui_next;
   2427 		ASSERT(pre->mui_next == muip);
   2428 		rw_enter(&mcip->mci_rw_lock, RW_WRITER);
   2429 		pre->mui_next = muip->mui_next;
   2430 		rw_exit(&mcip->mci_rw_lock);
   2431 	}
   2432 
   2433 	if ((mcip->mci_flags & MAC_CLIENT_FLAGS_PRIMARY) &&
   2434 	    muip->mui_vid == 0) {
   2435 		mcip->mci_flags &= ~MAC_CLIENT_FLAGS_PRIMARY;
   2436 		isprimary = B_TRUE;
   2437 	}
   2438 	if (!mac_client_single_rcvr(mcip)) {
   2439 		/*
   2440 		 * This MAC client is shared by more than one unicast
   2441 		 * addresses, so we will just remove the flent
   2442 		 * corresponding to the address being removed. We don't invoke
   2443 		 * mac_rx_classify_flow_rem() since the additional flow is
   2444 		 * not associated with its own separate set of SRS and rings,
   2445 		 * and these constructs are still needed for the remaining
   2446 		 * flows.
   2447 		 */
   2448 		flent = mac_client_get_flow(mcip, muip);
   2449 		ASSERT(flent != NULL);
   2450 
   2451 		/*
   2452 		 * The first one is disappearing, need to make sure
   2453 		 * we replace it with another from the list of
   2454 		 * shared clients.
   2455 		 */
   2456 		if (flent == mcip->mci_flent)
   2457 			flent = mac_client_swap_mciflent(mcip);
   2458 		mac_client_remove_flow_from_list(mcip, flent);
   2459 		mac_flow_remove(mip->mi_flow_tab, flent, B_FALSE);
   2460 		mac_flow_wait(flent, FLOW_DRIVER_UPCALL);
   2461 
   2462 		/*
   2463 		 * The multicast groups that were added by the client so
   2464 		 * far must be removed from the brodcast domain corresponding
   2465 		 * to the VID being removed.
   2466 		 */
   2467 		mac_client_bcast_refresh(mcip, mac_client_update_mcast,
   2468 		    (void *)flent, B_FALSE);
   2469 
   2470 		if (mip->mi_type->mt_brdcst_addr != NULL) {
   2471 			mac_bcast_delete(mcip, mip->mi_type->mt_brdcst_addr,
   2472 			    muip->mui_vid);
   2473 		}
   2474 
   2475 		FLOW_FINAL_REFRELE(flent);
   2476 		ASSERT(!(mcip->mci_state_flags & MCIS_EXCLUSIVE));
   2477 		/*
   2478 		 * Enable fastpath if this is a VNIC or a VLAN.
   2479 		 */
   2480 		if (mcip->mci_state_flags & MCIS_IS_VNIC)
   2481 			mac_fastpath_enable((mac_handle_t)mip);
   2482 		mac_stop((mac_handle_t)mip);
   2483 		i_mac_perim_exit(mip);
   2484 		return (0);
   2485 	}
   2486 
   2487 	mac_client_datapath_teardown(mch, muip, flent);
   2488 
   2489 	/*
   2490 	 * If we are removing the primary, check if we have a passive primary
   2491 	 * client that we need to activate now.
   2492 	 */
   2493 	if (!isprimary) {
   2494 		i_mac_perim_exit(mip);
   2495 		return (0);
   2496 	}
   2497 	mcip = mac_get_passive_primary_client(mip);
   2498 	if (mcip != NULL) {
   2499 		mac_resource_props_t	mrp;
   2500 		mac_unicast_impl_t	*muip;
   2501 
   2502 		mcip->mci_flags &= ~MAC_CLIENT_FLAGS_PASSIVE_PRIMARY;
   2503 		bzero(&mrp, sizeof (mac_resource_props_t));
   2504 		/*
   2505 		 * Apply the property cached in the mac_impl_t to the
   2506 		 * primary mac client.
   2507 		 */
   2508 		mac_get_resources((mac_handle_t)mip, &mrp);
   2509 		(void) mac_client_set_resources(mch, &mrp);
   2510 		ASSERT(mcip->mci_p_unicast_list != NULL);
   2511 		muip = mcip->mci_p_unicast_list;
   2512 		mcip->mci_p_unicast_list = NULL;
   2513 		if (mac_client_datapath_setup(mcip, VLAN_ID_NONE,
   2514 		    mip->mi_addr, &mrp, B_TRUE, muip) == 0) {
   2515 			if (mcip->mci_rx_p_fn != NULL) {
   2516 				mac_rx_set(mch, mcip->mci_rx_p_fn,
   2517 				    mcip->mci_rx_p_arg);
   2518 				mcip->mci_rx_p_fn = NULL;
   2519 				mcip->mci_rx_p_arg = NULL;
   2520 			}
   2521 		} else {
   2522 			kmem_free(muip, sizeof (mac_unicast_impl_t));
   2523 		}
   2524 	}
   2525 	i_mac_perim_exit(mip);
   2526 	return (0);
   2527 }
   2528 
   2529 /*
   2530  * Multicast add function invoked by MAC clients.
   2531  */
   2532 int
   2533 mac_multicast_add(mac_client_handle_t mch, const uint8_t *addr)
   2534 {
   2535 	mac_client_impl_t	*mcip = (mac_client_impl_t *)mch;
   2536 	mac_impl_t		*mip = mcip->mci_mip;
   2537 	flow_entry_t		*flent = mcip->mci_flent_list;
   2538 	flow_entry_t		*prev_fe = NULL;
   2539 	uint16_t		vid;
   2540 	int			err = 0;
   2541 
   2542 	/* Verify the address is a valid multicast address */
   2543 	if ((err = mip->mi_type->mt_ops.mtops_multicst_verify(addr,
   2544 	    mip->mi_pdata)) != 0)
   2545 		return (err);
   2546 
   2547 	i_mac_perim_enter(mip);
   2548 	while (flent != NULL) {
   2549 		vid = i_mac_flow_vid(flent);
   2550 
   2551 		err = mac_bcast_add((mac_client_impl_t *)mch, addr, vid,
   2552 		    MAC_ADDRTYPE_MULTICAST);
   2553 		if (err != 0)
   2554 			break;
   2555 		prev_fe = flent;
   2556 		flent = flent->fe_client_next;
   2557 	}
   2558 
   2559 	/*
   2560 	 * If we failed adding, then undo all, rather than partial
   2561 	 * success.
   2562 	 */
   2563 	if (flent != NULL && prev_fe != NULL) {
   2564 		flent = mcip->mci_flent_list;
   2565 		while (flent != prev_fe->fe_client_next) {
   2566 			vid = i_mac_flow_vid(flent);
   2567 			mac_bcast_delete((mac_client_impl_t *)mch, addr, vid);
   2568 			flent = flent->fe_client_next;
   2569 		}
   2570 	}
   2571 	i_mac_perim_exit(mip);
   2572 	return (err);
   2573 }
   2574 
   2575 /*
   2576  * Multicast delete function invoked by MAC clients.
   2577  */
   2578 void
   2579 mac_multicast_remove(mac_client_handle_t mch, const uint8_t *addr)
   2580 {
   2581 	mac_client_impl_t	*mcip = (mac_client_impl_t *)mch;
   2582 	mac_impl_t		*mip = mcip->mci_mip;
   2583 	flow_entry_t		*flent;
   2584 	uint16_t		vid;
   2585 
   2586 	i_mac_perim_enter(mip);
   2587 	for (flent = mcip->mci_flent_list; flent != NULL;
   2588 	    flent = flent->fe_client_next) {
   2589 		vid = i_mac_flow_vid(flent);
   2590 		mac_bcast_delete((mac_client_impl_t *)mch, addr, vid);
   2591 	}
   2592 	i_mac_perim_exit(mip);
   2593 }
   2594 
   2595 /*
   2596  * When a MAC client desires to capture packets on an interface,
   2597  * it registers a promiscuous call back with mac_promisc_add().
   2598  * There are three types of promiscuous callbacks:
   2599  *
   2600  * * MAC_CLIENT_PROMISC_ALL
   2601  *   Captures all packets sent and received by the MAC client,
   2602  *   the physical interface, as well as all other MAC clients
   2603  *   defined on top of the same MAC.
   2604  *
   2605  * * MAC_CLIENT_PROMISC_FILTERED
   2606  *   Captures all packets sent and received by the MAC client,
   2607  *   plus all multicast traffic sent and received by the phyisical
   2608  *   interface and the other MAC clients.
   2609  *
   2610  * * MAC_CLIENT_PROMISC_MULTI
   2611  *   Captures all broadcast and multicast packets sent and
   2612  *   received by the MAC clients as well as the physical interface.
   2613  *
   2614  * In all cases, the underlying MAC is put in promiscuous mode.
   2615  */
   2616 int
   2617 mac_promisc_add(mac_client_handle_t mch, mac_client_promisc_type_t type,
   2618     mac_rx_t fn, void *arg, mac_promisc_handle_t *mphp, uint16_t flags)
   2619 {
   2620 	mac_client_impl_t *mcip = (mac_client_impl_t *)mch;
   2621 	mac_impl_t *mip = mcip->mci_mip;
   2622 	mac_promisc_impl_t *mpip;
   2623 	mac_cb_info_t	*mcbi;
   2624 	int rc;
   2625 
   2626 	i_mac_perim_enter(mip);
   2627 
   2628 	if ((rc = mac_start((mac_handle_t)mip)) != 0) {
   2629 		i_mac_perim_exit(mip);
   2630 		return (rc);
   2631 	}
   2632 
   2633 	if ((mcip->mci_state_flags & MCIS_IS_VNIC) &&
   2634 	    type == MAC_CLIENT_PROMISC_ALL) {
   2635 		/*
   2636 		 * The function is being invoked by the upper MAC client
   2637 		 * of a VNIC. The VNIC should only see the traffic
   2638 		 * it is entitled to.
   2639 		 */
   2640 		type = MAC_CLIENT_PROMISC_FILTERED;
   2641 	}
   2642 
   2643 
   2644 	/*
   2645 	 * Turn on promiscuous mode for the underlying NIC.
   2646 	 * This is needed even for filtered callbacks which
   2647 	 * expect to receive all multicast traffic on the wire.
   2648 	 *
   2649 	 * Physical promiscuous mode should not be turned on if
   2650 	 * MAC_PROMISC_FLAGS_NO_PHYS is set.
   2651 	 */
   2652 	if ((flags & MAC_PROMISC_FLAGS_NO_PHYS) == 0) {
   2653 		if ((rc = i_mac_promisc_set(mip, B_TRUE)) != 0) {
   2654 			mac_stop((mac_handle_t)mip);
   2655 			i_mac_perim_exit(mip);
   2656 			return (rc);
   2657 		}
   2658 	}
   2659 
   2660 	mpip = kmem_cache_alloc(mac_promisc_impl_cache, KM_SLEEP);
   2661 
   2662 	mpip->mpi_type = type;
   2663 	mpip->mpi_fn = fn;
   2664 	mpip->mpi_arg = arg;
   2665 	mpip->mpi_mcip = mcip;
   2666 	mpip->mpi_no_tx_loop = ((flags & MAC_PROMISC_FLAGS_NO_TX_LOOP) != 0);
   2667 	mpip->mpi_no_phys = ((flags & MAC_PROMISC_FLAGS_NO_PHYS) != 0);
   2668 	mpip->mpi_strip_vlan_tag =
   2669 	    ((flags & MAC_PROMISC_FLAGS_VLAN_TAG_STRIP) != 0);
   2670 	mpip->mpi_no_copy = ((flags & MAC_PROMISC_FLAGS_NO_COPY) != 0);
   2671 
   2672 	mcbi = &mip->mi_promisc_cb_info;
   2673 	mutex_enter(mcbi->mcbi_lockp);
   2674 
   2675 	mac_callback_add(&mip->mi_promisc_cb_info, &mcip->mci_promisc_list,
   2676 	    &mpip->mpi_mci_link);
   2677 	mac_callback_add(&mip->mi_promisc_cb_info, &mip->mi_promisc_list,
   2678 	    &mpip->mpi_mi_link);
   2679 
   2680 	mutex_exit(mcbi->mcbi_lockp);
   2681 
   2682 	*mphp = (mac_promisc_handle_t)mpip;
   2683 	i_mac_perim_exit(mip);
   2684 	return (0);
   2685 }
   2686 
   2687 /*
   2688  * Remove a multicast address previously aded through mac_promisc_add().
   2689  */
   2690 void
   2691 mac_promisc_remove(mac_promisc_handle_t mph)
   2692 {
   2693 	mac_promisc_impl_t *mpip = (mac_promisc_impl_t *)mph;
   2694 	mac_client_impl_t *mcip = mpip->mpi_mcip;
   2695 	mac_impl_t *mip = mcip->mci_mip;
   2696 	mac_cb_info_t *mcbi;
   2697 	int rv;
   2698 
   2699 	i_mac_perim_enter(mip);
   2700 
   2701 	/*
   2702 	 * Even if the device can't be reset into normal mode, we still
   2703 	 * need to clear the client promisc callbacks. The client may want
   2704 	 * to close the mac end point and we can't have stale callbacks.
   2705 	 */
   2706 	if (!(mpip->mpi_no_phys)) {
   2707 		if ((rv = i_mac_promisc_set(mip, B_FALSE)) != 0) {
   2708 			cmn_err(CE_WARN, "%s: failed to switch OFF promiscuous"
   2709 			    " mode because of error 0x%x", mip->mi_name, rv);
   2710 		}
   2711 	}
   2712 	mcbi = &mip->mi_promisc_cb_info;
   2713 	mutex_enter(mcbi->mcbi_lockp);
   2714 	if (mac_callback_remove(mcbi, &mip->mi_promisc_list,
   2715 	    &mpip->mpi_mi_link)) {
   2716 		VERIFY(mac_callback_remove(&mip->mi_promisc_cb_info,
   2717 		    &mcip->mci_promisc_list, &mpip->mpi_mci_link));
   2718 		kmem_cache_free(mac_promisc_impl_cache, mpip);
   2719 	} else {
   2720 		mac_callback_remove_wait(&mip->mi_promisc_cb_info);
   2721 	}
   2722 	mutex_exit(mcbi->mcbi_lockp);
   2723 	mac_stop((mac_handle_t)mip);
   2724 
   2725 	i_mac_perim_exit(mip);
   2726 }
   2727 
   2728 /*
   2729  * Reference count the number of active Tx threads. MCI_TX_QUIESCE indicates
   2730  * that a control operation wants to quiesce the Tx data flow in which case
   2731  * we return an error. Holding any of the per cpu locks ensures that the
   2732  * mci_tx_flag won't change.
   2733  *
   2734  * 'CPU' must be accessed just once and used to compute the index into the
   2735  * percpu array, and that index must be used for the entire duration of the
   2736  * packet send operation. Note that the thread may be preempted and run on
   2737  * another cpu any time and so we can't use 'CPU' more than once for the
   2738  * operation.
   2739  */
   2740 #define	MAC_TX_TRY_HOLD(mcip, mytx, error)				\
   2741 {									\
   2742 	(error) = 0;							\
   2743 	(mytx) = &(mcip)->mci_tx_pcpu[CPU->cpu_seqid & mac_tx_percpu_cnt]; \
   2744 	mutex_enter(&(mytx)->pcpu_tx_lock);				\
   2745 	if (!((mcip)->mci_tx_flag & MCI_TX_QUIESCE)) {			\
   2746 		(mytx)->pcpu_tx_refcnt++;				\
   2747 	} else {							\
   2748 		(error) = -1;						\
   2749 	}								\
   2750 	mutex_exit(&(mytx)->pcpu_tx_lock);				\
   2751 }
   2752 
   2753 /*
   2754  * Release the reference. If needed, signal any control operation waiting
   2755  * for Tx quiescence. The wait and signal are always done using the
   2756  * mci_tx_pcpu[0]'s lock
   2757  */
   2758 #define	MAC_TX_RELE(mcip, mytx) {					\
   2759 	mutex_enter(&(mytx)->pcpu_tx_lock);				\
   2760 	if (--(mytx)->pcpu_tx_refcnt == 0 &&				\
   2761 	    (mcip)->mci_tx_flag & MCI_TX_QUIESCE) {			\
   2762 		mutex_exit(&(mytx)->pcpu_tx_lock);			\
   2763 		mutex_enter(&(mcip)->mci_tx_pcpu[0].pcpu_tx_lock);	\
   2764 		cv_signal(&(mcip)->mci_tx_cv);				\
   2765 		mutex_exit(&(mcip)->mci_tx_pcpu[0].pcpu_tx_lock);	\
   2766 	} else {							\
   2767 		mutex_exit(&(mytx)->pcpu_tx_lock);			\
   2768 	}								\
   2769 }
   2770 
   2771 /*
   2772  * Bump the count of the number of active Tx threads. This is maintained as
   2773  * a per CPU counter. On (CMT kind of) machines with large number of CPUs,
   2774  * a single mci_tx_lock may become contended. However a count of the total
   2775  * number of Tx threads per client is needed in order to quiesce the Tx side
   2776  * prior to reassigning a Tx ring dynamically to another client. The thread
   2777  * that needs to quiesce the Tx traffic grabs all the percpu locks and checks
   2778  * the sum of the individual percpu refcnts. Each Tx data thread only grabs
   2779  * its own percpu lock and increments its own refcnt.
   2780  */
   2781 void *
   2782 mac_tx_hold(mac_client_handle_t mch)
   2783 {
   2784 	mac_client_impl_t *mcip = (mac_client_impl_t *)mch;
   2785 	mac_tx_percpu_t	*mytx;
   2786 	int error;
   2787 
   2788 	MAC_TX_TRY_HOLD(mcip, mytx, error);
   2789 	return (error == 0 ? (void *)mytx : NULL);
   2790 }
   2791 
   2792 void
   2793 mac_tx_rele(mac_client_handle_t mch, void *mytx_handle)
   2794 {
   2795 	mac_client_impl_t *mcip = (mac_client_impl_t *)mch;
   2796 	mac_tx_percpu_t	*mytx = mytx_handle;
   2797 
   2798 	MAC_TX_RELE(mcip, mytx)
   2799 }
   2800 
   2801 /*
   2802  * Send function invoked by MAC clients.
   2803  */
   2804 mac_tx_cookie_t
   2805 mac_tx(mac_client_handle_t mch, mblk_t *mp_chain, uintptr_t hint,
   2806     uint16_t flag, mblk_t **ret_mp)
   2807 {
   2808 	mac_tx_cookie_t		cookie = NULL;
   2809 	int			error;
   2810 	mac_tx_percpu_t		*mytx;
   2811 	mac_soft_ring_set_t	*srs;
   2812 	flow_entry_t		*flent;
   2813 	boolean_t		is_subflow = B_FALSE;
   2814 	mac_client_impl_t	*mcip = (mac_client_impl_t *)mch;
   2815 	mac_impl_t		*mip = mcip->mci_mip;
   2816 	mac_srs_tx_t		*srs_tx;
   2817 
   2818 	/*
   2819 	 * Check whether the active Tx threads count is bumped already.
   2820 	 */
   2821 	if (!(flag & MAC_TX_NO_HOLD)) {
   2822 		MAC_TX_TRY_HOLD(mcip, mytx, error);
   2823 		if (error != 0) {
   2824 			freemsgchain(mp_chain);
   2825 			return (NULL);
   2826 		}
   2827 	}
   2828 
   2829 	/*
   2830 	 * If mac protection is enabled, only the permissible packets will be
   2831 	 * returned by mac_protect_check().
   2832 	 */
   2833 	if ((mcip->mci_flent->
   2834 	    fe_resource_props.mrp_mask & MRP_PROTECT) != 0 &&
   2835 	    (mp_chain = mac_protect_check(mch, mp_chain)) == NULL)
   2836 		goto done;
   2837 
   2838 	if (mcip->mci_subflow_tab != NULL &&
   2839 	    mcip->mci_subflow_tab->ft_flow_count > 0 &&
   2840 	    mac_flow_lookup(mcip->mci_subflow_tab, mp_chain,
   2841 	    FLOW_OUTBOUND, &flent) == 0) {
   2842 		/*
   2843 		 * The main assumption here is that if in the event
   2844 		 * we get a chain, all the packets will be classified
   2845 		 * to the same Flow/SRS. If this changes for any
   2846 		 * reason, the following logic should change as well.
   2847 		 * I suppose the fanout_hint also assumes this .
   2848 		 */
   2849 		ASSERT(flent != NULL);
   2850 		is_subflow = B_TRUE;
   2851 	} else {
   2852 		flent = mcip->mci_flent;
   2853 	}
   2854 
   2855 	srs = flent->fe_tx_srs;
   2856 	/*
   2857 	 * This is to avoid panics with PF_PACKET that can call mac_tx()
   2858 	 * against an interface that is not capable of sending. A rewrite
   2859 	 * of the mac datapath is required to remove this limitation.
   2860 	 */
   2861 	if (srs == NULL) {
   2862 		freemsgchain(mp_chain);
   2863 		goto done;
   2864 	}
   2865 
   2866 	srs_tx = &srs->srs_tx;
   2867 	if (srs_tx->st_mode == SRS_TX_DEFAULT &&
   2868 	    (srs->srs_state & SRS_ENQUEUED) == 0 &&
   2869 	    mip->mi_nactiveclients == 1 && mip->mi_promisc_list == NULL &&
   2870 	    mp_chain->b_next == NULL) {
   2871 		uint64_t	obytes;
   2872 
   2873 		/*
   2874 		 * Since dls always opens the underlying MAC, nclients equals
   2875 		 * to 1 means that the only active client is dls itself acting
   2876 		 * as a primary client of the MAC instance. Since dls will not
   2877 		 * send tagged packets in that case, and dls is trusted to send
   2878 		 * packets for its allowed VLAN(s), the VLAN tag insertion and
   2879 		 * check is required only if nclients is greater than 1.
   2880 		 */
   2881 		if (mip->mi_nclients > 1) {
   2882 			if (MAC_VID_CHECK_NEEDED(mcip)) {
   2883 				int	err = 0;
   2884 
   2885 				MAC_VID_CHECK(mcip, mp_chain, err);
   2886 				if (err != 0) {
   2887 					freemsg(mp_chain);
   2888 					mcip->mci_stat_oerrors++;
   2889 					goto done;
   2890 				}
   2891 			}
   2892 			if (MAC_TAG_NEEDED(mcip)) {
   2893 				mp_chain = mac_add_vlan_tag(mp_chain, 0,
   2894 				    mac_client_vid(mch));
   2895 				if (mp_chain == NULL) {
   2896 					mcip->mci_stat_oerrors++;
   2897 					goto done;
   2898 				}
   2899 			}
   2900 		}
   2901 
   2902 		obytes = (mp_chain->b_cont == NULL ? MBLKL(mp_chain) :
   2903 		    msgdsize(mp_chain));
   2904 
   2905 		MAC_TX(mip, srs_tx->st_arg2, mp_chain,
   2906 		    ((mcip->mci_state_flags & MCIS_SHARE_BOUND) != 0));
   2907 
   2908 		if (mp_chain == NULL) {
   2909 			cookie = NULL;
   2910 			mcip->mci_stat_obytes += obytes;
   2911 			mcip->mci_stat_opackets += 1;
   2912 			if ((srs->srs_type & SRST_FLOW) != 0) {
   2913 				FLOW_STAT_UPDATE(flent, obytes, obytes);
   2914 				FLOW_STAT_UPDATE(flent, opackets, 1);
   2915 			}
   2916 		} else {
   2917 			mutex_enter(&srs->srs_lock);
   2918 			cookie = mac_tx_srs_no_desc(srs, mp_chain,
   2919 			    flag, ret_mp);
   2920 			mutex_exit(&srs->srs_lock);
   2921 		}
   2922 	} else {
   2923 		cookie = srs_tx->st_func(srs, mp_chain, hint, flag, ret_mp);
   2924 	}
   2925 
   2926 done:
   2927 	if (is_subflow)
   2928 		FLOW_REFRELE(flent);
   2929 
   2930 	if (!(flag & MAC_TX_NO_HOLD))
   2931 		MAC_TX_RELE(mcip, mytx);
   2932 
   2933 	return (cookie);
   2934 }
   2935 
   2936 /*
   2937  * mac_tx_is_blocked
   2938  *
   2939  * Given a cookie, it returns if the ring identified by the cookie is
   2940  * flow-controlled or not. If NULL is passed in place of a cookie,
   2941  * then it finds out if any of the underlying rings belonging to the
   2942  * SRS is flow controlled or not and returns that status.
   2943  */
   2944 /* ARGSUSED */
   2945 boolean_t
   2946 mac_tx_is_flow_blocked(mac_client_handle_t mch, mac_tx_cookie_t cookie)
   2947 {
   2948 	mac_client_impl_t *mcip = (mac_client_impl_t *)mch;
   2949 	mac_soft_ring_set_t *mac_srs;
   2950 	mac_soft_ring_t *sringp;
   2951 	boolean_t blocked = B_FALSE;
   2952 	mac_tx_percpu_t *mytx;
   2953 	int err;
   2954 	int i;
   2955 
   2956 	/*
   2957 	 * Bump the reference count so that mac_srs won't be deleted.
   2958 	 * If the client is currently quiesced and we failed to bump
   2959 	 * the reference, return B_TRUE so that flow control stays
   2960 	 * as enabled.
   2961 	 *
   2962 	 * Flow control will then be disabled once the client is no
   2963 	 * longer quiesced.
   2964 	 */
   2965 	MAC_TX_TRY_HOLD(mcip, mytx, err);
   2966 	if (err != 0)
   2967 		return (B_TRUE);
   2968 
   2969 	if ((mac_srs = MCIP_TX_SRS(mcip)) == NULL) {
   2970 		MAC_TX_RELE(mcip, mytx);
   2971 		return (B_FALSE);
   2972 	}
   2973 
   2974 	mutex_enter(&mac_srs->srs_lock);
   2975 	if (mac_srs->srs_tx.st_mode == SRS_TX_FANOUT) {
   2976 		if (cookie != NULL) {
   2977 			sringp = (mac_soft_ring_t *)cookie;
   2978 			mutex_enter(&sringp->s_ring_lock);
   2979 			if (sringp->s_ring_state & S_RING_TX_HIWAT)
   2980 				blocked = B_TRUE;
   2981 			mutex_exit(&sringp->s_ring_lock);
   2982 		} else {
   2983 			for (i = 0; i < mac_srs->srs_oth_ring_count; i++) {
   2984 				sringp = mac_srs->srs_oth_soft_rings[i];
   2985 				mutex_enter(&sringp->s_ring_lock);
   2986 				if (sringp->s_ring_state & S_RING_TX_HIWAT) {
   2987 					blocked = B_TRUE;
   2988 					mutex_exit(&sringp->s_ring_lock);
   2989 					break;
   2990 				}
   2991 				mutex_exit(&sringp->s_ring_lock);
   2992 			}
   2993 		}
   2994 	} else {
   2995 		blocked = (mac_srs->srs_state & SRS_TX_HIWAT);
   2996 	}
   2997 	mutex_exit(&mac_srs->srs_lock);
   2998 	MAC_TX_RELE(mcip, mytx);
   2999 	return (blocked);
   3000 }
   3001 
   3002 /*
   3003  * Check if the MAC client is the primary MAC client.
   3004  */
   3005 boolean_t
   3006 mac_is_primary_client(mac_client_impl_t *mcip)
   3007 {
   3008 	return (mcip->mci_flags & MAC_CLIENT_FLAGS_PRIMARY);
   3009 }
   3010 
   3011 void
   3012 mac_ioctl(mac_handle_t mh, queue_t *wq, mblk_t *bp)
   3013 {
   3014 	mac_impl_t	*mip = (mac_impl_t *)mh;
   3015 	int cmd = ((struct iocblk *)bp->b_rptr)->ioc_cmd;
   3016 
   3017 	if ((cmd == ND_GET && (mip->mi_callbacks->mc_callbacks & MC_GETPROP)) ||
   3018 	    (cmd == ND_SET && (mip->mi_callbacks->mc_callbacks & MC_SETPROP))) {
   3019 		/*
   3020 		 * If ndd props were registered, call them.
   3021 		 * Note that ndd ioctls are Obsolete
   3022 		 */
   3023 		mac_ndd_ioctl(mip, wq, bp);
   3024 		return;
   3025 	}
   3026 
   3027 	/*
   3028 	 * Call the driver to handle the ioctl.  The driver may not support
   3029 	 * any ioctls, in which case we reply with a NAK on its behalf.
   3030 	 */
   3031 	if (mip->mi_callbacks->mc_callbacks & MC_IOCTL)
   3032 		mip->mi_ioctl(mip->mi_driver, wq, bp);
   3033 	else
   3034 		miocnak(wq, bp, 0, EINVAL);
   3035 }
   3036 
   3037 /*
   3038  * Return the link state of the specified MAC instance.
   3039  */
   3040 link_state_t
   3041 mac_link_get(mac_handle_t mh)
   3042 {
   3043 	return (((mac_impl_t *)mh)->mi_linkstate);
   3044 }
   3045 
   3046 /*
   3047  * Add a mac client specified notification callback. Please see the comments
   3048  * above mac_callback_add() for general information about mac callback
   3049  * addition/deletion in the presence of mac callback list walkers
   3050  */
   3051 mac_notify_handle_t
   3052 mac_notify_add(mac_handle_t mh, mac_notify_t notify_fn, void *arg)
   3053 {
   3054 	mac_impl_t		*mip = (mac_impl_t *)mh;
   3055 	mac_notify_cb_t		*mncb;
   3056 	mac_cb_info_t		*mcbi;
   3057 
   3058 	/*
   3059 	 * Allocate a notify callback structure, fill in the details and
   3060 	 * use the mac callback list manipulation functions to chain into
   3061 	 * the list of callbacks.
   3062 	 */
   3063 	mncb = kmem_zalloc(sizeof (mac_notify_cb_t), KM_SLEEP);
   3064 	mncb->mncb_fn = notify_fn;
   3065 	mncb->mncb_arg = arg;
   3066 	mncb->mncb_mip = mip;
   3067 	mncb->mncb_link.mcb_objp = mncb;
   3068 	mncb->mncb_link.mcb_objsize = sizeof (mac_notify_cb_t);
   3069 	mncb->mncb_link.mcb_flags = MCB_NOTIFY_CB_T;
   3070 
   3071 	mcbi = &mip->mi_notify_cb_info;
   3072 
   3073 	i_mac_perim_enter(mip);
   3074 	mutex_enter(mcbi->mcbi_lockp);
   3075 
   3076 	mac_callback_add(&mip->mi_notify_cb_info, &mip->mi_notify_cb_list,
   3077 	    &mncb->mncb_link);
   3078 
   3079 	mutex_exit(mcbi->mcbi_lockp);
   3080 	i_mac_perim_exit(mip);
   3081 	return ((mac_notify_handle_t)mncb);
   3082 }
   3083 
   3084 void
   3085 mac_notify_remove_wait(mac_handle_t mh)
   3086 {
   3087 	mac_impl_t	*mip = (mac_impl_t *)mh;
   3088 	mac_cb_info_t	*mcbi = &mip->mi_notify_cb_info;
   3089 
   3090 	mutex_enter(mcbi->mcbi_lockp);
   3091 	mac_callback_remove_wait(&mip->mi_notify_cb_info);
   3092 	mutex_exit(mcbi->mcbi_lockp);
   3093 }
   3094 
   3095 /*
   3096  * Remove a mac client specified notification callback
   3097  */
   3098 int
   3099 mac_notify_remove(mac_notify_handle_t mnh, boolean_t wait)
   3100 {
   3101 	mac_notify_cb_t	*mncb = (mac_notify_cb_t *)mnh;
   3102 	mac_impl_t	*mip = mncb->mncb_mip;
   3103 	mac_cb_info_t	*mcbi;
   3104 	int		err = 0;
   3105 
   3106 	mcbi = &mip->mi_notify_cb_info;
   3107 
   3108 	i_mac_perim_enter(mip);
   3109 	mutex_enter(mcbi->mcbi_lockp);
   3110 
   3111 	ASSERT(mncb->mncb_link.mcb_objp == mncb);
   3112 	/*
   3113 	 * If there aren't any list walkers, the remove would succeed
   3114 	 * inline, else we wait for the deferred remove to complete
   3115 	 */
   3116 	if (mac_callback_remove(&mip->mi_notify_cb_info,
   3117 	    &mip->mi_notify_cb_list, &mncb->mncb_link)) {
   3118 		kmem_free(mncb, sizeof (mac_notify_cb_t));
   3119 	} else {
   3120 		err = EBUSY;
   3121 	}
   3122 
   3123 	mutex_exit(mcbi->mcbi_lockp);
   3124 	i_mac_perim_exit(mip);
   3125 
   3126 	/*
   3127 	 * If we failed to remove the notification callback and "wait" is set
   3128 	 * to be B_TRUE, wait for the callback to finish after we exit the
   3129 	 * mac perimeter.
   3130 	 */
   3131 	if (err != 0 && wait) {
   3132 		mac_notify_remove_wait((mac_handle_t)mip);
   3133 		return (0);
   3134 	}
   3135 
   3136 	return (err);
   3137 }
   3138 
   3139 /*
   3140  * Associate resource management callbacks with the specified MAC
   3141  * clients.
   3142  */
   3143 
   3144 void
   3145 mac_resource_set_common(mac_client_handle_t mch, mac_resource_add_t add,
   3146     mac_resource_remove_t remove, mac_resource_quiesce_t quiesce,
   3147     mac_resource_restart_t restart, mac_resource_bind_t bind,
   3148     void *arg)
   3149 {
   3150 	mac_client_impl_t *mcip = (mac_client_impl_t *)mch;
   3151 
   3152 	mcip->mci_resource_add = add;
   3153 	mcip->mci_resource_remove = remove;
   3154 	mcip->mci_resource_quiesce = quiesce;
   3155 	mcip->mci_resource_restart = restart;
   3156 	mcip->mci_resource_bind = bind;
   3157 	mcip->mci_resource_arg = arg;
   3158 }
   3159 
   3160 void
   3161 mac_resource_set(mac_client_handle_t mch, mac_resource_add_t add, void *arg)
   3162 {
   3163 	/* update the 'resource_add' callback */
   3164 	mac_resource_set_common(mch, add, NULL, NULL, NULL, NULL, arg);
   3165 }
   3166 
   3167 /*
   3168  * Sets up the client resources and enable the polling interface over all the
   3169  * SRS's and the soft rings of the client
   3170  */
   3171 void
   3172 mac_client_poll_enable(mac_client_handle_t mch)
   3173 {
   3174 	mac_client_impl_t	*mcip = (mac_client_impl_t *)mch;
   3175 	mac_soft_ring_set_t	*mac_srs;
   3176 	flow_entry_t		*flent;
   3177 	int			i;
   3178 
   3179 	flent = mcip->mci_flent;
   3180 	ASSERT(flent != NULL);
   3181 
   3182 	mcip->mci_state_flags |= MCIS_CLIENT_POLL_CAPABLE;
   3183 	for (i = 0; i < flent->fe_rx_srs_cnt; i++) {
   3184 		mac_srs = (mac_soft_ring_set_t *)flent->fe_rx_srs[i];
   3185 		ASSERT(mac_srs->srs_mcip == mcip);
   3186 		mac_srs_client_poll_enable(mcip, mac_srs);
   3187 	}
   3188 }
   3189 
   3190 /*
   3191  * Tears down the client resources and disable the polling interface over all
   3192  * the SRS's and the soft rings of the client
   3193  */
   3194 void
   3195 mac_client_poll_disable(mac_client_handle_t mch)
   3196 {
   3197 	mac_client_impl_t	*mcip = (mac_client_impl_t *)mch;
   3198 	mac_soft_ring_set_t	*mac_srs;
   3199 	flow_entry_t		*flent;
   3200 	int			i;
   3201 
   3202 	flent = mcip->mci_flent;
   3203 	ASSERT(flent != NULL);
   3204 
   3205 	mcip->mci_state_flags &= ~MCIS_CLIENT_POLL_CAPABLE;
   3206 	for (i = 0; i < flent->fe_rx_srs_cnt; i++) {
   3207 		mac_srs = (mac_soft_ring_set_t *)flent->fe_rx_srs[i];
   3208 		ASSERT(mac_srs->srs_mcip == mcip);
   3209 		mac_srs_client_poll_disable(mcip, mac_srs);
   3210 	}
   3211 }
   3212 
   3213 /*
   3214  * Associate the CPUs specified by the given property with a MAC client.
   3215  */
   3216 int
   3217 mac_cpu_set(mac_client_handle_t mch, mac_resource_props_t *mrp)
   3218 {
   3219 	mac_client_impl_t *mcip = (mac_client_impl_t *)mch;
   3220 	mac_impl_t *mip = mcip->mci_mip;
   3221 	int err = 0;
   3222 
   3223 	ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
   3224 
   3225 	if ((err = mac_validate_props(mrp)) != 0)
   3226 		return (err);
   3227 
   3228 	if (MCIP_DATAPATH_SETUP(mcip))
   3229 		mac_flow_modify(mip->mi_flow_tab, mcip->mci_flent, mrp);
   3230 
   3231 	mac_update_resources(mrp, MCIP_RESOURCE_PROPS(mcip), B_FALSE);
   3232 	return (0);
   3233 }
   3234 
   3235 /*
   3236  * Apply the specified properties to the specified MAC client.
   3237  */
   3238 int
   3239 mac_client_set_resources(mac_client_handle_t mch, mac_resource_props_t *mrp)
   3240 {
   3241 	mac_client_impl_t *mcip = (mac_client_impl_t *)mch;
   3242 	mac_impl_t *mip = mcip->mci_mip;
   3243 	int err = 0;
   3244 
   3245 	i_mac_perim_enter(mip);
   3246 
   3247 	if ((mrp->mrp_mask & MRP_MAXBW) || (mrp->mrp_mask & MRP_PRIORITY)) {
   3248 		err = mac_resource_ctl_set(mch, mrp);
   3249 		if (err != 0)
   3250 			goto done;
   3251 	}
   3252 
   3253 	if (mrp->mrp_mask & MRP_CPUS) {
   3254 		err = mac_cpu_set(mch, mrp);
   3255 		if (err != 0)
   3256 			goto done;
   3257 	}
   3258 
   3259 	if (mrp->mrp_mask & MRP_PROTECT)
   3260 		err = mac_protect_set(mch, mrp);
   3261 
   3262 done:
   3263 	i_mac_perim_exit(mip);
   3264 	return (err);
   3265 }
   3266 
   3267 /*
   3268  * Return the properties currently associated with the specified MAC client.
   3269  */
   3270 void
   3271 mac_client_get_resources(mac_client_handle_t mch, mac_resource_props_t *mrp)
   3272 {
   3273 	mac_client_impl_t	*mcip = (mac_client_impl_t *)mch;
   3274 	mac_resource_props_t	*mcip_mrp = MCIP_RESOURCE_PROPS(mcip);
   3275 
   3276 	bcopy(mcip_mrp, mrp, sizeof (mac_resource_props_t));
   3277 }
   3278 
   3279 /*
   3280  * Pass a copy of the specified packet to the promiscuous callbacks
   3281  * of the specified MAC.
   3282  *
   3283  * If sender is NULL, the function is being invoked for a packet chain
   3284  * received from the wire. If sender is non-NULL, it points to
   3285  * the MAC client from which the packet is being sent.
   3286  *
   3287  * The packets are distributed to the promiscuous callbacks as follows:
   3288  *
   3289  * - all packets are sent to the MAC_CLIENT_PROMISC_ALL callbacks
   3290  * - all broadcast and multicast packets are sent to the
   3291  *   MAC_CLIENT_PROMISC_FILTER and MAC_CLIENT_PROMISC_MULTI.
   3292  *
   3293  * The unicast packets of MAC_CLIENT_PROMISC_FILTER callbacks are dispatched
   3294  * after classification by mac_rx_deliver().
   3295  */
   3296 
   3297 static void
   3298 mac_promisc_dispatch_one(mac_promisc_impl_t *mpip, mblk_t *mp,
   3299     boolean_t loopback)
   3300 {
   3301 	mblk_t *mp_copy, *mp_next;
   3302 
   3303 	if (!mpip->mpi_no_copy || mpip->mpi_strip_vlan_tag) {
   3304 		mp_copy = copymsg(mp);
   3305 		if (mp_copy == NULL)
   3306 			return;
   3307 
   3308 		if (mpip->mpi_strip_vlan_tag) {
   3309 			mp_copy = mac_strip_vlan_tag_chain(mp_copy);
   3310 			if (mp_copy == NULL)
   3311 				return;
   3312 		}
   3313 		mp_next = NULL;
   3314 	} else {
   3315 		mp_copy = mp;
   3316 		mp_next = mp->b_next;
   3317 	}
   3318 	mp_copy->b_next = NULL;
   3319 
   3320 	mpip->mpi_fn(mpip->mpi_arg, NULL, mp_copy, loopback);
   3321 	if (mp_copy == mp)
   3322 		mp->b_next = mp_next;
   3323 }
   3324 
   3325 /*
   3326  * Return the VID of a packet. Zero if the packet is not tagged.
   3327  */
   3328 static uint16_t
   3329 mac_ether_vid(mblk_t *mp)
   3330 {
   3331 	struct ether_header *eth = (struct ether_header *)mp->b_rptr;
   3332 
   3333 	if (ntohs(eth->ether_type) == ETHERTYPE_VLAN) {
   3334 		struct ether_vlan_header *t_evhp =
   3335 		    (struct ether_vlan_header *)mp->b_rptr;
   3336 		return (VLAN_ID(ntohs(t_evhp->ether_tci)));
   3337 	}
   3338 
   3339 	return (0);
   3340 }
   3341 
   3342 /*
   3343  * Return whether the specified packet contains a multicast or broadcast
   3344  * destination MAC address.
   3345  */
   3346 static boolean_t
   3347 mac_is_mcast(mac_impl_t *mip, mblk_t *mp)
   3348 {
   3349 	mac_header_info_t hdr_info;
   3350 
   3351 	if (mac_header_info((mac_handle_t)mip, mp, &hdr_info) != 0)
   3352 		return (B_FALSE);
   3353 	return ((hdr_info.mhi_dsttype == MAC_ADDRTYPE_BROADCAST) ||
   3354 	    (hdr_info.mhi_dsttype == MAC_ADDRTYPE_MULTICAST));
   3355 }
   3356 
   3357 /*
   3358  * Send a copy of an mblk chain to the MAC clients of the specified MAC.
   3359  * "sender" points to the sender MAC client for outbound packets, and
   3360  * is set to NULL for inbound packets.
   3361  */
   3362 void
   3363 mac_promisc_dispatch(mac_impl_t *mip, mblk_t *mp_chain,
   3364     mac_client_impl_t *sender)
   3365 {
   3366 	mac_promisc_impl_t *mpip;
   3367 	mac_cb_t *mcb;
   3368 	mblk_t *mp;
   3369 	boolean_t is_mcast, is_sender;
   3370 
   3371 	MAC_PROMISC_WALKER_INC(mip);
   3372 	for (mp = mp_chain; mp != NULL; mp = mp->b_next) {
   3373 		is_mcast = mac_is_mcast(mip, mp);
   3374 		/* send packet to interested callbacks */
   3375 		for (mcb = mip->mi_promisc_list; mcb != NULL;
   3376 		    mcb = mcb->mcb_nextp) {
   3377 			mpip = (mac_promisc_impl_t *)mcb->mcb_objp;
   3378 			is_sender = (mpip->mpi_mcip == sender);
   3379 
   3380 			if (is_sender && mpip->mpi_no_tx_loop)
   3381 				/*
   3382 				 * The sender doesn't want to receive
   3383 				 * copies of the packets it sends.
   3384 				 */
   3385 				continue;
   3386 
   3387 			/* this client doesn't need any packets (bridge) */
   3388 			if (mpip->mpi_fn == NULL)
   3389 				continue;
   3390 
   3391 			/*
   3392 			 * For an ethernet MAC, don't displatch a multicast
   3393 			 * packet to a non-PROMISC_ALL callbacks unless the VID
   3394 			 * of the packet matches the VID of the client.
   3395 			 */
   3396 			if (is_mcast &&
   3397 			    mpip->mpi_type != MAC_CLIENT_PROMISC_ALL &&
   3398 			    !mac_client_check_flow_vid(mpip->mpi_mcip,
   3399 			    mac_ether_vid(mp)))
   3400 				continue;
   3401 
   3402 			if (is_sender ||
   3403 			    mpip->mpi_type == MAC_CLIENT_PROMISC_ALL ||
   3404 			    is_mcast)
   3405 				mac_promisc_dispatch_one(mpip, mp, is_sender);
   3406 		}
   3407 	}
   3408 	MAC_PROMISC_WALKER_DCR(mip);
   3409 }
   3410 
   3411 void
   3412 mac_promisc_client_dispatch(mac_client_impl_t *mcip, mblk_t *mp_chain)
   3413 {
   3414 	mac_impl_t		*mip = mcip->mci_mip;
   3415 	mac_promisc_impl_t	*mpip;
   3416 	boolean_t		is_mcast;
   3417 	mblk_t			*mp;
   3418 	mac_cb_t		*mcb;
   3419 
   3420 	/*
   3421 	 * The unicast packets for the MAC client still
   3422 	 * need to be delivered to the MAC_CLIENT_PROMISC_FILTERED
   3423 	 * promiscuous callbacks. The broadcast and multicast
   3424 	 * packets were delivered from mac_rx().
   3425 	 */
   3426 	MAC_PROMISC_WALKER_INC(mip);
   3427 	for (mp = mp_chain; mp != NULL; mp = mp->b_next) {
   3428 		is_mcast = mac_is_mcast(mip, mp);
   3429 		for (mcb = mcip->mci_promisc_list; mcb != NULL;
   3430 		    mcb = mcb->mcb_nextp) {
   3431 			mpip = (mac_promisc_impl_t *)mcb->mcb_objp;
   3432 			if (mpip->mpi_type == MAC_CLIENT_PROMISC_FILTERED &&
   3433 			    !is_mcast) {
   3434 				mac_promisc_dispatch_one(mpip, mp, B_FALSE);
   3435 			}
   3436 		}
   3437 	}
   3438 	MAC_PROMISC_WALKER_DCR(mip);
   3439 }
   3440 
   3441 /*
   3442  * Return the margin value currently assigned to the specified MAC instance.
   3443  */
   3444 void
   3445 mac_margin_get(mac_handle_t mh, uint32_t *marginp)
   3446 {
   3447 	mac_impl_t *mip = (mac_impl_t *)mh;
   3448 
   3449 	rw_enter(&(mip->mi_rw_lock), RW_READER);
   3450 	*marginp = mip->mi_margin;
   3451 	rw_exit(&(mip->mi_rw_lock));
   3452 }
   3453 
   3454 /*
   3455  * mac_info_get() is used for retrieving the mac_info when a DL_INFO_REQ is
   3456  * issued before a DL_ATTACH_REQ. we walk the i_mac_impl_hash table and find
   3457  * the first mac_impl_t with a matching driver name; then we copy its mac_info_t
   3458  * to the caller. we do all this with i_mac_impl_lock held so the mac_impl_t
   3459  * cannot disappear while we are accessing it.
   3460  */
   3461 typedef struct i_mac_info_state_s {
   3462 	const char	*mi_name;
   3463 	mac_info_t	*mi_infop;
   3464 } i_mac_info_state_t;
   3465 
   3466 /*ARGSUSED*/
   3467 static uint_t
   3468 i_mac_info_walker(mod_hash_key_t key, mod_hash_val_t *val, void *arg)
   3469 {
   3470 	i_mac_info_state_t *statep = arg;
   3471 	mac_impl_t *mip = (mac_impl_t *)val;
   3472 
   3473 	if (mip->mi_state_flags & MIS_DISABLED)
   3474 		return (MH_WALK_CONTINUE);
   3475 
   3476 	if (strcmp(statep->mi_name,
   3477 	    ddi_driver_name(mip->mi_dip)) != 0)
   3478 		return (MH_WALK_CONTINUE);
   3479 
   3480 	statep->mi_infop = &mip->mi_info;
   3481 	return (MH_WALK_TERMINATE);
   3482 }
   3483 
   3484 boolean_t
   3485 mac_info_get(const char *name, mac_info_t *minfop)
   3486 {
   3487 	i_mac_info_state_t state;
   3488 
   3489 	rw_enter(&i_mac_impl_lock, RW_READER);
   3490 	state.mi_name = name;
   3491 	state.mi_infop = NULL;
   3492 	mod_hash_walk(i_mac_impl_hash, i_mac_info_walker, &state);
   3493 	if (state.mi_infop == NULL) {
   3494 		rw_exit(&i_mac_impl_lock);
   3495 		return (B_FALSE);
   3496 	}
   3497 	*minfop = *state.mi_infop;
   3498 	rw_exit(&i_mac_impl_lock);
   3499 	return (B_TRUE);
   3500 }
   3501 
   3502 /*
   3503  * To get the capabilities that MAC layer cares about, such as rings, factory
   3504  * mac address, vnic or not, it should directly invoke this function.  If the
   3505  * link is part of a bridge, then the only "capability" it has is the inability
   3506  * to do zero copy.
   3507  */
   3508 boolean_t
   3509 i_mac_capab_get(mac_handle_t mh, mac_capab_t cap, void *cap_data)
   3510 {
   3511 	mac_impl_t *mip = (mac_impl_t *)mh;
   3512 
   3513 	if (mip->mi_bridge_link != NULL)
   3514 		return (cap == MAC_CAPAB_NO_ZCOPY);
   3515 	else if (mip->mi_callbacks->mc_callbacks & MC_GETCAPAB)
   3516 		return (mip->mi_getcapab(mip->mi_driver, cap, cap_data));
   3517 	else
   3518 		return (B_FALSE);
   3519 }
   3520 
   3521 /*
   3522  * Capability query function. If number of active mac clients is greater than
   3523  * 1, only limited capabilities can be advertised to the caller no matter the
   3524  * driver has certain capability or not. Else, we query the driver to get the
   3525  * capability.
   3526  */
   3527 boolean_t
   3528 mac_capab_get(mac_handle_t mh, mac_capab_t cap, void *cap_data)
   3529 {
   3530 	mac_impl_t *mip = (mac_impl_t *)mh;
   3531 
   3532 	/*
   3533 	 * if mi_nactiveclients > 1, only MAC_CAPAB_LEGACY, MAC_CAPAB_HCKSUM,
   3534 	 * MAC_CAPAB_NO_NATIVEVLAN and MAC_CAPAB_NO_ZCOPY can be advertised.
   3535 	 */
   3536 	if (mip->mi_nactiveclients > 1) {
   3537 		switch (cap) {
   3538 		case MAC_CAPAB_NO_NATIVEVLAN:
   3539 		case MAC_CAPAB_NO_ZCOPY:
   3540 			return (B_TRUE);
   3541 		case MAC_CAPAB_LEGACY:
   3542 		case MAC_CAPAB_HCKSUM:
   3543 			break;
   3544 		default:
   3545 			return (B_FALSE);
   3546 		}
   3547 	}
   3548 
   3549 	/* else get capab from driver */
   3550 	return (i_mac_capab_get(mh, cap, cap_data));
   3551 }
   3552 
   3553 boolean_t
   3554 mac_sap_verify(mac_handle_t mh, uint32_t sap, uint32_t *bind_sap)
   3555 {
   3556 	mac_impl_t *mip = (mac_impl_t *)mh;
   3557 
   3558 	return (mip->mi_type->mt_ops.mtops_sap_verify(sap, bind_sap,
   3559 	    mip->mi_pdata));
   3560 }
   3561 
   3562 mblk_t *
   3563 mac_header(mac_handle_t mh, const uint8_t *daddr, uint32_t sap, mblk_t *payload,
   3564     size_t extra_len)
   3565 {
   3566 	mac_impl_t	*mip = (mac_impl_t *)mh;
   3567 	const uint8_t	*hdr_daddr;
   3568 
   3569 	/*
   3570 	 * If the MAC is point-to-point with a fixed destination address, then
   3571 	 * we must always use that destination in the MAC header.
   3572 	 */
   3573 	hdr_daddr = (mip->mi_dstaddr_set ? mip->mi_dstaddr : daddr);
   3574 	return (mip->mi_type->mt_ops.mtops_header(mip->mi_addr, hdr_daddr, sap,
   3575 	    mip->mi_pdata, payload, extra_len));
   3576 }
   3577 
   3578 int
   3579 mac_header_info(mac_handle_t mh, mblk_t *mp, mac_header_info_t *mhip)
   3580 {
   3581 	mac_impl_t *mip = (mac_impl_t *)mh;
   3582 
   3583 	return (mip->mi_type->mt_ops.mtops_header_info(mp, mip->mi_pdata,
   3584 	    mhip));
   3585 }
   3586 
   3587 int
   3588 mac_vlan_header_info(mac_handle_t mh, mblk_t *mp, mac_header_info_t *mhip)
   3589 {
   3590 	mac_impl_t	*mip = (mac_impl_t *)mh;
   3591 	boolean_t	is_ethernet = (mip->mi_info.mi_media == DL_ETHER);
   3592 	int		err = 0;
   3593 
   3594 	/*
   3595 	 * Packets should always be at least 16 bit aligned.
   3596 	 */
   3597 	ASSERT(IS_P2ALIGNED(mp->b_rptr, sizeof (uint16_t)));
   3598 
   3599 	if ((err = mac_header_info(mh, mp, mhip)) != 0)
   3600 		return (err);
   3601 
   3602 	/*
   3603 	 * If this is a VLAN-tagged Ethernet packet, then the SAP in the
   3604 	 * mac_header_info_t as returned by mac_header_info() is
   3605 	 * ETHERTYPE_VLAN. We need to grab the ethertype from the VLAN header.
   3606 	 */
   3607 	if (is_ethernet && (mhip->mhi_bindsap == ETHERTYPE_VLAN)) {
   3608 		struct ether_vlan_header *evhp;
   3609 		uint16_t sap;
   3610 		mblk_t *tmp = NULL;
   3611 		size_t size;
   3612 
   3613 		size = sizeof (struct ether_vlan_header);
   3614 		if (MBLKL(mp) < size) {
   3615 			/*
   3616 			 * Pullup the message in order to get the MAC header
   3617 			 * infomation. Note that this is a read-only function,
   3618 			 * we keep the input packet intact.
   3619 			 */
   3620 			if ((tmp = msgpullup(mp, size)) == NULL)
   3621 				return (EINVAL);
   3622 
   3623 			mp = tmp;
   3624 		}
   3625 		evhp = (struct ether_vlan_header *)mp->b_rptr;
   3626 		sap = ntohs(evhp->ether_type);
   3627 		(void) mac_sap_verify(mh, sap, &mhip->mhi_bindsap);
   3628 		mhip->mhi_hdrsize = sizeof (struct ether_vlan_header);
   3629 		mhip->mhi_tci = ntohs(evhp->ether_tci);
   3630 		mhip->mhi_istagged = B_TRUE;
   3631 		freemsg(tmp);
   3632 
   3633 		if (VLAN_CFI(mhip->mhi_tci) != ETHER_CFI)
   3634 			return (EINVAL);
   3635 	} else {
   3636 		mhip->mhi_istagged = B_FALSE;
   3637 		mhip->mhi_tci = 0;
   3638 	}
   3639 
   3640 	return (0);
   3641 }
   3642 
   3643 mblk_t *
   3644 mac_header_cook(mac_handle_t mh, mblk_t *mp)
   3645 {
   3646 	mac_impl_t *mip = (mac_impl_t *)mh;
   3647 
   3648 	if (mip->mi_type->mt_ops.mtops_ops & MTOPS_HEADER_COOK) {
   3649 		if (DB_REF(mp) > 1) {
   3650 			mblk_t *newmp = copymsg(mp);
   3651 			if (newmp == NULL)
   3652 				return (NULL);
   3653 			freemsg(mp);
   3654 			mp = newmp;
   3655 		}
   3656 		return (mip->mi_type->mt_ops.mtops_header_cook(mp,
   3657 		    mip->mi_pdata));
   3658 	}
   3659 	return (mp);
   3660 }
   3661 
   3662 mblk_t *
   3663 mac_header_uncook(mac_handle_t mh, mblk_t *mp)
   3664 {
   3665 	mac_impl_t *mip = (mac_impl_t *)mh;
   3666 
   3667 	if (mip->mi_type->mt_ops.mtops_ops & MTOPS_HEADER_UNCOOK) {
   3668 		if (DB_REF(mp) > 1) {
   3669 			mblk_t *newmp = copymsg(mp);
   3670 			if (newmp == NULL)
   3671 				return (NULL);
   3672 			freemsg(mp);
   3673 			mp = newmp;
   3674 		}
   3675 		return (mip->mi_type->mt_ops.mtops_header_uncook(mp,
   3676 		    mip->mi_pdata));
   3677 	}
   3678 	return (mp);
   3679 }
   3680 
   3681 uint_t
   3682 mac_addr_len(mac_handle_t mh)
   3683 {
   3684 	mac_impl_t *mip = (mac_impl_t *)mh;
   3685 
   3686 	return (mip->mi_type->mt_addr_length);
   3687 }
   3688 
   3689 /* True if a MAC is a VNIC */
   3690 boolean_t
   3691 mac_is_vnic(mac_handle_t mh)
   3692 {
   3693 	return (((mac_impl_t *)mh)->mi_state_flags & MIS_IS_VNIC);
   3694 }
   3695 
   3696 mac_handle_t
   3697 mac_get_lower_mac_handle(mac_handle_t mh)
   3698 {
   3699 	mac_impl_t *mip = (mac_impl_t *)mh;
   3700 
   3701 	ASSERT(mac_is_vnic(mh));
   3702 	return (((vnic_t *)mip->mi_driver)->vn_lower_mh);
   3703 }
   3704 
   3705 void
   3706 mac_update_resources(mac_resource_props_t *nmrp, mac_resource_props_t *cmrp,
   3707     boolean_t is_user_flow)
   3708 {
   3709 	if (nmrp != NULL && cmrp != NULL) {
   3710 		if (nmrp->mrp_mask & MRP_PRIORITY) {
   3711 			if (nmrp->mrp_priority == MPL_RESET) {
   3712 				cmrp->mrp_mask &= ~MRP_PRIORITY;
   3713 				if (is_user_flow) {
   3714 					cmrp->mrp_priority =
   3715 					    MPL_SUBFLOW_DEFAULT;
   3716 				} else {
   3717 					cmrp->mrp_priority = MPL_LINK_DEFAULT;
   3718 				}
   3719 			} else {
   3720 				cmrp->mrp_mask |= MRP_PRIORITY;
   3721 				cmrp->mrp_priority = nmrp->mrp_priority;
   3722 			}
   3723 		}
   3724 		if (nmrp->mrp_mask & MRP_MAXBW) {
   3725 			cmrp->mrp_maxbw = nmrp->mrp_maxbw;
   3726 			if (nmrp->mrp_maxbw == MRP_MAXBW_RESETVAL)
   3727 				cmrp->mrp_mask &= ~MRP_MAXBW;
   3728 			else
   3729 				cmrp->mrp_mask |= MRP_MAXBW;
   3730 		}
   3731 		if (nmrp->mrp_mask & MRP_CPUS)
   3732 			MAC_COPY_CPUS(nmrp, cmrp);
   3733 
   3734 		if (nmrp->mrp_mask & MRP_PROTECT)
   3735 			mac_protect_update(nmrp, cmrp);
   3736 	}
   3737 }
   3738 
   3739 /*
   3740  * i_mac_set_resources:
   3741  *
   3742  * This routine associates properties with the primary MAC client of
   3743  * the specified MAC instance.
   3744  * - Cache the properties in mac_impl_t
   3745  * - Apply the properties to the primary MAC client if exists
   3746  */
   3747 int
   3748 i_mac_set_resources(mac_handle_t mh, mac_resource_props_t *mrp)
   3749 {
   3750 	mac_impl_t		*mip = (mac_impl_t *)mh;
   3751 	mac_client_impl_t	*mcip;
   3752 	int			err = 0;
   3753 	uint32_t		resmask, newresmask;
   3754 	mac_resource_props_t	tmrp, umrp;
   3755 
   3756 	ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
   3757 
   3758 	err = mac_validate_props(mrp);
   3759 	if (err != 0)
   3760 		return (err);
   3761 
   3762 	bcopy(&mip->mi_resource_props, &umrp, sizeof (mac_resource_props_t));
   3763 	resmask = umrp.mrp_mask;
   3764 	mac_update_resources(mrp, &umrp, B_FALSE);
   3765 	newresmask = umrp.mrp_mask;
   3766 
   3767 	if (resmask == 0 && newresmask != 0) {
   3768 		/*
   3769 		 * Bandwidth, priority or cpu link properties configured,
   3770 		 * must disable fastpath.
   3771 		 */
   3772 		if ((err = mac_fastpath_disable((mac_handle_t)mip)) != 0)
   3773 			return (err);
   3774 	}
   3775 
   3776 	/*
   3777 	 * Since bind_cpu may be modified by mac_client_set_resources()
   3778 	 * we use a copy of bind_cpu and finally cache bind_cpu in mip.
   3779 	 * This allows us to cache only user edits in mip.
   3780 	 */
   3781 	bcopy(mrp, &tmrp, sizeof (mac_resource_props_t));
   3782 	mcip = mac_primary_client_handle(mip);
   3783 	if (mcip != NULL && (mcip->mci_state_flags & MCIS_IS_AGGR_PORT) == 0) {
   3784 		err =
   3785 		    mac_client_set_resources((mac_client_handle_t)mcip, &tmrp);
   3786 	}
   3787 
   3788 	/* Only update the values if mac_client_set_resources succeeded */
   3789 	if (err == 0) {
   3790 		bcopy(&umrp, &mip->mi_resource_props,
   3791 		    sizeof (mac_resource_props_t));
   3792 		/*
   3793 		 * If bankwidth, priority or cpu link properties cleared,
   3794 		 * renable fastpath.
   3795 		 */
   3796 		if (resmask != 0 && newresmask == 0)
   3797 			mac_fastpath_enable((mac_handle_t)mip);
   3798 	} else if (resmask == 0 && newresmask != 0) {
   3799 		mac_fastpath_enable((mac_handle_t)mip);
   3800 	}
   3801 	return (err);
   3802 }
   3803 
   3804 int
   3805 mac_set_resources(mac_handle_t mh, mac_resource_props_t *mrp)
   3806 {
   3807 	int err;
   3808 
   3809 	i_mac_perim_enter((mac_impl_t *)mh);
   3810 	err = i_mac_set_resources(mh, mrp);
   3811 	i_mac_perim_exit((mac_impl_t *)mh);
   3812 	return (err);
   3813 }
   3814 
   3815 /*
   3816  * Get the properties cached for the specified MAC instance.
   3817  */
   3818 void
   3819 mac_get_resources(mac_handle_t mh, mac_resource_props_t *mrp)
   3820 {
   3821 	mac_impl_t 		*mip = (mac_impl_t *)mh;
   3822 	mac_client_impl_t	*mcip;
   3823 
   3824 	if (mip->mi_state_flags & MIS_IS_VNIC) {
   3825 		mcip = mac_primary_client_handle(mip);
   3826 		if (mcip != NULL) {
   3827 			mac_client_get_resources((mac_client_handle_t)mcip,
   3828 			    mrp);
   3829 			return;
   3830 		}
   3831 	}
   3832 	bcopy(&mip->mi_resource_props, mrp, sizeof (mac_resource_props_t));
   3833 }
   3834 
   3835 int
   3836 mac_set_pvid(mac_handle_t mh, uint16_t pvid)
   3837 {
   3838 	mac_impl_t *mip = (mac_impl_t *)mh;
   3839 	mac_client_impl_t *mcip;
   3840 	mac_unicast_impl_t *muip;
   3841 
   3842 	i_mac_perim_enter(mip);
   3843 	if (pvid != 0) {
   3844 		for (mcip = mip->mi_clients_list; mcip != NULL;
   3845 		    mcip = mcip->mci_client_next) {
   3846 			for (muip = mcip->mci_unicast_list; muip != NULL;
   3847 			    muip = muip->mui_next) {
   3848 				if (muip->mui_vid == pvid) {
   3849 					i_mac_perim_exit(mip);
   3850 					return (EBUSY);
   3851 				}
   3852 			}
   3853 		}
   3854 	}
   3855 	mip->mi_pvid = pvid;
   3856 	i_mac_perim_exit(mip);
   3857 	return (0);
   3858 }
   3859 
   3860 uint16_t
   3861 mac_get_pvid(mac_handle_t mh)
   3862 {
   3863 	mac_impl_t *mip = (mac_impl_t *)mh;
   3864 
   3865 	return (mip->mi_pvid);
   3866 }
   3867 
   3868 uint32_t
   3869 mac_get_llimit(mac_handle_t mh)
   3870 {
   3871 	mac_impl_t *mip = (mac_impl_t *)mh;
   3872 
   3873 	return (mip->mi_llimit);
   3874 }
   3875 
   3876 uint32_t
   3877 mac_get_ldecay(mac_handle_t mh)
   3878 {
   3879 	mac_impl_t *mip = (mac_impl_t *)mh;
   3880 
   3881 	return (mip->mi_ldecay);
   3882 }
   3883 
   3884 /*
   3885  * Rename a mac client, its flow, and the kstat.
   3886  */
   3887 int
   3888 mac_rename_primary(mac_handle_t mh, const char *new_name)
   3889 {
   3890 	mac_impl_t		*mip = (mac_impl_t *)mh;
   3891 	mac_client_impl_t	*cur_clnt = NULL;
   3892 	flow_entry_t		*fep;
   3893 
   3894 	i_mac_perim_enter(mip);
   3895 
   3896 	/*
   3897 	 * VNICs: we need to change the sys flow name and
   3898 	 * the associated flow kstat.
   3899 	 */
   3900 	if (mip->mi_state_flags & MIS_IS_VNIC) {
   3901 		ASSERT(new_name != NULL);
   3902 		mac_rename_flow_names(mac_vnic_lower(mip), new_name);
   3903 		goto done;
   3904 	}
   3905 	/*
   3906 	 * This mac may itself be an aggr link, or it may have some client
   3907 	 * which is an aggr port. For both cases, we need to change the
   3908 	 * aggr port's mac client name, its flow name and the associated flow
   3909 	 * kstat.
   3910 	 */
   3911 	if (mip->mi_state_flags & MIS_IS_AGGR) {
   3912 		mac_capab_aggr_t aggr_cap;
   3913 		mac_rename_fn_t rename_fn;
   3914 		boolean_t ret;
   3915 
   3916 		ASSERT(new_name != NULL);
   3917 		ret = i_mac_capab_get((mac_handle_t)mip, MAC_CAPAB_AGGR,
   3918 		    (void *)(&aggr_cap));
   3919 		ASSERT(ret == B_TRUE);
   3920 		rename_fn = aggr_cap.mca_rename_fn;
   3921 		rename_fn(new_name, mip->mi_driver);
   3922 		/*
   3923 		 * The aggr's client name and kstat flow name will be
   3924 		 * updated below, i.e. via mac_rename_flow_names.
   3925 		 */
   3926 	}
   3927 
   3928 	for (cur_clnt = mip->mi_clients_list; cur_clnt != NULL;
   3929 	    cur_clnt = cur_clnt->mci_client_next) {
   3930 		if (cur_clnt->mci_state_flags & MCIS_IS_AGGR_PORT) {
   3931 			if (new_name != NULL) {
   3932 				char *str_st = cur_clnt->mci_name;
   3933 				char *str_del = strchr(str_st, '-');
   3934 
   3935 				ASSERT(str_del != NULL);
   3936 				bzero(str_del + 1, MAXNAMELEN -
   3937 				    (str_del - str_st + 1));
   3938 				bcopy(new_name, str_del + 1,
   3939 				    strlen(new_name));
   3940 			}
   3941 			fep = cur_clnt->mci_flent;
   3942 			mac_rename_flow(fep, cur_clnt->mci_name);
   3943 			break;
   3944 		} else if (new_name != NULL &&
   3945 		    cur_clnt->mci_state_flags & MCIS_USE_DATALINK_NAME) {
   3946 			mac_rename_flow_names(cur_clnt, new_name);
   3947 			break;
   3948 		}
   3949 	}
   3950 
   3951 done:
   3952 	i_mac_perim_exit(mip);
   3953 	return (0);
   3954 }
   3955 
   3956 /*
   3957  * Rename the MAC client's flow names
   3958  */
   3959 static void
   3960 mac_rename_flow_names(mac_client_impl_t *mcip, const char *new_name)
   3961 {
   3962 	flow_entry_t	*flent;
   3963 	uint16_t	vid;
   3964 	char		flowname[MAXFLOWNAMELEN];
   3965 	mac_impl_t	*mip = mcip->mci_mip;
   3966 
   3967 	ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
   3968 
   3969 	/*
   3970 	 * Use mi_rw_lock to ensure that threads not in the mac perimeter
   3971 	 * see a self-consistent value for mci_name
   3972 	 */
   3973 	rw_enter(&mip->mi_rw_lock, RW_WRITER);
   3974 	(void) strlcpy(mcip->mci_name, new_name, sizeof (mcip->mci_name));
   3975 	rw_exit(&mip->mi_rw_lock);
   3976 
   3977 	mac_rename_flow(mcip->mci_flent, new_name);
   3978 
   3979 	if (mcip->mci_nflents == 1)
   3980 		return;
   3981 
   3982 	/*
   3983 	 * We have to rename all the others too, no stats to destroy for
   3984 	 * these.
   3985 	 */
   3986 	for (flent = mcip->mci_flent_list; flent != NULL;
   3987 	    flent = flent->fe_client_next) {
   3988 		if (flent != mcip->mci_flent) {
   3989 			vid = i_mac_flow_vid(flent);
   3990 			(void) sprintf(flowname, "%s%u", new_name, vid);
   3991 			mac_flow_set_name(flent, flowname);
   3992 		}
   3993 	}
   3994 }
   3995 
   3996 
   3997 /*
   3998  * Add a flow to the MAC client's flow list - i.e list of MAC/VID tuples
   3999  * defined for the specified MAC client.
   4000  */
   4001 static void
   4002 mac_client_add_to_flow_list(mac_client_impl_t *mcip, flow_entry_t *flent)
   4003 {
   4004 	ASSERT(MAC_PERIM_HELD((mac_handle_t)mcip->mci_mip));
   4005 	/*
   4006 	 * The promisc Rx data path walks the mci_flent_list. Protect by
   4007 	 * using mi_rw_lock
   4008 	 */
   4009 	rw_enter(&mcip->mci_rw_lock, RW_WRITER);
   4010 
   4011 	/* Add it to the head */
   4012 	flent->fe_client_next = mcip->mci_flent_list;
   4013 	mcip->mci_flent_list = flent;
   4014 	mcip->mci_nflents++;
   4015 
   4016 	/*
   4017 	 * Keep track of the number of non-zero VIDs addresses per MAC
   4018 	 * client to avoid figuring it out in the data-path.
   4019 	 */
   4020 	if (i_mac_flow_vid(flent) != VLAN_ID_NONE)
   4021 		mcip->mci_nvids++;
   4022 
   4023 	rw_exit(&mcip->mci_rw_lock);
   4024 }
   4025 
   4026 /*
   4027  * Remove a flow entry from the MAC client's list.
   4028  */
   4029 static void
   4030 mac_client_remove_flow_from_list(mac_client_impl_t *mcip, flow_entry_t *flent)
   4031 {
   4032 	flow_entry_t	*fe = mcip->mci_flent_list;
   4033 	flow_entry_t	*prev_fe = NULL;
   4034 
   4035 	ASSERT(MAC_PERIM_HELD((mac_handle_t)mcip->mci_mip));
   4036 	/*
   4037 	 * The promisc Rx data path walks the mci_flent_list. Protect by
   4038 	 * using mci_rw_lock
   4039 	 */
   4040 	rw_enter(&mcip->mci_rw_lock, RW_WRITER);
   4041 	while ((fe != NULL) && (fe != flent)) {
   4042 		prev_fe = fe;
   4043 		fe = fe->fe_client_next;
   4044 	}
   4045 
   4046 	ASSERT(fe != NULL);
   4047 	if (prev_fe == NULL) {
   4048 		/* Deleting the first node */
   4049 		mcip->mci_flent_list = fe->fe_client_next;
   4050 	} else {
   4051 		prev_fe->fe_client_next = fe->fe_client_next;
   4052 	}
   4053 	mcip->mci_nflents--;
   4054 
   4055 	if (i_mac_flow_vid(flent) != VLAN_ID_NONE)
   4056 		mcip->mci_nvids--;
   4057 
   4058 	rw_exit(&mcip->mci_rw_lock);
   4059 }
   4060 
   4061 /*
   4062  * Check if the given VID belongs to this MAC client.
   4063  */
   4064 boolean_t
   4065 mac_client_check_flow_vid(mac_client_impl_t *mcip, uint16_t vid)
   4066 {
   4067 	flow_entry_t	*flent;
   4068 	uint16_t	mci_vid;
   4069 
   4070 	/* The mci_flent_list is protected by mci_rw_lock */
   4071 	rw_enter(&mcip->mci_rw_lock, RW_WRITER);
   4072 	for (flent = mcip->mci_flent_list; flent != NULL;
   4073 	    flent = flent->fe_client_next) {
   4074 		mci_vid = i_mac_flow_vid(flent);
   4075 		if (vid == mci_vid) {
   4076 			rw_exit(&mcip->mci_rw_lock);
   4077 			return (B_TRUE);
   4078 		}
   4079 	}
   4080 	rw_exit(&mcip->mci_rw_lock);
   4081 	return (B_FALSE);
   4082 }
   4083 
   4084 /*
   4085  * Get the flow entry for the specified <MAC addr, VID> tuple.
   4086  */
   4087 static flow_entry_t *
   4088 mac_client_get_flow(mac_client_impl_t *mcip, mac_unicast_impl_t *muip)
   4089 {
   4090 	mac_address_t *map = mcip->mci_unicast;
   4091 	flow_entry_t *flent;
   4092 	uint16_t vid;
   4093 	flow_desc_t flow_desc;
   4094 
   4095 	ASSERT(MAC_PERIM_HELD((mac_handle_t)mcip->mci_mip));
   4096 
   4097 	mac_flow_get_desc(mcip->mci_flent, &flow_desc);
   4098 	if (bcmp(flow_desc.fd_dst_mac, map->ma_addr, map->ma_len) != 0)
   4099 		return (NULL);
   4100 
   4101 	for (flent = mcip->mci_flent_list; flent != NULL;
   4102 	    flent = flent->fe_client_next) {
   4103 		vid = i_mac_flow_vid(flent);
   4104 		if (vid == muip->mui_vid) {
   4105 			return (flent);
   4106 		}
   4107 	}
   4108 
   4109 	return (NULL);
   4110 }
   4111 
   4112 /*
   4113  * Since mci_flent has the SRSs, when we want to remove it, we replace
   4114  * the flow_desc_t in mci_flent with that of an existing flent and then
   4115  * remove that flent instead of mci_flent.
   4116  */
   4117 static flow_entry_t *
   4118 mac_client_swap_mciflent(mac_client_impl_t *mcip)
   4119 {
   4120 	flow_entry_t	*flent = mcip->mci_flent;
   4121 	flow_tab_t	*ft = flent->fe_flow_tab;
   4122 	flow_entry_t	*flent1;
   4123 	flow_desc_t	fl_desc;
   4124 	char		fl_name[MAXFLOWNAMELEN];
   4125 	int		err;
   4126 
   4127 	ASSERT(MAC_PERIM_HELD((mac_handle_t)mcip->mci_mip));
   4128 	ASSERT(mcip->mci_nflents > 1);
   4129 
   4130 	/* get the next flent following the primary flent  */
   4131 	flent1 = mcip->mci_flent_list->fe_client_next;
   4132 	ASSERT(flent1 != NULL && flent1->fe_flow_tab == ft);
   4133 
   4134 	/*
   4135 	 * Remove the flent from the flow table before updating the
   4136 	 * flow descriptor as the hash depends on the flow descriptor.
   4137 	 * This also helps incoming packet classification avoid having
   4138 	 * to grab fe_lock. Access to fe_flow_desc of a flent not in the
   4139 	 * flow table is done under the fe_lock so that log or stat functions
   4140 	 * see a self-consistent fe_flow_desc. The name and desc are specific
   4141 	 * to a flow, the rest are shared by all the clients, including
   4142 	 * resource control etc.
   4143 	 */
   4144 	mac_flow_remove(ft, flent, B_TRUE);
   4145 	mac_flow_remove(ft, flent1, B_TRUE);
   4146 
   4147 	bcopy(&flent->fe_flow_desc, &fl_desc, sizeof (flow_desc_t));
   4148 	bcopy(flent->fe_flow_name, fl_name, MAXFLOWNAMELEN);
   4149 
   4150 	/* update the primary flow entry */
   4151 	mutex_enter(&flent->fe_lock);
   4152 	bcopy(&flent1->fe_flow_desc, &flent->fe_flow_desc,
   4153 	    sizeof (flow_desc_t));
   4154 	bcopy(&flent1->fe_flow_name, &flent->fe_flow_name, MAXFLOWNAMELEN);
   4155 	mutex_exit(&flent->fe_lock);
   4156 
   4157 	/* update the flow entry that is to be freed */
   4158 	mutex_enter(&flent1->fe_lock);
   4159 	bcopy(&fl_desc, &flent1->fe_flow_desc, sizeof (flow_desc_t));
   4160 	bcopy(fl_name, &flent1->fe_flow_name, MAXFLOWNAMELEN);
   4161 	mutex_exit(&flent1->fe_lock);
   4162 
   4163 	/* now reinsert the flow entries in the table */
   4164 	err = mac_flow_add(ft, flent);
   4165 	ASSERT(err == 0);
   4166 
   4167 	err = mac_flow_add(ft, flent1);
   4168 	ASSERT(err == 0);
   4169 
   4170 	return (flent1);
   4171 }
   4172 
   4173 /*
   4174  * Return whether there is only one flow entry associated with this
   4175  * MAC client.
   4176  */
   4177 static boolean_t
   4178 mac_client_single_rcvr(mac_client_impl_t *mcip)
   4179 {
   4180 	return (mcip->mci_nflents == 1);
   4181 }
   4182 
   4183 int
   4184 mac_validate_props(mac_resource_props_t *mrp)
   4185 {
   4186 	if (mrp == NULL)
   4187 		return (0);
   4188 
   4189 	if (mrp->mrp_mask & MRP_PRIORITY) {
   4190 		mac_priority_level_t	pri = mrp->mrp_priority;
   4191 
   4192 		if (pri < MPL_LOW || pri > MPL_RESET)
   4193 			return (EINVAL);
   4194 	}
   4195 
   4196 	if (mrp->mrp_mask & MRP_MAXBW) {
   4197 		uint64_t maxbw = mrp->mrp_maxbw;
   4198 
   4199 		if (maxbw < MRP_MAXBW_MINVAL && maxbw != 0)
   4200 			return (EINVAL);
   4201 	}
   4202 	if (mrp->mrp_mask & MRP_CPUS) {
   4203 		int i, j;
   4204 		mac_cpu_mode_t	fanout;
   4205 
   4206 		if (mrp->mrp_ncpus > ncpus || mrp->mrp_ncpus > MAX_SR_FANOUT)
   4207 			return (EINVAL);
   4208 
   4209 		for (i = 0; i < mrp->mrp_ncpus; i++) {
   4210 			for (j = 0; j < mrp->mrp_ncpus; j++) {
   4211 				if (i != j &&
   4212 				    mrp->mrp_cpu[i] == mrp->mrp_cpu[j]) {
   4213 					return (EINVAL);
   4214 				}
   4215 			}
   4216 		}
   4217 
   4218 		for (i = 0; i < mrp->mrp_ncpus; i++) {
   4219 			cpu_t *cp;
   4220 			int rv;
   4221 
   4222 			mutex_enter(&cpu_lock);
   4223 			cp = cpu_get(mrp->mrp_cpu[i]);
   4224 			if (cp != NULL)
   4225 				rv = cpu_is_online(cp);
   4226 			else
   4227 				rv = 0;
   4228 			mutex_exit(&cpu_lock);
   4229 			if (rv == 0)
   4230 				return (EINVAL);
   4231 		}
   4232 
   4233 		fanout = mrp->mrp_fanout_mode;
   4234 		if (fanout < 0 || fanout > MCM_CPUS)
   4235 			return (EINVAL);
   4236 	}
   4237 
   4238 	if (mrp->mrp_mask & MRP_PROTECT) {
   4239 		int err = mac_protect_validate(mrp);
   4240 		if (err != 0)
   4241 			return (err);
   4242 	}
   4243 	return (0);
   4244 }
   4245 
   4246 /*
   4247  * Send a MAC_NOTE_LINK notification to all the MAC clients whenever the
   4248  * underlying physical link is down. This is to allow MAC clients to
   4249  * communicate with other clients.
   4250  */
   4251 void
   4252 mac_virtual_link_update(mac_impl_t *mip)
   4253 {
   4254 	if (mip->mi_linkstate != LINK_STATE_UP)
   4255 		i_mac_notify(mip, MAC_NOTE_LINK);
   4256 }
   4257 
   4258 /*
   4259  * For clients that have a pass-thru MAC, e.g. VNIC, we set the VNIC's
   4260  * mac handle in the client.
   4261  */
   4262 void
   4263 mac_set_upper_mac(mac_client_handle_t mch, mac_handle_t mh)
   4264 {
   4265 	mac_client_impl_t	*mcip = (mac_client_impl_t *)mch;
   4266 
   4267 	mcip->mci_upper_mip = (mac_impl_t *)mh;
   4268 }
   4269 
   4270 /*
   4271  * Mark the mac as being used exclusively by the single mac client that is
   4272  * doing some control operation on this mac. No further opens of this mac
   4273  * will be allowed until this client calls mac_unmark_exclusive. The mac
   4274  * client calling this function must already be in the mac perimeter
   4275  */
   4276 int
   4277 mac_mark_exclusive(mac_handle_t mh)
   4278 {
   4279 	mac_impl_t	*mip = (mac_impl_t *)mh;
   4280 
   4281 	ASSERT(MAC_PERIM_HELD(mh));
   4282 	/*
   4283 	 * Look up its entry in the global hash table.
   4284 	 */
   4285 	rw_enter(&i_mac_impl_lock, RW_WRITER);
   4286 	if (mip->mi_state_flags & MIS_DISABLED) {
   4287 		rw_exit(&i_mac_impl_lock);
   4288 		return (ENOENT);
   4289 	}
   4290 
   4291 	/*
   4292 	 * A reference to mac is held even if the link is not plumbed.
   4293 	 * In i_dls_link_create() we open the MAC interface and hold the
   4294 	 * reference. There is an additional reference for the mac_open
   4295 	 * done in acquiring the mac perimeter
   4296 	 */
   4297 	if (mip->mi_ref != 2) {
   4298 		rw_exit(&i_mac_impl_lock);
   4299 		return (EBUSY);
   4300 	}
   4301 
   4302 	ASSERT(!(mip->mi_state_flags & MIS_EXCLUSIVE_HELD));
   4303 	mip->mi_state_flags |= MIS_EXCLUSIVE_HELD;
   4304 	rw_exit(&i_mac_impl_lock);
   4305 	return (0);
   4306 }
   4307 
   4308 void
   4309 mac_unmark_exclusive(mac_handle_t mh)
   4310 {
   4311 	mac_impl_t	*mip = (mac_impl_t *)mh;
   4312 
   4313 	ASSERT(MAC_PERIM_HELD(mh));
   4314 
   4315 	rw_enter(&i_mac_impl_lock, RW_WRITER);
   4316 	/* 1 for the creation and another for the perimeter */
   4317 	ASSERT(mip->mi_ref == 2 && (mip->mi_state_flags & MIS_EXCLUSIVE_HELD));
   4318 	mip->mi_state_flags &= ~MIS_EXCLUSIVE_HELD;
   4319 	rw_exit(&i_mac_impl_lock);
   4320 }
   4321 
   4322 /*
   4323  * Set the MTU for the specified MAC.  Note that this mechanism depends on
   4324  * the driver calling mac_maxsdu_update() to update the link MTU if it was
   4325  * successful in setting its MTU.
   4326  *
   4327  * Note that there is potential for improvement here.  A better model might be
   4328  * to not require drivers to call mac_maxsdu_update(), but rather have this
   4329  * function update mi_sdu_max and send notifications if the driver setprop
   4330  * callback succeeds.  This would remove the burden and complexity from
   4331  * drivers.
   4332  */
   4333 int
   4334 mac_set_mtu(mac_handle_t mh, uint_t new_mtu, uint_t *old_mtu_arg)
   4335 {
   4336 	mac_impl_t *mip = (mac_impl_t *)mh;
   4337 	uint_t old_mtu;
   4338 	int rv = 0;
   4339 
   4340 	i_mac_perim_enter(mip);
   4341 
   4342 	if (!(mip->mi_callbacks->mc_callbacks & (MC_SETPROP|MC_GETPROP))) {
   4343 		rv = ENOTSUP;
   4344 		goto bail;
   4345 	}
   4346 
   4347 	old_mtu = mip->mi_sdu_max;
   4348 
   4349 	if (old_mtu != new_mtu) {
   4350 		rv = mip->mi_callbacks->mc_setprop(mip->mi_driver,
   4351 		    "mtu", MAC_PROP_MTU, sizeof (uint_t), &new_mtu);
   4352 	}
   4353 
   4354 bail:
   4355 	i_mac_perim_exit(mip);
   4356 
   4357 	if (rv == 0 && old_mtu_arg != NULL)
   4358 		*old_mtu_arg = old_mtu;
   4359 	return (rv);
   4360 }
   4361 
   4362 void
   4363 mac_get_hwgrp_info(mac_handle_t mh, int grp_index, uint_t *grp_num,
   4364     uint_t *n_rings, uint_t *type, uint_t *n_clnts, char *clnts_name)
   4365 {
   4366 	mac_impl_t *mip = (mac_impl_t *)mh;
   4367 	mac_grp_client_t *mcip;
   4368 	uint_t i = 0, index = 0;
   4369 
   4370 	/* Revisit when we implement fully dynamic group allocation */
   4371 	ASSERT(grp_index >= 0 && grp_index < mip->mi_rx_group_count);
   4372 
   4373 	rw_enter(&mip->mi_rw_lock, RW_READER);
   4374 	*grp_num = mip->mi_rx_groups[grp_index].mrg_index;
   4375 	*type = mip->mi_rx_groups[grp_index].mrg_type;
   4376 	*n_rings = mip->mi_rx_groups[grp_index].mrg_cur_count;
   4377 	for (mcip = mip->mi_rx_groups[grp_index].mrg_clients; mcip != NULL;
   4378 	    mcip = mcip->mgc_next) {
   4379 		int name_len = strlen(mcip->mgc_client->mci_name);
   4380 
   4381 		/*
   4382 		 * MAXCLIENTNAMELEN is the buffer size reserved for client
   4383 		 * names.
   4384 		 * XXXX Formating the client name string needs to be moved
   4385 		 * to user land when fixing the size of dhi_clnts in
   4386 		 * dld_hwgrpinfo_t. We should use n_clients * client_name for
   4387 		 * dhi_clntsin instead of MAXCLIENTNAMELEN
   4388 		 */
   4389 		if (index + name_len >= MAXCLIENTNAMELEN) {
   4390 			index = MAXCLIENTNAMELEN;
   4391 			break;
   4392 		}
   4393 		bcopy(mcip->mgc_client->mci_name, &(clnts_name[index]),
   4394 		    name_len);
   4395 		index += name_len;
   4396 		clnts_name[index++] = ',';
   4397 		i++;
   4398 	}
   4399 
   4400 	/* Get rid of the last , */
   4401 	if (index > 0)
   4402 		clnts_name[index - 1] = '\0';
   4403 	*n_clnts = i;
   4404 	rw_exit(&mip->mi_rw_lock);
   4405 }
   4406 
   4407 uint_t
   4408 mac_hwgrp_num(mac_handle_t mh)
   4409 {
   4410 	mac_impl_t *mip = (mac_impl_t *)mh;
   4411 
   4412 	return (mip->mi_rx_group_count);
   4413 }
   4414