Home | History | Annotate | Download | only in mac
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 
     22 /*
     23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     24  * Use is subject to license terms.
     25  */
     26 
     27 /*
     28  * MAC Services Module
     29  *
     30  * The GLDv3 framework locking -  The MAC layer
     31  * --------------------------------------------
     32  *
     33  * The MAC layer is central to the GLD framework and can provide the locking
     34  * framework needed for itself and for the use of MAC clients. MAC end points
     35  * are fairly disjoint and don't share a lot of state. So a coarse grained
     36  * multi-threading scheme is to single thread all create/modify/delete or set
     37  * type of control operations on a per mac end point while allowing data threads
     38  * concurrently.
     39  *
     40  * Control operations (set) that modify a mac end point are always serialized on
     41  * a per mac end point basis, We have at most 1 such thread per mac end point
     42  * at a time.
     43  *
     44  * All other operations that are not serialized are essentially multi-threaded.
     45  * For example a control operation (get) like getting statistics which may not
     46  * care about reading values atomically or data threads sending or receiving
     47  * data. Mostly these type of operations don't modify the control state. Any
     48  * state these operations care about are protected using traditional locks.
     49  *
     50  * The perimeter only serializes serial operations. It does not imply there
     51  * aren't any other concurrent operations. However a serialized operation may
     52  * sometimes need to make sure it is the only thread. In this case it needs
     53  * to use reference counting mechanisms to cv_wait until any current data
     54  * threads are done.
     55  *
     56  * The mac layer itself does not hold any locks across a call to another layer.
     57  * The perimeter is however held across a down call to the driver to make the
     58  * whole control operation atomic with respect to other control operations.
     59  * Also the data path and get type control operations may proceed concurrently.
     60  * These operations synchronize with the single serial operation on a given mac
     61  * end point using regular locks. The perimeter ensures that conflicting
     62  * operations like say a mac_multicast_add and a mac_multicast_remove on the
     63  * same mac end point don't interfere with each other and also ensures that the
     64  * changes in the mac layer and the call to the underlying driver to say add a
     65  * multicast address are done atomically without interference from a thread
     66  * trying to delete the same address.
     67  *
     68  * For example, consider
     69  * mac_multicst_add()
     70  * {
     71  *	mac_perimeter_enter();	serialize all control operations
     72  *
     73  *	grab list lock		protect against access by data threads
     74  *	add to list
     75  *	drop list lock
     76  *
     77  *	call driver's mi_multicst
     78  *
     79  *	mac_perimeter_exit();
     80  * }
     81  *
     82  * To lessen the number of serialization locks and simplify the lock hierarchy,
     83  * we serialize all the control operations on a per mac end point by using a
     84  * single serialization lock called the perimeter. We allow recursive entry into
     85  * the perimeter to facilitate use of this mechanism by both the mac client and
     86  * the MAC layer itself.
     87  *
     88  * MAC client means an entity that does an operation on a mac handle
     89  * obtained from a mac_open/mac_client_open. Similarly MAC driver means
     90  * an entity that does an operation on a mac handle obtained from a
     91  * mac_register. An entity could be both client and driver but on different
     92  * handles eg. aggr. and should only make the corresponding mac interface calls
     93  * i.e. mac driver interface or mac client interface as appropriate for that
     94  * mac handle.
     95  *
     96  * General rules.
     97  * -------------
     98  *
     99  * R1. The lock order of upcall threads is natually opposite to downcall
    100  * threads. Hence upcalls must not hold any locks across layers for fear of
    101  * recursive lock enter and lock order violation. This applies to all layers.
    102  *
    103  * R2. The perimeter is just another lock. Since it is held in the down
    104  * direction, acquiring the perimeter in an upcall is prohibited as it would
    105  * cause a deadlock. This applies to all layers.
    106  *
    107  * Note that upcalls that need to grab the mac perimeter (for example
    108  * mac_notify upcalls) can still achieve that by posting the request to a
    109  * thread, which can then grab all the required perimeters and locks in the
    110  * right global order. Note that in the above example the mac layer iself
    111  * won't grab the mac perimeter in the mac_notify upcall, instead the upcall
    112  * to the client must do that. Please see the aggr code for an example.
    113  *
    114  * MAC client rules
    115  * ----------------
    116  *
    117  * R3. A MAC client may use the MAC provided perimeter facility to serialize
    118  * control operations on a per mac end point. It does this by by acquring
    119  * and holding the perimeter across a sequence of calls to the mac layer.
    120  * This ensures atomicity across the entire block of mac calls. In this
    121  * model the MAC client must not hold any client locks across the calls to
    122  * the mac layer. This model is the preferred solution.
    123  *
    124  * R4. However if a MAC client has a lot of global state across all mac end
    125  * points the per mac end point serialization may not be sufficient. In this
    126  * case the client may choose to use global locks or use its own serialization.
    127  * To avoid deadlocks, these client layer locks held across the mac calls
    128  * in the control path must never be acquired by the data path for the reason
    129  * mentioned below.
    130  *
    131  * (Assume that a control operation that holds a client lock blocks in the
    132  * mac layer waiting for upcall reference counts to drop to zero. If an upcall
    133  * data thread that holds this reference count, tries to acquire the same
    134  * client lock subsequently it will deadlock).
    135  *
    136  * A MAC client may follow either the R3 model or the R4 model, but can't
    137  * mix both. In the former, the hierarchy is Perim -> client locks, but in
    138  * the latter it is client locks -> Perim.
    139  *
    140  * R5. MAC clients must make MAC calls (excluding data calls) in a cv_wait'able
    141  * context since they may block while trying to acquire the perimeter.
    142  * In addition some calls may block waiting for upcall refcnts to come down to
    143  * zero.
    144  *
    145  * R6. MAC clients must make sure that they are single threaded and all threads
    146  * from the top (in particular data threads) have finished before calling
    147  * mac_client_close. The MAC framework does not track the number of client
    148  * threads using the mac client handle. Also mac clients must make sure
    149  * they have undone all the control operations before calling mac_client_close.
    150  * For example mac_unicast_remove/mac_multicast_remove to undo the corresponding
    151  * mac_unicast_add/mac_multicast_add.
    152  *
    153  * MAC framework rules
    154  * -------------------
    155  *
    156  * R7. The mac layer itself must not hold any mac layer locks (except the mac
    157  * perimeter) across a call to any other layer from the mac layer. The call to
    158  * any other layer could be via mi_* entry points, classifier entry points into
    159  * the driver or via upcall pointers into layers above. The mac perimeter may
    160  * be acquired or held only in the down direction, for e.g. when calling into
    161  * a mi_* driver enty point to provide atomicity of the operation.
    162  *
    163  * R8. Since it is not guaranteed (see R14) that drivers won't hold locks across
    164  * mac driver interfaces, the MAC layer must provide a cut out for control
    165  * interfaces like upcall notifications and start them in a separate thread.
    166  *
    167  * R9. Note that locking order also implies a plumbing order. For example
    168  * VNICs are allowed to be created over aggrs, but not vice-versa. An attempt
    169  * to plumb in any other order must be failed at mac_open time, otherwise it
    170  * could lead to deadlocks due to inverse locking order.
    171  *
    172  * R10. MAC driver interfaces must not block since the driver could call them
    173  * in interrupt context.
    174  *
    175  * R11. Walkers must preferably not hold any locks while calling walker
    176  * callbacks. Instead these can operate on reference counts. In simple
    177  * callbacks it may be ok to hold a lock and call the callbacks, but this is
    178  * harder to maintain in the general case of arbitrary callbacks.
    179  *
    180  * R12. The MAC layer must protect upcall notification callbacks using reference
    181  * counts rather than holding locks across the callbacks.
    182  *
    183  * R13. Given the variety of drivers, it is preferable if the MAC layer can make
    184  * sure that any pointers (such as mac ring pointers) it passes to the driver
    185  * remain valid until mac unregister time. Currently the mac layer achieves
    186  * this by using generation numbers for rings and freeing the mac rings only
    187  * at unregister time.  The MAC layer must provide a layer of indirection and
    188  * must not expose underlying driver rings or driver data structures/pointers
    189  * directly to MAC clients.
    190  *
    191  * MAC driver rules
    192  * ----------------
    193  *
    194  * R14. It would be preferable if MAC drivers don't hold any locks across any
    195  * mac call. However at a minimum they must not hold any locks across data
    196  * upcalls. They must also make sure that all references to mac data structures
    197  * are cleaned up and that it is single threaded at mac_unregister time.
    198  *
    199  * R15. MAC driver interfaces don't block and so the action may be done
    200  * asynchronously in a separate thread as for example handling notifications.
    201  * The driver must not assume that the action is complete when the call
    202  * returns.
    203  *
    204  * R16. Drivers must maintain a generation number per Rx ring, and pass it
    205  * back to mac_rx_ring(); They are expected to increment the generation
    206  * number whenever the ring's stop routine is invoked.
    207  * See comments in mac_rx_ring();
    208  *
    209  * R17 Similarly mi_stop is another synchronization point and the driver must
    210  * ensure that all upcalls are done and there won't be any future upcall
    211  * before returning from mi_stop.
    212  *
    213  * R18. The driver may assume that all set/modify control operations via
    214  * the mi_* entry points are single threaded on a per mac end point.
    215  *
    216  * Lock and Perimeter hierarchy scenarios
    217  * ---------------------------------------
    218  *
    219  * i_mac_impl_lock -> mi_rw_lock -> srs_lock -> s_ring_lock[i_mac_tx_srs_notify]
    220  *
    221  * ft_lock -> fe_lock [mac_flow_lookup]
    222  *
    223  * mi_rw_lock -> fe_lock [mac_bcast_send]
    224  *
    225  * srs_lock -> mac_bw_lock [mac_rx_srs_drain_bw]
    226  *
    227  * cpu_lock -> mac_srs_g_lock -> srs_lock -> s_ring_lock [mac_walk_srs_and_bind]
    228  *
    229  * i_dls_devnet_lock -> mac layer locks [dls_devnet_rename]
    230  *
    231  * Perimeters are ordered P1 -> P2 -> P3 from top to bottom in order of mac
    232  * client to driver. In the case of clients that explictly use the mac provided
    233  * perimeter mechanism for its serialization, the hierarchy is
    234  * Perimeter -> mac layer locks, since the client never holds any locks across
    235  * the mac calls. In the case of clients that use its own locks the hierarchy
    236  * is Client locks -> Mac Perim -> Mac layer locks. The client never explicitly
    237  * calls mac_perim_enter/exit in this case.
    238  *
    239  * Subflow creation rules
    240  * ---------------------------
    241  * o In case of a user specified cpulist present on underlying link and flows,
    242  * the flows cpulist must be a subset of the underlying link.
    243  * o In case of a user specified fanout mode present on link and flow, the
    244  * subflow fanout count has to be less than or equal to that of the
    245  * underlying link. The cpu-bindings for the subflows will be a subset of
    246  * the underlying link.
    247  * o In case if no cpulist specified on both underlying link and flow, the
    248  * underlying link relies on a  MAC tunable to provide out of box fanout.
    249  * The subflow will have no cpulist (the subflow will be unbound)
    250  * o In case if no cpulist is specified on the underlying link, a subflow can
    251  * carry  either a user-specified cpulist or fanout count. The cpu-bindings
    252  * for the subflow will not adhere to restriction that they need to be subset
    253  * of the underlying link.
    254  * o In case where the underlying link is carrying either a user specified
    255  * cpulist or fanout mode and for a unspecified subflow, the subflow will be
    256  * created unbound.
    257  * o While creating unbound subflows, bandwidth mode changes attempt to
    258  * figure a right fanout count. In such cases the fanout count will override
    259  * the unbound cpu-binding behavior.
    260  * o In addition to this, while cycling between flow and link properties, we
    261  * impose a restriction that if a link property has a subflow with
    262  * user-specified attributes, we will not allow changing the link property.
    263  * The administrator needs to reset all the user specified properties for the
    264  * subflows before attempting a link property change.
    265  * Some of the above rules can be overridden by specifying additional command
    266  * line options while creating or modifying link or subflow properties.
    267  */
    268 
    269 #include <sys/types.h>
    270 #include <sys/conf.h>
    271 #include <sys/id_space.h>
    272 #include <sys/esunddi.h>
    273 #include <sys/stat.h>
    274 #include <sys/mkdev.h>
    275 #include <sys/stream.h>
    276 #include <sys/strsun.h>
    277 #include <sys/strsubr.h>
    278 #include <sys/dlpi.h>
    279 #include <sys/modhash.h>
    280 #include <sys/mac_provider.h>
    281 #include <sys/mac_client_impl.h>
    282 #include <sys/mac_soft_ring.h>
    283 #include <sys/mac_impl.h>
    284 #include <sys/mac.h>
    285 #include <sys/dls.h>
    286 #include <sys/dld.h>
    287 #include <sys/modctl.h>
    288 #include <sys/fs/dv_node.h>
    289 #include <sys/thread.h>
    290 #include <sys/proc.h>
    291 #include <sys/callb.h>
    292 #include <sys/cpuvar.h>
    293 #include <sys/atomic.h>
    294 #include <sys/bitmap.h>
    295 #include <sys/sdt.h>
    296 #include <sys/mac_flow.h>
    297 #include <sys/ddi_intr_impl.h>
    298 #include <sys/disp.h>
    299 #include <sys/sdt.h>
    300 #include <sys/vnic.h>
    301 #include <sys/vnic_impl.h>
    302 #include <sys/vlan.h>
    303 #include <inet/ip.h>
    304 #include <inet/ip6.h>
    305 #include <sys/exacct.h>
    306 #include <sys/exacct_impl.h>
    307 #include <inet/nd.h>
    308 #include <sys/ethernet.h>
    309 
    310 #define	IMPL_HASHSZ	67	/* prime */
    311 
    312 kmem_cache_t	*i_mac_impl_cachep;
    313 mod_hash_t		*i_mac_impl_hash;
    314 krwlock_t		i_mac_impl_lock;
    315 uint_t			i_mac_impl_count;
    316 static kmem_cache_t	*mac_ring_cache;
    317 static id_space_t	*minor_ids;
    318 static uint32_t		minor_count;
    319 
    320 /*
    321  * Logging stuff. Perhaps mac_logging_interval could be broken into
    322  * mac_flow_log_interval and mac_link_log_interval if we want to be
    323  * able to schedule them differently.
    324  */
    325 uint_t			mac_logging_interval;
    326 boolean_t		mac_flow_log_enable;
    327 boolean_t		mac_link_log_enable;
    328 timeout_id_t		mac_logging_timer;
    329 
    330 /* for debugging, see MAC_DBG_PRT() in mac_impl.h */
    331 int mac_dbg = 0;
    332 
    333 #define	MACTYPE_KMODDIR	"mac"
    334 #define	MACTYPE_HASHSZ	67
    335 static mod_hash_t	*i_mactype_hash;
    336 /*
    337  * i_mactype_lock synchronizes threads that obtain references to mactype_t
    338  * structures through i_mactype_getplugin().
    339  */
    340 static kmutex_t		i_mactype_lock;
    341 
    342 /*
    343  * mac_tx_percpu_cnt
    344  *
    345  * Number of per cpu locks per mac_client_impl_t. Used by the transmit side
    346  * in mac_tx to reduce lock contention. This is sized at boot time in mac_init.
    347  * mac_tx_percpu_cnt_max is settable in /etc/system and must be a power of 2.
    348  * Per cpu locks may be disabled by setting mac_tx_percpu_cnt_max to 1.
    349  */
    350 int mac_tx_percpu_cnt;
    351 int mac_tx_percpu_cnt_max = 128;
    352 
    353 /*
    354  * Call back functions for the bridge module.  These are guaranteed to be valid
    355  * when holding a reference on a link or when holding mip->mi_bridge_lock and
    356  * mi_bridge_link is non-NULL.
    357  */
    358 mac_bridge_tx_t mac_bridge_tx_cb;
    359 mac_bridge_rx_t mac_bridge_rx_cb;
    360 mac_bridge_ref_t mac_bridge_ref_cb;
    361 mac_bridge_ls_t mac_bridge_ls_cb;
    362 
    363 static int i_mac_constructor(void *, void *, int);
    364 static void i_mac_destructor(void *, void *);
    365 static int i_mac_ring_ctor(void *, void *, int);
    366 static void i_mac_ring_dtor(void *, void *);
    367 static mblk_t *mac_rx_classify(mac_impl_t *, mac_resource_handle_t, mblk_t *);
    368 void mac_tx_client_flush(mac_client_impl_t *);
    369 void mac_tx_client_block(mac_client_impl_t *);
    370 static void mac_rx_ring_quiesce(mac_ring_t *, uint_t);
    371 static int mac_start_group_and_rings(mac_group_t *);
    372 static void mac_stop_group_and_rings(mac_group_t *);
    373 
    374 /*
    375  * Module initialization functions.
    376  */
    377 
    378 void
    379 mac_init(void)
    380 {
    381 	mac_tx_percpu_cnt = ((boot_max_ncpus == -1) ? max_ncpus :
    382 	    boot_max_ncpus);
    383 
    384 	/* Upper bound is mac_tx_percpu_cnt_max */
    385 	if (mac_tx_percpu_cnt > mac_tx_percpu_cnt_max)
    386 		mac_tx_percpu_cnt = mac_tx_percpu_cnt_max;
    387 
    388 	if (mac_tx_percpu_cnt < 1) {
    389 		/* Someone set max_tx_percpu_cnt_max to 0 or less */
    390 		mac_tx_percpu_cnt = 1;
    391 	}
    392 
    393 	ASSERT(mac_tx_percpu_cnt >= 1);
    394 	mac_tx_percpu_cnt = (1 << highbit(mac_tx_percpu_cnt - 1));
    395 	/*
    396 	 * Make it of the form 2**N - 1 in the range
    397 	 * [0 .. mac_tx_percpu_cnt_max - 1]
    398 	 */
    399 	mac_tx_percpu_cnt--;
    400 
    401 	i_mac_impl_cachep = kmem_cache_create("mac_impl_cache",
    402 	    sizeof (mac_impl_t), 0, i_mac_constructor, i_mac_destructor,
    403 	    NULL, NULL, NULL, 0);
    404 	ASSERT(i_mac_impl_cachep != NULL);
    405 
    406 	mac_ring_cache = kmem_cache_create("mac_ring_cache",
    407 	    sizeof (mac_ring_t), 0, i_mac_ring_ctor, i_mac_ring_dtor, NULL,
    408 	    NULL, NULL, 0);
    409 	ASSERT(mac_ring_cache != NULL);
    410 
    411 	i_mac_impl_hash = mod_hash_create_extended("mac_impl_hash",
    412 	    IMPL_HASHSZ, mod_hash_null_keydtor, mod_hash_null_valdtor,
    413 	    mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP);
    414 	rw_init(&i_mac_impl_lock, NULL, RW_DEFAULT, NULL);
    415 
    416 	mac_flow_init();
    417 	mac_soft_ring_init();
    418 	mac_bcast_init();
    419 	mac_client_init();
    420 
    421 	i_mac_impl_count = 0;
    422 
    423 	i_mactype_hash = mod_hash_create_extended("mactype_hash",
    424 	    MACTYPE_HASHSZ,
    425 	    mod_hash_null_keydtor, mod_hash_null_valdtor,
    426 	    mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP);
    427 
    428 	/*
    429 	 * Allocate an id space to manage minor numbers. The range of the
    430 	 * space will be from MAC_MAX_MINOR+1 to MAC_PRIVATE_MINOR-1.  This
    431 	 * leaves half of the 32-bit minors available for driver private use.
    432 	 */
    433 	minor_ids = id_space_create("mac_minor_ids", MAC_MAX_MINOR+1,
    434 	    MAC_PRIVATE_MINOR-1);
    435 	ASSERT(minor_ids != NULL);
    436 	minor_count = 0;
    437 
    438 	/* Let's default to 20 seconds */
    439 	mac_logging_interval = 20;
    440 	mac_flow_log_enable = B_FALSE;
    441 	mac_link_log_enable = B_FALSE;
    442 	mac_logging_timer = 0;
    443 }
    444 
    445 int
    446 mac_fini(void)
    447 {
    448 	if (i_mac_impl_count > 0 || minor_count > 0)
    449 		return (EBUSY);
    450 
    451 	id_space_destroy(minor_ids);
    452 	mac_flow_fini();
    453 
    454 	mod_hash_destroy_hash(i_mac_impl_hash);
    455 	rw_destroy(&i_mac_impl_lock);
    456 
    457 	mac_client_fini();
    458 	kmem_cache_destroy(mac_ring_cache);
    459 
    460 	mod_hash_destroy_hash(i_mactype_hash);
    461 	mac_soft_ring_finish();
    462 	return (0);
    463 }
    464 
    465 /*
    466  * Initialize a GLDv3 driver's device ops.  A driver that manages its own ops
    467  * (e.g. softmac) may pass in a NULL ops argument.
    468  */
    469 void
    470 mac_init_ops(struct dev_ops *ops, const char *name)
    471 {
    472 	major_t major = ddi_name_to_major((char *)name);
    473 
    474 	/*
    475 	 * By returning on error below, we are not letting the driver continue
    476 	 * in an undefined context.  The mac_register() function will faill if
    477 	 * DN_GLDV3_DRIVER isn't set.
    478 	 */
    479 	if (major == DDI_MAJOR_T_NONE)
    480 		return;
    481 	LOCK_DEV_OPS(&devnamesp[major].dn_lock);
    482 	devnamesp[major].dn_flags |= (DN_GLDV3_DRIVER | DN_NETWORK_DRIVER);
    483 	UNLOCK_DEV_OPS(&devnamesp[major].dn_lock);
    484 	if (ops != NULL)
    485 		dld_init_ops(ops, name);
    486 }
    487 
    488 void
    489 mac_fini_ops(struct dev_ops *ops)
    490 {
    491 	dld_fini_ops(ops);
    492 }
    493 
    494 /*ARGSUSED*/
    495 static int
    496 i_mac_constructor(void *buf, void *arg, int kmflag)
    497 {
    498 	mac_impl_t	*mip = buf;
    499 
    500 	bzero(buf, sizeof (mac_impl_t));
    501 
    502 	mip->mi_linkstate = LINK_STATE_UNKNOWN;
    503 
    504 	mutex_init(&mip->mi_lock, NULL, MUTEX_DRIVER, NULL);
    505 	rw_init(&mip->mi_rw_lock, NULL, RW_DRIVER, NULL);
    506 	mutex_init(&mip->mi_notify_lock, NULL, MUTEX_DRIVER, NULL);
    507 	mutex_init(&mip->mi_promisc_lock, NULL, MUTEX_DRIVER, NULL);
    508 	mutex_init(&mip->mi_ring_lock, NULL, MUTEX_DEFAULT, NULL);
    509 
    510 	mip->mi_notify_cb_info.mcbi_lockp = &mip->mi_notify_lock;
    511 	cv_init(&mip->mi_notify_cb_info.mcbi_cv, NULL, CV_DRIVER, NULL);
    512 	mip->mi_promisc_cb_info.mcbi_lockp = &mip->mi_promisc_lock;
    513 	cv_init(&mip->mi_promisc_cb_info.mcbi_cv, NULL, CV_DRIVER, NULL);
    514 
    515 	mutex_init(&mip->mi_bridge_lock, NULL, MUTEX_DEFAULT, NULL);
    516 
    517 	return (0);
    518 }
    519 
    520 /*ARGSUSED*/
    521 static void
    522 i_mac_destructor(void *buf, void *arg)
    523 {
    524 	mac_impl_t	*mip = buf;
    525 	mac_cb_info_t	*mcbi;
    526 
    527 	ASSERT(mip->mi_ref == 0);
    528 	ASSERT(mip->mi_active == 0);
    529 	ASSERT(mip->mi_linkstate == LINK_STATE_UNKNOWN);
    530 	ASSERT(mip->mi_devpromisc == 0);
    531 	ASSERT(mip->mi_ksp == NULL);
    532 	ASSERT(mip->mi_kstat_count == 0);
    533 	ASSERT(mip->mi_nclients == 0);
    534 	ASSERT(mip->mi_nactiveclients == 0);
    535 	ASSERT(mip->mi_single_active_client == NULL);
    536 	ASSERT(mip->mi_state_flags == 0);
    537 	ASSERT(mip->mi_factory_addr == NULL);
    538 	ASSERT(mip->mi_factory_addr_num == 0);
    539 	ASSERT(mip->mi_default_tx_ring == NULL);
    540 
    541 	mcbi = &mip->mi_notify_cb_info;
    542 	ASSERT(mcbi->mcbi_del_cnt == 0 && mcbi->mcbi_walker_cnt == 0);
    543 	ASSERT(mip->mi_notify_bits == 0);
    544 	ASSERT(mip->mi_notify_thread == NULL);
    545 	ASSERT(mcbi->mcbi_lockp == &mip->mi_notify_lock);
    546 	mcbi->mcbi_lockp = NULL;
    547 
    548 	mcbi = &mip->mi_promisc_cb_info;
    549 	ASSERT(mcbi->mcbi_del_cnt == 0 && mip->mi_promisc_list == NULL);
    550 	ASSERT(mip->mi_promisc_list == NULL);
    551 	ASSERT(mcbi->mcbi_lockp == &mip->mi_promisc_lock);
    552 	mcbi->mcbi_lockp = NULL;
    553 
    554 	ASSERT(mip->mi_bcast_ngrps == 0 && mip->mi_bcast_grp == NULL);
    555 	ASSERT(mip->mi_perim_owner == NULL && mip->mi_perim_ocnt == 0);
    556 
    557 	mutex_destroy(&mip->mi_lock);
    558 	rw_destroy(&mip->mi_rw_lock);
    559 
    560 	mutex_destroy(&mip->mi_promisc_lock);
    561 	cv_destroy(&mip->mi_promisc_cb_info.mcbi_cv);
    562 	mutex_destroy(&mip->mi_notify_lock);
    563 	cv_destroy(&mip->mi_notify_cb_info.mcbi_cv);
    564 	mutex_destroy(&mip->mi_ring_lock);
    565 
    566 	ASSERT(mip->mi_bridge_link == NULL);
    567 }
    568 
    569 /* ARGSUSED */
    570 static int
    571 i_mac_ring_ctor(void *buf, void *arg, int kmflag)
    572 {
    573 	mac_ring_t *ring = (mac_ring_t *)buf;
    574 
    575 	bzero(ring, sizeof (mac_ring_t));
    576 	cv_init(&ring->mr_cv, NULL, CV_DEFAULT, NULL);
    577 	mutex_init(&ring->mr_lock, NULL, MUTEX_DEFAULT, NULL);
    578 	ring->mr_state = MR_FREE;
    579 	return (0);
    580 }
    581 
    582 /* ARGSUSED */
    583 static void
    584 i_mac_ring_dtor(void *buf, void *arg)
    585 {
    586 	mac_ring_t *ring = (mac_ring_t *)buf;
    587 
    588 	cv_destroy(&ring->mr_cv);
    589 	mutex_destroy(&ring->mr_lock);
    590 }
    591 
    592 /*
    593  * Common functions to do mac callback addition and deletion. Currently this is
    594  * used by promisc callbacks and notify callbacks. List addition and deletion
    595  * need to take care of list walkers. List walkers in general, can't hold list
    596  * locks and make upcall callbacks due to potential lock order and recursive
    597  * reentry issues. Instead list walkers increment the list walker count to mark
    598  * the presence of a walker thread. Addition can be carefully done to ensure
    599  * that the list walker always sees either the old list or the new list.
    600  * However the deletion can't be done while the walker is active, instead the
    601  * deleting thread simply marks the entry as logically deleted. The last walker
    602  * physically deletes and frees up the logically deleted entries when the walk
    603  * is complete.
    604  */
    605 void
    606 mac_callback_add(mac_cb_info_t *mcbi, mac_cb_t **mcb_head,
    607     mac_cb_t *mcb_elem)
    608 {
    609 	mac_cb_t	*p;
    610 	mac_cb_t	**pp;
    611 
    612 	/* Verify it is not already in the list */
    613 	for (pp = mcb_head; (p = *pp) != NULL; pp = &p->mcb_nextp) {
    614 		if (p == mcb_elem)
    615 			break;
    616 	}
    617 	VERIFY(p == NULL);
    618 
    619 	/*
    620 	 * Add it to the head of the callback list. The membar ensures that
    621 	 * the following list pointer manipulations reach global visibility
    622 	 * in exactly the program order below.
    623 	 */
    624 	ASSERT(MUTEX_HELD(mcbi->mcbi_lockp));
    625 
    626 	mcb_elem->mcb_nextp = *mcb_head;
    627 	membar_producer();
    628 	*mcb_head = mcb_elem;
    629 }
    630 
    631 /*
    632  * Mark the entry as logically deleted. If there aren't any walkers unlink
    633  * from the list. In either case return the corresponding status.
    634  */
    635 boolean_t
    636 mac_callback_remove(mac_cb_info_t *mcbi, mac_cb_t **mcb_head,
    637     mac_cb_t *mcb_elem)
    638 {
    639 	mac_cb_t	*p;
    640 	mac_cb_t	**pp;
    641 
    642 	ASSERT(MUTEX_HELD(mcbi->mcbi_lockp));
    643 	/*
    644 	 * Search the callback list for the entry to be removed
    645 	 */
    646 	for (pp = mcb_head; (p = *pp) != NULL; pp = &p->mcb_nextp) {
    647 		if (p == mcb_elem)
    648 			break;
    649 	}
    650 	VERIFY(p != NULL);
    651 
    652 	/*
    653 	 * If there are walkers just mark it as deleted and the last walker
    654 	 * will remove from the list and free it.
    655 	 */
    656 	if (mcbi->mcbi_walker_cnt != 0) {
    657 		p->mcb_flags |= MCB_CONDEMNED;
    658 		mcbi->mcbi_del_cnt++;
    659 		return (B_FALSE);
    660 	}
    661 
    662 	ASSERT(mcbi->mcbi_del_cnt == 0);
    663 	*pp = p->mcb_nextp;
    664 	p->mcb_nextp = NULL;
    665 	return (B_TRUE);
    666 }
    667 
    668 /*
    669  * Wait for all pending callback removals to be completed
    670  */
    671 void
    672 mac_callback_remove_wait(mac_cb_info_t *mcbi)
    673 {
    674 	ASSERT(MUTEX_HELD(mcbi->mcbi_lockp));
    675 	while (mcbi->mcbi_del_cnt != 0) {
    676 		DTRACE_PROBE1(need_wait, mac_cb_info_t *, mcbi);
    677 		cv_wait(&mcbi->mcbi_cv, mcbi->mcbi_lockp);
    678 	}
    679 }
    680 
    681 /*
    682  * The last mac callback walker does the cleanup. Walk the list and unlik
    683  * all the logically deleted entries and construct a temporary list of
    684  * removed entries. Return the list of removed entries to the caller.
    685  */
    686 mac_cb_t *
    687 mac_callback_walker_cleanup(mac_cb_info_t *mcbi, mac_cb_t **mcb_head)
    688 {
    689 	mac_cb_t	*p;
    690 	mac_cb_t	**pp;
    691 	mac_cb_t	*rmlist = NULL;		/* List of removed elements */
    692 	int	cnt = 0;
    693 
    694 	ASSERT(MUTEX_HELD(mcbi->mcbi_lockp));
    695 	ASSERT(mcbi->mcbi_del_cnt != 0 && mcbi->mcbi_walker_cnt == 0);
    696 
    697 	pp = mcb_head;
    698 	while (*pp != NULL) {
    699 		if ((*pp)->mcb_flags & MCB_CONDEMNED) {
    700 			p = *pp;
    701 			*pp = p->mcb_nextp;
    702 			p->mcb_nextp = rmlist;
    703 			rmlist = p;
    704 			cnt++;
    705 			continue;
    706 		}
    707 		pp = &(*pp)->mcb_nextp;
    708 	}
    709 
    710 	ASSERT(mcbi->mcbi_del_cnt == cnt);
    711 	mcbi->mcbi_del_cnt = 0;
    712 	return (rmlist);
    713 }
    714 
    715 boolean_t
    716 mac_callback_lookup(mac_cb_t **mcb_headp, mac_cb_t *mcb_elem)
    717 {
    718 	mac_cb_t	*mcb;
    719 
    720 	/* Verify it is not already in the list */
    721 	for (mcb = *mcb_headp; mcb != NULL; mcb = mcb->mcb_nextp) {
    722 		if (mcb == mcb_elem)
    723 			return (B_TRUE);
    724 	}
    725 
    726 	return (B_FALSE);
    727 }
    728 
    729 boolean_t
    730 mac_callback_find(mac_cb_info_t *mcbi, mac_cb_t **mcb_headp, mac_cb_t *mcb_elem)
    731 {
    732 	boolean_t	found;
    733 
    734 	mutex_enter(mcbi->mcbi_lockp);
    735 	found = mac_callback_lookup(mcb_headp, mcb_elem);
    736 	mutex_exit(mcbi->mcbi_lockp);
    737 
    738 	return (found);
    739 }
    740 
    741 /* Free the list of removed callbacks */
    742 void
    743 mac_callback_free(mac_cb_t *rmlist)
    744 {
    745 	mac_cb_t	*mcb;
    746 	mac_cb_t	*mcb_next;
    747 
    748 	for (mcb = rmlist; mcb != NULL; mcb = mcb_next) {
    749 		mcb_next = mcb->mcb_nextp;
    750 		kmem_free(mcb->mcb_objp, mcb->mcb_objsize);
    751 	}
    752 }
    753 
    754 /*
    755  * The promisc callbacks are in 2 lists, one off the 'mip' and another off the
    756  * 'mcip' threaded by mpi_mi_link and mpi_mci_link respectively. However there
    757  * is only a single shared total walker count, and an entry can't be physically
    758  * unlinked if a walker is active on either list. The last walker does this
    759  * cleanup of logically deleted entries.
    760  */
    761 void
    762 i_mac_promisc_walker_cleanup(mac_impl_t *mip)
    763 {
    764 	mac_cb_t	*rmlist;
    765 	mac_cb_t	*mcb;
    766 	mac_cb_t	*mcb_next;
    767 	mac_promisc_impl_t	*mpip;
    768 
    769 	/*
    770 	 * Construct a temporary list of deleted callbacks by walking the
    771 	 * the mi_promisc_list. Then for each entry in the temporary list,
    772 	 * remove it from the mci_promisc_list and free the entry.
    773 	 */
    774 	rmlist = mac_callback_walker_cleanup(&mip->mi_promisc_cb_info,
    775 	    &mip->mi_promisc_list);
    776 
    777 	for (mcb = rmlist; mcb != NULL; mcb = mcb_next) {
    778 		mcb_next = mcb->mcb_nextp;
    779 		mpip = (mac_promisc_impl_t *)mcb->mcb_objp;
    780 		VERIFY(mac_callback_remove(&mip->mi_promisc_cb_info,
    781 		    &mpip->mpi_mcip->mci_promisc_list, &mpip->mpi_mci_link));
    782 		mcb->mcb_flags = 0;
    783 		mcb->mcb_nextp = NULL;
    784 		kmem_cache_free(mac_promisc_impl_cache, mpip);
    785 	}
    786 }
    787 
    788 void
    789 i_mac_notify(mac_impl_t *mip, mac_notify_type_t type)
    790 {
    791 	mac_cb_info_t	*mcbi;
    792 
    793 	/*
    794 	 * Signal the notify thread even after mi_ref has become zero and
    795 	 * mi_disabled is set. The synchronization with the notify thread
    796 	 * happens in mac_unregister and that implies the driver must make
    797 	 * sure it is single-threaded (with respect to mac calls) and that
    798 	 * all pending mac calls have returned before it calls mac_unregister
    799 	 */
    800 	rw_enter(&i_mac_impl_lock, RW_READER);
    801 	if (mip->mi_state_flags & MIS_DISABLED)
    802 		goto exit;
    803 
    804 	/*
    805 	 * Guard against incorrect notifications.  (Running a newer
    806 	 * mac client against an older implementation?)
    807 	 */
    808 	if (type >= MAC_NNOTE)
    809 		goto exit;
    810 
    811 	mcbi = &mip->mi_notify_cb_info;
    812 	mutex_enter(mcbi->mcbi_lockp);
    813 	mip->mi_notify_bits |= (1 << type);
    814 	cv_broadcast(&mcbi->mcbi_cv);
    815 	mutex_exit(mcbi->mcbi_lockp);
    816 
    817 exit:
    818 	rw_exit(&i_mac_impl_lock);
    819 }
    820 
    821 /*
    822  * Mac serialization primitives. Please see the block comment at the
    823  * top of the file.
    824  */
    825 void
    826 i_mac_perim_enter(mac_impl_t *mip)
    827 {
    828 	mac_client_impl_t	*mcip;
    829 
    830 	if (mip->mi_state_flags & MIS_IS_VNIC) {
    831 		/*
    832 		 * This is a VNIC. Return the lower mac since that is what
    833 		 * we want to serialize on.
    834 		 */
    835 		mcip = mac_vnic_lower(mip);
    836 		mip = mcip->mci_mip;
    837 	}
    838 
    839 	mutex_enter(&mip->mi_perim_lock);
    840 	if (mip->mi_perim_owner == curthread) {
    841 		mip->mi_perim_ocnt++;
    842 		mutex_exit(&mip->mi_perim_lock);
    843 		return;
    844 	}
    845 
    846 	while (mip->mi_perim_owner != NULL)
    847 		cv_wait(&mip->mi_perim_cv, &mip->mi_perim_lock);
    848 
    849 	mip->mi_perim_owner = curthread;
    850 	ASSERT(mip->mi_perim_ocnt == 0);
    851 	mip->mi_perim_ocnt++;
    852 #ifdef DEBUG
    853 	mip->mi_perim_stack_depth = getpcstack(mip->mi_perim_stack,
    854 	    MAC_PERIM_STACK_DEPTH);
    855 #endif
    856 	mutex_exit(&mip->mi_perim_lock);
    857 }
    858 
    859 int
    860 i_mac_perim_enter_nowait(mac_impl_t *mip)
    861 {
    862 	/*
    863 	 * The vnic is a special case, since the serialization is done based
    864 	 * on the lower mac. If the lower mac is busy, it does not imply the
    865 	 * vnic can't be unregistered. But in the case of other drivers,
    866 	 * a busy perimeter or open mac handles implies that the mac is busy
    867 	 * and can't be unregistered.
    868 	 */
    869 	if (mip->mi_state_flags & MIS_IS_VNIC) {
    870 		i_mac_perim_enter(mip);
    871 		return (0);
    872 	}
    873 
    874 	mutex_enter(&mip->mi_perim_lock);
    875 	if (mip->mi_perim_owner != NULL) {
    876 		mutex_exit(&mip->mi_perim_lock);
    877 		return (EBUSY);
    878 	}
    879 	ASSERT(mip->mi_perim_ocnt == 0);
    880 	mip->mi_perim_owner = curthread;
    881 	mip->mi_perim_ocnt++;
    882 	mutex_exit(&mip->mi_perim_lock);
    883 
    884 	return (0);
    885 }
    886 
    887 void
    888 i_mac_perim_exit(mac_impl_t *mip)
    889 {
    890 	mac_client_impl_t *mcip;
    891 
    892 	if (mip->mi_state_flags & MIS_IS_VNIC) {
    893 		/*
    894 		 * This is a VNIC. Return the lower mac since that is what
    895 		 * we want to serialize on.
    896 		 */
    897 		mcip = mac_vnic_lower(mip);
    898 		mip = mcip->mci_mip;
    899 	}
    900 
    901 	ASSERT(mip->mi_perim_owner == curthread && mip->mi_perim_ocnt != 0);
    902 
    903 	mutex_enter(&mip->mi_perim_lock);
    904 	if (--mip->mi_perim_ocnt == 0) {
    905 		mip->mi_perim_owner = NULL;
    906 		cv_signal(&mip->mi_perim_cv);
    907 	}
    908 	mutex_exit(&mip->mi_perim_lock);
    909 }
    910 
    911 /*
    912  * Returns whether the current thread holds the mac perimeter. Used in making
    913  * assertions.
    914  */
    915 boolean_t
    916 mac_perim_held(mac_handle_t mh)
    917 {
    918 	mac_impl_t	*mip = (mac_impl_t *)mh;
    919 	mac_client_impl_t *mcip;
    920 
    921 	if (mip->mi_state_flags & MIS_IS_VNIC) {
    922 		/*
    923 		 * This is a VNIC. Return the lower mac since that is what
    924 		 * we want to serialize on.
    925 		 */
    926 		mcip = mac_vnic_lower(mip);
    927 		mip = mcip->mci_mip;
    928 	}
    929 	return (mip->mi_perim_owner == curthread);
    930 }
    931 
    932 /*
    933  * mac client interfaces to enter the mac perimeter of a mac end point, given
    934  * its mac handle, or macname or linkid.
    935  */
    936 void
    937 mac_perim_enter_by_mh(mac_handle_t mh, mac_perim_handle_t *mphp)
    938 {
    939 	mac_impl_t	*mip = (mac_impl_t *)mh;
    940 
    941 	i_mac_perim_enter(mip);
    942 	/*
    943 	 * The mac_perim_handle_t returned encodes the 'mip' and whether a
    944 	 * mac_open has been done internally while entering the perimeter.
    945 	 * This information is used in mac_perim_exit
    946 	 */
    947 	MAC_ENCODE_MPH(*mphp, mip, 0);
    948 }
    949 
    950 int
    951 mac_perim_enter_by_macname(const char *name, mac_perim_handle_t *mphp)
    952 {
    953 	int	err;
    954 	mac_handle_t	mh;
    955 
    956 	if ((err = mac_open(name, &mh)) != 0)
    957 		return (err);
    958 
    959 	mac_perim_enter_by_mh(mh, mphp);
    960 	MAC_ENCODE_MPH(*mphp, mh, 1);
    961 	return (0);
    962 }
    963 
    964 int
    965 mac_perim_enter_by_linkid(datalink_id_t linkid, mac_perim_handle_t *mphp)
    966 {
    967 	int	err;
    968 	mac_handle_t	mh;
    969 
    970 	if ((err = mac_open_by_linkid(linkid, &mh)) != 0)
    971 		return (err);
    972 
    973 	mac_perim_enter_by_mh(mh, mphp);
    974 	MAC_ENCODE_MPH(*mphp, mh, 1);
    975 	return (0);
    976 }
    977 
    978 void
    979 mac_perim_exit(mac_perim_handle_t mph)
    980 {
    981 	mac_impl_t	*mip;
    982 	boolean_t	need_close;
    983 
    984 	MAC_DECODE_MPH(mph, mip, need_close);
    985 	i_mac_perim_exit(mip);
    986 	if (need_close)
    987 		mac_close((mac_handle_t)mip);
    988 }
    989 
    990 int
    991 mac_hold(const char *macname, mac_impl_t **pmip)
    992 {
    993 	mac_impl_t	*mip;
    994 	int		err;
    995 
    996 	/*
    997 	 * Check the device name length to make sure it won't overflow our
    998 	 * buffer.
    999 	 */
   1000 	if (strlen(macname) >= MAXNAMELEN)
   1001 		return (EINVAL);
   1002 
   1003 	/*
   1004 	 * Look up its entry in the global hash table.
   1005 	 */
   1006 	rw_enter(&i_mac_impl_lock, RW_WRITER);
   1007 	err = mod_hash_find(i_mac_impl_hash, (mod_hash_key_t)macname,
   1008 	    (mod_hash_val_t *)&mip);
   1009 
   1010 	if (err != 0) {
   1011 		rw_exit(&i_mac_impl_lock);
   1012 		return (ENOENT);
   1013 	}
   1014 
   1015 	if (mip->mi_state_flags & MIS_DISABLED) {
   1016 		rw_exit(&i_mac_impl_lock);
   1017 		return (ENOENT);
   1018 	}
   1019 
   1020 	if (mip->mi_state_flags & MIS_EXCLUSIVE_HELD) {
   1021 		rw_exit(&i_mac_impl_lock);
   1022 		return (EBUSY);
   1023 	}
   1024 
   1025 	mip->mi_ref++;
   1026 	rw_exit(&i_mac_impl_lock);
   1027 
   1028 	*pmip = mip;
   1029 	return (0);
   1030 }
   1031 
   1032 void
   1033 mac_rele(mac_impl_t *mip)
   1034 {
   1035 	rw_enter(&i_mac_impl_lock, RW_WRITER);
   1036 	ASSERT(mip->mi_ref != 0);
   1037 	if (--mip->mi_ref == 0) {
   1038 		ASSERT(mip->mi_nactiveclients == 0 &&
   1039 		    !(mip->mi_state_flags & MIS_EXCLUSIVE));
   1040 	}
   1041 	rw_exit(&i_mac_impl_lock);
   1042 }
   1043 
   1044 /*
   1045  * Private GLDv3 function to start a MAC instance.
   1046  */
   1047 int
   1048 mac_start(mac_handle_t mh)
   1049 {
   1050 	mac_impl_t	*mip = (mac_impl_t *)mh;
   1051 	int		err = 0;
   1052 
   1053 	ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
   1054 	ASSERT(mip->mi_start != NULL);
   1055 
   1056 	/*
   1057 	 * Check whether the device is already started.
   1058 	 */
   1059 	if (mip->mi_active++ == 0) {
   1060 		mac_ring_t *ring = NULL;
   1061 
   1062 		/*
   1063 		 * Start the device.
   1064 		 */
   1065 		err = mip->mi_start(mip->mi_driver);
   1066 		if (err != 0) {
   1067 			mip->mi_active--;
   1068 			return (err);
   1069 		}
   1070 
   1071 		/*
   1072 		 * Start the default tx ring.
   1073 		 */
   1074 		if (mip->mi_default_tx_ring != NULL) {
   1075 
   1076 			ring = (mac_ring_t *)mip->mi_default_tx_ring;
   1077 			err = mac_start_ring(ring);
   1078 			if (err != 0) {
   1079 				mip->mi_active--;
   1080 				return (err);
   1081 			}
   1082 			ring->mr_state = MR_INUSE;
   1083 		}
   1084 
   1085 		if (mip->mi_rx_groups != NULL) {
   1086 			/*
   1087 			 * Start the default ring, since it will be needed
   1088 			 * to receive broadcast and multicast traffic for
   1089 			 * both primary and non-primary MAC clients.
   1090 			 */
   1091 			mac_group_t *grp = &mip->mi_rx_groups[0];
   1092 
   1093 			ASSERT(grp->mrg_state == MAC_GROUP_STATE_REGISTERED);
   1094 			err = mac_start_group_and_rings(grp);
   1095 			if (err != 0) {
   1096 				mip->mi_active--;
   1097 				if (ring != NULL) {
   1098 					mac_stop_ring(ring);
   1099 					ring->mr_state = MR_FREE;
   1100 				}
   1101 				return (err);
   1102 			}
   1103 			mac_set_rx_group_state(grp, MAC_GROUP_STATE_SHARED);
   1104 		}
   1105 	}
   1106 
   1107 	return (err);
   1108 }
   1109 
   1110 /*
   1111  * Private GLDv3 function to stop a MAC instance.
   1112  */
   1113 void
   1114 mac_stop(mac_handle_t mh)
   1115 {
   1116 	mac_impl_t	*mip = (mac_impl_t *)mh;
   1117 
   1118 	ASSERT(mip->mi_stop != NULL);
   1119 	ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
   1120 
   1121 	/*
   1122 	 * Check whether the device is still needed.
   1123 	 */
   1124 	ASSERT(mip->mi_active != 0);
   1125 	if (--mip->mi_active == 0) {
   1126 		if (mip->mi_rx_groups != NULL) {
   1127 			/*
   1128 			 * There should be no more active clients since the
   1129 			 * MAC is being stopped. Stop the default RX group
   1130 			 * and transition it back to registered state.
   1131 			 */
   1132 			mac_group_t *grp = &mip->mi_rx_groups[0];
   1133 
   1134 			/*
   1135 			 * When clients are torn down, the groups
   1136 			 * are release via mac_release_rx_group which
   1137 			 * knows the the default group is always in
   1138 			 * started mode since broadcast uses it. So
   1139 			 * we can assert that their are no clients
   1140 			 * (since mac_bcast_add doesn't register itself
   1141 			 * as a client) and group is in SHARED state.
   1142 			 */
   1143 			ASSERT(grp->mrg_state == MAC_GROUP_STATE_SHARED);
   1144 			ASSERT(MAC_RX_GROUP_NO_CLIENT(grp) &&
   1145 			    mip->mi_nactiveclients == 0);
   1146 			mac_stop_group_and_rings(grp);
   1147 			mac_set_rx_group_state(grp, MAC_GROUP_STATE_REGISTERED);
   1148 		}
   1149 
   1150 		if (mip->mi_default_tx_ring != NULL) {
   1151 			mac_ring_t *ring;
   1152 
   1153 			ring = (mac_ring_t *)mip->mi_default_tx_ring;
   1154 			mac_stop_ring(ring);
   1155 			ring->mr_state = MR_FREE;
   1156 		}
   1157 
   1158 		/*
   1159 		 * Stop the device.
   1160 		 */
   1161 		mip->mi_stop(mip->mi_driver);
   1162 	}
   1163 }
   1164 
   1165 int
   1166 i_mac_promisc_set(mac_impl_t *mip, boolean_t on)
   1167 {
   1168 	int		err = 0;
   1169 
   1170 	ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
   1171 	ASSERT(mip->mi_setpromisc != NULL);
   1172 
   1173 	if (on) {
   1174 		/*
   1175 		 * Enable promiscuous mode on the device if not yet enabled.
   1176 		 */
   1177 		if (mip->mi_devpromisc++ == 0) {
   1178 			err = mip->mi_setpromisc(mip->mi_driver, B_TRUE);
   1179 			if (err != 0) {
   1180 				mip->mi_devpromisc--;
   1181 				return (err);
   1182 			}
   1183 			i_mac_notify(mip, MAC_NOTE_DEVPROMISC);
   1184 		}
   1185 	} else {
   1186 		if (mip->mi_devpromisc == 0)
   1187 			return (EPROTO);
   1188 
   1189 		/*
   1190 		 * Disable promiscuous mode on the device if this is the last
   1191 		 * enabling.
   1192 		 */
   1193 		if (--mip->mi_devpromisc == 0) {
   1194 			err = mip->mi_setpromisc(mip->mi_driver, B_FALSE);
   1195 			if (err != 0) {
   1196 				mip->mi_devpromisc++;
   1197 				return (err);
   1198 			}
   1199 			i_mac_notify(mip, MAC_NOTE_DEVPROMISC);
   1200 		}
   1201 	}
   1202 
   1203 	return (0);
   1204 }
   1205 
   1206 /*
   1207  * The promiscuity state can change any time. If the caller needs to take
   1208  * actions that are atomic with the promiscuity state, then the caller needs
   1209  * to bracket the entire sequence with mac_perim_enter/exit
   1210  */
   1211 boolean_t
   1212 mac_promisc_get(mac_handle_t mh)
   1213 {
   1214 	mac_impl_t		*mip = (mac_impl_t *)mh;
   1215 
   1216 	/*
   1217 	 * Return the current promiscuity.
   1218 	 */
   1219 	return (mip->mi_devpromisc != 0);
   1220 }
   1221 
   1222 /*
   1223  * Invoked at MAC instance attach time to initialize the list
   1224  * of factory MAC addresses supported by a MAC instance. This function
   1225  * builds a local cache in the mac_impl_t for the MAC addresses
   1226  * supported by the underlying hardware. The MAC clients themselves
   1227  * use the mac_addr_factory*() functions to query and reserve
   1228  * factory MAC addresses.
   1229  */
   1230 void
   1231 mac_addr_factory_init(mac_impl_t *mip)
   1232 {
   1233 	mac_capab_multifactaddr_t capab;
   1234 	uint8_t *addr;
   1235 	int i;
   1236 
   1237 	/*
   1238 	 * First round to see how many factory MAC addresses are available.
   1239 	 */
   1240 	bzero(&capab, sizeof (capab));
   1241 	if (!i_mac_capab_get((mac_handle_t)mip, MAC_CAPAB_MULTIFACTADDR,
   1242 	    &capab) || (capab.mcm_naddr == 0)) {
   1243 		/*
   1244 		 * The MAC instance doesn't support multiple factory
   1245 		 * MAC addresses, we're done here.
   1246 		 */
   1247 		return;
   1248 	}
   1249 
   1250 	/*
   1251 	 * Allocate the space and get all the factory addresses.
   1252 	 */
   1253 	addr = kmem_alloc(capab.mcm_naddr * MAXMACADDRLEN, KM_SLEEP);
   1254 	capab.mcm_getaddr(mip->mi_driver, capab.mcm_naddr, addr);
   1255 
   1256 	mip->mi_factory_addr_num = capab.mcm_naddr;
   1257 	mip->mi_factory_addr = kmem_zalloc(mip->mi_factory_addr_num *
   1258 	    sizeof (mac_factory_addr_t), KM_SLEEP);
   1259 
   1260 	for (i = 0; i < capab.mcm_naddr; i++) {
   1261 		bcopy(addr + i * MAXMACADDRLEN,
   1262 		    mip->mi_factory_addr[i].mfa_addr,
   1263 		    mip->mi_type->mt_addr_length);
   1264 		mip->mi_factory_addr[i].mfa_in_use = B_FALSE;
   1265 	}
   1266 
   1267 	kmem_free(addr, capab.mcm_naddr * MAXMACADDRLEN);
   1268 }
   1269 
   1270 void
   1271 mac_addr_factory_fini(mac_impl_t *mip)
   1272 {
   1273 	if (mip->mi_factory_addr == NULL) {
   1274 		ASSERT(mip->mi_factory_addr_num == 0);
   1275 		return;
   1276 	}
   1277 
   1278 	kmem_free(mip->mi_factory_addr, mip->mi_factory_addr_num *
   1279 	    sizeof (mac_factory_addr_t));
   1280 
   1281 	mip->mi_factory_addr = NULL;
   1282 	mip->mi_factory_addr_num = 0;
   1283 }
   1284 
   1285 /*
   1286  * Reserve a factory MAC address. If *slot is set to -1, the function
   1287  * attempts to reserve any of the available factory MAC addresses and
   1288  * returns the reserved slot id. If no slots are available, the function
   1289  * returns ENOSPC. If *slot is not set to -1, the function reserves
   1290  * the specified slot if it is available, or returns EBUSY is the slot
   1291  * is already used. Returns ENOTSUP if the underlying MAC does not
   1292  * support multiple factory addresses. If the slot number is not -1 but
   1293  * is invalid, returns EINVAL.
   1294  */
   1295 int
   1296 mac_addr_factory_reserve(mac_client_handle_t mch, int *slot)
   1297 {
   1298 	mac_client_impl_t *mcip = (mac_client_impl_t *)mch;
   1299 	mac_impl_t *mip = mcip->mci_mip;
   1300 	int i, ret = 0;
   1301 
   1302 	i_mac_perim_enter(mip);
   1303 	/*
   1304 	 * Protect against concurrent readers that may need a self-consistent
   1305 	 * view of the factory addresses
   1306 	 */
   1307 	rw_enter(&mip->mi_rw_lock, RW_WRITER);
   1308 
   1309 	if (mip->mi_factory_addr_num == 0) {
   1310 		ret = ENOTSUP;
   1311 		goto bail;
   1312 	}
   1313 
   1314 	if (*slot != -1) {
   1315 		/* check the specified slot */
   1316 		if (*slot < 1 || *slot > mip->mi_factory_addr_num) {
   1317 			ret = EINVAL;
   1318 			goto bail;
   1319 		}
   1320 		if (mip->mi_factory_addr[*slot-1].mfa_in_use) {
   1321 			ret = EBUSY;
   1322 			goto bail;
   1323 		}
   1324 	} else {
   1325 		/* pick the next available slot */
   1326 		for (i = 0; i < mip->mi_factory_addr_num; i++) {
   1327 			if (!mip->mi_factory_addr[i].mfa_in_use)
   1328 				break;
   1329 		}
   1330 
   1331 		if (i == mip->mi_factory_addr_num) {
   1332 			ret = ENOSPC;
   1333 			goto bail;
   1334 		}
   1335 		*slot = i+1;
   1336 	}
   1337 
   1338 	mip->mi_factory_addr[*slot-1].mfa_in_use = B_TRUE;
   1339 	mip->mi_factory_addr[*slot-1].mfa_client = mcip;
   1340 
   1341 bail:
   1342 	rw_exit(&mip->mi_rw_lock);
   1343 	i_mac_perim_exit(mip);
   1344 	return (ret);
   1345 }
   1346 
   1347 /*
   1348  * Release the specified factory MAC address slot.
   1349  */
   1350 void
   1351 mac_addr_factory_release(mac_client_handle_t mch, uint_t slot)
   1352 {
   1353 	mac_client_impl_t *mcip = (mac_client_impl_t *)mch;
   1354 	mac_impl_t *mip = mcip->mci_mip;
   1355 
   1356 	i_mac_perim_enter(mip);
   1357 	/*
   1358 	 * Protect against concurrent readers that may need a self-consistent
   1359 	 * view of the factory addresses
   1360 	 */
   1361 	rw_enter(&mip->mi_rw_lock, RW_WRITER);
   1362 
   1363 	ASSERT(slot > 0 && slot <= mip->mi_factory_addr_num);
   1364 	ASSERT(mip->mi_factory_addr[slot-1].mfa_in_use);
   1365 
   1366 	mip->mi_factory_addr[slot-1].mfa_in_use = B_FALSE;
   1367 
   1368 	rw_exit(&mip->mi_rw_lock);
   1369 	i_mac_perim_exit(mip);
   1370 }
   1371 
   1372 /*
   1373  * Stores in mac_addr the value of the specified MAC address. Returns
   1374  * 0 on success, or EINVAL if the slot number is not valid for the MAC.
   1375  * The caller must provide a string of at least MAXNAMELEN bytes.
   1376  */
   1377 void
   1378 mac_addr_factory_value(mac_handle_t mh, int slot, uchar_t *mac_addr,
   1379     uint_t *addr_len, char *client_name, boolean_t *in_use_arg)
   1380 {
   1381 	mac_impl_t *mip = (mac_impl_t *)mh;
   1382 	boolean_t in_use;
   1383 
   1384 	ASSERT(slot > 0 && slot <= mip->mi_factory_addr_num);
   1385 
   1386 	/*
   1387 	 * Readers need to hold mi_rw_lock. Writers need to hold mac perimeter
   1388 	 * and mi_rw_lock
   1389 	 */
   1390 	rw_enter(&mip->mi_rw_lock, RW_READER);
   1391 	bcopy(mip->mi_factory_addr[slot-1].mfa_addr, mac_addr, MAXMACADDRLEN);
   1392 	*addr_len = mip->mi_type->mt_addr_length;
   1393 	in_use = mip->mi_factory_addr[slot-1].mfa_in_use;
   1394 	if (in_use && client_name != NULL) {
   1395 		bcopy(mip->mi_factory_addr[slot-1].mfa_client->mci_name,
   1396 		    client_name, MAXNAMELEN);
   1397 	}
   1398 	if (in_use_arg != NULL)
   1399 		*in_use_arg = in_use;
   1400 	rw_exit(&mip->mi_rw_lock);
   1401 }
   1402 
   1403 /*
   1404  * Returns the number of factory MAC addresses (in addition to the
   1405  * primary MAC address), 0 if the underlying MAC doesn't support
   1406  * that feature.
   1407  */
   1408 uint_t
   1409 mac_addr_factory_num(mac_handle_t mh)
   1410 {
   1411 	mac_impl_t *mip = (mac_impl_t *)mh;
   1412 
   1413 	return (mip->mi_factory_addr_num);
   1414 }
   1415 
   1416 
   1417 void
   1418 mac_rx_group_unmark(mac_group_t *grp, uint_t flag)
   1419 {
   1420 	mac_ring_t	*ring;
   1421 
   1422 	for (ring = grp->mrg_rings; ring != NULL; ring = ring->mr_next)
   1423 		ring->mr_flag &= ~flag;
   1424 }
   1425 
   1426 /*
   1427  * The following mac_hwrings_xxx() functions are private mac client functions
   1428  * used by the aggr driver to access and control the underlying HW Rx group
   1429  * and rings. In this case, the aggr driver has exclusive control of the
   1430  * underlying HW Rx group/rings, it calls the following functions to
   1431  * start/stop the HW Rx rings, disable/enable polling, add/remove mac'
   1432  * addresses, or set up the Rx callback.
   1433  */
   1434 /* ARGSUSED */
   1435 static void
   1436 mac_hwrings_rx_process(void *arg, mac_resource_handle_t srs,
   1437     mblk_t *mp_chain, boolean_t loopback)
   1438 {
   1439 	mac_soft_ring_set_t	*mac_srs = (mac_soft_ring_set_t *)srs;
   1440 	mac_srs_rx_t		*srs_rx = &mac_srs->srs_rx;
   1441 	mac_direct_rx_t		proc;
   1442 	void			*arg1;
   1443 	mac_resource_handle_t	arg2;
   1444 
   1445 	proc = srs_rx->sr_func;
   1446 	arg1 = srs_rx->sr_arg1;
   1447 	arg2 = mac_srs->srs_mrh;
   1448 
   1449 	proc(arg1, arg2, mp_chain, NULL);
   1450 }
   1451 
   1452 /*
   1453  * This function is called to get the list of HW rings that are reserved by
   1454  * an exclusive mac client.
   1455  *
   1456  * Return value: the number of HW rings.
   1457  */
   1458 int
   1459 mac_hwrings_get(mac_client_handle_t mch, mac_group_handle_t *hwgh,
   1460     mac_ring_handle_t *hwrh, mac_ring_type_t rtype)
   1461 {
   1462 	mac_client_impl_t	*mcip = (mac_client_impl_t *)mch;
   1463 	int			cnt = 0;
   1464 
   1465 	switch (rtype) {
   1466 	case MAC_RING_TYPE_RX: {
   1467 		flow_entry_t	*flent = mcip->mci_flent;
   1468 		mac_group_t	*grp;
   1469 		mac_ring_t	*ring;
   1470 
   1471 		grp = flent->fe_rx_ring_group;
   1472 		/*
   1473 		 * The mac client did not reserve any RX group, return directly.
   1474 		 * This is probably because the underlying MAC does not support
   1475 		 * any groups.
   1476 		 */
   1477 		*hwgh = NULL;
   1478 		if (grp == NULL)
   1479 			return (0);
   1480 		/*
   1481 		 * This group must be reserved by this mac client.
   1482 		 */
   1483 		ASSERT((grp->mrg_state == MAC_GROUP_STATE_RESERVED) &&
   1484 		    (mch == (mac_client_handle_t)
   1485 		    (MAC_RX_GROUP_ONLY_CLIENT(grp))));
   1486 		for (ring = grp->mrg_rings;
   1487 		    ring != NULL; ring = ring->mr_next, cnt++) {
   1488 			ASSERT(cnt < MAX_RINGS_PER_GROUP);
   1489 			hwrh[cnt] = (mac_ring_handle_t)ring;
   1490 		}
   1491 		*hwgh = (mac_group_handle_t)grp;
   1492 		return (cnt);
   1493 	}
   1494 	case MAC_RING_TYPE_TX: {
   1495 		mac_soft_ring_set_t	*tx_srs;
   1496 		mac_srs_tx_t		*tx;
   1497 
   1498 		tx_srs = MCIP_TX_SRS(mcip);
   1499 		tx = &tx_srs->srs_tx;
   1500 		for (; cnt < tx->st_ring_count; cnt++)
   1501 			hwrh[cnt] = tx->st_rings[cnt];
   1502 		return (cnt);
   1503 	}
   1504 	default:
   1505 		ASSERT(B_FALSE);
   1506 		return (-1);
   1507 	}
   1508 }
   1509 
   1510 /*
   1511  * Setup the RX callback of the mac client which exclusively controls HW ring.
   1512  */
   1513 void
   1514 mac_hwring_setup(mac_ring_handle_t hwrh, mac_resource_handle_t prh)
   1515 {
   1516 	mac_ring_t		*hw_ring = (mac_ring_t *)hwrh;
   1517 	mac_soft_ring_set_t	*mac_srs = hw_ring->mr_srs;
   1518 
   1519 	mac_srs->srs_mrh = prh;
   1520 	mac_srs->srs_rx.sr_lower_proc = mac_hwrings_rx_process;
   1521 }
   1522 
   1523 void
   1524 mac_hwring_teardown(mac_ring_handle_t hwrh)
   1525 {
   1526 	mac_ring_t		*hw_ring = (mac_ring_t *)hwrh;
   1527 	mac_soft_ring_set_t	*mac_srs = hw_ring->mr_srs;
   1528 
   1529 	mac_srs->srs_rx.sr_lower_proc = mac_rx_srs_process;
   1530 	mac_srs->srs_mrh = NULL;
   1531 }
   1532 
   1533 int
   1534 mac_hwring_disable_intr(mac_ring_handle_t rh)
   1535 {
   1536 	mac_ring_t *rr_ring = (mac_ring_t *)rh;
   1537 	mac_intr_t *intr = &rr_ring->mr_info.mri_intr;
   1538 
   1539 	return (intr->mi_disable(intr->mi_handle));
   1540 }
   1541 
   1542 int
   1543 mac_hwring_enable_intr(mac_ring_handle_t rh)
   1544 {
   1545 	mac_ring_t *rr_ring = (mac_ring_t *)rh;
   1546 	mac_intr_t *intr = &rr_ring->mr_info.mri_intr;
   1547 
   1548 	return (intr->mi_enable(intr->mi_handle));
   1549 }
   1550 
   1551 int
   1552 mac_hwring_start(mac_ring_handle_t rh)
   1553 {
   1554 	mac_ring_t *rr_ring = (mac_ring_t *)rh;
   1555 
   1556 	MAC_RING_UNMARK(rr_ring, MR_QUIESCE);
   1557 	return (0);
   1558 }
   1559 
   1560 void
   1561 mac_hwring_stop(mac_ring_handle_t rh)
   1562 {
   1563 	mac_ring_t *rr_ring = (mac_ring_t *)rh;
   1564 
   1565 	mac_rx_ring_quiesce(rr_ring, MR_QUIESCE);
   1566 }
   1567 
   1568 mblk_t *
   1569 mac_hwring_poll(mac_ring_handle_t rh, int bytes_to_pickup)
   1570 {
   1571 	mac_ring_t *rr_ring = (mac_ring_t *)rh;
   1572 	mac_ring_info_t *info = &rr_ring->mr_info;
   1573 
   1574 	return (info->mri_poll(info->mri_driver, bytes_to_pickup));
   1575 }
   1576 
   1577 /*
   1578  * Send packets through the selected tx ring.
   1579  */
   1580 mblk_t *
   1581 mac_hwring_tx(mac_ring_handle_t rh, mblk_t *mp)
   1582 {
   1583 	mac_ring_t *ring = (mac_ring_t *)rh;
   1584 	mac_ring_info_t *info = &ring->mr_info;
   1585 
   1586 	ASSERT(ring->mr_type == MAC_RING_TYPE_TX &&
   1587 	    ring->mr_state >= MR_INUSE);
   1588 	return (info->mri_tx(info->mri_driver, mp));
   1589 }
   1590 
   1591 int
   1592 mac_hwgroup_addmac(mac_group_handle_t gh, const uint8_t *addr)
   1593 {
   1594 	mac_group_t *group = (mac_group_t *)gh;
   1595 
   1596 	return (mac_group_addmac(group, addr));
   1597 }
   1598 
   1599 int
   1600 mac_hwgroup_remmac(mac_group_handle_t gh, const uint8_t *addr)
   1601 {
   1602 	mac_group_t *group = (mac_group_t *)gh;
   1603 
   1604 	return (mac_group_remmac(group, addr));
   1605 }
   1606 
   1607 /*
   1608  * Set the RX group to be shared/reserved. Note that the group must be
   1609  * started/stopped outside of this function.
   1610  */
   1611 void
   1612 mac_set_rx_group_state(mac_group_t *grp, mac_group_state_t state)
   1613 {
   1614 	/*
   1615 	 * If there is no change in the group state, just return.
   1616 	 */
   1617 	if (grp->mrg_state == state)
   1618 		return;
   1619 
   1620 	switch (state) {
   1621 	case MAC_GROUP_STATE_RESERVED:
   1622 		/*
   1623 		 * Successfully reserved the group.
   1624 		 *
   1625 		 * Given that there is an exclusive client controlling this
   1626 		 * group, we enable the group level polling when available,
   1627 		 * so that SRSs get to turn on/off individual rings they's
   1628 		 * assigned to.
   1629 		 */
   1630 		ASSERT(MAC_PERIM_HELD(grp->mrg_mh));
   1631 
   1632 		if (GROUP_INTR_DISABLE_FUNC(grp) != NULL)
   1633 			GROUP_INTR_DISABLE_FUNC(grp)(GROUP_INTR_HANDLE(grp));
   1634 
   1635 		break;
   1636 
   1637 	case MAC_GROUP_STATE_SHARED:
   1638 		/*
   1639 		 * Set all rings of this group to software classified.
   1640 		 * If the group has an overriding interrupt, then re-enable it.
   1641 		 */
   1642 		ASSERT(MAC_PERIM_HELD(grp->mrg_mh));
   1643 
   1644 		if (GROUP_INTR_ENABLE_FUNC(grp) != NULL)
   1645 			GROUP_INTR_ENABLE_FUNC(grp)(GROUP_INTR_HANDLE(grp));
   1646 
   1647 		/* The ring is not available for reservations any more */
   1648 		break;
   1649 
   1650 	case MAC_GROUP_STATE_REGISTERED:
   1651 		/* Also callable from mac_register, perim is not held */
   1652 		break;
   1653 
   1654 	default:
   1655 		ASSERT(B_FALSE);
   1656 		break;
   1657 	}
   1658 
   1659 	grp->mrg_state = state;
   1660 }
   1661 
   1662 /*
   1663  * Quiesce future hardware classified packets for the specified Rx ring
   1664  */
   1665 static void
   1666 mac_rx_ring_quiesce(mac_ring_t *rx_ring, uint_t ring_flag)
   1667 {
   1668 	ASSERT(rx_ring->mr_classify_type == MAC_HW_CLASSIFIER);
   1669 	ASSERT(ring_flag == MR_CONDEMNED || ring_flag  == MR_QUIESCE);
   1670 
   1671 	mutex_enter(&rx_ring->mr_lock);
   1672 	rx_ring->mr_flag |= ring_flag;
   1673 	while (rx_ring->mr_refcnt != 0)
   1674 		cv_wait(&rx_ring->mr_cv, &rx_ring->mr_lock);
   1675 	mutex_exit(&rx_ring->mr_lock);
   1676 }
   1677 
   1678 /*
   1679  * Please see mac_tx for details about the per cpu locking scheme
   1680  */
   1681 static void
   1682 mac_tx_lock_all(mac_client_impl_t *mcip)
   1683 {
   1684 	int	i;
   1685 
   1686 	for (i = 0; i <= mac_tx_percpu_cnt; i++)
   1687 		mutex_enter(&mcip->mci_tx_pcpu[i].pcpu_tx_lock);
   1688 }
   1689 
   1690 static void
   1691 mac_tx_unlock_all(mac_client_impl_t *mcip)
   1692 {
   1693 	int	i;
   1694 
   1695 	for (i = mac_tx_percpu_cnt; i >= 0; i--)
   1696 		mutex_exit(&mcip->mci_tx_pcpu[i].pcpu_tx_lock);
   1697 }
   1698 
   1699 static void
   1700 mac_tx_unlock_allbutzero(mac_client_impl_t *mcip)
   1701 {
   1702 	int	i;
   1703 
   1704 	for (i = mac_tx_percpu_cnt; i > 0; i--)
   1705 		mutex_exit(&mcip->mci_tx_pcpu[i].pcpu_tx_lock);
   1706 }
   1707 
   1708 static int
   1709 mac_tx_sum_refcnt(mac_client_impl_t *mcip)
   1710 {
   1711 	int	i;
   1712 	int	refcnt = 0;
   1713 
   1714 	for (i = 0; i <= mac_tx_percpu_cnt; i++)
   1715 		refcnt += mcip->mci_tx_pcpu[i].pcpu_tx_refcnt;
   1716 
   1717 	return (refcnt);
   1718 }
   1719 
   1720 /*
   1721  * Stop future Tx packets coming down from the client in preparation for
   1722  * quiescing the Tx side. This is needed for dynamic reclaim and reassignment
   1723  * of rings between clients
   1724  */
   1725 void
   1726 mac_tx_client_block(mac_client_impl_t *mcip)
   1727 {
   1728 	mac_tx_lock_all(mcip);
   1729 	mcip->mci_tx_flag |= MCI_TX_QUIESCE;
   1730 	while (mac_tx_sum_refcnt(mcip) != 0) {
   1731 		mac_tx_unlock_allbutzero(mcip);
   1732 		cv_wait(&mcip->mci_tx_cv, &mcip->mci_tx_pcpu[0].pcpu_tx_lock);
   1733 		mutex_exit(&mcip->mci_tx_pcpu[0].pcpu_tx_lock);
   1734 		mac_tx_lock_all(mcip);
   1735 	}
   1736 	mac_tx_unlock_all(mcip);
   1737 }
   1738 
   1739 void
   1740 mac_tx_client_unblock(mac_client_impl_t *mcip)
   1741 {
   1742 	mac_tx_lock_all(mcip);
   1743 	mcip->mci_tx_flag &= ~MCI_TX_QUIESCE;
   1744 	mac_tx_unlock_all(mcip);
   1745 	/*
   1746 	 * We may fail to disable flow control for the last MAC_NOTE_TX
   1747 	 * notification because the MAC client is quiesced. Send the
   1748 	 * notification again.
   1749 	 */
   1750 	i_mac_notify(mcip->mci_mip, MAC_NOTE_TX);
   1751 }
   1752 
   1753 /*
   1754  * Wait for an SRS to quiesce. The SRS worker will signal us when the
   1755  * quiesce is done.
   1756  */
   1757 static void
   1758 mac_srs_quiesce_wait(mac_soft_ring_set_t *srs, uint_t srs_flag)
   1759 {
   1760 	mutex_enter(&srs->srs_lock);
   1761 	while (!(srs->srs_state & srs_flag))
   1762 		cv_wait(&srs->srs_quiesce_done_cv, &srs->srs_lock);
   1763 	mutex_exit(&srs->srs_lock);
   1764 }
   1765 
   1766 /*
   1767  * Quiescing an Rx SRS is achieved by the following sequence. The protocol
   1768  * works bottom up by cutting off packet flow from the bottommost point in the
   1769  * mac, then the SRS, and then the soft rings. There are 2 use cases of this
   1770  * mechanism. One is a temporary quiesce of the SRS, such as say while changing
   1771  * the Rx callbacks. Another use case is Rx SRS teardown. In the former case
   1772  * the QUIESCE prefix/suffix is used and in the latter the CONDEMNED is used
   1773  * for the SRS and MR flags. In the former case the threads pause waiting for
   1774  * a restart, while in the latter case the threads exit. The Tx SRS teardown
   1775  * is also mostly similar to the above.
   1776  *
   1777  * 1. Stop future hardware classified packets at the lowest level in the mac.
   1778  *    Remove any hardware classification rule (CONDEMNED case) and mark the
   1779  *    rings as CONDEMNED or QUIESCE as appropriate. This prevents the mr_refcnt
   1780  *    from increasing. Upcalls from the driver that come through hardware
   1781  *    classification will be dropped in mac_rx from now on. Then we wait for
   1782  *    the mr_refcnt to drop to zero. When the mr_refcnt reaches zero we are
   1783  *    sure there aren't any upcall threads from the driver through hardware
   1784  *    classification. In the case of SRS teardown we also remove the
   1785  *    classification rule in the driver.
   1786  *
   1787  * 2. Stop future software classified packets by marking the flow entry with
   1788  *    FE_QUIESCE or FE_CONDEMNED as appropriate which prevents the refcnt from
   1789  *    increasing. We also remove the flow entry from the table in the latter
   1790  *    case. Then wait for the fe_refcnt to reach an appropriate quiescent value
   1791  *    that indicates there aren't any active threads using that flow entry.
   1792  *
   1793  * 3. Quiesce the SRS and softrings by signaling the SRS. The SRS poll thread,
   1794  *    SRS worker thread, and the soft ring threads are quiesced in sequence
   1795  *    with the SRS worker thread serving as a master controller. This
   1796  *    mechansim is explained in mac_srs_worker_quiesce().
   1797  *
   1798  * The restart mechanism to reactivate the SRS and softrings is explained
   1799  * in mac_srs_worker_restart(). Here we just signal the SRS worker to start the
   1800  * restart sequence.
   1801  */
   1802 void
   1803 mac_rx_srs_quiesce(mac_soft_ring_set_t *srs, uint_t srs_quiesce_flag)
   1804 {
   1805 	flow_entry_t	*flent = srs->srs_flent;
   1806 	uint_t	mr_flag, srs_done_flag;
   1807 
   1808 	ASSERT(MAC_PERIM_HELD((mac_handle_t)FLENT_TO_MIP(flent)));
   1809 	ASSERT(!(srs->srs_type & SRST_TX));
   1810 
   1811 	if (srs_quiesce_flag == SRS_CONDEMNED) {
   1812 		mr_flag = MR_CONDEMNED;
   1813 		srs_done_flag = SRS_CONDEMNED_DONE;
   1814 		if (srs->srs_type & SRST_CLIENT_POLL_ENABLED)
   1815 			mac_srs_client_poll_disable(srs->srs_mcip, srs);
   1816 	} else {
   1817 		ASSERT(srs_quiesce_flag == SRS_QUIESCE);
   1818 		mr_flag = MR_QUIESCE;
   1819 		srs_done_flag = SRS_QUIESCE_DONE;
   1820 		if (srs->srs_type & SRST_CLIENT_POLL_ENABLED)
   1821 			mac_srs_client_poll_quiesce(srs->srs_mcip, srs);
   1822 	}
   1823 
   1824 	if (srs->srs_ring != NULL) {
   1825 		mac_rx_ring_quiesce(srs->srs_ring, mr_flag);
   1826 	} else {
   1827 		/*
   1828 		 * SRS is driven by software classification. In case
   1829 		 * of CONDEMNED, the top level teardown functions will
   1830 		 * deal with flow removal.
   1831 		 */
   1832 		if (srs_quiesce_flag != SRS_CONDEMNED) {
   1833 			FLOW_MARK(flent, FE_QUIESCE);
   1834 			mac_flow_wait(flent, FLOW_DRIVER_UPCALL);
   1835 		}
   1836 	}
   1837 
   1838 	/*
   1839 	 * Signal the SRS to quiesce itself, and then cv_wait for the
   1840 	 * SRS quiesce to complete. The SRS worker thread will wake us
   1841 	 * up when the quiesce is complete
   1842 	 */
   1843 	mac_srs_signal(srs, srs_quiesce_flag);
   1844 	mac_srs_quiesce_wait(srs, srs_done_flag);
   1845 }
   1846 
   1847 /*
   1848  * Remove an SRS.
   1849  */
   1850 void
   1851 mac_rx_srs_remove(mac_soft_ring_set_t *srs)
   1852 {
   1853 	flow_entry_t *flent = srs->srs_flent;
   1854 	int i;
   1855 
   1856 	mac_rx_srs_quiesce(srs, SRS_CONDEMNED);
   1857 	/*
   1858 	 * Locate and remove our entry in the fe_rx_srs[] array, and
   1859 	 * adjust the fe_rx_srs array entries and array count by
   1860 	 * moving the last entry into the vacated spot.
   1861 	 */
   1862 	mutex_enter(&flent->fe_lock);
   1863 	for (i = 0; i < flent->fe_rx_srs_cnt; i++) {
   1864 		if (flent->fe_rx_srs[i] == srs)
   1865 			break;
   1866 	}
   1867 
   1868 	ASSERT(i != 0 && i < flent->fe_rx_srs_cnt);
   1869 	if (i != flent->fe_rx_srs_cnt - 1) {
   1870 		flent->fe_rx_srs[i] =
   1871 		    flent->fe_rx_srs[flent->fe_rx_srs_cnt - 1];
   1872 		i = flent->fe_rx_srs_cnt - 1;
   1873 	}
   1874 
   1875 	flent->fe_rx_srs[i] = NULL;
   1876 	flent->fe_rx_srs_cnt--;
   1877 	mutex_exit(&flent->fe_lock);
   1878 
   1879 	mac_srs_free(srs);
   1880 }
   1881 
   1882 static void
   1883 mac_srs_clear_flag(mac_soft_ring_set_t *srs, uint_t flag)
   1884 {
   1885 	mutex_enter(&srs->srs_lock);
   1886 	srs->srs_state &= ~flag;
   1887 	mutex_exit(&srs->srs_lock);
   1888 }
   1889 
   1890 void
   1891 mac_rx_srs_restart(mac_soft_ring_set_t *srs)
   1892 {
   1893 	flow_entry_t	*flent = srs->srs_flent;
   1894 	mac_ring_t	*mr;
   1895 
   1896 	ASSERT(MAC_PERIM_HELD((mac_handle_t)FLENT_TO_MIP(flent)));
   1897 	ASSERT((srs->srs_type & SRST_TX) == 0);
   1898 
   1899 	/*
   1900 	 * This handles a change in the number of SRSs between the quiesce and
   1901 	 * and restart operation of a flow.
   1902 	 */
   1903 	if (!SRS_QUIESCED(srs))
   1904 		return;
   1905 
   1906 	/*
   1907 	 * Signal the SRS to restart itself. Wait for the restart to complete
   1908 	 * Note that we only restart the SRS if it is not marked as
   1909 	 * permanently quiesced.
   1910 	 */
   1911 	if (!SRS_QUIESCED_PERMANENT(srs)) {
   1912 		mac_srs_signal(srs, SRS_RESTART);
   1913 		mac_srs_quiesce_wait(srs, SRS_RESTART_DONE);
   1914 		mac_srs_clear_flag(srs, SRS_RESTART_DONE);
   1915 
   1916 		mac_srs_client_poll_restart(srs->srs_mcip, srs);
   1917 	}
   1918 
   1919 	/* Finally clear the flags to let the packets in */
   1920 	mr = srs->srs_ring;
   1921 	if (mr != NULL) {
   1922 		MAC_RING_UNMARK(mr, MR_QUIESCE);
   1923 		/* In case the ring was stopped, safely restart it */
   1924 		(void) mac_start_ring(mr);
   1925 	} else {
   1926 		FLOW_UNMARK(flent, FE_QUIESCE);
   1927 	}
   1928 }
   1929 
   1930 /*
   1931  * Temporary quiesce of a flow and associated Rx SRS.
   1932  * Please see block comment above mac_rx_classify_flow_rem.
   1933  */
   1934 /* ARGSUSED */
   1935 int
   1936 mac_rx_classify_flow_quiesce(flow_entry_t *flent, void *arg)
   1937 {
   1938 	int		i;
   1939 
   1940 	for (i = 0; i < flent->fe_rx_srs_cnt; i++) {
   1941 		mac_rx_srs_quiesce((mac_soft_ring_set_t *)flent->fe_rx_srs[i],
   1942 		    SRS_QUIESCE);
   1943 	}
   1944 	return (0);
   1945 }
   1946 
   1947 /*
   1948  * Restart a flow and associated Rx SRS that has been quiesced temporarily
   1949  * Please see block comment above mac_rx_classify_flow_rem
   1950  */
   1951 /* ARGSUSED */
   1952 int
   1953 mac_rx_classify_flow_restart(flow_entry_t *flent, void *arg)
   1954 {
   1955 	int		i;
   1956 
   1957 	for (i = 0; i < flent->fe_rx_srs_cnt; i++)
   1958 		mac_rx_srs_restart((mac_soft_ring_set_t *)flent->fe_rx_srs[i]);
   1959 
   1960 	return (0);
   1961 }
   1962 
   1963 void
   1964 mac_srs_perm_quiesce(mac_client_handle_t mch, boolean_t on)
   1965 {
   1966 	mac_client_impl_t	*mcip = (mac_client_impl_t *)mch;
   1967 	flow_entry_t		*flent = mcip->mci_flent;
   1968 	mac_impl_t		*mip = mcip->mci_mip;
   1969 	mac_soft_ring_set_t	*mac_srs;
   1970 	int			i;
   1971 
   1972 	ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
   1973 
   1974 	if (flent == NULL)
   1975 		return;
   1976 
   1977 	for (i = 0; i < flent->fe_rx_srs_cnt; i++) {
   1978 		mac_srs = flent->fe_rx_srs[i];
   1979 		mutex_enter(&mac_srs->srs_lock);
   1980 		if (on)
   1981 			mac_srs->srs_state |= SRS_QUIESCE_PERM;
   1982 		else
   1983 			mac_srs->srs_state &= ~SRS_QUIESCE_PERM;
   1984 		mutex_exit(&mac_srs->srs_lock);
   1985 	}
   1986 }
   1987 
   1988 void
   1989 mac_rx_client_quiesce(mac_client_handle_t mch)
   1990 {
   1991 	mac_client_impl_t	*mcip = (mac_client_impl_t *)mch;
   1992 	mac_impl_t		*mip = mcip->mci_mip;
   1993 
   1994 	ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
   1995 
   1996 	if (MCIP_DATAPATH_SETUP(mcip)) {
   1997 		(void) mac_rx_classify_flow_quiesce(mcip->mci_flent,
   1998 		    NULL);
   1999 		(void) mac_flow_walk_nolock(mcip->mci_subflow_tab,
   2000 		    mac_rx_classify_flow_quiesce, NULL);
   2001 	}
   2002 }
   2003 
   2004 void
   2005 mac_rx_client_restart(mac_client_handle_t mch)
   2006 {
   2007 	mac_client_impl_t	*mcip = (mac_client_impl_t *)mch;
   2008 	mac_impl_t		*mip = mcip->mci_mip;
   2009 
   2010 	ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
   2011 
   2012 	if (MCIP_DATAPATH_SETUP(mcip)) {
   2013 		(void) mac_rx_classify_flow_restart(mcip->mci_flent, NULL);
   2014 		(void) mac_flow_walk_nolock(mcip->mci_subflow_tab,
   2015 		    mac_rx_classify_flow_restart, NULL);
   2016 	}
   2017 }
   2018 
   2019 /*
   2020  * This function only quiesces the Tx SRS and softring worker threads. Callers
   2021  * need to make sure that there aren't any mac client threads doing current or
   2022  * future transmits in the mac before calling this function.
   2023  */
   2024 void
   2025 mac_tx_srs_quiesce(mac_soft_ring_set_t *srs, uint_t srs_quiesce_flag)
   2026 {
   2027 	mac_client_impl_t	*mcip = srs->srs_mcip;
   2028 
   2029 	ASSERT(MAC_PERIM_HELD((mac_handle_t)mcip->mci_mip));
   2030 
   2031 	ASSERT(srs->srs_type & SRST_TX);
   2032 	ASSERT(srs_quiesce_flag == SRS_CONDEMNED ||
   2033 	    srs_quiesce_flag == SRS_QUIESCE);
   2034 
   2035 	/*
   2036 	 * Signal the SRS to quiesce itself, and then cv_wait for the
   2037 	 * SRS quiesce to complete. The SRS worker thread will wake us
   2038 	 * up when the quiesce is complete
   2039 	 */
   2040 	mac_srs_signal(srs, srs_quiesce_flag);
   2041 	mac_srs_quiesce_wait(srs, srs_quiesce_flag == SRS_QUIESCE ?
   2042 	    SRS_QUIESCE_DONE : SRS_CONDEMNED_DONE);
   2043 }
   2044 
   2045 void
   2046 mac_tx_srs_restart(mac_soft_ring_set_t *srs)
   2047 {
   2048 	/*
   2049 	 * Resizing the fanout could result in creation of new SRSs.
   2050 	 * They may not necessarily be in the quiesced state in which
   2051 	 * case it need be restarted
   2052 	 */
   2053 	if (!SRS_QUIESCED(srs))
   2054 		return;
   2055 
   2056 	mac_srs_signal(srs, SRS_RESTART);
   2057 	mac_srs_quiesce_wait(srs, SRS_RESTART_DONE);
   2058 	mac_srs_clear_flag(srs, SRS_RESTART_DONE);
   2059 }
   2060 
   2061 /*
   2062  * Temporary quiesce of a flow and associated Rx SRS.
   2063  * Please see block comment above mac_rx_srs_quiesce
   2064  */
   2065 /* ARGSUSED */
   2066 int
   2067 mac_tx_flow_quiesce(flow_entry_t *flent, void *arg)
   2068 {
   2069 	/*
   2070 	 * The fe_tx_srs is null for a subflow on an interface that is
   2071 	 * not plumbed
   2072 	 */
   2073 	if (flent->fe_tx_srs != NULL)
   2074 		mac_tx_srs_quiesce(flent->fe_tx_srs, SRS_QUIESCE);
   2075 	return (0);
   2076 }
   2077 
   2078 /* ARGSUSED */
   2079 int
   2080 mac_tx_flow_restart(flow_entry_t *flent, void *arg)
   2081 {
   2082 	/*
   2083 	 * The fe_tx_srs is null for a subflow on an interface that is
   2084 	 * not plumbed
   2085 	 */
   2086 	if (flent->fe_tx_srs != NULL)
   2087 		mac_tx_srs_restart(flent->fe_tx_srs);
   2088 	return (0);
   2089 }
   2090 
   2091 void
   2092 mac_tx_client_quiesce(mac_client_impl_t *mcip, uint_t srs_quiesce_flag)
   2093 {
   2094 	ASSERT(MAC_PERIM_HELD((mac_handle_t)mcip->mci_mip));
   2095 
   2096 	mac_tx_client_block(mcip);
   2097 	if (MCIP_TX_SRS(mcip) != NULL) {
   2098 		mac_tx_srs_quiesce(MCIP_TX_SRS(mcip), srs_quiesce_flag);
   2099 		(void) mac_flow_walk_nolock(mcip->mci_subflow_tab,
   2100 		    mac_tx_flow_quiesce, NULL);
   2101 	}
   2102 }
   2103 
   2104 void
   2105 mac_tx_client_restart(mac_client_impl_t *mcip)
   2106 {
   2107 	ASSERT(MAC_PERIM_HELD((mac_handle_t)mcip->mci_mip));
   2108 
   2109 	mac_tx_client_unblock(mcip);
   2110 	if (MCIP_TX_SRS(mcip) != NULL) {
   2111 		mac_tx_srs_restart(MCIP_TX_SRS(mcip));
   2112 		(void) mac_flow_walk_nolock(mcip->mci_subflow_tab,
   2113 		    mac_tx_flow_restart, NULL);
   2114 	}
   2115 }
   2116 
   2117 void
   2118 mac_tx_client_flush(mac_client_impl_t *mcip)
   2119 {
   2120 	ASSERT(MAC_PERIM_HELD((mac_handle_t)mcip->mci_mip));
   2121 
   2122 	mac_tx_client_quiesce(mcip, SRS_QUIESCE);
   2123 	mac_tx_client_restart(mcip);
   2124 }
   2125 
   2126 void
   2127 mac_client_quiesce(mac_client_impl_t *mcip)
   2128 {
   2129 	mac_rx_client_quiesce((mac_client_handle_t)mcip);
   2130 	mac_tx_client_quiesce(mcip, SRS_QUIESCE);
   2131 }
   2132 
   2133 void
   2134 mac_client_restart(mac_client_impl_t *mcip)
   2135 {
   2136 	mac_rx_client_restart((mac_client_handle_t)mcip);
   2137 	mac_tx_client_restart(mcip);
   2138 }
   2139 
   2140 /*
   2141  * Allocate a minor number.
   2142  */
   2143 minor_t
   2144 mac_minor_hold(boolean_t sleep)
   2145 {
   2146 	minor_t	minor;
   2147 
   2148 	/*
   2149 	 * Grab a value from the arena.
   2150 	 */
   2151 	atomic_add_32(&minor_count, 1);
   2152 
   2153 	if (sleep)
   2154 		minor = (uint_t)id_alloc(minor_ids);
   2155 	else
   2156 		minor = (uint_t)id_alloc_nosleep(minor_ids);
   2157 
   2158 	if (minor == 0) {
   2159 		atomic_add_32(&minor_count, -1);
   2160 		return (0);
   2161 	}
   2162 
   2163 	return (minor);
   2164 }
   2165 
   2166 /*
   2167  * Release a previously allocated minor number.
   2168  */
   2169 void
   2170 mac_minor_rele(minor_t minor)
   2171 {
   2172 	/*
   2173 	 * Return the value to the arena.
   2174 	 */
   2175 	id_free(minor_ids, minor);
   2176 	atomic_add_32(&minor_count, -1);
   2177 }
   2178 
   2179 uint32_t
   2180 mac_no_notification(mac_handle_t mh)
   2181 {
   2182 	mac_impl_t *mip = (mac_impl_t *)mh;
   2183 
   2184 	return (((mip->mi_state_flags & MIS_LEGACY) != 0) ?
   2185 	    mip->mi_capab_legacy.ml_unsup_note : 0);
   2186 }
   2187 
   2188 /*
   2189  * Prevent any new opens of this mac in preparation for unregister
   2190  */
   2191 int
   2192 i_mac_disable(mac_impl_t *mip)
   2193 {
   2194 	mac_client_impl_t	*mcip;
   2195 
   2196 	rw_enter(&i_mac_impl_lock, RW_WRITER);
   2197 	if (mip->mi_state_flags & MIS_DISABLED) {
   2198 		/* Already disabled, return success */
   2199 		rw_exit(&i_mac_impl_lock);
   2200 		return (0);
   2201 	}
   2202 	/*
   2203 	 * See if there are any other references to this mac_t (e.g., VLAN's).
   2204 	 * If so return failure. If all the other checks below pass, then
   2205 	 * set mi_disabled atomically under the i_mac_impl_lock to prevent
   2206 	 * any new VLAN's from being created or new mac client opens of this
   2207 	 * mac end point.
   2208 	 */
   2209 	if (mip->mi_ref > 0) {
   2210 		rw_exit(&i_mac_impl_lock);
   2211 		return (EBUSY);
   2212 	}
   2213 
   2214 	/*
   2215 	 * mac clients must delete all multicast groups they join before
   2216 	 * closing. bcast groups are reference counted, the last client
   2217 	 * to delete the group will wait till the group is physically
   2218 	 * deleted. Since all clients have closed this mac end point
   2219 	 * mi_bcast_ngrps must be zero at this point
   2220 	 */
   2221 	ASSERT(mip->mi_bcast_ngrps == 0);
   2222 
   2223 	/*
   2224 	 * Don't let go of this if it has some flows.
   2225 	 * All other code guarantees no flows are added to a disabled
   2226 	 * mac, therefore it is sufficient to check for the flow table
   2227 	 * only here.
   2228 	 */
   2229 	mcip = mac_primary_client_handle(mip);
   2230 	if ((mcip != NULL) && mac_link_has_flows((mac_client_handle_t)mcip)) {
   2231 		rw_exit(&i_mac_impl_lock);
   2232 		return (ENOTEMPTY);
   2233 	}
   2234 
   2235 	mip->mi_state_flags |= MIS_DISABLED;
   2236 	rw_exit(&i_mac_impl_lock);
   2237 	return (0);
   2238 }
   2239 
   2240 int
   2241 mac_disable_nowait(mac_handle_t mh)
   2242 {
   2243 	mac_impl_t	*mip = (mac_impl_t *)mh;
   2244 	int err;
   2245 
   2246 	if ((err = i_mac_perim_enter_nowait(mip)) != 0)
   2247 		return (err);
   2248 	err = i_mac_disable(mip);
   2249 	i_mac_perim_exit(mip);
   2250 	return (err);
   2251 }
   2252 
   2253 int
   2254 mac_disable(mac_handle_t mh)
   2255 {
   2256 	mac_impl_t	*mip = (mac_impl_t *)mh;
   2257 	int err;
   2258 
   2259 	i_mac_perim_enter(mip);
   2260 	err = i_mac_disable(mip);
   2261 	i_mac_perim_exit(mip);
   2262 
   2263 	/*
   2264 	 * Clean up notification thread and wait for it to exit.
   2265 	 */
   2266 	if (err == 0)
   2267 		i_mac_notify_exit(mip);
   2268 
   2269 	return (err);
   2270 }
   2271 
   2272 /*
   2273  * Called when the MAC instance has a non empty flow table, to de-multiplex
   2274  * incoming packets to the right flow.
   2275  * The MAC's rw lock is assumed held as a READER.
   2276  */
   2277 /* ARGSUSED */
   2278 static mblk_t *
   2279 mac_rx_classify(mac_impl_t *mip, mac_resource_handle_t mrh, mblk_t *mp)
   2280 {
   2281 	flow_entry_t	*flent = NULL;
   2282 	uint_t		flags = FLOW_INBOUND;
   2283 	int		err;
   2284 
   2285 	/*
   2286 	 * If the mac is a port of an aggregation, pass FLOW_IGNORE_VLAN
   2287 	 * to mac_flow_lookup() so that the VLAN packets can be successfully
   2288 	 * passed to the non-VLAN aggregation flows.
   2289 	 *
   2290 	 * Note that there is possibly a race between this and
   2291 	 * mac_unicast_remove/add() and VLAN packets could be incorrectly
   2292 	 * classified to non-VLAN flows of non-aggregation mac clients. These
   2293 	 * VLAN packets will be then filtered out by the mac module.
   2294 	 */
   2295 	if ((mip->mi_state_flags & MIS_EXCLUSIVE) != 0)
   2296 		flags |= FLOW_IGNORE_VLAN;
   2297 
   2298 	err = mac_flow_lookup(mip->mi_flow_tab, mp, flags, &flent);
   2299 	if (err != 0) {
   2300 		/* no registered receive function */
   2301 		return (mp);
   2302 	} else {
   2303 		mac_client_impl_t	*mcip;
   2304 
   2305 		/*
   2306 		 * This flent might just be an additional one on the MAC client,
   2307 		 * i.e. for classification purposes (different fdesc), however
   2308 		 * the resources, SRS et. al., are in the mci_flent, so if
   2309 		 * this isn't the mci_flent, we need to get it.
   2310 		 */
   2311 		if ((mcip = flent->fe_mcip) != NULL &&
   2312 		    mcip->mci_flent != flent) {
   2313 			FLOW_REFRELE(flent);
   2314 			flent = mcip->mci_flent;
   2315 			FLOW_TRY_REFHOLD(flent, err);
   2316 			if (err != 0)
   2317 				return (mp);
   2318 		}
   2319 		(flent->fe_cb_fn)(flent->fe_cb_arg1, flent->fe_cb_arg2, mp,
   2320 		    B_FALSE);
   2321 		FLOW_REFRELE(flent);
   2322 	}
   2323 	return (NULL);
   2324 }
   2325 
   2326 mblk_t *
   2327 mac_rx_flow(mac_handle_t mh, mac_resource_handle_t mrh, mblk_t *mp_chain)
   2328 {
   2329 	mac_impl_t	*mip = (mac_impl_t *)mh;
   2330 	mblk_t		*bp, *bp1, **bpp, *list = NULL;
   2331 
   2332 	/*
   2333 	 * We walk the chain and attempt to classify each packet.
   2334 	 * The packets that couldn't be classified will be returned
   2335 	 * back to the caller.
   2336 	 */
   2337 	bp = mp_chain;
   2338 	bpp = &list;
   2339 	while (bp != NULL) {
   2340 		bp1 = bp;
   2341 		bp = bp->b_next;
   2342 		bp1->b_next = NULL;
   2343 
   2344 		if (mac_rx_classify(mip, mrh, bp1) != NULL) {
   2345 			*bpp = bp1;
   2346 			bpp = &bp1->b_next;
   2347 		}
   2348 	}
   2349 	return (list);
   2350 }
   2351 
   2352 static int
   2353 mac_tx_flow_srs_wakeup(flow_entry_t *flent, void *arg)
   2354 {
   2355 	mac_ring_handle_t ring = arg;
   2356 
   2357 	if (flent->fe_tx_srs)
   2358 		mac_tx_srs_wakeup(flent->fe_tx_srs, ring);
   2359 	return (0);
   2360 }
   2361 
   2362 void
   2363 i_mac_tx_srs_notify(mac_impl_t *mip, mac_ring_handle_t ring)
   2364 {
   2365 	mac_client_impl_t	*cclient;
   2366 	mac_soft_ring_set_t	*mac_srs;
   2367 
   2368 	/*
   2369 	 * After grabbing the mi_rw_lock, the list of clients can't change.
   2370 	 * If there are any clients mi_disabled must be B_FALSE and can't
   2371 	 * get set since there are clients. If there aren't any clients we
   2372 	 * don't do anything. In any case the mip has to be valid. The driver
   2373 	 * must make sure that it goes single threaded (with respect to mac
   2374 	 * calls) and wait for all pending mac calls to finish before calling
   2375 	 * mac_unregister.
   2376 	 */
   2377 	rw_enter(&i_mac_impl_lock, RW_READER);
   2378 	if (mip->mi_state_flags & MIS_DISABLED) {
   2379 		rw_exit(&i_mac_impl_lock);
   2380 		return;
   2381 	}
   2382 
   2383 	/*
   2384 	 * Get MAC tx srs from walking mac_client_handle list.
   2385 	 */
   2386 	rw_enter(&mip->mi_rw_lock, RW_READER);
   2387 	for (cclient = mip->mi_clients_list; cclient != NULL;
   2388 	    cclient = cclient->mci_client_next) {
   2389 		if ((mac_srs = MCIP_TX_SRS(cclient)) != NULL)
   2390 			mac_tx_srs_wakeup(mac_srs, ring);
   2391 		(void) mac_flow_walk(cclient->mci_subflow_tab,
   2392 		    mac_tx_flow_srs_wakeup, ring);
   2393 	}
   2394 	rw_exit(&mip->mi_rw_lock);
   2395 	rw_exit(&i_mac_impl_lock);
   2396 }
   2397 
   2398 /* ARGSUSED */
   2399 void
   2400 mac_multicast_refresh(mac_handle_t mh, mac_multicst_t refresh, void *arg,
   2401     boolean_t add)
   2402 {
   2403 	mac_impl_t *mip = (mac_impl_t *)mh;
   2404 
   2405 	i_mac_perim_enter((mac_impl_t *)mh);
   2406 	/*
   2407 	 * If no specific refresh function was given then default to the
   2408 	 * driver's m_multicst entry point.
   2409 	 */
   2410 	if (refresh == NULL) {
   2411 		refresh = mip->mi_multicst;
   2412 		arg = mip->mi_driver;
   2413 	}
   2414 
   2415 	mac_bcast_refresh(mip, refresh, arg, add);
   2416 	i_mac_perim_exit((mac_impl_t *)mh);
   2417 }
   2418 
   2419 void
   2420 mac_promisc_refresh(mac_handle_t mh, mac_setpromisc_t refresh, void *arg)
   2421 {
   2422 	mac_impl_t	*mip = (mac_impl_t *)mh;
   2423 
   2424 	/*
   2425 	 * If no specific refresh function was given then default to the
   2426 	 * driver's m_promisc entry point.
   2427 	 */
   2428 	if (refresh == NULL) {
   2429 		refresh = mip->mi_setpromisc;
   2430 		arg = mip->mi_driver;
   2431 	}
   2432 	ASSERT(refresh != NULL);
   2433 
   2434 	/*
   2435 	 * Call the refresh function with the current promiscuity.
   2436 	 */
   2437 	refresh(arg, (mip->mi_devpromisc != 0));
   2438 }
   2439 
   2440 /*
   2441  * The mac client requests that the mac not to change its margin size to
   2442  * be less than the specified value.  If "current" is B_TRUE, then the client
   2443  * requests the mac not to change its margin size to be smaller than the
   2444  * current size. Further, return the current margin size value in this case.
   2445  *
   2446  * We keep every requested size in an ordered list from largest to smallest.
   2447  */
   2448 int
   2449 mac_margin_add(mac_handle_t mh, uint32_t *marginp, boolean_t current)
   2450 {
   2451 	mac_impl_t		*mip = (mac_impl_t *)mh;
   2452 	mac_margin_req_t	**pp, *p;
   2453 	int			err = 0;
   2454 
   2455 	rw_enter(&(mip->mi_rw_lock), RW_WRITER);
   2456 	if (current)
   2457 		*marginp = mip->mi_margin;
   2458 
   2459 	/*
   2460 	 * If the current margin value cannot satisfy the margin requested,
   2461 	 * return ENOTSUP directly.
   2462 	 */
   2463 	if (*marginp > mip->mi_margin) {
   2464 		err = ENOTSUP;
   2465 		goto done;
   2466 	}
   2467 
   2468 	/*
   2469 	 * Check whether the given margin is already in the list. If so,
   2470 	 * bump the reference count.
   2471 	 */
   2472 	for (pp = &mip->mi_mmrp; (p = *pp) != NULL; pp = &p->mmr_nextp) {
   2473 		if (p->mmr_margin == *marginp) {
   2474 			/*
   2475 			 * The margin requested is already in the list,
   2476 			 * so just bump the reference count.
   2477 			 */
   2478 			p->mmr_ref++;
   2479 			goto done;
   2480 		}
   2481 		if (p->mmr_margin < *marginp)
   2482 			break;
   2483 	}
   2484 
   2485 
   2486 	p = kmem_zalloc(sizeof (mac_margin_req_t), KM_SLEEP);
   2487 	p->mmr_margin = *marginp;
   2488 	p->mmr_ref++;
   2489 	p->mmr_nextp = *pp;
   2490 	*pp = p;
   2491 
   2492 done:
   2493 	rw_exit(&(mip->mi_rw_lock));
   2494 	return (err);
   2495 }
   2496 
   2497 /*
   2498  * The mac client requests to cancel its previous mac_margin_add() request.
   2499  * We remove the requested margin size from the list.
   2500  */
   2501 int
   2502 mac_margin_remove(mac_handle_t mh, uint32_t margin)
   2503 {
   2504 	mac_impl_t		*mip = (mac_impl_t *)mh;
   2505 	mac_margin_req_t	**pp, *p;
   2506 	int			err = 0;
   2507 
   2508 	rw_enter(&(mip->mi_rw_lock), RW_WRITER);
   2509 	/*
   2510 	 * Find the entry in the list for the given margin.
   2511 	 */
   2512 	for (pp = &(mip->mi_mmrp); (p = *pp) != NULL; pp = &(p->mmr_nextp)) {
   2513 		if (p->mmr_margin == margin) {
   2514 			if (--p->mmr_ref == 0)
   2515 				break;
   2516 
   2517 			/*
   2518 			 * There is still a reference to this address so
   2519 			 * there's nothing more to do.
   2520 			 */
   2521 			goto done;
   2522 		}
   2523 	}
   2524 
   2525 	/*
   2526 	 * We did not find an entry for the given margin.
   2527 	 */
   2528 	if (p == NULL) {
   2529 		err = ENOENT;
   2530 		goto done;
   2531 	}
   2532 
   2533 	ASSERT(p->mmr_ref == 0);
   2534 
   2535 	/*
   2536 	 * Remove it from the list.
   2537 	 */
   2538 	*pp = p->mmr_nextp;
   2539 	kmem_free(p, sizeof (mac_margin_req_t));
   2540 done:
   2541 	rw_exit(&(mip->mi_rw_lock));
   2542 	return (err);
   2543 }
   2544 
   2545 boolean_t
   2546 mac_margin_update(mac_handle_t mh, uint32_t margin)
   2547 {
   2548 	mac_impl_t	*mip = (mac_impl_t *)mh;
   2549 	uint32_t	margin_needed = 0;
   2550 
   2551 	rw_enter(&(mip->mi_rw_lock), RW_WRITER);
   2552 
   2553 	if (mip->mi_mmrp != NULL)
   2554 		margin_needed = mip->mi_mmrp->mmr_margin;
   2555 
   2556 	if (margin_needed <= margin)
   2557 		mip->mi_margin = margin;
   2558 
   2559 	rw_exit(&(mip->mi_rw_lock));
   2560 
   2561 	if (margin_needed <= margin)
   2562 		i_mac_notify(mip, MAC_NOTE_MARGIN);
   2563 
   2564 	return (margin_needed <= margin);
   2565 }
   2566 
   2567 /*
   2568  * MAC Type Plugin functions.
   2569  */
   2570 
   2571 mactype_t *
   2572 mactype_getplugin(const char *pname)
   2573 {
   2574 	mactype_t	*mtype = NULL;
   2575 	boolean_t	tried_modload = B_FALSE;
   2576 
   2577 	mutex_enter(&i_mactype_lock);
   2578 
   2579 find_registered_mactype:
   2580 	if (mod_hash_find(i_mactype_hash, (mod_hash_key_t)pname,
   2581 	    (mod_hash_val_t *)&mtype) != 0) {
   2582 		if (!tried_modload) {
   2583 			/*
   2584 			 * If the plugin has not yet been loaded, then
   2585 			 * attempt to load it now.  If modload() succeeds,
   2586 			 * the plugin should have registered using
   2587 			 * mactype_register(), in which case we can go back
   2588 			 * and attempt to find it again.
   2589 			 */
   2590 			if (modload(MACTYPE_KMODDIR, (char *)pname) != -1) {
   2591 				tried_modload = B_TRUE;
   2592 				goto find_registered_mactype;
   2593 			}
   2594 		}
   2595 	} else {
   2596 		/*
   2597 		 * Note that there's no danger that the plugin we've loaded
   2598 		 * could be unloaded between the modload() step and the
   2599 		 * reference count bump here, as we're holding
   2600 		 * i_mactype_lock, which mactype_unregister() also holds.
   2601 		 */
   2602 		atomic_inc_32(&mtype->mt_ref);
   2603 	}
   2604 
   2605 	mutex_exit(&i_mactype_lock);
   2606 	return (mtype);
   2607 }
   2608 
   2609 mactype_register_t *
   2610 mactype_alloc(uint_t mactype_version)
   2611 {
   2612 	mactype_register_t *mtrp;
   2613 
   2614 	/*
   2615 	 * Make sure there isn't a version mismatch between the plugin and
   2616 	 * the framework.  In the future, if multiple versions are
   2617 	 * supported, this check could become more sophisticated.
   2618 	 */
   2619 	if (mactype_version != MACTYPE_VERSION)
   2620 		return (NULL);
   2621 
   2622 	mtrp = kmem_zalloc(sizeof (mactype_register_t), KM_SLEEP);
   2623 	mtrp->mtr_version = mactype_version;
   2624 	return (mtrp);
   2625 }
   2626 
   2627 void
   2628 mactype_free(mactype_register_t *mtrp)
   2629 {
   2630 	kmem_free(mtrp, sizeof (mactype_register_t));
   2631 }
   2632 
   2633 int
   2634 mactype_register(mactype_register_t *mtrp)
   2635 {
   2636 	mactype_t	*mtp;
   2637 	mactype_ops_t	*ops = mtrp->mtr_ops;
   2638 
   2639 	/* Do some sanity checking before we register this MAC type. */
   2640 	if (mtrp->mtr_ident == NULL || ops == NULL)
   2641 		return (EINVAL);
   2642 
   2643 	/*
   2644 	 * Verify that all mandatory callbacks are set in the ops
   2645 	 * vector.
   2646 	 */
   2647 	if (ops->mtops_unicst_verify == NULL ||
   2648 	    ops->mtops_multicst_verify == NULL ||
   2649 	    ops->mtops_sap_verify == NULL ||
   2650 	    ops->mtops_header == NULL ||
   2651 	    ops->mtops_header_info == NULL) {
   2652 		return (EINVAL);
   2653 	}
   2654 
   2655 	mtp = kmem_zalloc(sizeof (*mtp), KM_SLEEP);
   2656 	mtp->mt_ident = mtrp->mtr_ident;
   2657 	mtp->mt_ops = *ops;
   2658 	mtp->mt_type = mtrp->mtr_mactype;
   2659 	mtp->mt_nativetype = mtrp->mtr_nativetype;
   2660 	mtp->mt_addr_length = mtrp->mtr_addrlen;
   2661 	if (mtrp->mtr_brdcst_addr != NULL) {
   2662 		mtp->mt_brdcst_addr = kmem_alloc(mtrp->mtr_addrlen, KM_SLEEP);
   2663 		bcopy(mtrp->mtr_brdcst_addr, mtp->mt_brdcst_addr,
   2664 		    mtrp->mtr_addrlen);
   2665 	}
   2666 
   2667 	mtp->mt_stats = mtrp->mtr_stats;
   2668 	mtp->mt_statcount = mtrp->mtr_statcount;
   2669 
   2670 	mtp->mt_mapping = mtrp->mtr_mapping;
   2671 	mtp->mt_mappingcount = mtrp->mtr_mappingcount;
   2672 
   2673 	if (mod_hash_insert(i_mactype_hash,
   2674 	    (mod_hash_key_t)mtp->mt_ident, (mod_hash_val_t)mtp) != 0) {
   2675 		kmem_free(mtp->mt_brdcst_addr, mtp->mt_addr_length);
   2676 		kmem_free(mtp, sizeof (*mtp));
   2677 		return (EEXIST);
   2678 	}
   2679 	return (0);
   2680 }
   2681 
   2682 int
   2683 mactype_unregister(const char *ident)
   2684 {
   2685 	mactype_t	*mtp;
   2686 	mod_hash_val_t	val;
   2687 	int 		err;
   2688 
   2689 	/*
   2690 	 * Let's not allow MAC drivers to use this plugin while we're
   2691 	 * trying to unregister it.  Holding i_mactype_lock also prevents a
   2692 	 * plugin from unregistering while a MAC driver is attempting to
   2693 	 * hold a reference to it in i_mactype_getplugin().
   2694 	 */
   2695 	mutex_enter(&i_mactype_lock);
   2696 
   2697 	if ((err = mod_hash_find(i_mactype_hash, (mod_hash_key_t)ident,
   2698 	    (mod_hash_val_t *)&mtp)) != 0) {
   2699 		/* A plugin is trying to unregister, but it never registered. */
   2700 		err = ENXIO;
   2701 		goto done;
   2702 	}
   2703 
   2704 	if (mtp->mt_ref != 0) {
   2705 		err = EBUSY;
   2706 		goto done;
   2707 	}
   2708 
   2709 	err = mod_hash_remove(i_mactype_hash, (mod_hash_key_t)ident, &val);
   2710 	ASSERT(err == 0);
   2711 	if (err != 0) {
   2712 		/* This should never happen, thus the ASSERT() above. */
   2713 		err = EINVAL;
   2714 		goto done;
   2715 	}
   2716 	ASSERT(mtp == (mactype_t *)val);
   2717 
   2718 	if (mtp->mt_brdcst_addr != NULL)
   2719 		kmem_free(mtp->mt_brdcst_addr, mtp->mt_addr_length);
   2720 	kmem_free(mtp, sizeof (mactype_t));
   2721 done:
   2722 	mutex_exit(&i_mactype_lock);
   2723 	return (err);
   2724 }
   2725 
   2726 /*
   2727  * mac_set_prop() sets mac or hardware driver properties:
   2728  * 	MAC resource properties include maxbw, priority, and cpu binding list.
   2729  *	Driver properties are private properties to the hardware, such as mtu
   2730  *	and speed.  There's one other MAC property -- the PVID.
   2731  * If the property is a driver property, mac_set_prop() calls driver's callback
   2732  * function to set it.
   2733  * If the property is a mac resource property, mac_set_prop() invokes
   2734  * mac_set_resources() which will cache the property value in mac_impl_t and
   2735  * may call mac_client_set_resource() to update property value of the primary
   2736  * mac client, if it exists.
   2737  */
   2738 int
   2739 mac_set_prop(mac_handle_t mh, mac_prop_t *macprop, void *val, uint_t valsize)
   2740 {
   2741 	int err = ENOTSUP;
   2742 	mac_impl_t *mip = (mac_impl_t *)mh;
   2743 
   2744 	ASSERT(MAC_PERIM_HELD(mh));
   2745 
   2746 	switch (macprop->mp_id) {
   2747 	case MAC_PROP_MAXBW:
   2748 	case MAC_PROP_PRIO:
   2749 	case MAC_PROP_PROTECT:
   2750 	case MAC_PROP_BIND_CPU: {
   2751 		mac_resource_props_t mrp;
   2752 
   2753 		/* If it is mac property, call mac_set_resources() */
   2754 		if (valsize < sizeof (mac_resource_props_t))
   2755 			return (EINVAL);
   2756 		bcopy(val, &mrp, sizeof (mrp));
   2757 		err = mac_set_resources(mh, &mrp);
   2758 		break;
   2759 	}
   2760 
   2761 	case MAC_PROP_PVID:
   2762 		if (valsize < sizeof (uint16_t) ||
   2763 		    (mip->mi_state_flags & MIS_IS_VNIC))
   2764 			return (EINVAL);
   2765 		err = mac_set_pvid(mh, *(uint16_t *)val);
   2766 		break;
   2767 
   2768 	case MAC_PROP_MTU: {
   2769 		uint32_t mtu;
   2770 
   2771 		if (valsize < sizeof (mtu))
   2772 			return (EINVAL);
   2773 		bcopy(val, &mtu, sizeof (mtu));
   2774 		err = mac_set_mtu(mh, mtu, NULL);
   2775 		break;
   2776 	}
   2777 
   2778 	case MAC_PROP_LLIMIT:
   2779 	case MAC_PROP_LDECAY: {
   2780 		uint32_t learnval;
   2781 
   2782 		if (valsize < sizeof (learnval) ||
   2783 		    (mip->mi_state_flags & MIS_IS_VNIC))
   2784 			return (EINVAL);
   2785 		bcopy(val, &learnval, sizeof (learnval));
   2786 		if (learnval == 0 && macprop->mp_id == MAC_PROP_LDECAY)
   2787 			return (EINVAL);
   2788 		if (macprop->mp_id == MAC_PROP_LLIMIT)
   2789 			mip->mi_llimit = learnval;
   2790 		else
   2791 			mip->mi_ldecay = learnval;
   2792 		err = 0;
   2793 		break;
   2794 	}
   2795 
   2796 	default:
   2797 		/* For other driver properties, call driver's callback */
   2798 		if (mip->mi_callbacks->mc_callbacks & MC_SETPROP) {
   2799 			err = mip->mi_callbacks->mc_setprop(mip->mi_driver,
   2800 			    macprop->mp_name, macprop->mp_id, valsize, val);
   2801 		}
   2802 	}
   2803 	return (err);
   2804 }
   2805 
   2806 /*
   2807  * mac_get_prop() gets mac or hardware driver properties.
   2808  *
   2809  * If the property is a driver property, mac_get_prop() calls driver's callback
   2810  * function to get it.
   2811  * If the property is a mac property, mac_get_prop() invokes mac_get_resources()
   2812  * which returns the cached value in mac_impl_t.
   2813  */
   2814 int
   2815 mac_get_prop(mac_handle_t mh, mac_prop_t *macprop, void *val, uint_t valsize,
   2816     uint_t *perm)
   2817 {
   2818 	int err = ENOTSUP;
   2819 	mac_impl_t *mip = (mac_impl_t *)mh;
   2820 	link_state_t link_state;
   2821 	boolean_t is_getprop, is_setprop;
   2822 
   2823 	is_getprop = (mip->mi_callbacks->mc_callbacks & MC_GETPROP);
   2824 	is_setprop = (mip->mi_callbacks->mc_callbacks & MC_SETPROP);
   2825 
   2826 	switch (macprop->mp_id) {
   2827 	case MAC_PROP_MAXBW:
   2828 	case MAC_PROP_PRIO:
   2829 	case MAC_PROP_PROTECT:
   2830 	case MAC_PROP_BIND_CPU: {
   2831 		mac_resource_props_t mrp;
   2832 
   2833 		/* If mac property, read from cache */
   2834 		if (valsize < sizeof (mac_resource_props_t))
   2835 			return (EINVAL);
   2836 		mac_get_resources(mh, &mrp);
   2837 		bcopy(&mrp, val, sizeof (mac_resource_props_t));
   2838 		return (0);
   2839 	}
   2840 
   2841 	case MAC_PROP_PVID:
   2842 		if (valsize < sizeof (uint16_t) ||
   2843 		    (mip->mi_state_flags & MIS_IS_VNIC))
   2844 			return (EINVAL);
   2845 		*(uint16_t *)val = mac_get_pvid(mh);
   2846 		return (0);
   2847 
   2848 	case MAC_PROP_LLIMIT:
   2849 	case MAC_PROP_LDECAY:
   2850 		if (valsize < sizeof (uint32_t) ||
   2851 		    (mip->mi_state_flags & MIS_IS_VNIC))
   2852 			return (EINVAL);
   2853 		if (macprop->mp_id == MAC_PROP_LLIMIT)
   2854 			bcopy(&mip->mi_llimit, val, sizeof (mip->mi_llimit));
   2855 		else
   2856 			bcopy(&mip->mi_ldecay, val, sizeof (mip->mi_ldecay));
   2857 		return (0);
   2858 
   2859 	case MAC_PROP_MTU: {
   2860 		uint32_t sdu;
   2861 		mac_propval_range_t range;
   2862 
   2863 		if ((macprop->mp_flags & MAC_PROP_POSSIBLE) != 0) {
   2864 			if (valsize < sizeof (mac_propval_range_t))
   2865 				return (EINVAL);
   2866 			if (is_getprop) {
   2867 				err = mip->mi_callbacks->mc_getprop(mip->
   2868 				    mi_driver, macprop->mp_name, macprop->mp_id,
   2869 				    macprop->mp_flags, valsize, val, perm);
   2870 			}
   2871 			/*
   2872 			 * If the driver doesn't have *_m_getprop defined or
   2873 			 * if the driver doesn't support setting MTU then
   2874 			 * return the CURRENT value as POSSIBLE value.
   2875 			 */
   2876 			if (!is_getprop || err == ENOTSUP) {
   2877 				mac_sdu_get(mh, NULL, &sdu);
   2878 				range.mpr_count = 1;
   2879 				range.mpr_type = MAC_PROPVAL_UINT32;
   2880 				range.range_uint32[0].mpur_min =
   2881 				    range.range_uint32[0].mpur_max = sdu;
   2882 				bcopy(&range, val, sizeof (range));
   2883 				err = 0;
   2884 			}
   2885 			return (err);
   2886 		}
   2887 		if (valsize < sizeof (sdu))
   2888 			return (EINVAL);
   2889 		if ((macprop->mp_flags & MAC_PROP_DEFAULT) == 0) {
   2890 			mac_sdu_get(mh, NULL, &sdu);
   2891 			bcopy(&sdu, val, sizeof (sdu));
   2892 			if (is_setprop && (mip->mi_callbacks->mc_setprop(mip->
   2893 			    mi_driver, macprop->mp_name, macprop->mp_id,
   2894 			    valsize, val) == 0)) {
   2895 				*perm = MAC_PROP_PERM_RW;
   2896 			} else {
   2897 				*perm = MAC_PROP_PERM_READ;
   2898 			}
   2899 			return (0);
   2900 		} else {
   2901 			if (mip->mi_info.mi_media == DL_ETHER) {
   2902 				sdu = ETHERMTU;
   2903 				bcopy(&sdu, val, sizeof (sdu));
   2904 
   2905 				return (0);
   2906 			}
   2907 			/*
   2908 			 * ask driver for its default.
   2909 			 */
   2910 			break;
   2911 		}
   2912 	}
   2913 	case MAC_PROP_STATUS:
   2914 		if (valsize < sizeof (link_state))
   2915 			return (EINVAL);
   2916 		*perm = MAC_PROP_PERM_READ;
   2917 		link_state = mac_link_get(mh);
   2918 		bcopy(&link_state, val, sizeof (link_state));
   2919 		return (0);
   2920 	default:
   2921 		break;
   2922 
   2923 	}
   2924 	/* If driver property, request from driver */
   2925 	if (is_getprop) {
   2926 		err = mip->mi_callbacks->mc_getprop(mip->mi_driver,
   2927 		    macprop->mp_name, macprop->mp_id, macprop->mp_flags,
   2928 		    valsize, val, perm);
   2929 	}
   2930 	return (err);
   2931 }
   2932 
   2933 int
   2934 mac_fastpath_disable(mac_handle_t mh)
   2935 {
   2936 	mac_impl_t	*mip = (mac_impl_t *)mh;
   2937 
   2938 	if ((mip->mi_state_flags & MIS_LEGACY) == 0)
   2939 		return (0);
   2940 
   2941 	return (mip->mi_capab_legacy.ml_fastpath_disable(mip->mi_driver));
   2942 }
   2943 
   2944 void
   2945 mac_fastpath_enable(mac_handle_t mh)
   2946 {
   2947 	mac_impl_t	*mip = (mac_impl_t *)mh;
   2948 
   2949 	if ((mip->mi_state_flags & MIS_LEGACY) == 0)
   2950 		return;
   2951 
   2952 	mip->mi_capab_legacy.ml_fastpath_enable(mip->mi_driver);
   2953 }
   2954 
   2955 void
   2956 mac_register_priv_prop(mac_impl_t *mip, mac_priv_prop_t *mpp, uint_t nprop)
   2957 {
   2958 	mac_priv_prop_t *mpriv;
   2959 
   2960 	if (mpp == NULL)
   2961 		return;
   2962 
   2963 	mpriv = kmem_zalloc(nprop * sizeof (*mpriv), KM_SLEEP);
   2964 	(void) memcpy(mpriv, mpp, nprop * sizeof (*mpriv));
   2965 	mip->mi_priv_prop = mpriv;
   2966 	mip->mi_priv_prop_count = nprop;
   2967 }
   2968 
   2969 void
   2970 mac_unregister_priv_prop(mac_impl_t *mip)
   2971 {
   2972 	mac_priv_prop_t	*mpriv;
   2973 
   2974 	mpriv = mip->mi_priv_prop;
   2975 	if (mpriv != NULL) {
   2976 		kmem_free(mpriv, mip->mi_priv_prop_count * sizeof (*mpriv));
   2977 		mip->mi_priv_prop = NULL;
   2978 	}
   2979 	mip->mi_priv_prop_count = 0;
   2980 }
   2981 
   2982 /*
   2983  * mac_ring_t 'mr' macros. Some rogue drivers may access ring structure
   2984  * (by invoking mac_rx()) even after processing mac_stop_ring(). In such
   2985  * cases if MAC free's the ring structure after mac_stop_ring(), any
   2986  * illegal access to the ring structure coming from the driver will panic
   2987  * the system. In order to protect the system from such inadverent access,
   2988  * we maintain a cache of rings in the mac_impl_t after they get free'd up.
   2989  * When packets are received on free'd up rings, MAC (through the generation
   2990  * count mechanism) will drop such packets.
   2991  */
   2992 static mac_ring_t *
   2993 mac_ring_alloc(mac_impl_t *mip, mac_capab_rings_t *cap_rings)
   2994 {
   2995 	mac_ring_t *ring;
   2996 
   2997 	if (cap_rings->mr_type == MAC_RING_TYPE_RX) {
   2998 		mutex_enter(&mip->mi_ring_lock);
   2999 		if (mip->mi_ring_freelist != NULL) {
   3000 			ring = mip->mi_ring_freelist;
   3001 			mip->mi_ring_freelist = ring->mr_next;
   3002 			bzero(ring, sizeof (mac_ring_t));
   3003 		} else {
   3004 			ring = kmem_cache_alloc(mac_ring_cache, KM_SLEEP);
   3005 		}
   3006 		mutex_exit(&mip->mi_ring_lock);
   3007 	} else {
   3008 		ring = kmem_zalloc(sizeof (mac_ring_t), KM_SLEEP);
   3009 	}
   3010 	ASSERT((ring != NULL) && (ring->mr_state == MR_FREE));
   3011 	return (ring);
   3012 }
   3013 
   3014 static void
   3015 mac_ring_free(mac_impl_t *mip, mac_ring_t *ring)
   3016 {
   3017 	if (ring->mr_type == MAC_RING_TYPE_RX) {
   3018 		mutex_enter(&mip->mi_ring_lock);
   3019 		ring->mr_state = MR_FREE;
   3020 		ring->mr_flag = 0;
   3021 		ring->mr_next = mip->mi_ring_freelist;
   3022 		mip->mi_ring_freelist = ring;
   3023 		mutex_exit(&mip->mi_ring_lock);
   3024 	} else {
   3025 		kmem_free(ring, sizeof (mac_ring_t));
   3026 	}
   3027 }
   3028 
   3029 static void
   3030 mac_ring_freeall(mac_impl_t *mip)
   3031 {
   3032 	mac_ring_t *ring_next;
   3033 	mutex_enter(&mip->mi_ring_lock);
   3034 	mac_ring_t *ring = mip->mi_ring_freelist;
   3035 	while (ring != NULL) {
   3036 		ring_next = ring->mr_next;
   3037 		kmem_cache_free(mac_ring_cache, ring);
   3038 		ring = ring_next;
   3039 	}
   3040 	mip->mi_ring_freelist = NULL;
   3041 	mutex_exit(&mip->mi_ring_lock);
   3042 }
   3043 
   3044 int
   3045 mac_start_ring(mac_ring_t *ring)
   3046 {
   3047 	int rv = 0;
   3048 
   3049 	if (ring->mr_start != NULL)
   3050 		rv = ring->mr_start(ring->mr_driver, ring->mr_gen_num);
   3051 
   3052 	return (rv);
   3053 }
   3054 
   3055 void
   3056 mac_stop_ring(mac_ring_t *ring)
   3057 {
   3058 	if (ring->mr_stop != NULL)
   3059 		ring->mr_stop(ring->mr_driver);
   3060 
   3061 	/*
   3062 	 * Increment the ring generation number for this ring.
   3063 	 */
   3064 	ring->mr_gen_num++;
   3065 }
   3066 
   3067 int
   3068 mac_start_group(mac_group_t *group)
   3069 {
   3070 	int rv = 0;
   3071 
   3072 	if (group->mrg_start != NULL)
   3073 		rv = group->mrg_start(group->mrg_driver);
   3074 
   3075 	return (rv);
   3076 }
   3077 
   3078 void
   3079 mac_stop_group(mac_group_t *group)
   3080 {
   3081 	if (group->mrg_stop != NULL)
   3082 		group->mrg_stop(group->mrg_driver);
   3083 }
   3084 
   3085 /*
   3086  * Called from mac_start() on the default Rx group. Broadcast and multicast
   3087  * packets are received only on the default group. Hence the default group
   3088  * needs to be up even if the primary client is not up, for the other groups
   3089  * to be functional. We do this by calling this function at mac_start time
   3090  * itself. However the broadcast packets that are received can't make their
   3091  * way beyond mac_rx until a mac client creates a broadcast flow.
   3092  */
   3093 static int
   3094 mac_start_group_and_rings(mac_group_t *group)
   3095 {
   3096 	mac_ring_t	*ring;
   3097 	int		rv = 0;
   3098 
   3099 	ASSERT(group->mrg_state == MAC_GROUP_STATE_REGISTERED);
   3100 	if ((rv = mac_start_group(group)) != 0)
   3101 		return (rv);
   3102 
   3103 	for (ring = group->mrg_rings; ring != NULL; ring = ring->mr_next) {
   3104 		ASSERT(ring->mr_state == MR_FREE);
   3105 		if ((rv = mac_start_ring(ring)) != 0)
   3106 			goto error;
   3107 		ring->mr_state = MR_INUSE;
   3108 		ring->mr_classify_type = MAC_SW_CLASSIFIER;
   3109 	}
   3110 	return (0);
   3111 
   3112 error:
   3113 	mac_stop_group_and_rings(group);
   3114 	return (rv);
   3115 }
   3116 
   3117 /* Called from mac_stop on the default Rx group */
   3118 static void
   3119 mac_stop_group_and_rings(mac_group_t *group)
   3120 {
   3121 	mac_ring_t	*ring;
   3122 
   3123 	for (ring = group->mrg_rings; ring != NULL; ring = ring->mr_next) {
   3124 		if (ring->mr_state != MR_FREE) {
   3125 			mac_stop_ring(ring);
   3126 			ring->mr_state = MR_FREE;
   3127 			ring->mr_flag = 0;
   3128 			ring->mr_classify_type = MAC_NO_CLASSIFIER;
   3129 		}
   3130 	}
   3131 	mac_stop_group(group);
   3132 }
   3133 
   3134 
   3135 static mac_ring_t *
   3136 mac_init_ring(mac_impl_t *mip, mac_group_t *group, int index,
   3137     mac_capab_rings_t *cap_rings)
   3138 {
   3139 	mac_ring_t *ring;
   3140 	mac_ring_info_t ring_info;
   3141 
   3142 	ring = mac_ring_alloc(mip, cap_rings);
   3143 
   3144 	/* Prepare basic information of ring */
   3145 	ring->mr_index = index;
   3146 	ring->mr_type = group->mrg_type;
   3147 	ring->mr_gh = (mac_group_handle_t)group;
   3148 
   3149 	/* Insert the new ring to the list. */
   3150 	ring->mr_next = group->mrg_rings;
   3151 	group->mrg_rings = ring;
   3152 
   3153 	/* Zero to reuse the info data structure */
   3154 	bzero(&ring_info, sizeof (ring_info));
   3155 
   3156 	/* Query ring information from driver */
   3157 	cap_rings->mr_rget(mip->mi_driver, group->mrg_type, group->mrg_index,
   3158 	    index, &ring_info, (mac_ring_handle_t)ring);
   3159 
   3160 	ring->mr_info = ring_info;
   3161 
   3162 	/* Update ring's status */
   3163 	ring->mr_state = MR_FREE;
   3164 	ring->mr_flag = 0;
   3165 
   3166 	/* Update the ring count of the group */
   3167 	group->mrg_cur_count++;
   3168 	return (ring);
   3169 }
   3170 
   3171 /*
   3172  * Rings are chained together for easy regrouping.
   3173  */
   3174 static void
   3175 mac_init_group(mac_impl_t *mip, mac_group_t *group, int size,
   3176     mac_capab_rings_t *cap_rings)
   3177 {
   3178 	int index;
   3179 
   3180 	/*
   3181 	 * Initialize all ring members of this group. Size of zero will not
   3182 	 * enter the loop, so it's safe for initializing an empty group.
   3183 	 */
   3184 	for (index = size - 1; index >= 0; index--)
   3185 		(void) mac_init_ring(mip, group, index, cap_rings);
   3186 }
   3187 
   3188 int
   3189 mac_init_rings(mac_impl_t *mip, mac_ring_type_t rtype)
   3190 {
   3191 	mac_capab_rings_t *cap_rings;
   3192 	mac_group_t *group, *groups;
   3193 	mac_group_info_t group_info;
   3194 	uint_t group_free = 0;
   3195 	uint_t ring_left;
   3196 	mac_ring_t *ring;
   3197 	int g, err = 0;
   3198 
   3199 	switch (rtype) {
   3200 	case MAC_RING_TYPE_RX:
   3201 		ASSERT(mip->mi_rx_groups == NULL);
   3202 
   3203 		cap_rings = &mip->mi_rx_rings_cap;
   3204 		cap_rings->mr_type = MAC_RING_TYPE_RX;
   3205 		break;
   3206 	case MAC_RING_TYPE_TX:
   3207 		ASSERT(mip->mi_tx_groups == NULL);
   3208 
   3209 		cap_rings = &mip->mi_tx_rings_cap;
   3210 		cap_rings->mr_type = MAC_RING_TYPE_TX;
   3211 		break;
   3212 	default:
   3213 		ASSERT(B_FALSE);
   3214 	}
   3215 
   3216 	if (!i_mac_capab_get((mac_handle_t)mip, MAC_CAPAB_RINGS,
   3217 	    cap_rings))
   3218 		return (0);
   3219 
   3220 	/*
   3221 	 * Allocate a contiguous buffer for all groups.
   3222 	 */
   3223 	groups = kmem_zalloc(sizeof (mac_group_t) * (cap_rings->mr_gnum + 1),
   3224 	    KM_SLEEP);
   3225 
   3226 	ring_left = cap_rings->mr_rnum;
   3227 
   3228 	/*
   3229 	 * Get all ring groups if any, and get their ring members
   3230 	 * if any.
   3231 	 */
   3232 	for (g = 0; g < cap_rings->mr_gnum; g++) {
   3233 		group = groups + g;
   3234 
   3235 		/* Prepare basic information of the group */
   3236 		group->mrg_index = g;
   3237 		group->mrg_type = rtype;
   3238 		group->mrg_state = MAC_GROUP_STATE_UNINIT;
   3239 		group->mrg_mh = (mac_handle_t)mip;
   3240 		group->mrg_next = group + 1;
   3241 
   3242 		/* Zero to reuse the info data structure */
   3243 		bzero(&group_info, sizeof (group_info));
   3244 
   3245 		/* Query group information from driver */
   3246 		cap_rings->mr_gget(mip->mi_driver, rtype, g, &group_info,
   3247 		    (mac_group_handle_t)group);
   3248 
   3249 		switch (cap_rings->mr_group_type) {
   3250 		case MAC_GROUP_TYPE_DYNAMIC:
   3251 			if (cap_rings->mr_gaddring == NULL ||
   3252 			    cap_rings->mr_gremring == NULL) {
   3253 				DTRACE_PROBE3(
   3254 				    mac__init__rings_no_addremring,
   3255 				    char *, mip->mi_name,
   3256 				    mac_group_add_ring_t,
   3257 				    cap_rings->mr_gaddring,
   3258 				    mac_group_add_ring_t,
   3259 				    cap_rings->mr_gremring);
   3260 				err = EINVAL;
   3261 				goto bail;
   3262 			}
   3263 
   3264 			switch (rtype) {
   3265 			case MAC_RING_TYPE_RX:
   3266 				/*
   3267 				 * The first RX group must have non-zero
   3268 				 * rings, and the following groups must
   3269 				 * have zero rings.
   3270 				 */
   3271 				if (g == 0 && group_info.mgi_count == 0) {
   3272 					DTRACE_PROBE1(
   3273 					    mac__init__rings__rx__def__zero,
   3274 					    char *, mip->mi_name);
   3275 					err = EINVAL;
   3276 					goto bail;
   3277 				}
   3278 				if (g > 0 && group_info.mgi_count != 0) {
   3279 					DTRACE_PROBE3(
   3280 					    mac__init__rings__rx__nonzero,
   3281 					    char *, mip->mi_name,
   3282 					    int, g, int, group_info.mgi_count);
   3283 					err = EINVAL;
   3284 					goto bail;
   3285 				}
   3286 				break;
   3287 			case MAC_RING_TYPE_TX:
   3288 				/*
   3289 				 * All TX ring groups must have zero rings.
   3290 				 */
   3291 				if (group_info.mgi_count != 0) {
   3292 					DTRACE_PROBE3(
   3293 					    mac__init__rings__tx__nonzero,
   3294 					    char *, mip->mi_name,
   3295 					    int, g, int, group_info.mgi_count);
   3296 					err = EINVAL;
   3297 					goto bail;
   3298 				}
   3299 				break;
   3300 			}
   3301 			break;
   3302 		case MAC_GROUP_TYPE_STATIC:
   3303 			/*
   3304 			 * Note that an empty group is allowed, e.g., an aggr
   3305 			 * would start with an empty group.
   3306 			 */
   3307 			break;
   3308 		default:
   3309 			/* unknown group type */
   3310 			DTRACE_PROBE2(mac__init__rings__unknown__type,
   3311 			    char *, mip->mi_name,
   3312 			    int, cap_rings->mr_group_type);
   3313 			err = EINVAL;
   3314 			goto bail;
   3315 		}
   3316 
   3317 
   3318 		/*
   3319 		 * Driver must register group->mgi_addmac/remmac() for rx groups
   3320 		 * to support multiple MAC addresses.
   3321 		 */
   3322 		if (rtype == MAC_RING_TYPE_RX) {
   3323 			if ((group_info.mgi_addmac == NULL) ||
   3324 			    (group_info.mgi_addmac == NULL))
   3325 				goto bail;
   3326 		}
   3327 
   3328 		/* Cache driver-supplied information */
   3329 		group->mrg_info = group_info;
   3330 
   3331 		/* Update the group's status and group count. */
   3332 		mac_set_rx_group_state(group, MAC_GROUP_STATE_REGISTERED);
   3333 		group_free++;
   3334 
   3335 		group->mrg_rings = NULL;
   3336 		group->mrg_cur_count = 0;
   3337 		mac_init_group(mip, group, group_info.mgi_count, cap_rings);
   3338 		ring_left -= group_info.mgi_count;
   3339 
   3340 		/* The current group size should be equal to default value */
   3341 		ASSERT(group->mrg_cur_count == group_info.mgi_count);
   3342 	}
   3343 
   3344 	/* Build up a dummy group for free resources as a pool */
   3345 	group = groups + cap_rings->mr_gnum;
   3346 
   3347 	/* Prepare basic information of the group */
   3348 	group->mrg_index = -1;
   3349 	group->mrg_type = rtype;
   3350 	group->mrg_state = MAC_GROUP_STATE_UNINIT;
   3351 	group->mrg_mh = (mac_handle_t)mip;
   3352 	group->mrg_next = NULL;
   3353 
   3354 	/*
   3355 	 * If there are ungrouped rings, allocate a continuous buffer for
   3356 	 * remaining resources.
   3357 	 */
   3358 	if (ring_left != 0) {
   3359 		group->mrg_rings = NULL;
   3360 		group->mrg_cur_count = 0;
   3361 		mac_init_group(mip, group, ring_left, cap_rings);
   3362 
   3363 		/* The current group size should be equal to ring_left */
   3364 		ASSERT(group->mrg_cur_count == ring_left);
   3365 
   3366 		ring_left = 0;
   3367 
   3368 		/* Update this group's status */
   3369 		mac_set_rx_group_state(group, MAC_GROUP_STATE_REGISTERED);
   3370 	} else
   3371 		group->mrg_rings = NULL;
   3372 
   3373 	ASSERT(ring_left == 0);
   3374 
   3375 bail:
   3376 	/* Cache other important information to finalize the initialization */
   3377 	switch (rtype) {
   3378 	case MAC_RING_TYPE_RX:
   3379 		mip->mi_rx_group_type = cap_rings->mr_group_type;
   3380 		mip->mi_rx_group_count = cap_rings->mr_gnum;
   3381 		mip->mi_rx_groups = groups;
   3382 		break;
   3383 	case MAC_RING_TYPE_TX:
   3384 		mip->mi_tx_group_type = cap_rings->mr_group_type;
   3385 		mip->mi_tx_group_count = cap_rings->mr_gnum;
   3386 		mip->mi_tx_group_free = group_free;
   3387 		mip->mi_tx_groups = groups;
   3388 
   3389 		/*
   3390 		 * Ring 0 is used as the default one and it could be assigned
   3391 		 * to a client as well.
   3392 		 */
   3393 		group = groups + cap_rings->mr_gnum;
   3394 		ring = group->mrg_rings;
   3395 		while ((ring->mr_index != 0) && (ring->mr_next != NULL))
   3396 			ring = ring->mr_next;
   3397 		ASSERT(ring->mr_index == 0);
   3398 		mip->mi_default_tx_ring = (mac_ring_handle_t)ring;
   3399 		break;
   3400 	default:
   3401 		ASSERT(B_FALSE);
   3402 	}
   3403 
   3404 	if (err != 0)
   3405 		mac_free_rings(mip, rtype);
   3406 
   3407 	return (err);
   3408 }
   3409 
   3410 /*
   3411  * Called to free all ring groups with particular type. It's supposed all groups
   3412  * have been released by clinet.
   3413  */
   3414 void
   3415 mac_free_rings(mac_impl_t *mip, mac_ring_type_t rtype)
   3416 {
   3417 	mac_group_t *group, *groups;
   3418 	uint_t group_count;
   3419 
   3420 	switch (rtype) {
   3421 	case MAC_RING_TYPE_RX:
   3422 		if (mip->mi_rx_groups == NULL)
   3423 			return;
   3424 
   3425 		groups = mip->mi_rx_groups;
   3426 		group_count = mip->mi_rx_group_count;
   3427 
   3428 		mip->mi_rx_groups = NULL;
   3429 		mip->mi_rx_group_count = 0;
   3430 		break;
   3431 	case MAC_RING_TYPE_TX:
   3432 		ASSERT(mip->mi_tx_group_count == mip->mi_tx_group_free);
   3433 
   3434 		if (mip->mi_tx_groups == NULL)
   3435 			return;
   3436 
   3437 		groups = mip->mi_tx_groups;
   3438 		group_count = mip->mi_tx_group_count;
   3439 
   3440 		mip->mi_tx_groups = NULL;
   3441 		mip->mi_tx_group_count = 0;
   3442 		mip->mi_tx_group_free = 0;
   3443 		mip->mi_default_tx_ring = NULL;
   3444 		break;
   3445 	default:
   3446 		ASSERT(B_FALSE);
   3447 	}
   3448 
   3449 	for (group = groups; group != NULL; group = group->mrg_next) {
   3450 		mac_ring_t *ring;
   3451 
   3452 		if (group->mrg_cur_count == 0)
   3453 			continue;
   3454 
   3455 		ASSERT(group->mrg_rings != NULL);
   3456 
   3457 		while ((ring = group->mrg_rings) != NULL) {
   3458 			group->mrg_rings = ring->mr_next;
   3459 			mac_ring_free(mip, ring);
   3460 		}
   3461 	}
   3462 
   3463 	/* Free all the cached rings */
   3464 	mac_ring_freeall(mip);
   3465 	/* Free the block of group data strutures */
   3466 	kmem_free(groups, sizeof (mac_group_t) * (group_count + 1));
   3467 }
   3468 
   3469 /*
   3470  * Associate a MAC address with a receive group.
   3471  *
   3472  * The return value of this function should always be checked properly, because
   3473  * any type of failure could cause unexpected results. A group can be added
   3474  * or removed with a MAC address only after it has been reserved. Ideally,
   3475  * a successful reservation always leads to calling mac_group_addmac() to
   3476  * steer desired traffic. Failure of adding an unicast MAC address doesn't
   3477  * always imply that the group is functioning abnormally.
   3478  *
   3479  * Currently this function is called everywhere, and it reflects assumptions
   3480  * about MAC addresses in the implementation. CR 6735196.
   3481  */
   3482 int
   3483 mac_group_addmac(mac_group_t *group, const uint8_t *addr)
   3484 {
   3485 	ASSERT(group->mrg_type == MAC_RING_TYPE_RX);
   3486 	ASSERT(group->mrg_info.mgi_addmac != NULL);
   3487 
   3488 	return (group->mrg_info.mgi_addmac(group->mrg_info.mgi_driver, addr));
   3489 }
   3490 
   3491 /*
   3492  * Remove the association between MAC address and receive group.
   3493  */
   3494 int
   3495 mac_group_remmac(mac_group_t *group, const uint8_t *addr)
   3496 {
   3497 	ASSERT(group->mrg_type == MAC_RING_TYPE_RX);
   3498 	ASSERT(group->mrg_info.mgi_remmac != NULL);
   3499 
   3500 	return (group->mrg_info.mgi_remmac(group->mrg_info.mgi_driver, addr));
   3501 }
   3502 
   3503 /*
   3504  * Release a ring in use by marking it MR_FREE.
   3505  * Any other client may reserve it for its use.
   3506  */
   3507 void
   3508 mac_release_tx_ring(mac_ring_handle_t rh)
   3509 {
   3510 	mac_ring_t *ring = (mac_ring_t *)rh;
   3511 	mac_group_t *group = (mac_group_t *)ring->mr_gh;
   3512 	mac_impl_t *mip = (mac_impl_t *)group->mrg_mh;
   3513 
   3514 	ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
   3515 	ASSERT(ring->mr_state != MR_FREE);
   3516 
   3517 	/*
   3518 	 * Default tx ring will be released by mac_stop().
   3519 	 */
   3520 	if (rh == mip->mi_default_tx_ring)
   3521 		return;
   3522 
   3523 	mac_stop_ring(ring);
   3524 
   3525 	ring->mr_state = MR_FREE;
   3526 	ring->mr_flag = 0;
   3527 }
   3528 
   3529 /*
   3530  * This is the entry point for packets transmitted through the bridging code.
   3531  * If no bridge is in place, MAC_RING_TX transmits using tx ring. The 'rh'
   3532  * pointer may be NULL to select the default ring.
   3533  */
   3534 mblk_t *
   3535 mac_bridge_tx(mac_impl_t *mip, mac_ring_handle_t rh, mblk_t *mp)
   3536 {
   3537 	mac_handle_t mh;
   3538 
   3539 	/*
   3540 	 * Once we take a reference on the bridge link, the bridge
   3541 	 * module itself can't unload, so the callback pointers are
   3542 	 * stable.
   3543 	 */
   3544 	mutex_enter(&mip->mi_bridge_lock);
   3545 	if ((mh = mip->mi_bridge_link) != NULL)
   3546 		mac_bridge_ref_cb(mh, B_TRUE);
   3547 	mutex_exit(&mip->mi_bridge_lock);
   3548 	if (mh == NULL) {
   3549 		MAC_RING_TX(mip, rh, mp, mp);
   3550 	} else {
   3551 		mp = mac_bridge_tx_cb(mh, rh, mp);
   3552 		mac_bridge_ref_cb(mh, B_FALSE);
   3553 	}
   3554 
   3555 	return (mp);
   3556 }
   3557 
   3558 /*
   3559  * Find a ring from its index.
   3560  */
   3561 mac_ring_t *
   3562 mac_find_ring(mac_group_t *group, int index)
   3563 {
   3564 	mac_ring_t *ring = group->mrg_rings;
   3565 
   3566 	for (ring = group->mrg_rings; ring != NULL; ring = ring->mr_next)
   3567 		if (ring->mr_index == index)
   3568 			break;
   3569 
   3570 	return (ring);
   3571 }
   3572 /*
   3573  * Add a ring to an existing group.
   3574  *
   3575  * The ring must be either passed directly (for example if the ring
   3576  * movement is initiated by the framework), or specified through a driver
   3577  * index (for example when the ring is added by the driver.
   3578  *
   3579  * The caller needs to call mac_perim_enter() before calling this function.
   3580  */
   3581 int
   3582 i_mac_group_add_ring(mac_group_t *group, mac_ring_t *ring, int index)
   3583 {
   3584 	mac_impl_t *mip = (mac_impl_t *)group->mrg_mh;
   3585 	mac_capab_rings_t *cap_rings;
   3586 	boolean_t driver_call = (ring == NULL);
   3587 	mac_group_type_t group_type;
   3588 	int ret = 0;
   3589 
   3590 	ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
   3591 
   3592 	switch (group->mrg_type) {
   3593 	case MAC_RING_TYPE_RX:
   3594 		cap_rings = &mip->mi_rx_rings_cap;
   3595 		group_type = mip->mi_rx_group_type;
   3596 		break;
   3597 	case MAC_RING_TYPE_TX:
   3598 		cap_rings = &mip->mi_tx_rings_cap;
   3599 		group_type = mip->mi_tx_group_type;
   3600 		break;
   3601 	default:
   3602 		ASSERT(B_FALSE);
   3603 	}
   3604 
   3605 	/*
   3606 	 * There should be no ring with the same ring index in the target
   3607 	 * group.
   3608 	 */
   3609 	ASSERT(mac_find_ring(group, driver_call ? index : ring->mr_index) ==
   3610 	    NULL);
   3611 
   3612 	if (driver_call) {
   3613 		/*
   3614 		 * The function is called as a result of a request from
   3615 		 * a driver to add a ring to an existing group, for example
   3616 		 * from the aggregation driver. Allocate a new mac_ring_t
   3617 		 * for that ring.
   3618 		 */
   3619 		ring = mac_init_ring(mip, group, index, cap_rings);
   3620 		ASSERT(group->mrg_state > MAC_GROUP_STATE_UNINIT);
   3621 	} else {
   3622 		/*
   3623 		 * The function is called as a result of a MAC layer request
   3624 		 * to add a ring to an existing group. In this case the
   3625 		 * ring is being moved between groups, which requires
   3626 		 * the underlying driver to support dynamic grouping,
   3627 		 * and the mac_ring_t already exists.
   3628 		 */
   3629 		ASSERT(group_type == MAC_GROUP_TYPE_DYNAMIC);
   3630 		ASSERT(cap_rings->mr_gaddring != NULL);
   3631 		ASSERT(ring->mr_gh == NULL);
   3632 	}
   3633 
   3634 	/*
   3635 	 * At this point the ring should not be in use, and it should be
   3636 	 * of the right for the target group.
   3637 	 */
   3638 	ASSERT(ring->mr_state < MR_INUSE);
   3639 	ASSERT(ring->mr_srs == NULL);
   3640 	ASSERT(ring->mr_type == group->mrg_type);
   3641 
   3642 	if (!driver_call) {
   3643 		/*
   3644 		 * Add the driver level hardware ring if the process was not
   3645 		 * initiated by the driver, and the target group is not the
   3646 		 * group.
   3647 		 */
   3648 		if (group->mrg_driver != NULL) {
   3649 			cap_rings->mr_gaddring(group->mrg_driver,
   3650 			    ring->mr_driver, ring->mr_type);
   3651 		}
   3652 
   3653 		/*
   3654 		 * Insert the ring ahead existing rings.
   3655 		 */
   3656 		ring->mr_next = group->mrg_rings;
   3657 		group->mrg_rings = ring;
   3658 		ring->mr_gh = (mac_group_handle_t)group;
   3659 		group->mrg_cur_count++;
   3660 	}
   3661 
   3662 	/*
   3663 	 * If the group has not been actively used, we're done.
   3664 	 */
   3665 	if (group->mrg_index != -1 &&
   3666 	    group->mrg_state < MAC_GROUP_STATE_RESERVED)
   3667 		return (0);
   3668 
   3669 	/*
   3670 	 * Set up SRS/SR according to the ring type.
   3671 	 */
   3672 	switch (ring->mr_type) {
   3673 	case MAC_RING_TYPE_RX:
   3674 		/*
   3675 		 * Setup SRS on top of the new ring if the group is
   3676 		 * reserved for someones exclusive use.
   3677 		 */
   3678 		if (group->mrg_state == MAC_GROUP_STATE_RESERVED) {
   3679 			flow_entry_t *flent;
   3680 			mac_client_impl_t *mcip;
   3681 
   3682 			mcip = MAC_RX_GROUP_ONLY_CLIENT(group);
   3683 			ASSERT(mcip != NULL);
   3684 			flent = mcip->mci_flent;
   3685 			ASSERT(flent->fe_rx_srs_cnt > 0);
   3686 			mac_srs_group_setup(mcip, flent, group, SRST_LINK);
   3687 		}
   3688 		break;
   3689 	case MAC_RING_TYPE_TX:
   3690 		/*
   3691 		 * For TX this function is only invoked during the
   3692 		 * initial creation of a group when a share is
   3693 		 * associated with a MAC client. So the datapath is not
   3694 		 * yet setup, and will be setup later after the
   3695 		 * group has been reserved and populated.
   3696 		 */
   3697 		break;
   3698 	default:
   3699 		ASSERT(B_FALSE);
   3700 	}
   3701 
   3702 	/*
   3703 	 * Start the ring if needed. Failure causes to undo the grouping action.
   3704 	 */
   3705 	if ((ret = mac_start_ring(ring)) != 0) {
   3706 		if (ring->mr_type == MAC_RING_TYPE_RX) {
   3707 			if (ring->mr_srs != NULL) {
   3708 				mac_rx_srs_remove(ring->mr_srs);
   3709 				ring->mr_srs = NULL;
   3710 			}
   3711 		}
   3712 		if (!driver_call) {
   3713 			cap_rings->mr_gremring(group->mrg_driver,
   3714 			    ring->mr_driver, ring->mr_type);
   3715 		}
   3716 		group->mrg_cur_count--;
   3717 		group->mrg_rings = ring->mr_next;
   3718 
   3719 		ring->mr_gh = NULL;
   3720 
   3721 		if (driver_call)
   3722 			mac_ring_free(mip, ring);
   3723 
   3724 		return (ret);
   3725 	}
   3726 
   3727 	/*
   3728 	 * Update the ring's state.
   3729 	 */
   3730 	ring->mr_state = MR_INUSE;
   3731 	MAC_RING_UNMARK(ring, MR_INCIPIENT);
   3732 	return (0);
   3733 }
   3734 
   3735 /*
   3736  * Remove a ring from it's current group. MAC internal function for dynamic
   3737  * grouping.
   3738  *
   3739  * The caller needs to call mac_perim_enter() before calling this function.
   3740  */
   3741 void
   3742 i_mac_group_rem_ring(mac_group_t *group, mac_ring_t *ring,
   3743     boolean_t driver_call)
   3744 {
   3745 	mac_impl_t *mip = (mac_impl_t *)group->mrg_mh;
   3746 	mac_capab_rings_t *cap_rings = NULL;
   3747 	mac_group_type_t group_type;
   3748 
   3749 	ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
   3750 
   3751 	ASSERT(mac_find_ring(group, ring->mr_index) == ring);
   3752 	ASSERT((mac_group_t *)ring->mr_gh == group);
   3753 	ASSERT(ring->mr_type == group->mrg_type);
   3754 
   3755 	switch (ring->mr_type) {
   3756 	case MAC_RING_TYPE_RX:
   3757 		group_type = mip->mi_rx_group_type;
   3758 		cap_rings = &mip->mi_rx_rings_cap;
   3759 
   3760 		if (group->mrg_state >= MAC_GROUP_STATE_RESERVED)
   3761 			mac_stop_ring(ring);
   3762 
   3763 		/*
   3764 		 * Only hardware classified packets hold a reference to the
   3765 		 * ring all the way up the Rx path. mac_rx_srs_remove()
   3766 		 * will take care of quiescing the Rx path and removing the
   3767 		 * SRS. The software classified path neither holds a reference
   3768 		 * nor any association with the ring in mac_rx.
   3769 		 */
   3770 		if (ring->mr_srs != NULL) {
   3771 			mac_rx_srs_remove(ring->mr_srs);
   3772 			ring->mr_srs = NULL;
   3773 		}
   3774 		ring->mr_state = MR_FREE;
   3775 		ring->mr_flag = 0;
   3776 
   3777 		break;
   3778 	case MAC_RING_TYPE_TX:
   3779 		/*
   3780 		 * For TX this function is only invoked in two
   3781 		 * cases:
   3782 		 *
   3783 		 * 1) In the case of a failure during the
   3784 		 * initial creation of a group when a share is
   3785 		 * associated with a MAC client. So the SRS is not
   3786 		 * yet setup, and will be setup later after the
   3787 		 * group has been reserved and populated.
   3788 		 *
   3789 		 * 2) From mac_release_tx_group() when freeing
   3790 		 * a TX SRS.
   3791 		 *
   3792 		 * In both cases the SRS and its soft rings are
   3793 		 * already quiesced.
   3794 		 */
   3795 		ASSERT(!driver_call);
   3796 		group_type = mip->mi_tx_group_type;
   3797 		cap_rings = &mip->mi_tx_rings_cap;
   3798 		break;
   3799 	default:
   3800 		ASSERT(B_FALSE);
   3801 	}
   3802 
   3803 	/*
   3804 	 * Remove the ring from the group.
   3805 	 */
   3806 	if (ring == group->mrg_rings)
   3807 		group->mrg_rings = ring->mr_next;
   3808 	else {
   3809 		mac_ring_t *pre;
   3810 
   3811 		pre = group->mrg_rings;
   3812 		while (pre->mr_next != ring)
   3813 			pre = pre->mr_next;
   3814 		pre->mr_next = ring->mr_next;
   3815 	}
   3816 	group->mrg_cur_count--;
   3817 
   3818 	if (!driver_call) {
   3819 		ASSERT(group_type == MAC_GROUP_TYPE_DYNAMIC);
   3820 		ASSERT(cap_rings->mr_gremring != NULL);
   3821 
   3822 		/*
   3823 		 * Remove the driver level hardware ring.
   3824 		 */
   3825 		if (group->mrg_driver != NULL) {
   3826 			cap_rings->mr_gremring(group->mrg_driver,
   3827 			    ring->mr_driver, ring->mr_type);
   3828 		}
   3829 	}
   3830 
   3831 	ring->mr_gh = NULL;
   3832 	if (driver_call) {
   3833 		mac_ring_free(mip, ring);
   3834 	} else {
   3835 		ring->mr_state = MR_FREE;
   3836 		ring->mr_flag = 0;
   3837 	}
   3838 }
   3839 
   3840 /*
   3841  * Move a ring to the target group. If needed, remove the ring from the group
   3842  * that it currently belongs to.
   3843  *
   3844  * The caller need to enter MAC's perimeter by calling mac_perim_enter().
   3845  */
   3846 static int
   3847 mac_group_mov_ring(mac_impl_t *mip, mac_group_t *d_group, mac_ring_t *ring)
   3848 {
   3849 	mac_group_t *s_group = (mac_group_t *)ring->mr_gh;
   3850 	int rv;
   3851 
   3852 	ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
   3853 	ASSERT(d_group != NULL);
   3854 	ASSERT(s_group->mrg_mh == d_group->mrg_mh);
   3855 
   3856 	if (s_group == d_group)
   3857 		return (0);
   3858 
   3859 	/*
   3860 	 * Remove it from current group first.
   3861 	 */
   3862 	if (s_group != NULL)
   3863 		i_mac_group_rem_ring(s_group, ring, B_FALSE);
   3864 
   3865 	/*
   3866 	 * Add it to the new group.
   3867 	 */
   3868 	rv = i_mac_group_add_ring(d_group, ring, 0);
   3869 	if (rv != 0) {
   3870 		/*
   3871 		 * Failed to add ring back to source group. If
   3872 		 * that fails, the ring is stuck in limbo, log message.
   3873 		 */
   3874 		if (i_mac_group_add_ring(s_group, ring, 0)) {
   3875 			cmn_err(CE_WARN, "%s: failed to move ring %p\n",
   3876 			    mip->mi_name, (void *)ring);
   3877 		}
   3878 	}
   3879 
   3880 	return (rv);
   3881 }
   3882 
   3883 /*
   3884  * Find a MAC address according to its value.
   3885  */
   3886 mac_address_t *
   3887 mac_find_macaddr(mac_impl_t *mip, uint8_t *mac_addr)
   3888 {
   3889 	mac_address_t *map;
   3890 
   3891 	ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
   3892 
   3893 	for (map = mip->mi_addresses; map != NULL; map = map->ma_next) {
   3894 		if (bcmp(mac_addr, map->ma_addr, map->ma_len) == 0)
   3895 			break;
   3896 	}
   3897 
   3898 	return (map);
   3899 }
   3900 
   3901 /*
   3902  * Check whether the MAC address is shared by multiple clients.
   3903  */
   3904 boolean_t
   3905 mac_check_macaddr_shared(mac_address_t *map)
   3906 {
   3907 	ASSERT(MAC_PERIM_HELD((mac_handle_t)map->ma_mip));
   3908 
   3909 	return (map->ma_nusers > 1);
   3910 }
   3911 
   3912 /*
   3913  * Remove the specified MAC address from the MAC address list and free it.
   3914  */
   3915 static void
   3916 mac_free_macaddr(mac_address_t *map)
   3917 {
   3918 	mac_impl_t *mip = map->ma_mip;
   3919 
   3920 	ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
   3921 	ASSERT(mip->mi_addresses != NULL);
   3922 
   3923 	map = mac_find_macaddr(mip, map->ma_addr);
   3924 
   3925 	ASSERT(map != NULL);
   3926 	ASSERT(map->ma_nusers == 0);
   3927 
   3928 	if (map == mip->mi_addresses) {
   3929 		mip->mi_addresses = map->ma_next;
   3930 	} else {
   3931 		mac_address_t *pre;
   3932 
   3933 		pre = mip->mi_addresses;
   3934 		while (pre->ma_next != map)
   3935 			pre = pre->ma_next;
   3936 		pre->ma_next = map->ma_next;
   3937 	}
   3938 
   3939 	kmem_free(map, sizeof (mac_address_t));
   3940 }
   3941 
   3942 /*
   3943  * Add a MAC address reference for a client. If the desired MAC address
   3944  * exists, add a reference to it. Otherwise, add the new address by adding
   3945  * it to a reserved group or setting promiscuous mode. Won't try different
   3946  * group is the group is non-NULL, so the caller must explictly share
   3947  * default group when needed.
   3948  *
   3949  * Note, the primary MAC address is initialized at registration time, so
   3950  * to add it to default group only need to activate it if its reference
   3951  * count is still zero. Also, some drivers may not have advertised RINGS
   3952  * capability.
   3953  */
   3954 int
   3955 mac_add_macaddr(mac_impl_t *mip, mac_group_t *group, uint8_t *mac_addr,
   3956     boolean_t use_hw)
   3957 {
   3958 	mac_address_t *map;
   3959 	int err = 0;
   3960 	boolean_t allocated_map = B_FALSE;
   3961 
   3962 	ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
   3963 
   3964 	map = mac_find_macaddr(mip, mac_addr);
   3965 
   3966 	/*
   3967 	 * If the new MAC address has not been added. Allocate a new one
   3968 	 * and set it up.
   3969 	 */
   3970 	if (map == NULL) {
   3971 		map = kmem_zalloc(sizeof (mac_address_t), KM_SLEEP);
   3972 		map->ma_len = mip->mi_type->mt_addr_length;
   3973 		bcopy(mac_addr, map->ma_addr, map->ma_len);
   3974 		map->ma_nusers = 0;
   3975 		map->ma_group = group;
   3976 		map->ma_mip = mip;
   3977 
   3978 		/* add the new MAC address to the head of the address list */
   3979 		map->ma_next = mip->mi_addresses;
   3980 		mip->mi_addresses = map;
   3981 
   3982 		allocated_map = B_TRUE;
   3983 	}
   3984 
   3985 	ASSERT(map->ma_group == group);
   3986 
   3987 	/*
   3988 	 * If the MAC address is already in use, simply account for the
   3989 	 * new client.
   3990 	 */
   3991 	if (map->ma_nusers++ > 0)
   3992 		return (0);
   3993 
   3994 	/*
   3995 	 * Activate this MAC address by adding it to the reserved group.
   3996 	 */
   3997 	if (group != NULL) {
   3998 		err = mac_group_addmac(group, (const uint8_t *)mac_addr);
   3999 		if (err == 0) {
   4000 			map->ma_type = MAC_ADDRESS_TYPE_UNICAST_CLASSIFIED;
   4001 			return (0);
   4002 		}
   4003 	}
   4004 
   4005 	/*
   4006 	 * The MAC address addition failed. If the client requires a
   4007 	 * hardware classified MAC address, fail the operation.
   4008 	 */
   4009 	if (use_hw) {
   4010 		err = ENOSPC;
   4011 		goto bail;
   4012 	}
   4013 
   4014 	/*
   4015 	 * Try promiscuous mode.
   4016 	 *
   4017 	 * For drivers that don't advertise RINGS capability, do
   4018 	 * nothing for the primary address.
   4019 	 */
   4020 	if ((group == NULL) &&
   4021 	    (bcmp(map->ma_addr, mip->mi_addr, map->ma_len) == 0)) {
   4022 		map->ma_type = MAC_ADDRESS_TYPE_UNICAST_CLASSIFIED;
   4023 		return (0);
   4024 	}
   4025 
   4026 	/*
   4027 	 * Enable promiscuous mode in order to receive traffic
   4028 	 * to the new MAC address.
   4029 	 */
   4030 	if ((err = i_mac_promisc_set(mip, B_TRUE)) == 0) {
   4031 		map->ma_type = MAC_ADDRESS_TYPE_UNICAST_PROMISC;
   4032 		return (0);
   4033 	}
   4034 
   4035 	/*
   4036 	 * Free the MAC address that could not be added. Don't free
   4037 	 * a pre-existing address, it could have been the entry
   4038 	 * for the primary MAC address which was pre-allocated by
   4039 	 * mac_init_macaddr(), and which must remain on the list.
   4040 	 */
   4041 bail:
   4042 	map->ma_nusers--;
   4043 	if (allocated_map)
   4044 		mac_free_macaddr(map);
   4045 	return (err);
   4046 }
   4047 
   4048 /*
   4049  * Remove a reference to a MAC address. This may cause to remove the MAC
   4050  * address from an associated group or to turn off promiscuous mode.
   4051  * The caller needs to handle the failure properly.
   4052  */
   4053 int
   4054 mac_remove_macaddr(mac_address_t *map)
   4055 {
   4056 	mac_impl_t *mip = map->ma_mip;
   4057 	int err = 0;
   4058 
   4059 	ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
   4060 
   4061 	ASSERT(map == mac_find_macaddr(mip, map->ma_addr));
   4062 
   4063 	/*
   4064 	 * If it's not the last client using this MAC address, only update
   4065 	 * the MAC clients count.
   4066 	 */
   4067 	if (--map->ma_nusers > 0)
   4068 		return (0);
   4069 
   4070 	/*
   4071 	 * The MAC address is no longer used by any MAC client, so remove
   4072 	 * it from its associated group, or turn off promiscuous mode
   4073 	 * if it was enabled for the MAC address.
   4074 	 */
   4075 	switch (map->ma_type) {
   4076 	case MAC_ADDRESS_TYPE_UNICAST_CLASSIFIED:
   4077 		/*
   4078 		 * Don't free the preset primary address for drivers that
   4079 		 * don't advertise RINGS capability.
   4080 		 */
   4081 		if (map->ma_group == NULL)
   4082 			return (0);
   4083 
   4084 		err = mac_group_remmac(map->ma_group, map->ma_addr);
   4085 		break;
   4086 	case MAC_ADDRESS_TYPE_UNICAST_PROMISC:
   4087 		err = i_mac_promisc_set(mip, B_FALSE);
   4088 		break;
   4089 	default:
   4090 		ASSERT(B_FALSE);
   4091 	}
   4092 
   4093 	if (err != 0)
   4094 		return (err);
   4095 
   4096 	/*
   4097 	 * We created MAC address for the primary one at registration, so we
   4098 	 * won't free it here. mac_fini_macaddr() will take care of it.
   4099 	 */
   4100 	if (bcmp(map->ma_addr, mip->mi_addr, map->ma_len) != 0)
   4101 		mac_free_macaddr(map);
   4102 
   4103 	return (0);
   4104 }
   4105 
   4106 /*
   4107  * Update an existing MAC address. The caller need to make sure that the new
   4108  * value has not been used.
   4109  */
   4110 int
   4111 mac_update_macaddr(mac_address_t *map, uint8_t *mac_addr)
   4112 {
   4113 	mac_impl_t *mip = map->ma_mip;
   4114 	int err = 0;
   4115 
   4116 	ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
   4117 	ASSERT(mac_find_macaddr(mip, mac_addr) == NULL);
   4118 
   4119 	switch (map->ma_type) {
   4120 	case MAC_ADDRESS_TYPE_UNICAST_CLASSIFIED:
   4121 		/*
   4122 		 * Update the primary address for drivers that are not
   4123 		 * RINGS capable.
   4124 		 */
   4125 		if (map->ma_group == NULL) {
   4126 			err = mip->mi_unicst(mip->mi_driver, (const uint8_t *)
   4127 			    mac_addr);
   4128 			if (err != 0)
   4129 				return (err);
   4130 			break;
   4131 		}
   4132 
   4133 		/*
   4134 		 * If this MAC address is not currently in use,
   4135 		 * simply break out and update the value.
   4136 		 */
   4137 		if (map->ma_nusers == 0)
   4138 			break;
   4139 
   4140 		/*
   4141 		 * Need to replace the MAC address associated with a group.
   4142 		 */
   4143 		err = mac_group_remmac(map->ma_group, map->ma_addr);
   4144 		if (err != 0)
   4145 			return (err);
   4146 
   4147 		err = mac_group_addmac(map->ma_group, mac_addr);
   4148 
   4149 		/*
   4150 		 * Failure hints hardware error. The MAC layer needs to
   4151 		 * have error notification facility to handle this.
   4152 		 * Now, simply try to restore the value.
   4153 		 */
   4154 		if (err != 0)
   4155 			(void) mac_group_addmac(map->ma_group, map->ma_addr);
   4156 
   4157 		break;
   4158 	case MAC_ADDRESS_TYPE_UNICAST_PROMISC:
   4159 		/*
   4160 		 * Need to do nothing more if in promiscuous mode.
   4161 		 */
   4162 		break;
   4163 	default:
   4164 		ASSERT(B_FALSE);
   4165 	}
   4166 
   4167 	/*
   4168 	 * Successfully replaced the MAC address.
   4169 	 */
   4170 	if (err == 0)
   4171 		bcopy(mac_addr, map->ma_addr, map->ma_len);
   4172 
   4173 	return (err);
   4174 }
   4175 
   4176 /*
   4177  * Freshen the MAC address with new value. Its caller must have updated the
   4178  * hardware MAC address before calling this function.
   4179  * This funcitons is supposed to be used to handle the MAC address change
   4180  * notification from underlying drivers.
   4181  */
   4182 void
   4183 mac_freshen_macaddr(mac_address_t *map, uint8_t *mac_addr)
   4184 {
   4185 	mac_impl_t *mip = map->ma_mip;
   4186 
   4187 	ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
   4188 	ASSERT(mac_find_macaddr(mip, mac_addr) == NULL);
   4189 
   4190 	/*
   4191 	 * Freshen the MAC address with new value.
   4192 	 */
   4193 	bcopy(mac_addr, map->ma_addr, map->ma_len);
   4194 	bcopy(mac_addr, mip->mi_addr, map->ma_len);
   4195 
   4196 	/*
   4197 	 * Update all MAC clients that share this MAC address.
   4198 	 */
   4199 	mac_unicast_update_clients(mip, map);
   4200 }
   4201 
   4202 /*
   4203  * Set up the primary MAC address.
   4204  */
   4205 void
   4206 mac_init_macaddr(mac_impl_t *mip)
   4207 {
   4208 	mac_address_t *map;
   4209 
   4210 	/*
   4211 	 * The reference count is initialized to zero, until it's really
   4212 	 * activated.
   4213 	 */
   4214 	map = kmem_zalloc(sizeof (mac_address_t), KM_SLEEP);
   4215 	map->ma_len = mip->mi_type->mt_addr_length;
   4216 	bcopy(mip->mi_addr, map->ma_addr, map->ma_len);
   4217 
   4218 	/*
   4219 	 * If driver advertises RINGS capability, it shouldn't have initialized
   4220 	 * its primary MAC address. For other drivers, including VNIC, the
   4221 	 * primary address must work after registration.
   4222 	 */
   4223 	if (mip->mi_rx_groups == NULL)
   4224 		map->ma_type = MAC_ADDRESS_TYPE_UNICAST_CLASSIFIED;
   4225 
   4226 	/*
   4227 	 * The primary MAC address is reserved for default group according
   4228 	 * to current design.
   4229 	 */
   4230 	map->ma_group = mip->mi_rx_groups;
   4231 	map->ma_mip = mip;
   4232 
   4233 	mip->mi_addresses = map;
   4234 }
   4235 
   4236 /*
   4237  * Clean up the primary MAC address. Note, only one primary MAC address
   4238  * is allowed. All other MAC addresses must have been freed appropriately.
   4239  */
   4240 void
   4241 mac_fini_macaddr(mac_impl_t *mip)
   4242 {
   4243 	mac_address_t *map = mip->mi_addresses;
   4244 
   4245 	if (map == NULL)
   4246 		return;
   4247 
   4248 	/*
   4249 	 * If mi_addresses is initialized, there should be exactly one
   4250 	 * entry left on the list with no users.
   4251 	 */
   4252 	ASSERT(map->ma_nusers == 0);
   4253 	ASSERT(map->ma_next == NULL);
   4254 
   4255 	kmem_free(map, sizeof (mac_address_t));
   4256 	mip->mi_addresses = NULL;
   4257 }
   4258 
   4259 /*
   4260  * Logging related functions.
   4261  */
   4262 
   4263 /* Write the Flow description to the log file */
   4264 int
   4265 mac_write_flow_desc(flow_entry_t *flent, mac_client_impl_t *mcip)
   4266 {
   4267 	flow_desc_t		*fdesc;
   4268 	mac_resource_props_t	*mrp;
   4269 	net_desc_t		ndesc;
   4270 
   4271 	bzero(&ndesc, sizeof (net_desc_t));
   4272 
   4273 	/*
   4274 	 * Grab the fe_lock to see a self-consistent fe_flow_desc.
   4275 	 * Updates to the fe_flow_desc are done under the fe_lock
   4276 	 */
   4277 	mutex_enter(&flent->fe_lock);
   4278 	fdesc = &flent->fe_flow_desc;
   4279 	mrp = &flent->fe_resource_props;
   4280 
   4281 	ndesc.nd_name = flent->fe_flow_name;
   4282 	ndesc.nd_devname = mcip->mci_name;
   4283 	bcopy(fdesc->fd_src_mac, ndesc.nd_ehost, ETHERADDRL);
   4284 	bcopy(fdesc->fd_dst_mac, ndesc.nd_edest, ETHERADDRL);
   4285 	ndesc.nd_sap = htonl(fdesc->fd_sap);
   4286 	ndesc.nd_isv4 = (uint8_t)fdesc->fd_ipversion == IPV4_VERSION;
   4287 	ndesc.nd_bw_limit = mrp->mrp_maxbw;
   4288 	if (ndesc.nd_isv4) {
   4289 		ndesc.nd_saddr[3] = htonl(fdesc->fd_local_addr.s6_addr32[3]);
   4290 		ndesc.nd_daddr[3] = htonl(fdesc->fd_remote_addr.s6_addr32[3]);
   4291 	} else {
   4292 		bcopy(&fdesc->fd_local_addr, ndesc.nd_saddr, IPV6_ADDR_LEN);
   4293 		bcopy(&fdesc->fd_remote_addr, ndesc.nd_daddr, IPV6_ADDR_LEN);
   4294 	}
   4295 	ndesc.nd_sport = htons(fdesc->fd_local_port);
   4296 	ndesc.nd_dport = htons(fdesc->fd_remote_port);
   4297 	ndesc.nd_protocol = (uint8_t)fdesc->fd_protocol;
   4298 	mutex_exit(&flent->fe_lock);
   4299 
   4300 	return (exacct_commit_netinfo((void *)&ndesc, EX_NET_FLDESC_REC));
   4301 }
   4302 
   4303 /* Write the Flow statistics to the log file */
   4304 int
   4305 mac_write_flow_stats(flow_entry_t *flent)
   4306 {
   4307 	flow_stats_t	*fl_stats;
   4308 	net_stat_t	nstat;
   4309 
   4310 	fl_stats = &flent->fe_flowstats;
   4311 	nstat.ns_name = flent->fe_flow_name;
   4312 	nstat.ns_ibytes = fl_stats->fs_rbytes;
   4313 	nstat.ns_obytes = fl_stats->fs_obytes;
   4314 	nstat.ns_ipackets = fl_stats->fs_ipackets;
   4315 	nstat.ns_opackets = fl_stats->fs_opackets;
   4316 	nstat.ns_ierrors = fl_stats->fs_ierrors;
   4317 	nstat.ns_oerrors = fl_stats->fs_oerrors;
   4318 
   4319 	return (exacct_commit_netinfo((void *)&nstat, EX_NET_FLSTAT_REC));
   4320 }
   4321 
   4322 /* Write the Link Description to the log file */
   4323 int
   4324 mac_write_link_desc(mac_client_impl_t *mcip)
   4325 {
   4326 	net_desc_t		ndesc;
   4327 	flow_entry_t		*flent = mcip->mci_flent;
   4328 
   4329 	bzero(&ndesc, sizeof (net_desc_t));
   4330 
   4331 	ndesc.nd_name = mcip->mci_name;
   4332 	ndesc.nd_devname = mcip->mci_name;
   4333 	ndesc.nd_isv4 = B_TRUE;
   4334 	/*
   4335 	 * Grab the fe_lock to see a self-consistent fe_flow_desc.
   4336 	 * Updates to the fe_flow_desc are done under the fe_lock
   4337 	 * after removing the flent from the flow table.
   4338 	 */
   4339 	mutex_enter(&flent->fe_lock);
   4340 	bcopy(flent->fe_flow_desc.fd_src_mac, ndesc.nd_ehost, ETHERADDRL);
   4341 	mutex_exit(&flent->fe_lock);
   4342 
   4343 	return (exacct_commit_netinfo((void *)&ndesc, EX_NET_LNDESC_REC));
   4344 }
   4345 
   4346 /* Write the Link statistics to the log file */
   4347 int
   4348 mac_write_link_stats(mac_client_impl_t *mcip)
   4349 {
   4350 	net_stat_t	nstat;
   4351 
   4352 	nstat.ns_name = mcip->mci_name;
   4353 	nstat.ns_ibytes = mcip->mci_stat_ibytes;
   4354 	nstat.ns_obytes = mcip->mci_stat_obytes;
   4355 	nstat.ns_ipackets = mcip->mci_stat_ipackets;
   4356 	nstat.ns_opackets = mcip->mci_stat_opackets;
   4357 	nstat.ns_ierrors = mcip->mci_stat_ierrors;
   4358 	nstat.ns_oerrors = mcip->mci_stat_oerrors;
   4359 
   4360 	return (exacct_commit_netinfo((void *)&nstat, EX_NET_LNSTAT_REC));
   4361 }
   4362 
   4363 /*
   4364  * For a given flow, if the descrition has not been logged before, do it now.
   4365  * If it is a VNIC, then we have collected information about it from the MAC
   4366  * table, so skip it.
   4367  */
   4368 /*ARGSUSED*/
   4369 static int
   4370 mac_log_flowinfo(flow_entry_t *flent, void *args)
   4371 {
   4372 	mac_client_impl_t	*mcip = flent->fe_mcip;
   4373 
   4374 	if (mcip == NULL)
   4375 		return (0);
   4376 
   4377 	/*
   4378 	 * If the name starts with "vnic", and fe_user_generated is true (to
   4379 	 * exclude the mcast and active flow entries created implicitly for
   4380 	 * a vnic, it is a VNIC flow.  i.e. vnic1 is a vnic flow,
   4381 	 * vnic/bge1/mcast1 is not and neither is vnic/bge1/active.
   4382 	 */
   4383 	if (strncasecmp(flent->fe_flow_name, "vnic", 4) == 0 &&
   4384 	    (flent->fe_type & FLOW_USER) != 0) {
   4385 		return (0);
   4386 	}
   4387 
   4388 	if (!flent->fe_desc_logged) {
   4389 		/*
   4390 		 * We don't return error because we want to continu the
   4391 		 * walk in case this is the last walk which means we
   4392 		 * need to reset fe_desc_logged in all the flows.
   4393 		 */
   4394 		if (mac_write_flow_desc(flent, mcip) != 0)
   4395 			return (0);
   4396 		flent->fe_desc_logged = B_TRUE;
   4397 	}
   4398 
   4399 	/*
   4400 	 * Regardless of the error, we want to proceed in case we have to
   4401 	 * reset fe_desc_logged.
   4402 	 */
   4403 	(void) mac_write_flow_stats(flent);
   4404 
   4405 	if (mcip != NULL && !(mcip->mci_state_flags & MCIS_DESC_LOGGED))
   4406 		flent->fe_desc_logged = B_FALSE;
   4407 
   4408 	return (0);
   4409 }
   4410 
   4411 typedef struct i_mac_log_state_s {
   4412 	boolean_t	mi_last;
   4413 	int		mi_fenable;
   4414 	int		mi_lenable;
   4415 } i_mac_log_state_t;
   4416 
   4417 /*
   4418  * Walk the mac_impl_ts and log the description for each mac client of this mac,
   4419  * if it hasn't already been done. Additionally, log statistics for the link as
   4420  * well. Walk the flow table and log information for each flow as well.
   4421  * If it is the last walk (mci_last), then we turn off mci_desc_logged (and
   4422  * also fe_desc_logged, if flow logging is on) since we want to log the
   4423  * description if and when logging is restarted.
   4424  */
   4425 /*ARGSUSED*/
   4426 static uint_t
   4427 i_mac_log_walker(mod_hash_key_t key, mod_hash_val_t *val, void *arg)
   4428 {
   4429 	mac_impl_t		*mip = (mac_impl_t *)val;
   4430 	i_mac_log_state_t	*lstate = (i_mac_log_state_t *)arg;
   4431 	int			ret;
   4432 	mac_client_impl_t	*mcip;
   4433 
   4434 	/*
   4435 	 * Only walk the client list for NIC and etherstub
   4436 	 */
   4437 	if ((mip->mi_state_flags & MIS_DISABLED) ||
   4438 	    ((mip->mi_state_flags & MIS_IS_VNIC) &&
   4439 	    (mac_get_lower_mac_handle((mac_handle_t)mip) != NULL)))
   4440 		return (MH_WALK_CONTINUE);
   4441 
   4442 	for (mcip = mip->mi_clients_list; mcip != NULL;
   4443 	    mcip = mcip->mci_client_next) {
   4444 		if (!MCIP_DATAPATH_SETUP(mcip))
   4445 			continue;
   4446 		if (lstate->mi_lenable) {
   4447 			if (!(mcip->mci_state_flags & MCIS_DESC_LOGGED)) {
   4448 				ret = mac_write_link_desc(mcip);
   4449 				if (ret != 0) {
   4450 				/*
   4451 				 * We can't terminate it if this is the last
   4452 				 * walk, else there might be some links with
   4453 				 * mi_desc_logged set to true, which means
   4454 				 * their description won't be logged the next
   4455 				 * time logging is started (similarly for the
   4456 				 * flows within such links). We can continue
   4457 				 * without walking the flow table (i.e. to
   4458 				 * set fe_desc_logged to false) because we
   4459 				 * won't have written any flow stuff for this
   4460 				 * link as we haven't logged the link itself.
   4461 				 */
   4462 					if (lstate->mi_last)
   4463 						return (MH_WALK_CONTINUE);
   4464 					else
   4465 						return (MH_WALK_TERMINATE);
   4466 				}
   4467 				mcip->mci_state_flags |= MCIS_DESC_LOGGED;
   4468 			}
   4469 		}
   4470 
   4471 		if (mac_write_link_stats(mcip) != 0 && !lstate->mi_last)
   4472 			return (MH_WALK_TERMINATE);
   4473 
   4474 		if (lstate->mi_last)
   4475 			mcip->mci_state_flags &= ~MCIS_DESC_LOGGED;
   4476 
   4477 		if (lstate->mi_fenable) {
   4478 			if (mcip->mci_subflow_tab != NULL) {
   4479 				(void) mac_flow_walk(mcip->mci_subflow_tab,
   4480 				    mac_log_flowinfo, mip);
   4481 			}
   4482 		}
   4483 	}
   4484 	return (MH_WALK_CONTINUE);
   4485 }
   4486 
   4487 /*
   4488  * The timer thread that runs every mac_logging_interval seconds and logs
   4489  * link and/or flow information.
   4490  */
   4491 /* ARGSUSED */
   4492 void
   4493 mac_log_linkinfo(void *arg)
   4494 {
   4495 	i_mac_log_state_t	lstate;
   4496 
   4497 	rw_enter(&i_mac_impl_lock, RW_READER);
   4498 	if (!mac_flow_log_enable && !mac_link_log_enable) {
   4499 		rw_exit(&i_mac_impl_lock);
   4500 		return;
   4501 	}
   4502 	lstate.mi_fenable = mac_flow_log_enable;
   4503 	lstate.mi_lenable = mac_link_log_enable;
   4504 	lstate.mi_last = B_FALSE;
   4505 	rw_exit(&i_mac_impl_lock);
   4506 
   4507 	mod_hash_walk(i_mac_impl_hash, i_mac_log_walker, &lstate);
   4508 
   4509 	rw_enter(&i_mac_impl_lock, RW_WRITER);
   4510 	if (mac_flow_log_enable || mac_link_log_enable) {
   4511 		mac_logging_timer = timeout(mac_log_linkinfo, NULL,
   4512 		    SEC_TO_TICK(mac_logging_interval));
   4513 	}
   4514 	rw_exit(&i_mac_impl_lock);
   4515 }
   4516 
   4517 typedef struct i_mac_fastpath_state_s {
   4518 	boolean_t	mf_disable;
   4519 	int		mf_err;
   4520 } i_mac_fastpath_state_t;
   4521 
   4522 /*ARGSUSED*/
   4523 static uint_t
   4524 i_mac_fastpath_disable_walker(mod_hash_key_t key, mod_hash_val_t *val,
   4525     void *arg)
   4526 {
   4527 	i_mac_fastpath_state_t	*state = arg;
   4528 	mac_handle_t		mh = (mac_handle_t)val;
   4529 
   4530 	if (state->mf_disable)
   4531 		state->mf_err = mac_fastpath_disable(mh);
   4532 	else
   4533 		mac_fastpath_enable(mh);
   4534 
   4535 	return (state->mf_err == 0 ? MH_WALK_CONTINUE : MH_WALK_TERMINATE);
   4536 }
   4537 
   4538 /*
   4539  * Start the logging timer.
   4540  */
   4541 int
   4542 mac_start_logusage(mac_logtype_t type, uint_t interval)
   4543 {
   4544 	i_mac_fastpath_state_t state = {B_TRUE, 0};
   4545 	int err;
   4546 
   4547 	rw_enter(&i_mac_impl_lock, RW_WRITER);
   4548 	switch (type) {
   4549 	case MAC_LOGTYPE_FLOW:
   4550 		if (mac_flow_log_enable) {
   4551 			rw_exit(&i_mac_impl_lock);
   4552 			return (0);
   4553 		}
   4554 		/* FALLTHRU */
   4555 	case MAC_LOGTYPE_LINK:
   4556 		if (mac_link_log_enable) {
   4557 			rw_exit(&i_mac_impl_lock);
   4558 			return (0);
   4559 		}
   4560 		break;
   4561 	default:
   4562 		ASSERT(0);
   4563 	}
   4564 
   4565 	/* Disable fastpath */
   4566 	mod_hash_walk(i_mac_impl_hash, i_mac_fastpath_disable_walker, &state);
   4567 	if ((err = state.mf_err) != 0) {
   4568 		/* Reenable fastpath  */
   4569 		state.mf_disable = B_FALSE;
   4570 		state.mf_err = 0;
   4571 		mod_hash_walk(i_mac_impl_hash,
   4572 		    i_mac_fastpath_disable_walker, &state);
   4573 		rw_exit(&i_mac_impl_lock);
   4574 		return (err);
   4575 	}
   4576 
   4577 	switch (type) {
   4578 	case MAC_LOGTYPE_FLOW:
   4579 		mac_flow_log_enable = B_TRUE;
   4580 		/* FALLTHRU */
   4581 	case MAC_LOGTYPE_LINK:
   4582 		mac_link_log_enable = B_TRUE;
   4583 		break;
   4584 	}
   4585 
   4586 	mac_logging_interval = interval;
   4587 	rw_exit(&i_mac_impl_lock);
   4588 	mac_log_linkinfo(NULL);
   4589 	return (0);
   4590 }
   4591 
   4592 /*
   4593  * Stop the logging timer if both Link and Flow logging are turned off.
   4594  */
   4595 void
   4596 mac_stop_logusage(mac_logtype_t type)
   4597 {
   4598 	i_mac_log_state_t	lstate;
   4599 	i_mac_fastpath_state_t	state = {B_FALSE, 0};
   4600 
   4601 	rw_enter(&i_mac_impl_lock, RW_WRITER);
   4602 	lstate.mi_fenable = mac_flow_log_enable;
   4603 	lstate.mi_lenable = mac_link_log_enable;
   4604 
   4605 	/* Last walk */
   4606 	lstate.mi_last = B_TRUE;
   4607 
   4608 	switch (type) {
   4609 	case MAC_LOGTYPE_FLOW:
   4610 		if (lstate.mi_fenable) {
   4611 			ASSERT(mac_link_log_enable);
   4612 			mac_flow_log_enable = B_FALSE;
   4613 			mac_link_log_enable = B_FALSE;
   4614 			break;
   4615 		}
   4616 		/* FALLTHRU */
   4617 	case MAC_LOGTYPE_LINK:
   4618 		if (!lstate.mi_lenable || mac_flow_log_enable) {
   4619 			rw_exit(&i_mac_impl_lock);
   4620 			return;
   4621 		}
   4622 		mac_link_log_enable = B_FALSE;
   4623 		break;
   4624 	default:
   4625 		ASSERT(0);
   4626 	}
   4627 
   4628 	/* Reenable fastpath */
   4629 	mod_hash_walk(i_mac_impl_hash, i_mac_fastpath_disable_walker, &state);
   4630 
   4631 	rw_exit(&i_mac_impl_lock);
   4632 	(void) untimeout(mac_logging_timer);
   4633 	mac_logging_timer = 0;
   4634 
   4635 	/* Last walk */
   4636 	mod_hash_walk(i_mac_impl_hash, i_mac_log_walker, &lstate);
   4637 }
   4638 
   4639 /*
   4640  * Walk the rx and tx SRS/SRs for a flow and update the priority value.
   4641  */
   4642 void
   4643 mac_flow_update_priority(mac_client_impl_t *mcip, flow_entry_t *flent)
   4644 {
   4645 	pri_t			pri;
   4646 	int			count;
   4647 	mac_soft_ring_set_t	*mac_srs;
   4648 
   4649 	if (flent->fe_rx_srs_cnt <= 0)
   4650 		return;
   4651 
   4652 	if (((mac_soft_ring_set_t *)flent->fe_rx_srs[0])->srs_type ==
   4653 	    SRST_FLOW) {
   4654 		pri = FLOW_PRIORITY(mcip->mci_min_pri,
   4655 		    mcip->mci_max_pri,
   4656 		    flent->fe_resource_props.mrp_priority);
   4657 	} else {
   4658 		pri = mcip->mci_max_pri;
   4659 	}
   4660 
   4661 	for (count = 0; count < flent->fe_rx_srs_cnt; count++) {
   4662 		mac_srs = flent->fe_rx_srs[count];
   4663 		mac_update_srs_priority(mac_srs, pri);
   4664 	}
   4665 	/*
   4666 	 * If we have a Tx SRS, we need to modify all the threads associated
   4667 	 * with it.
   4668 	 */
   4669 	if (flent->fe_tx_srs != NULL)
   4670 		mac_update_srs_priority(flent->fe_tx_srs, pri);
   4671 }
   4672 
   4673 /*
   4674  * RX and TX rings are reserved according to different semantics depending
   4675  * on the requests from the MAC clients and type of rings:
   4676  *
   4677  * On the Tx side, by default we reserve individual rings, independently from
   4678  * the groups.
   4679  *
   4680  * On the Rx side, the reservation is at the granularity of the group
   4681  * of rings, and used for v12n level 1 only. It has a special case for the
   4682  * primary client.
   4683  *
   4684  * If a share is allocated to a MAC client, we allocate a TX group and an
   4685  * RX group to the client, and assign TX rings and RX rings to these
   4686  * groups according to information gathered from the driver through
   4687  * the share capability.
   4688  *
   4689  * The foreseable evolution of Rx rings will handle v12n level 2 and higher
   4690  * to allocate individual rings out of a group and program the hw classifier
   4691  * based on IP address or higher level criteria.
   4692  */
   4693 
   4694 /*
   4695  * mac_reserve_tx_ring()
   4696  * Reserve a unused ring by marking it with MR_INUSE state.
   4697  * As reserved, the ring is ready to function.
   4698  *
   4699  * Notes for Hybrid I/O:
   4700  *
   4701  * If a specific ring is needed, it is specified through the desired_ring
   4702  * argument. Otherwise that argument is set to NULL.
   4703  * If the desired ring was previous allocated to another client, this
   4704  * function swaps it with a new ring from the group of unassigned rings.
   4705  */
   4706 mac_ring_t *
   4707 mac_reserve_tx_ring(mac_impl_t *mip, mac_ring_t *desired_ring)
   4708 {
   4709 	mac_group_t *group;
   4710 	mac_ring_t *ring;
   4711 
   4712 	ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
   4713 
   4714 	if (mip->mi_tx_groups == NULL)
   4715 		return (NULL);
   4716 
   4717 	/*
   4718 	 * Find an available ring and start it before changing its status.
   4719 	 * The unassigned rings are at the end of the mi_tx_groups
   4720 	 * array.
   4721 	 */
   4722 	group = mip->mi_tx_groups + mip->mi_tx_group_count;
   4723 
   4724 	for (ring = group->mrg_rings; ring != NULL;
   4725 	    ring = ring->mr_next) {
   4726 		if (desired_ring == NULL) {
   4727 			if (ring->mr_state == MR_FREE)
   4728 				/* wanted any free ring and found one */
   4729 				break;
   4730 		} else {
   4731 			mac_ring_t *sring;
   4732 			mac_client_impl_t *client;
   4733 			mac_soft_ring_set_t *srs;
   4734 
   4735 			if (ring != desired_ring)
   4736 				/* wants a desired ring but this one ain't it */
   4737 				continue;
   4738 
   4739 			if (ring->mr_state == MR_FREE)
   4740 				break;
   4741 
   4742 			/*
   4743 			 * Found the desired ring but it's already in use.
   4744 			 * Swap it with a new ring.
   4745 			 */
   4746 
   4747 			/* find the client which owns that ring */
   4748 			for (client = mip->mi_clients_list; client != NULL;
   4749 			    client = client->mci_client_next) {
   4750 				srs = MCIP_TX_SRS(client);
   4751 				if (srs != NULL && mac_tx_srs_ring_present(srs,
   4752 				    desired_ring)) {
   4753 					/* found our ring */
   4754 					break;
   4755 				}
   4756 			}
   4757 			if (client == NULL) {
   4758 				/*
   4759 				 * The TX ring is in use, but it's not
   4760 				 * associated with any clients, so it
   4761 				 * has to be the default ring. In that
   4762 				 * case we can simply assign a new ring
   4763 				 * as the default ring, and we're done.
   4764 				 */
   4765 				ASSERT(mip->mi_default_tx_ring ==
   4766 				    (mac_ring_handle_t)desired_ring);
   4767 
   4768 				/*
   4769 				 * Quiesce all clients on top of
   4770 				 * the NIC to make sure there are no
   4771 				 * pending threads still relying on
   4772 				 * that default ring, for example
   4773 				 * the multicast path.
   4774 				 */
   4775 				for (client = mip->mi_clients_list;
   4776 				    client != NULL;
   4777 				    client = client->mci_client_next) {
   4778 					mac_tx_client_quiesce(client,
   4779 					    SRS_QUIESCE);
   4780 				}
   4781 
   4782 				mip->mi_default_tx_ring = (mac_ring_handle_t)
   4783 				    mac_reserve_tx_ring(mip, NULL);
   4784 
   4785 				/* resume the clients */
   4786 				for (client = mip->mi_clients_list;
   4787 				    client != NULL;
   4788 				    client = client->mci_client_next)
   4789 					mac_tx_client_restart(client);
   4790 
   4791 				break;
   4792 			}
   4793 
   4794 			/*
   4795 			 * Note that we cannot simply invoke the group
   4796 			 * add/rem routines since the client doesn't have a
   4797 			 * TX group. So we need to instead add/remove
   4798 			 * the rings from the SRS.
   4799 			 */
   4800 			ASSERT(client->mci_share == NULL);
   4801 
   4802 			/* first quiece the client */
   4803 			mac_tx_client_quiesce(client, SRS_QUIESCE);
   4804 
   4805 			/* give a new ring to the client... */
   4806 			sring = mac_reserve_tx_ring(mip, NULL);
   4807 			if (sring != NULL) {
   4808 				/*
   4809 				 * There are no other available ring
   4810 				 * on that MAC instance. The client
   4811 				 * will fallback to the shared TX
   4812 				 * ring.
   4813 				 */
   4814 				mac_tx_srs_add_ring(srs, sring);
   4815 			}
   4816 
   4817 			/* ... in exchange for our desired ring */
   4818 			mac_tx_srs_del_ring(srs, desired_ring);
   4819 
   4820 			/* restart the client */
   4821 			mac_tx_client_restart(client);
   4822 
   4823 			if (mip->mi_default_tx_ring ==
   4824 			    (mac_ring_handle_t)desired_ring) {
   4825 				/*
   4826 				 * The desired ring is the default ring,
   4827 				 * and there are one or more clients
   4828 				 * using that default ring directly.
   4829 				 */
   4830 				mip->mi_default_tx_ring =
   4831 				    (mac_ring_handle_t)sring;
   4832 				/*
   4833 				 * Find clients using default ring and
   4834 				 * swap it with the new default ring.
   4835 				 */
   4836 				for (client = mip->mi_clients_list;
   4837 				    client != NULL;
   4838 				    client = client->mci_client_next) {
   4839 					srs = MCIP_TX_SRS(client);
   4840 					if (srs != NULL &&
   4841 					    mac_tx_srs_ring_present(srs,
   4842 					    desired_ring)) {
   4843 						/* first quiece the client */
   4844 						mac_tx_client_quiesce(client,
   4845 						    SRS_QUIESCE);
   4846 
   4847 						/*
   4848 						 * Give it the new default
   4849 						 * ring, and remove the old
   4850 						 * one.
   4851 						 */
   4852 						if (sring != NULL) {
   4853 							mac_tx_srs_add_ring(srs,
   4854 							    sring);
   4855 						}
   4856 						mac_tx_srs_del_ring(srs,
   4857 						    desired_ring);
   4858 
   4859 						/* restart the client */
   4860 						mac_tx_client_restart(client);
   4861 					}
   4862 				}
   4863 			}
   4864 			break;
   4865 		}
   4866 	}
   4867 
   4868 	if (ring != NULL) {
   4869 		if (mac_start_ring(ring) != 0)
   4870 			return (NULL);
   4871 		ring->mr_state = MR_INUSE;
   4872 	}
   4873 
   4874 	return (ring);
   4875 }
   4876 
   4877 /*
   4878  * Minimum number of rings to leave in the default TX group when allocating
   4879  * rings to new clients.
   4880  */
   4881 static uint_t mac_min_rx_default_rings = 1;
   4882 
   4883 /*
   4884  * Populate a zero-ring group with rings. If the share is non-NULL,
   4885  * the rings are chosen according to that share.
   4886  * Invoked after allocating a new RX or TX group through
   4887  * mac_reserve_rx_group() or mac_reserve_tx_group(), respectively.
   4888  * Returns zero on success, an errno otherwise.
   4889  */
   4890 int
   4891 i_mac_group_allocate_rings(mac_impl_t *mip, mac_ring_type_t ring_type,
   4892     mac_group_t *src_group, mac_group_t *new_group, mac_share_handle_t share)
   4893 {
   4894 	mac_ring_t **rings, *tmp_ring[1], *ring;
   4895 	uint_t nrings;
   4896 	int rv, i, j;
   4897 
   4898 	ASSERT(mip->mi_rx_group_type == MAC_GROUP_TYPE_DYNAMIC &&
   4899 	    mip->mi_tx_group_type == MAC_GROUP_TYPE_DYNAMIC);
   4900 	ASSERT(new_group->mrg_cur_count == 0);
   4901 
   4902 	/*
   4903 	 * First find the rings to allocate to the group.
   4904 	 */
   4905 	if (share != NULL) {
   4906 		/* get rings through ms_squery() */
   4907 		mip->mi_share_capab.ms_squery(share, ring_type, NULL, &nrings);
   4908 		ASSERT(nrings != 0);
   4909 		rings = kmem_alloc(nrings * sizeof (mac_ring_handle_t),
   4910 		    KM_SLEEP);
   4911 		mip->mi_share_capab.ms_squery(share, ring_type,
   4912 		    (mac_ring_handle_t *)rings, &nrings);
   4913 	} else {
   4914 		/* this function is called for TX only with a share */
   4915 		ASSERT(ring_type == MAC_RING_TYPE_RX);
   4916 		/*
   4917 		 * Pick one ring from default group.
   4918 		 *
   4919 		 * for now pick the second ring which requires the first ring
   4920 		 * at index 0 to stay in the default group, since it is the
   4921 		 * ring which carries the multicast traffic.
   4922 		 * We need a better way for a driver to indicate this,
   4923 		 * for example a per-ring flag.
   4924 		 */
   4925 		for (ring = src_group->mrg_rings; ring != NULL;
   4926 		    ring = ring->mr_next) {
   4927 			if (ring->mr_index != 0)
   4928 				break;
   4929 		}
   4930 		ASSERT(ring != NULL);
   4931 		nrings = 1;
   4932 		tmp_ring[0] = ring;
   4933 		rings = tmp_ring;
   4934 	}
   4935 
   4936 	switch (ring_type) {
   4937 	case MAC_RING_TYPE_RX:
   4938 		if (src_group->mrg_cur_count - nrings <
   4939 		    mac_min_rx_default_rings) {
   4940 			/* we ran out of rings */
   4941 			return (ENOSPC);
   4942 		}
   4943 
   4944 		/* move receive rings to new group */
   4945 		for (i = 0; i < nrings; i++) {
   4946 			rv = mac_group_mov_ring(mip, new_group, rings[i]);
   4947 			if (rv != 0) {
   4948 				/* move rings back on failure */
   4949 				for (j = 0; j < i; j++) {
   4950 					(void) mac_group_mov_ring(mip,
   4951 					    src_group, rings[j]);
   4952 				}
   4953 				return (rv);
   4954 			}
   4955 		}
   4956 		break;
   4957 
   4958 	case MAC_RING_TYPE_TX: {
   4959 		mac_ring_t *tmp_ring;
   4960 
   4961 		/* move the TX rings to the new group */
   4962 		ASSERT(src_group == NULL);
   4963 		for (i = 0; i < nrings; i++) {
   4964 			/* get the desired ring */
   4965 			tmp_ring = mac_reserve_tx_ring(mip, rings[i]);
   4966 			ASSERT(tmp_ring == rings[i]);
   4967 			rv = mac_group_mov_ring(mip, new_group, rings[i]);
   4968 			if (rv != 0) {
   4969 				/* cleanup on failure */
   4970 				for (j = 0; j < i; j++) {
   4971 					(void) mac_group_mov_ring(mip,
   4972 					    mip->mi_tx_groups +
   4973 					    mip->mi_tx_group_count, rings[j]);
   4974 				}
   4975 			}
   4976 		}
   4977 		break;
   4978 	}
   4979 	}
   4980 
   4981 	if (share != NULL) {
   4982 		/* add group to share */
   4983 		mip->mi_share_capab.ms_sadd(share, new_group->mrg_driver);
   4984 		/* free temporary array of rings */
   4985 		kmem_free(rings, nrings * sizeof (mac_ring_handle_t));
   4986 	}
   4987 
   4988 	return (0);
   4989 }
   4990 
   4991 void
   4992 mac_rx_group_add_client(mac_group_t *grp, mac_client_impl_t *mcip)
   4993 {
   4994 	mac_grp_client_t *mgcp;
   4995 
   4996 	for (mgcp = grp->mrg_clients; mgcp != NULL; mgcp = mgcp->mgc_next) {
   4997 		if (mgcp->mgc_client == mcip)
   4998 			break;
   4999 	}
   5000 
   5001 	VERIFY(mgcp == NULL);
   5002 
   5003 	mgcp = kmem_zalloc(sizeof (mac_grp_client_t), KM_SLEEP);
   5004 	mgcp->mgc_client = mcip;
   5005 	mgcp->mgc_next = grp->mrg_clients;
   5006 	grp->mrg_clients = mgcp;
   5007 
   5008 }
   5009 
   5010 void
   5011 mac_rx_group_remove_client(mac_group_t *grp, mac_client_impl_t *mcip)
   5012 {
   5013 	mac_grp_client_t *mgcp, **pprev;
   5014 
   5015 	for (pprev = &grp->mrg_clients, mgcp = *pprev; mgcp != NULL;
   5016 	    pprev = &mgcp->mgc_next, mgcp = *pprev) {
   5017 		if (mgcp->mgc_client == mcip)
   5018 			break;
   5019 	}
   5020 
   5021 	ASSERT(mgcp != NULL);
   5022 
   5023 	*pprev = mgcp->mgc_next;
   5024 	kmem_free(mgcp, sizeof (mac_grp_client_t));
   5025 }
   5026 
   5027 /*
   5028  * mac_reserve_rx_group()
   5029  *
   5030  * Finds an available group and exclusively reserves it for a client.
   5031  * The group is chosen to suit the flow's resource controls (bandwidth and
   5032  * fanout requirements) and the address type.
   5033  * If the requestor is the pimary MAC then return the group with the
   5034  * largest number of rings, otherwise the default ring when available.
   5035  */
   5036 mac_group_t *
   5037 mac_reserve_rx_group(mac_client_impl_t *mcip, uint8_t *mac_addr,
   5038     mac_rx_group_reserve_type_t rtype)
   5039 {
   5040 	mac_share_handle_t	share = mcip->mci_share;
   5041 	mac_impl_t		*mip = mcip->mci_mip;
   5042 	mac_group_t		*grp = NULL;
   5043 	int			i, start, loopcount;
   5044 	int			err;
   5045 	mac_address_t		*map;
   5046 
   5047 	ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
   5048 
   5049 	/* Check if a group already has this mac address (case of VLANs) */
   5050 	if ((map = mac_find_macaddr(mip, mac_addr)) != NULL)
   5051 		return (map->ma_group);
   5052 
   5053 	if (mip->mi_rx_groups == NULL || mip->mi_rx_group_count == 0 ||
   5054 	    rtype == MAC_RX_NO_RESERVE)
   5055 		return (NULL);
   5056 
   5057 	/*
   5058 	 * Try to exclusively reserve a RX group.
   5059 	 *
   5060 	 * For flows requires SW_RING it always goes to the default group
   5061 	 * (Until we can explicitely call out default groups (CR 6695600),
   5062 	 * we assume that the default group is always at position zero);
   5063 	 *
   5064 	 * For flows requires HW_DEFAULT_RING (unicast flow of the primary
   5065 	 * client), try to reserve the default RX group only.
   5066 	 *
   5067 	 * For flows requires HW_RING (unicast flow of other clients), try
   5068 	 * to reserve non-default RX group then the default group.
   5069 	 */
   5070 	switch (rtype) {
   5071 	case MAC_RX_RESERVE_DEFAULT:
   5072 		start = 0;
   5073 		loopcount = 1;
   5074 		break;
   5075 	case MAC_RX_RESERVE_NONDEFAULT:
   5076 		start = 1;
   5077 		loopcount = mip->mi_rx_group_count;
   5078 	}
   5079 
   5080 	for (i = start; i < start + loopcount; i++) {
   5081 		grp = &mip->mi_rx_groups[i % mip->mi_rx_group_count];
   5082 
   5083 		DTRACE_PROBE3(rx__group__trying, char *, mip->mi_name,
   5084 		    int, grp->mrg_index, mac_group_state_t, grp->mrg_state);
   5085 
   5086 		/*
   5087 		 * Check to see whether this mac client is the only client
   5088 		 * on this RX group. If not, we cannot exclusively reserve
   5089 		 * this RX group.
   5090 		 */
   5091 		if (!MAC_RX_GROUP_NO_CLIENT(grp) &&
   5092 		    (MAC_RX_GROUP_ONLY_CLIENT(grp) != mcip)) {
   5093 			continue;
   5094 		}
   5095 
   5096 		/*
   5097 		 * This group could already be SHARED by other multicast
   5098 		 * flows on this client. In that case, the group would
   5099 		 * be shared and has already been started.
   5100 		 */
   5101 		ASSERT(grp->mrg_state != MAC_GROUP_STATE_UNINIT);
   5102 
   5103 		if ((grp->mrg_state == MAC_GROUP_STATE_REGISTERED) &&
   5104 		    (mac_start_group(grp) != 0)) {
   5105 			continue;
   5106 		}
   5107 
   5108 		if ((i % mip->mi_rx_group_count) == 0 ||
   5109 		    mip->mi_rx_group_type != MAC_GROUP_TYPE_DYNAMIC) {
   5110 			break;
   5111 		}
   5112 
   5113 		ASSERT(grp->mrg_cur_count == 0);
   5114 
   5115 		/*
   5116 		 * Populate the group. Rings should be taken
   5117 		 * from the default group at position 0 for now.
   5118 		 */
   5119 
   5120 		err = i_mac_group_allocate_rings(mip, MAC_RING_TYPE_RX,
   5121 		    &mip->mi_rx_groups[0], grp, share);
   5122 		if (err == 0)
   5123 			break;
   5124 
   5125 		DTRACE_PROBE3(rx__group__reserve__alloc__rings, char *,
   5126 		    mip->mi_name, int, grp->mrg_index, int, err);
   5127 
   5128 		/*
   5129 		 * It's a dynamic group but the grouping operation failed.
   5130 		 */
   5131 		mac_stop_group(grp);
   5132 	}
   5133 
   5134 	if (i == start + loopcount)
   5135 		return (NULL);
   5136 
   5137 	ASSERT(grp != NULL);
   5138 
   5139 	DTRACE_PROBE2(rx__group__reserved,
   5140 	    char *, mip->mi_name, int, grp->mrg_index);
   5141 	return (grp);
   5142 }
   5143 
   5144 /*
   5145  * mac_rx_release_group()
   5146  *
   5147  * This is called when there are no clients left for the group.
   5148  * The group is stopped and marked MAC_GROUP_STATE_REGISTERED,
   5149  * and if it is a non default group, the shares are removed and
   5150  * all rings are assigned back to default group.
   5151  */
   5152 void
   5153 mac_release_rx_group(mac_client_impl_t *mcip, mac_group_t *group)
   5154 {
   5155 	mac_impl_t	*mip = mcip->mci_mip;
   5156 	mac_ring_t	*ring;
   5157 
   5158 	ASSERT(group != &mip->mi_rx_groups[0]);
   5159 
   5160 	/*
   5161 	 * This is the case where there are no clients left. Any
   5162 	 * SRS etc on this group have also be quiesced.
   5163 	 */
   5164 	for (ring = group->mrg_rings; ring != NULL; ring = ring->mr_next) {
   5165 		if (ring->mr_classify_type == MAC_HW_CLASSIFIER) {
   5166 			ASSERT(group->mrg_state == MAC_GROUP_STATE_RESERVED);
   5167 			/*
   5168 			 * Remove the SRS associated with the HW ring.
   5169 			 * As a result, polling will be disabled.
   5170 			 */
   5171 			ring->mr_srs = NULL;
   5172 		}
   5173 		ASSERT(ring->mr_state == MR_INUSE);
   5174 		mac_stop_ring(ring);
   5175 		ring->mr_state = MR_FREE;
   5176 		ring->mr_flag = 0;
   5177 	}
   5178 
   5179 	/* remove group from share */
   5180 	if (mcip->mci_share != NULL) {
   5181 		mip->mi_share_capab.ms_sremove(mcip->mci_share,
   5182 		    group->mrg_driver);
   5183 	}
   5184 
   5185 	if (mip->mi_rx_group_type == MAC_GROUP_TYPE_DYNAMIC) {
   5186 		mac_ring_t *ring;
   5187 
   5188 		/*
   5189 		 * Rings were dynamically allocated to group.
   5190 		 * Move rings back to default group.
   5191 		 */
   5192 		while ((ring = group->mrg_rings) != NULL) {
   5193 			(void) mac_group_mov_ring(mip,
   5194 			    &mip->mi_rx_groups[0], ring);
   5195 		}
   5196 	}
   5197 	mac_stop_group(group);
   5198 	/*
   5199 	 * Possible improvement: See if we can assign the group just released
   5200 	 * to a another client of the mip
   5201 	 */
   5202 }
   5203 
   5204 /*
   5205  * Reserves a TX group for the specified share. Invoked by mac_tx_srs_setup()
   5206  * when a share was allocated to the client.
   5207  */
   5208 mac_group_t *
   5209 mac_reserve_tx_group(mac_impl_t *mip, mac_share_handle_t share)
   5210 {
   5211 	mac_group_t *grp;
   5212 	int rv, i;
   5213 
   5214 	/*
   5215 	 * TX groups are currently allocated only to MAC clients
   5216 	 * which are associated with a share. Since we have a fixed
   5217 	 * number of share and groups, and we already successfully
   5218 	 * allocated a share, find an available TX group.
   5219 	 */
   5220 	ASSERT(share != NULL);
   5221 	ASSERT(mip->mi_tx_group_free > 0);
   5222 
   5223 	for (i = 0; i <  mip->mi_tx_group_count; i++) {
   5224 		grp = &mip->mi_tx_groups[i];
   5225 
   5226 		if ((grp->mrg_state == MAC_GROUP_STATE_RESERVED) ||
   5227 		    (grp->mrg_state == MAC_GROUP_STATE_UNINIT))
   5228 			continue;
   5229 
   5230 		rv = mac_start_group(grp);
   5231 		ASSERT(rv == 0);
   5232 
   5233 		grp->mrg_state = MAC_GROUP_STATE_RESERVED;
   5234 		break;
   5235 	}
   5236 
   5237 	ASSERT(grp != NULL);
   5238 
   5239 	/*
   5240 	 * Populate the group. Rings should be taken from the group
   5241 	 * of unassigned rings, which is past the array of TX
   5242 	 * groups adversized by the driver.
   5243 	 */
   5244 	rv = i_mac_group_allocate_rings(mip, MAC_RING_TYPE_TX, NULL,
   5245 	    grp, share);
   5246 	if (rv != 0) {
   5247 		DTRACE_PROBE3(tx__group__reserve__alloc__rings,
   5248 		    char *, mip->mi_name, int, grp->mrg_index, int, rv);
   5249 
   5250 		mac_stop_group(grp);
   5251 		grp->mrg_state = MAC_GROUP_STATE_UNINIT;
   5252 
   5253 		return (NULL);
   5254 	}
   5255 
   5256 	mip->mi_tx_group_free--;
   5257 
   5258 	return (grp);
   5259 }
   5260 
   5261 void
   5262 mac_release_tx_group(mac_impl_t *mip, mac_group_t *grp)
   5263 {
   5264 	mac_client_impl_t *mcip = grp->mrg_tx_client;
   5265 	mac_share_handle_t share = mcip->mci_share;
   5266 	mac_ring_t *ring;
   5267 
   5268 	ASSERT(mip->mi_tx_group_type == MAC_GROUP_TYPE_DYNAMIC);
   5269 	ASSERT(share != NULL);
   5270 	ASSERT(grp->mrg_state == MAC_GROUP_STATE_RESERVED);
   5271 
   5272 	mip->mi_share_capab.ms_sremove(share, grp->mrg_driver);
   5273 	while ((ring = grp->mrg_rings) != NULL) {
   5274 		/* move the ring back to the pool */
   5275 		(void) mac_group_mov_ring(mip, mip->mi_tx_groups +
   5276 		    mip->mi_tx_group_count, ring);
   5277 	}
   5278 	mac_stop_group(grp);
   5279 	mac_set_rx_group_state(grp, MAC_GROUP_STATE_REGISTERED);
   5280 	grp->mrg_tx_client = NULL;
   5281 	mip->mi_tx_group_free++;
   5282 }
   5283 
   5284 /*
   5285  * This is a 1-time control path activity initiated by the client (IP).
   5286  * The mac perimeter protects against other simultaneous control activities,
   5287  * for example an ioctl that attempts to change the degree of fanout and
   5288  * increase or decrease the number of softrings associated with this Tx SRS.
   5289  */
   5290 static mac_tx_notify_cb_t *
   5291 mac_client_tx_notify_add(mac_client_impl_t *mcip,
   5292     mac_tx_notify_t notify, void *arg)
   5293 {
   5294 	mac_cb_info_t *mcbi;
   5295 	mac_tx_notify_cb_t *mtnfp;
   5296 
   5297 	ASSERT(MAC_PERIM_HELD((mac_handle_t)mcip->mci_mip));
   5298 
   5299 	mtnfp = kmem_zalloc(sizeof (mac_tx_notify_cb_t), KM_SLEEP);
   5300 	mtnfp->mtnf_fn = notify;
   5301 	mtnfp->mtnf_arg = arg;
   5302 	mtnfp->mtnf_link.mcb_objp = mtnfp;
   5303 	mtnfp->mtnf_link.mcb_objsize = sizeof (mac_tx_notify_cb_t);
   5304 	mtnfp->mtnf_link.mcb_flags = MCB_TX_NOTIFY_CB_T;
   5305 
   5306 	mcbi = &mcip->mci_tx_notify_cb_info;
   5307 	mutex_enter(mcbi->mcbi_lockp);
   5308 	mac_callback_add(mcbi, &mcip->mci_tx_notify_cb_list, &mtnfp->mtnf_link);
   5309 	mutex_exit(mcbi->mcbi_lockp);
   5310 	return (mtnfp);
   5311 }
   5312 
   5313 static void
   5314 mac_client_tx_notify_remove(mac_client_impl_t *mcip, mac_tx_notify_cb_t *mtnfp)
   5315 {
   5316 	mac_cb_info_t	*mcbi;
   5317 	mac_cb_t	**cblist;
   5318 
   5319 	ASSERT(MAC_PERIM_HELD((mac_handle_t)mcip->mci_mip));
   5320 
   5321 	if (!mac_callback_find(&mcip->mci_tx_notify_cb_info,
   5322 	    &mcip->mci_tx_notify_cb_list, &mtnfp->mtnf_link)) {
   5323 		cmn_err(CE_WARN,
   5324 		    "mac_client_tx_notify_remove: callback not "
   5325 		    "found, mcip 0x%p mtnfp 0x%p", (void *)mcip, (void *)mtnfp);
   5326 		return;
   5327 	}
   5328 
   5329 	mcbi = &mcip->mci_tx_notify_cb_info;
   5330 	cblist = &mcip->mci_tx_notify_cb_list;
   5331 	mutex_enter(mcbi->mcbi_lockp);
   5332 	if (mac_callback_remove(mcbi, cblist, &mtnfp->mtnf_link))
   5333 		kmem_free(mtnfp, sizeof (mac_tx_notify_cb_t));
   5334 	else
   5335 		mac_callback_remove_wait(&mcip->mci_tx_notify_cb_info);
   5336 	mutex_exit(mcbi->mcbi_lockp);
   5337 }
   5338 
   5339 /*
   5340  * mac_client_tx_notify():
   5341  * call to add and remove flow control callback routine.
   5342  */
   5343 mac_tx_notify_handle_t
   5344 mac_client_tx_notify(mac_client_handle_t mch, mac_tx_notify_t callb_func,
   5345     void *ptr)
   5346 {
   5347 	mac_client_impl_t	*mcip = (mac_client_impl_t *)mch;
   5348 	mac_tx_notify_cb_t	*mtnfp = NULL;
   5349 
   5350 	i_mac_perim_enter(mcip->mci_mip);
   5351 
   5352 	if (callb_func != NULL) {
   5353 		/* Add a notify callback */
   5354 		mtnfp = mac_client_tx_notify_add(mcip, callb_func, ptr);
   5355 	} else {
   5356 		mac_client_tx_notify_remove(mcip, (mac_tx_notify_cb_t *)ptr);
   5357 	}
   5358 	i_mac_perim_exit(mcip->mci_mip);
   5359 
   5360 	return ((mac_tx_notify_handle_t)mtnfp);
   5361 }
   5362 
   5363 void
   5364 mac_bridge_vectors(mac_bridge_tx_t txf, mac_bridge_rx_t rxf,
   5365     mac_bridge_ref_t reff, mac_bridge_ls_t lsf)
   5366 {
   5367 	mac_bridge_tx_cb = txf;
   5368 	mac_bridge_rx_cb = rxf;
   5369 	mac_bridge_ref_cb = reff;
   5370 	mac_bridge_ls_cb = lsf;
   5371 }
   5372 
   5373 int
   5374 mac_bridge_set(mac_handle_t mh, mac_handle_t link)
   5375 {
   5376 	mac_impl_t *mip = (mac_impl_t *)mh;
   5377 	int retv;
   5378 
   5379 	mutex_enter(&mip->mi_bridge_lock);
   5380 	if (mip->mi_bridge_link == NULL) {
   5381 		mip->mi_bridge_link = link;
   5382 		retv = 0;
   5383 	} else {
   5384 		retv = EBUSY;
   5385 	}
   5386 	mutex_exit(&mip->mi_bridge_lock);
   5387 	if (retv == 0) {
   5388 		mac_poll_state_change(mh, B_FALSE);
   5389 		mac_capab_update(mh);
   5390 	}
   5391 	return (retv);
   5392 }
   5393 
   5394 /*
   5395  * Disable bridging on the indicated link.
   5396  */
   5397 void
   5398 mac_bridge_clear(mac_handle_t mh, mac_handle_t link)
   5399 {
   5400 	mac_impl_t *mip = (mac_impl_t *)mh;
   5401 
   5402 	mutex_enter(&mip->mi_bridge_lock);
   5403 	ASSERT(mip->mi_bridge_link == link);
   5404 	mip->mi_bridge_link = NULL;
   5405 	mutex_exit(&mip->mi_bridge_lock);
   5406 	mac_poll_state_change(mh, B_TRUE);
   5407 	mac_capab_update(mh);
   5408 }
   5409 
   5410 void
   5411 mac_no_active(mac_handle_t mh)
   5412 {
   5413 	mac_impl_t *mip = (mac_impl_t *)mh;
   5414 
   5415 	i_mac_perim_enter(mip);
   5416 	mip->mi_state_flags |= MIS_NO_ACTIVE;
   5417 	i_mac_perim_exit(mip);
   5418 }
   5419