Home | History | Annotate | Download | only in mac
      1      0     stevel /*
      2      0     stevel  * CDDL HEADER START
      3      0     stevel  *
      4      0     stevel  * The contents of this file are subject to the terms of the
      5   1852   yz147064  * Common Development and Distribution License (the "License").
      6   1852   yz147064  * You may not use this file except in compliance with the License.
      7      0     stevel  *
      8      0     stevel  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9      0     stevel  * or http://www.opensolaris.org/os/licensing.
     10      0     stevel  * See the License for the specific language governing permissions
     11      0     stevel  * and limitations under the License.
     12      0     stevel  *
     13      0     stevel  * When distributing Covered Code, include this CDDL HEADER in each
     14      0     stevel  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15      0     stevel  * If applicable, add the following below this CDDL HEADER, with the
     16      0     stevel  * fields enclosed by brackets "[]" replaced with your own identifying
     17      0     stevel  * information: Portions Copyright [yyyy] [name of copyright owner]
     18      0     stevel  *
     19      0     stevel  * CDDL HEADER END
     20      0     stevel  */
     21   5084    johnlev 
     22      0     stevel /*
     23   8603     Girish  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     24      0     stevel  * Use is subject to license terms.
     25      0     stevel  */
     26      0     stevel 
     27      0     stevel /*
     28      0     stevel  * MAC Services Module
     29   8275       Eric  *
     30   8275       Eric  * The GLDv3 framework locking -  The MAC layer
     31   8275       Eric  * --------------------------------------------
     32   8275       Eric  *
     33   8275       Eric  * The MAC layer is central to the GLD framework and can provide the locking
     34   8275       Eric  * framework needed for itself and for the use of MAC clients. MAC end points
     35   8275       Eric  * are fairly disjoint and don't share a lot of state. So a coarse grained
     36   8275       Eric  * multi-threading scheme is to single thread all create/modify/delete or set
     37   8275       Eric  * type of control operations on a per mac end point while allowing data threads
     38   8275       Eric  * concurrently.
     39   8275       Eric  *
     40   8275       Eric  * Control operations (set) that modify a mac end point are always serialized on
     41   8275       Eric  * a per mac end point basis, We have at most 1 such thread per mac end point
     42   8275       Eric  * at a time.
     43   8275       Eric  *
     44   8275       Eric  * All other operations that are not serialized are essentially multi-threaded.
     45   8275       Eric  * For example a control operation (get) like getting statistics which may not
     46   8275       Eric  * care about reading values atomically or data threads sending or receiving
     47   8275       Eric  * data. Mostly these type of operations don't modify the control state. Any
     48   8275       Eric  * state these operations care about are protected using traditional locks.
     49   8275       Eric  *
     50   8275       Eric  * The perimeter only serializes serial operations. It does not imply there
     51   8275       Eric  * aren't any other concurrent operations. However a serialized operation may
     52   8275       Eric  * sometimes need to make sure it is the only thread. In this case it needs
     53   8275       Eric  * to use reference counting mechanisms to cv_wait until any current data
     54   8275       Eric  * threads are done.
     55   8275       Eric  *
     56   8275       Eric  * The mac layer itself does not hold any locks across a call to another layer.
     57   8275       Eric  * The perimeter is however held across a down call to the driver to make the
     58   8275       Eric  * whole control operation atomic with respect to other control operations.
     59   8275       Eric  * Also the data path and get type control operations may proceed concurrently.
     60   8275       Eric  * These operations synchronize with the single serial operation on a given mac
     61   8275       Eric  * end point using regular locks. The perimeter ensures that conflicting
     62   8275       Eric  * operations like say a mac_multicast_add and a mac_multicast_remove on the
     63   8275       Eric  * same mac end point don't interfere with each other and also ensures that the
     64   8275       Eric  * changes in the mac layer and the call to the underlying driver to say add a
     65   8275       Eric  * multicast address are done atomically without interference from a thread
     66   8275       Eric  * trying to delete the same address.
     67   8275       Eric  *
     68   8275       Eric  * For example, consider
     69   8275       Eric  * mac_multicst_add()
     70   8275       Eric  * {
     71   8275       Eric  *	mac_perimeter_enter();	serialize all control operations
     72   8275       Eric  *
     73   8275       Eric  *	grab list lock		protect against access by data threads
     74   8275       Eric  *	add to list
     75   8275       Eric  *	drop list lock
     76   8275       Eric  *
     77   8275       Eric  *	call driver's mi_multicst
     78   8275       Eric  *
     79   8275       Eric  *	mac_perimeter_exit();
     80   8275       Eric  * }
     81   8275       Eric  *
     82   8275       Eric  * To lessen the number of serialization locks and simplify the lock hierarchy,
     83   8275       Eric  * we serialize all the control operations on a per mac end point by using a
     84   8275       Eric  * single serialization lock called the perimeter. We allow recursive entry into
     85   8275       Eric  * the perimeter to facilitate use of this mechanism by both the mac client and
     86   8275       Eric  * the MAC layer itself.
     87   8275       Eric  *
     88   8275       Eric  * MAC client means an entity that does an operation on a mac handle
     89   8275       Eric  * obtained from a mac_open/mac_client_open. Similarly MAC driver means
     90   8275       Eric  * an entity that does an operation on a mac handle obtained from a
     91   8275       Eric  * mac_register. An entity could be both client and driver but on different
     92   8275       Eric  * handles eg. aggr. and should only make the corresponding mac interface calls
     93   8275       Eric  * i.e. mac driver interface or mac client interface as appropriate for that
     94   8275       Eric  * mac handle.
     95   8275       Eric  *
     96   8275       Eric  * General rules.
     97   8275       Eric  * -------------
     98   8275       Eric  *
     99   8275       Eric  * R1. The lock order of upcall threads is natually opposite to downcall
    100   8275       Eric  * threads. Hence upcalls must not hold any locks across layers for fear of
    101   8275       Eric  * recursive lock enter and lock order violation. This applies to all layers.
    102   8275       Eric  *
    103   8275       Eric  * R2. The perimeter is just another lock. Since it is held in the down
    104   8275       Eric  * direction, acquiring the perimeter in an upcall is prohibited as it would
    105   8275       Eric  * cause a deadlock. This applies to all layers.
    106   8275       Eric  *
    107   8275       Eric  * Note that upcalls that need to grab the mac perimeter (for example
    108   8275       Eric  * mac_notify upcalls) can still achieve that by posting the request to a
    109   8275       Eric  * thread, which can then grab all the required perimeters and locks in the
    110   8275       Eric  * right global order. Note that in the above example the mac layer iself
    111   8275       Eric  * won't grab the mac perimeter in the mac_notify upcall, instead the upcall
    112   8275       Eric  * to the client must do that. Please see the aggr code for an example.
    113   8275       Eric  *
    114   8275       Eric  * MAC client rules
    115   8275       Eric  * ----------------
    116   8275       Eric  *
    117   8275       Eric  * R3. A MAC client may use the MAC provided perimeter facility to serialize
    118   8275       Eric  * control operations on a per mac end point. It does this by by acquring
    119   8275       Eric  * and holding the perimeter across a sequence of calls to the mac layer.
    120   8275       Eric  * This ensures atomicity across the entire block of mac calls. In this
    121   8275       Eric  * model the MAC client must not hold any client locks across the calls to
    122   8275       Eric  * the mac layer. This model is the preferred solution.
    123   8275       Eric  *
    124   8275       Eric  * R4. However if a MAC client has a lot of global state across all mac end
    125   8275       Eric  * points the per mac end point serialization may not be sufficient. In this
    126   8275       Eric  * case the client may choose to use global locks or use its own serialization.
    127   8275       Eric  * To avoid deadlocks, these client layer locks held across the mac calls
    128   8275       Eric  * in the control path must never be acquired by the data path for the reason
    129   8275       Eric  * mentioned below.
    130   8275       Eric  *
    131   8275       Eric  * (Assume that a control operation that holds a client lock blocks in the
    132   8275       Eric  * mac layer waiting for upcall reference counts to drop to zero. If an upcall
    133   8275       Eric  * data thread that holds this reference count, tries to acquire the same
    134   8275       Eric  * client lock subsequently it will deadlock).
    135   8275       Eric  *
    136   8275       Eric  * A MAC client may follow either the R3 model or the R4 model, but can't
    137   8275       Eric  * mix both. In the former, the hierarchy is Perim -> client locks, but in
    138   8275       Eric  * the latter it is client locks -> Perim.
    139   8275       Eric  *
    140   8275       Eric  * R5. MAC clients must make MAC calls (excluding data calls) in a cv_wait'able
    141   8275       Eric  * context since they may block while trying to acquire the perimeter.
    142   8275       Eric  * In addition some calls may block waiting for upcall refcnts to come down to
    143   8275       Eric  * zero.
    144   8275       Eric  *
    145   8275       Eric  * R6. MAC clients must make sure that they are single threaded and all threads
    146   8275       Eric  * from the top (in particular data threads) have finished before calling
    147   8275       Eric  * mac_client_close. The MAC framework does not track the number of client
    148   8275       Eric  * threads using the mac client handle. Also mac clients must make sure
    149   8275       Eric  * they have undone all the control operations before calling mac_client_close.
    150   8275       Eric  * For example mac_unicast_remove/mac_multicast_remove to undo the corresponding
    151   8275       Eric  * mac_unicast_add/mac_multicast_add.
    152   8275       Eric  *
    153   8275       Eric  * MAC framework rules
    154   8275       Eric  * -------------------
    155   8275       Eric  *
    156   8275       Eric  * R7. The mac layer itself must not hold any mac layer locks (except the mac
    157   8275       Eric  * perimeter) across a call to any other layer from the mac layer. The call to
    158   8275       Eric  * any other layer could be via mi_* entry points, classifier entry points into
    159   8275       Eric  * the driver or via upcall pointers into layers above. The mac perimeter may
    160   8275       Eric  * be acquired or held only in the down direction, for e.g. when calling into
    161   8275       Eric  * a mi_* driver enty point to provide atomicity of the operation.
    162   8275       Eric  *
    163   8275       Eric  * R8. Since it is not guaranteed (see R14) that drivers won't hold locks across
    164   8275       Eric  * mac driver interfaces, the MAC layer must provide a cut out for control
    165   8275       Eric  * interfaces like upcall notifications and start them in a separate thread.
    166   8275       Eric  *
    167   8275       Eric  * R9. Note that locking order also implies a plumbing order. For example
    168   8275       Eric  * VNICs are allowed to be created over aggrs, but not vice-versa. An attempt
    169   8275       Eric  * to plumb in any other order must be failed at mac_open time, otherwise it
    170   8275       Eric  * could lead to deadlocks due to inverse locking order.
    171   8275       Eric  *
    172   8275       Eric  * R10. MAC driver interfaces must not block since the driver could call them
    173   8275       Eric  * in interrupt context.
    174   8275       Eric  *
    175   8275       Eric  * R11. Walkers must preferably not hold any locks while calling walker
    176   8275       Eric  * callbacks. Instead these can operate on reference counts. In simple
    177   8275       Eric  * callbacks it may be ok to hold a lock and call the callbacks, but this is
    178   8275       Eric  * harder to maintain in the general case of arbitrary callbacks.
    179   8275       Eric  *
    180   8275       Eric  * R12. The MAC layer must protect upcall notification callbacks using reference
    181   8275       Eric  * counts rather than holding locks across the callbacks.
    182   8275       Eric  *
    183   8275       Eric  * R13. Given the variety of drivers, it is preferable if the MAC layer can make
    184   8275       Eric  * sure that any pointers (such as mac ring pointers) it passes to the driver
    185   8275       Eric  * remain valid until mac unregister time. Currently the mac layer achieves
    186   8275       Eric  * this by using generation numbers for rings and freeing the mac rings only
    187   8275       Eric  * at unregister time.  The MAC layer must provide a layer of indirection and
    188   8275       Eric  * must not expose underlying driver rings or driver data structures/pointers
    189   8275       Eric  * directly to MAC clients.
    190   8275       Eric  *
    191   8275       Eric  * MAC driver rules
    192   8275       Eric  * ----------------
    193   8275       Eric  *
    194   8275       Eric  * R14. It would be preferable if MAC drivers don't hold any locks across any
    195   8275       Eric  * mac call. However at a minimum they must not hold any locks across data
    196   8275       Eric  * upcalls. They must also make sure that all references to mac data structures
    197   8275       Eric  * are cleaned up and that it is single threaded at mac_unregister time.
    198   8275       Eric  *
    199   8275       Eric  * R15. MAC driver interfaces don't block and so the action may be done
    200   8275       Eric  * asynchronously in a separate thread as for example handling notifications.
    201   8275       Eric  * The driver must not assume that the action is complete when the call
    202   8275       Eric  * returns.
    203   8275       Eric  *
    204   8275       Eric  * R16. Drivers must maintain a generation number per Rx ring, and pass it
    205   8275       Eric  * back to mac_rx_ring(); They are expected to increment the generation
    206   8275       Eric  * number whenever the ring's stop routine is invoked.
    207   8275       Eric  * See comments in mac_rx_ring();
    208   8275       Eric  *
    209   8275       Eric  * R17 Similarly mi_stop is another synchronization point and the driver must
    210   8275       Eric  * ensure that all upcalls are done and there won't be any future upcall
    211   8275       Eric  * before returning from mi_stop.
    212   8275       Eric  *
    213   8275       Eric  * R18. The driver may assume that all set/modify control operations via
    214   8275       Eric  * the mi_* entry points are single threaded on a per mac end point.
    215   8275       Eric  *
    216   8275       Eric  * Lock and Perimeter hierarchy scenarios
    217   8275       Eric  * ---------------------------------------
    218   8275       Eric  *
    219   8275       Eric  * i_mac_impl_lock -> mi_rw_lock -> srs_lock -> s_ring_lock[i_mac_tx_srs_notify]
    220   8275       Eric  *
    221   8275       Eric  * ft_lock -> fe_lock [mac_flow_lookup]
    222   8275       Eric  *
    223   8275       Eric  * mi_rw_lock -> fe_lock [mac_bcast_send]
    224   8275       Eric  *
    225   8275       Eric  * srs_lock -> mac_bw_lock [mac_rx_srs_drain_bw]
    226   8275       Eric  *
    227   8275       Eric  * cpu_lock -> mac_srs_g_lock -> srs_lock -> s_ring_lock [mac_walk_srs_and_bind]
    228   8275       Eric  *
    229   8275       Eric  * i_dls_devnet_lock -> mac layer locks [dls_devnet_rename]
    230   8275       Eric  *
    231   8275       Eric  * Perimeters are ordered P1 -> P2 -> P3 from top to bottom in order of mac
    232   8275       Eric  * client to driver. In the case of clients that explictly use the mac provided
    233   8275       Eric  * perimeter mechanism for its serialization, the hierarchy is
    234   8275       Eric  * Perimeter -> mac layer locks, since the client never holds any locks across
    235   8275       Eric  * the mac calls. In the case of clients that use its own locks the hierarchy
    236   8275       Eric  * is Client locks -> Mac Perim -> Mac layer locks. The client never explicitly
    237   8275       Eric  * calls mac_perim_enter/exit in this case.
    238   8275       Eric  *
    239   8275       Eric  * Subflow creation rules
    240   8275       Eric  * ---------------------------
    241   8275       Eric  * o In case of a user specified cpulist present on underlying link and flows,
    242   8275       Eric  * the flows cpulist must be a subset of the underlying link.
    243   8275       Eric  * o In case of a user specified fanout mode present on link and flow, the
    244   8275       Eric  * subflow fanout count has to be less than or equal to that of the
    245   8275       Eric  * underlying link. The cpu-bindings for the subflows will be a subset of
    246   8275       Eric  * the underlying link.
    247   8275       Eric  * o In case if no cpulist specified on both underlying link and flow, the
    248   8275       Eric  * underlying link relies on a  MAC tunable to provide out of box fanout.
    249   8275       Eric  * The subflow will have no cpulist (the subflow will be unbound)
    250   8275       Eric  * o In case if no cpulist is specified on the underlying link, a subflow can
    251   8275       Eric  * carry  either a user-specified cpulist or fanout count. The cpu-bindings
    252   8275       Eric  * for the subflow will not adhere to restriction that they need to be subset
    253   8275       Eric  * of the underlying link.
    254   8275       Eric  * o In case where the underlying link is carrying either a user specified
    255   8275       Eric  * cpulist or fanout mode and for a unspecified subflow, the subflow will be
    256   8275       Eric  * created unbound.
    257   8275       Eric  * o While creating unbound subflows, bandwidth mode changes attempt to
    258   8275       Eric  * figure a right fanout count. In such cases the fanout count will override
    259   8275       Eric  * the unbound cpu-binding behavior.
    260   8275       Eric  * o In addition to this, while cycling between flow and link properties, we
    261   8275       Eric  * impose a restriction that if a link property has a subflow with
    262   8275       Eric  * user-specified attributes, we will not allow changing the link property.
    263   8275       Eric  * The administrator needs to reset all the user specified properties for the
    264   8275       Eric  * subflows before attempting a link property change.
    265   8275       Eric  * Some of the above rules can be overridden by specifying additional command
    266   8275       Eric  * line options while creating or modifying link or subflow properties.
    267      0     stevel  */
    268      0     stevel 
    269      0     stevel #include <sys/types.h>
    270      0     stevel #include <sys/conf.h>
    271   5895   yz147064 #include <sys/id_space.h>
    272   6077   yz147064 #include <sys/esunddi.h>
    273      0     stevel #include <sys/stat.h>
    274   5895   yz147064 #include <sys/mkdev.h>
    275      0     stevel #include <sys/stream.h>
    276      0     stevel #include <sys/strsun.h>
    277      0     stevel #include <sys/strsubr.h>
    278      0     stevel #include <sys/dlpi.h>
    279   8275       Eric #include <sys/modhash.h>
    280   8275       Eric #include <sys/mac_provider.h>
    281   8275       Eric #include <sys/mac_client_impl.h>
    282   8275       Eric #include <sys/mac_soft_ring.h>
    283   8275       Eric #include <sys/mac_impl.h>
    284   8275       Eric #include <sys/mac.h>
    285   5895   yz147064 #include <sys/dls.h>
    286    269   ericheng #include <sys/dld.h>
    287   2311        seb #include <sys/modctl.h>
    288   3448   dh155122 #include <sys/fs/dv_node.h>
    289   5009    gd78059 #include <sys/thread.h>
    290   5009    gd78059 #include <sys/proc.h>
    291   5009    gd78059 #include <sys/callb.h>
    292   5009    gd78059 #include <sys/cpuvar.h>
    293   3288        seb #include <sys/atomic.h>
    294   8275       Eric #include <sys/bitmap.h>
    295   4913   ethindra #include <sys/sdt.h>
    296   8275       Eric #include <sys/mac_flow.h>
    297   8275       Eric #include <sys/ddi_intr_impl.h>
    298   8275       Eric #include <sys/disp.h>
    299   8275       Eric #include <sys/sdt.h>
    300   8275       Eric #include <sys/vnic.h>
    301   8275       Eric #include <sys/vnic_impl.h>
    302   8275       Eric #include <sys/vlan.h>
    303   8275       Eric #include <inet/ip.h>
    304   8275       Eric #include <inet/ip6.h>
    305   8275       Eric #include <sys/exacct.h>
    306   8275       Eric #include <sys/exacct_impl.h>
    307   5903    sowmini #include <inet/nd.h>
    308   6512    sowmini #include <sys/ethernet.h>
    309      0     stevel 
    310      0     stevel #define	IMPL_HASHSZ	67	/* prime */
    311      0     stevel 
    312   8275       Eric kmem_cache_t	*i_mac_impl_cachep;
    313   8275       Eric mod_hash_t		*i_mac_impl_hash;
    314    269   ericheng krwlock_t		i_mac_impl_lock;
    315    269   ericheng uint_t			i_mac_impl_count;
    316   8275       Eric static kmem_cache_t	*mac_ring_cache;
    317   5895   yz147064 static id_space_t	*minor_ids;
    318   5895   yz147064 static uint32_t		minor_count;
    319   8275       Eric 
    320   8275       Eric /*
    321   8275       Eric  * Logging stuff. Perhaps mac_logging_interval could be broken into
    322   8275       Eric  * mac_flow_log_interval and mac_link_log_interval if we want to be
    323   8275       Eric  * able to schedule them differently.
    324   8275       Eric  */
    325   8275       Eric uint_t			mac_logging_interval;
    326   8275       Eric boolean_t		mac_flow_log_enable;
    327   8275       Eric boolean_t		mac_link_log_enable;
    328   8275       Eric timeout_id_t		mac_logging_timer;
    329   8275       Eric 
    330   8275       Eric /* for debugging, see MAC_DBG_PRT() in mac_impl.h */
    331   8275       Eric int mac_dbg = 0;
    332   1852   yz147064 
    333   2311        seb #define	MACTYPE_KMODDIR	"mac"
    334   2311        seb #define	MACTYPE_HASHSZ	67
    335   2311        seb static mod_hash_t	*i_mactype_hash;
    336   3288        seb /*
    337   3288        seb  * i_mactype_lock synchronizes threads that obtain references to mactype_t
    338   3288        seb  * structures through i_mactype_getplugin().
    339   3288        seb  */
    340   3288        seb static kmutex_t		i_mactype_lock;
    341   2311        seb 
    342   8275       Eric /*
    343   8275       Eric  * mac_tx_percpu_cnt
    344   8275       Eric  *
    345   8275       Eric  * Number of per cpu locks per mac_client_impl_t. Used by the transmit side
    346   8275       Eric  * in mac_tx to reduce lock contention. This is sized at boot time in mac_init.
    347   8275       Eric  * mac_tx_percpu_cnt_max is settable in /etc/system and must be a power of 2.
    348   8275       Eric  * Per cpu locks may be disabled by setting mac_tx_percpu_cnt_max to 1.
    349   8275       Eric  */
    350   8275       Eric int mac_tx_percpu_cnt;
    351   8275       Eric int mac_tx_percpu_cnt_max = 128;
    352   8275       Eric 
    353  10491      Rishi /*
    354  10491      Rishi  * Call back functions for the bridge module.  These are guaranteed to be valid
    355  10491      Rishi  * when holding a reference on a link or when holding mip->mi_bridge_lock and
    356  10491      Rishi  * mi_bridge_link is non-NULL.
    357  10491      Rishi  */
    358  10491      Rishi mac_bridge_tx_t mac_bridge_tx_cb;
    359  10491      Rishi mac_bridge_rx_t mac_bridge_rx_cb;
    360  10491      Rishi mac_bridge_ref_t mac_bridge_ref_cb;
    361  10491      Rishi mac_bridge_ls_t mac_bridge_ls_cb;
    362  10491      Rishi 
    363   8275       Eric static int i_mac_constructor(void *, void *, int);
    364   8275       Eric static void i_mac_destructor(void *, void *);
    365   8275       Eric static int i_mac_ring_ctor(void *, void *, int);
    366   8275       Eric static void i_mac_ring_dtor(void *, void *);
    367   8275       Eric static mblk_t *mac_rx_classify(mac_impl_t *, mac_resource_handle_t, mblk_t *);
    368   8275       Eric void mac_tx_client_flush(mac_client_impl_t *);
    369   8275       Eric void mac_tx_client_block(mac_client_impl_t *);
    370   8275       Eric static void mac_rx_ring_quiesce(mac_ring_t *, uint_t);
    371   8275       Eric static int mac_start_group_and_rings(mac_group_t *);
    372   8275       Eric static void mac_stop_group_and_rings(mac_group_t *);
    373   8275       Eric 
    374   8275       Eric /*
    375   8275       Eric  * Module initialization functions.
    376   8275       Eric  */
    377   8275       Eric 
    378   8275       Eric void
    379   8275       Eric mac_init(void)
    380   8275       Eric {
    381   8275       Eric 	mac_tx_percpu_cnt = ((boot_max_ncpus == -1) ? max_ncpus :
    382   8275       Eric 	    boot_max_ncpus);
    383   8275       Eric 
    384   8275       Eric 	/* Upper bound is mac_tx_percpu_cnt_max */
    385   8275       Eric 	if (mac_tx_percpu_cnt > mac_tx_percpu_cnt_max)
    386   8275       Eric 		mac_tx_percpu_cnt = mac_tx_percpu_cnt_max;
    387   8275       Eric 
    388   8275       Eric 	if (mac_tx_percpu_cnt < 1) {
    389   8275       Eric 		/* Someone set max_tx_percpu_cnt_max to 0 or less */
    390   8275       Eric 		mac_tx_percpu_cnt = 1;
    391   8275       Eric 	}
    392   8275       Eric 
    393   8275       Eric 	ASSERT(mac_tx_percpu_cnt >= 1);
    394   8275       Eric 	mac_tx_percpu_cnt = (1 << highbit(mac_tx_percpu_cnt - 1));
    395   8275       Eric 	/*
    396   8275       Eric 	 * Make it of the form 2**N - 1 in the range
    397   8275       Eric 	 * [0 .. mac_tx_percpu_cnt_max - 1]
    398   8275       Eric 	 */
    399   8275       Eric 	mac_tx_percpu_cnt--;
    400   8275       Eric 
    401   8275       Eric 	i_mac_impl_cachep = kmem_cache_create("mac_impl_cache",
    402   8275       Eric 	    sizeof (mac_impl_t), 0, i_mac_constructor, i_mac_destructor,
    403   8275       Eric 	    NULL, NULL, NULL, 0);
    404   8275       Eric 	ASSERT(i_mac_impl_cachep != NULL);
    405   8275       Eric 
    406   8275       Eric 	mac_ring_cache = kmem_cache_create("mac_ring_cache",
    407   8275       Eric 	    sizeof (mac_ring_t), 0, i_mac_ring_ctor, i_mac_ring_dtor, NULL,
    408   8275       Eric 	    NULL, NULL, 0);
    409   8275       Eric 	ASSERT(mac_ring_cache != NULL);
    410   8275       Eric 
    411   8275       Eric 	i_mac_impl_hash = mod_hash_create_extended("mac_impl_hash",
    412   8275       Eric 	    IMPL_HASHSZ, mod_hash_null_keydtor, mod_hash_null_valdtor,
    413   8275       Eric 	    mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP);
    414   8275       Eric 	rw_init(&i_mac_impl_lock, NULL, RW_DEFAULT, NULL);
    415   8275       Eric 
    416   8275       Eric 	mac_flow_init();
    417   8275       Eric 	mac_soft_ring_init();
    418   8275       Eric 	mac_bcast_init();
    419   8275       Eric 	mac_client_init();
    420   8275       Eric 
    421   8275       Eric 	i_mac_impl_count = 0;
    422   8275       Eric 
    423   8275       Eric 	i_mactype_hash = mod_hash_create_extended("mactype_hash",
    424   8275       Eric 	    MACTYPE_HASHSZ,
    425   8275       Eric 	    mod_hash_null_keydtor, mod_hash_null_valdtor,
    426   8275       Eric 	    mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP);
    427   8275       Eric 
    428   8275       Eric 	/*
    429   8275       Eric 	 * Allocate an id space to manage minor numbers. The range of the
    430  10283    Garrett 	 * space will be from MAC_MAX_MINOR+1 to MAC_PRIVATE_MINOR-1.  This
    431  10283    Garrett 	 * leaves half of the 32-bit minors available for driver private use.
    432  10283    Garrett 	 */
    433  10283    Garrett 	minor_ids = id_space_create("mac_minor_ids", MAC_MAX_MINOR+1,
    434  10283    Garrett 	    MAC_PRIVATE_MINOR-1);
    435   8275       Eric 	ASSERT(minor_ids != NULL);
    436   8275       Eric 	minor_count = 0;
    437   8275       Eric 
    438   8275       Eric 	/* Let's default to 20 seconds */
    439   8275       Eric 	mac_logging_interval = 20;
    440   8275       Eric 	mac_flow_log_enable = B_FALSE;
    441   8275       Eric 	mac_link_log_enable = B_FALSE;
    442   8275       Eric 	mac_logging_timer = 0;
    443   8275       Eric }
    444   8275       Eric 
    445   8275       Eric int
    446   8275       Eric mac_fini(void)
    447   8275       Eric {
    448   8275       Eric 	if (i_mac_impl_count > 0 || minor_count > 0)
    449   8275       Eric 		return (EBUSY);
    450   8275       Eric 
    451   8275       Eric 	id_space_destroy(minor_ids);
    452   8275       Eric 	mac_flow_fini();
    453   8275       Eric 
    454   8275       Eric 	mod_hash_destroy_hash(i_mac_impl_hash);
    455   8275       Eric 	rw_destroy(&i_mac_impl_lock);
    456   8275       Eric 
    457   8275       Eric 	mac_client_fini();
    458   8275       Eric 	kmem_cache_destroy(mac_ring_cache);
    459   8275       Eric 
    460   8275       Eric 	mod_hash_destroy_hash(i_mactype_hash);
    461   8275       Eric 	mac_soft_ring_finish();
    462   8275       Eric 	return (0);
    463   8275       Eric }
    464   8275       Eric 
    465  10986  Sebastien /*
    466  10986  Sebastien  * Initialize a GLDv3 driver's device ops.  A driver that manages its own ops
    467  10986  Sebastien  * (e.g. softmac) may pass in a NULL ops argument.
    468  10986  Sebastien  */
    469   8275       Eric void
    470   8275       Eric mac_init_ops(struct dev_ops *ops, const char *name)
    471   8275       Eric {
    472  10986  Sebastien 	major_t major = ddi_name_to_major((char *)name);
    473  10986  Sebastien 
    474  10986  Sebastien 	/*
    475  10986  Sebastien 	 * By returning on error below, we are not letting the driver continue
    476  10986  Sebastien 	 * in an undefined context.  The mac_register() function will faill if
    477  10986  Sebastien 	 * DN_GLDV3_DRIVER isn't set.
    478  10986  Sebastien 	 */
    479  10986  Sebastien 	if (major == DDI_MAJOR_T_NONE)
    480  10986  Sebastien 		return;
    481  10986  Sebastien 	LOCK_DEV_OPS(&devnamesp[major].dn_lock);
    482  10986  Sebastien 	devnamesp[major].dn_flags |= (DN_GLDV3_DRIVER | DN_NETWORK_DRIVER);
    483  10986  Sebastien 	UNLOCK_DEV_OPS(&devnamesp[major].dn_lock);
    484  10986  Sebastien 	if (ops != NULL)
    485  10986  Sebastien 		dld_init_ops(ops, name);
    486   8275       Eric }
    487   8275       Eric 
    488   8275       Eric void
    489   8275       Eric mac_fini_ops(struct dev_ops *ops)
    490   8275       Eric {
    491   8275       Eric 	dld_fini_ops(ops);
    492   8275       Eric }
    493      0     stevel 
    494      0     stevel /*ARGSUSED*/
    495      0     stevel static int
    496      0     stevel i_mac_constructor(void *buf, void *arg, int kmflag)
    497      0     stevel {
    498      0     stevel 	mac_impl_t	*mip = buf;
    499      0     stevel 
    500      0     stevel 	bzero(buf, sizeof (mac_impl_t));
    501      0     stevel 
    502   2311        seb 	mip->mi_linkstate = LINK_STATE_UNKNOWN;
    503   8275       Eric 
    504   4913   ethindra 	mutex_init(&mip->mi_lock, NULL, MUTEX_DRIVER, NULL);
    505   8275       Eric 	rw_init(&mip->mi_rw_lock, NULL, RW_DRIVER, NULL);
    506   8275       Eric 	mutex_init(&mip->mi_notify_lock, NULL, MUTEX_DRIVER, NULL);
    507   8275       Eric 	mutex_init(&mip->mi_promisc_lock, NULL, MUTEX_DRIVER, NULL);
    508   8275       Eric 	mutex_init(&mip->mi_ring_lock, NULL, MUTEX_DEFAULT, NULL);
    509   8275       Eric 
    510   8275       Eric 	mip->mi_notify_cb_info.mcbi_lockp = &mip->mi_notify_lock;
    511   8275       Eric 	cv_init(&mip->mi_notify_cb_info.mcbi_cv, NULL, CV_DRIVER, NULL);
    512   8275       Eric 	mip->mi_promisc_cb_info.mcbi_lockp = &mip->mi_promisc_lock;
    513   8275       Eric 	cv_init(&mip->mi_promisc_cb_info.mcbi_cv, NULL, CV_DRIVER, NULL);
    514  10491      Rishi 
    515  10491      Rishi 	mutex_init(&mip->mi_bridge_lock, NULL, MUTEX_DEFAULT, NULL);
    516  10491      Rishi 
    517      0     stevel 	return (0);
    518      0     stevel }
    519      0     stevel 
    520      0     stevel /*ARGSUSED*/
    521      0     stevel static void
    522      0     stevel i_mac_destructor(void *buf, void *arg)
    523      0     stevel {
    524      0     stevel 	mac_impl_t	*mip = buf;
    525   8275       Eric 	mac_cb_info_t	*mcbi;
    526      0     stevel 
    527      0     stevel 	ASSERT(mip->mi_ref == 0);
    528      0     stevel 	ASSERT(mip->mi_active == 0);
    529   2311        seb 	ASSERT(mip->mi_linkstate == LINK_STATE_UNKNOWN);
    530      0     stevel 	ASSERT(mip->mi_devpromisc == 0);
    531      0     stevel 	ASSERT(mip->mi_ksp == NULL);
    532   2311        seb 	ASSERT(mip->mi_kstat_count == 0);
    533   8275       Eric 	ASSERT(mip->mi_nclients == 0);
    534   8275       Eric 	ASSERT(mip->mi_nactiveclients == 0);
    535   8833       Venu 	ASSERT(mip->mi_single_active_client == NULL);
    536   8275       Eric 	ASSERT(mip->mi_state_flags == 0);
    537   8275       Eric 	ASSERT(mip->mi_factory_addr == NULL);
    538   8275       Eric 	ASSERT(mip->mi_factory_addr_num == 0);
    539   8275       Eric 	ASSERT(mip->mi_default_tx_ring == NULL);
    540   8275       Eric 
    541   8275       Eric 	mcbi = &mip->mi_notify_cb_info;
    542   8275       Eric 	ASSERT(mcbi->mcbi_del_cnt == 0 && mcbi->mcbi_walker_cnt == 0);
    543   5009    gd78059 	ASSERT(mip->mi_notify_bits == 0);
    544   5009    gd78059 	ASSERT(mip->mi_notify_thread == NULL);
    545   8275       Eric 	ASSERT(mcbi->mcbi_lockp == &mip->mi_notify_lock);
    546   8275       Eric 	mcbi->mcbi_lockp = NULL;
    547   8275       Eric 
    548   8275       Eric 	mcbi = &mip->mi_promisc_cb_info;
    549   8275       Eric 	ASSERT(mcbi->mcbi_del_cnt == 0 && mip->mi_promisc_list == NULL);
    550   8275       Eric 	ASSERT(mip->mi_promisc_list == NULL);
    551   8275       Eric 	ASSERT(mcbi->mcbi_lockp == &mip->mi_promisc_lock);
    552   8275       Eric 	mcbi->mcbi_lockp = NULL;
    553   8275       Eric 
    554   8275       Eric 	ASSERT(mip->mi_bcast_ngrps == 0 && mip->mi_bcast_grp == NULL);
    555   8275       Eric 	ASSERT(mip->mi_perim_owner == NULL && mip->mi_perim_ocnt == 0);
    556   8275       Eric 
    557   4913   ethindra 	mutex_destroy(&mip->mi_lock);
    558   8275       Eric 	rw_destroy(&mip->mi_rw_lock);
    559   8275       Eric 
    560   8275       Eric 	mutex_destroy(&mip->mi_promisc_lock);
    561   8275       Eric 	cv_destroy(&mip->mi_promisc_cb_info.mcbi_cv);
    562   8275       Eric 	mutex_destroy(&mip->mi_notify_lock);
    563   8275       Eric 	cv_destroy(&mip->mi_notify_cb_info.mcbi_cv);
    564   8275       Eric 	mutex_destroy(&mip->mi_ring_lock);
    565  10491      Rishi 
    566  10491      Rishi 	ASSERT(mip->mi_bridge_link == NULL);
    567   8275       Eric }
    568   8275       Eric 
    569   8275       Eric /* ARGSUSED */
    570   8275       Eric static int
    571   8275       Eric i_mac_ring_ctor(void *buf, void *arg, int kmflag)
    572   8275       Eric {
    573   8275       Eric 	mac_ring_t *ring = (mac_ring_t *)buf;
    574   8275       Eric 
    575   8275       Eric 	bzero(ring, sizeof (mac_ring_t));
    576   8275       Eric 	cv_init(&ring->mr_cv, NULL, CV_DEFAULT, NULL);
    577   8275       Eric 	mutex_init(&ring->mr_lock, NULL, MUTEX_DEFAULT, NULL);
    578   8275       Eric 	ring->mr_state = MR_FREE;
    579   8275       Eric 	return (0);
    580   8275       Eric }
    581   8275       Eric 
    582   8275       Eric /* ARGSUSED */
    583   8275       Eric static void
    584   8275       Eric i_mac_ring_dtor(void *buf, void *arg)
    585   8275       Eric {
    586   8275       Eric 	mac_ring_t *ring = (mac_ring_t *)buf;
    587   8275       Eric 
    588   8275       Eric 	cv_destroy(&ring->mr_cv);
    589   8275       Eric 	mutex_destroy(&ring->mr_lock);
    590   8275       Eric }
    591   8275       Eric 
    592   8275       Eric /*
    593   8275       Eric  * Common functions to do mac callback addition and deletion. Currently this is
    594   8275       Eric  * used by promisc callbacks and notify callbacks. List addition and deletion
    595   8275       Eric  * need to take care of list walkers. List walkers in general, can't hold list
    596   8275       Eric  * locks and make upcall callbacks due to potential lock order and recursive
    597   8275       Eric  * reentry issues. Instead list walkers increment the list walker count to mark
    598   8275       Eric  * the presence of a walker thread. Addition can be carefully done to ensure
    599   8275       Eric  * that the list walker always sees either the old list or the new list.
    600   8275       Eric  * However the deletion can't be done while the walker is active, instead the
    601   8275       Eric  * deleting thread simply marks the entry as logically deleted. The last walker
    602   8275       Eric  * physically deletes and frees up the logically deleted entries when the walk
    603   8275       Eric  * is complete.
    604   8275       Eric  */
    605   8275       Eric void
    606   8275       Eric mac_callback_add(mac_cb_info_t *mcbi, mac_cb_t **mcb_head,
    607   8275       Eric     mac_cb_t *mcb_elem)
    608   8275       Eric {
    609   8275       Eric 	mac_cb_t	*p;
    610   8275       Eric 	mac_cb_t	**pp;
    611   8275       Eric 
    612   8275       Eric 	/* Verify it is not already in the list */
    613   8275       Eric 	for (pp = mcb_head; (p = *pp) != NULL; pp = &p->mcb_nextp) {
    614   8275       Eric 		if (p == mcb_elem)
    615   8275       Eric 			break;
    616   8275       Eric 	}
    617   8275       Eric 	VERIFY(p == NULL);
    618   8275       Eric 
    619   8275       Eric 	/*
    620   8275       Eric 	 * Add it to the head of the callback list. The membar ensures that
    621   8275       Eric 	 * the following list pointer manipulations reach global visibility
    622   8275       Eric 	 * in exactly the program order below.
    623   8275       Eric 	 */
    624   8275       Eric 	ASSERT(MUTEX_HELD(mcbi->mcbi_lockp));
    625   8275       Eric 
    626   8275       Eric 	mcb_elem->mcb_nextp = *mcb_head;
    627   8275       Eric 	membar_producer();
    628   8275       Eric 	*mcb_head = mcb_elem;
    629   8275       Eric }
    630   8275       Eric 
    631   8275       Eric /*
    632   8275       Eric  * Mark the entry as logically deleted. If there aren't any walkers unlink
    633   8275       Eric  * from the list. In either case return the corresponding status.
    634   8275       Eric  */
    635   8275       Eric boolean_t
    636   8275       Eric mac_callback_remove(mac_cb_info_t *mcbi, mac_cb_t **mcb_head,
    637   8275       Eric     mac_cb_t *mcb_elem)
    638   8275       Eric {
    639   8275       Eric 	mac_cb_t	*p;
    640   8275       Eric 	mac_cb_t	**pp;
    641   8275       Eric 
    642   8275       Eric 	ASSERT(MUTEX_HELD(mcbi->mcbi_lockp));
    643   8275       Eric 	/*
    644   8275       Eric 	 * Search the callback list for the entry to be removed
    645   8275       Eric 	 */
    646   8275       Eric 	for (pp = mcb_head; (p = *pp) != NULL; pp = &p->mcb_nextp) {
    647   8275       Eric 		if (p == mcb_elem)
    648   8275       Eric 			break;
    649   8275       Eric 	}
    650   8275       Eric 	VERIFY(p != NULL);
    651   8275       Eric 
    652   8275       Eric 	/*
    653   8275       Eric 	 * If there are walkers just mark it as deleted and the last walker
    654   8275       Eric 	 * will remove from the list and free it.
    655   8275       Eric 	 */
    656   8275       Eric 	if (mcbi->mcbi_walker_cnt != 0) {
    657   8275       Eric 		p->mcb_flags |= MCB_CONDEMNED;
    658   8275       Eric 		mcbi->mcbi_del_cnt++;
    659   8275       Eric 		return (B_FALSE);
    660   8275       Eric 	}
    661   8275       Eric 
    662   8275       Eric 	ASSERT(mcbi->mcbi_del_cnt == 0);
    663   8275       Eric 	*pp = p->mcb_nextp;
    664   8275       Eric 	p->mcb_nextp = NULL;
    665   8275       Eric 	return (B_TRUE);
    666   8275       Eric }
    667   8275       Eric 
    668   8275       Eric /*
    669   8275       Eric  * Wait for all pending callback removals to be completed
    670   8275       Eric  */
    671   8275       Eric void
    672   8275       Eric mac_callback_remove_wait(mac_cb_info_t *mcbi)
    673   8275       Eric {
    674   8275       Eric 	ASSERT(MUTEX_HELD(mcbi->mcbi_lockp));
    675   8275       Eric 	while (mcbi->mcbi_del_cnt != 0) {
    676   8275       Eric 		DTRACE_PROBE1(need_wait, mac_cb_info_t *, mcbi);
    677   8275       Eric 		cv_wait(&mcbi->mcbi_cv, mcbi->mcbi_lockp);
    678   8275       Eric 	}
    679   8275       Eric }
    680   8275       Eric 
    681   8275       Eric /*
    682   8275       Eric  * The last mac callback walker does the cleanup. Walk the list and unlik
    683   8275       Eric  * all the logically deleted entries and construct a temporary list of
    684   8275       Eric  * removed entries. Return the list of removed entries to the caller.
    685   8275       Eric  */
    686   8275       Eric mac_cb_t *
    687   8275       Eric mac_callback_walker_cleanup(mac_cb_info_t *mcbi, mac_cb_t **mcb_head)
    688   8275       Eric {
    689   8275       Eric 	mac_cb_t	*p;
    690   8275       Eric 	mac_cb_t	**pp;
    691   8275       Eric 	mac_cb_t	*rmlist = NULL;		/* List of removed elements */
    692   8275       Eric 	int	cnt = 0;
    693   8275       Eric 
    694   8275       Eric 	ASSERT(MUTEX_HELD(mcbi->mcbi_lockp));
    695   8275       Eric 	ASSERT(mcbi->mcbi_del_cnt != 0 && mcbi->mcbi_walker_cnt == 0);
    696   8275       Eric 
    697   8275       Eric 	pp = mcb_head;
    698   8275       Eric 	while (*pp != NULL) {
    699   8275       Eric 		if ((*pp)->mcb_flags & MCB_CONDEMNED) {
    700   8275       Eric 			p = *pp;
    701   8275       Eric 			*pp = p->mcb_nextp;
    702   8275       Eric 			p->mcb_nextp = rmlist;
    703   8275       Eric 			rmlist = p;
    704   8275       Eric 			cnt++;
    705   8275       Eric 			continue;
    706   8275       Eric 		}
    707   8275       Eric 		pp = &(*pp)->mcb_nextp;
    708   8275       Eric 	}
    709   8275       Eric 
    710   8275       Eric 	ASSERT(mcbi->mcbi_del_cnt == cnt);
    711   8275       Eric 	mcbi->mcbi_del_cnt = 0;
    712   8275       Eric 	return (rmlist);
    713   8275       Eric }
    714   8275       Eric 
    715   8275       Eric boolean_t
    716   8275       Eric mac_callback_lookup(mac_cb_t **mcb_headp, mac_cb_t *mcb_elem)
    717   8275       Eric {
    718   8275       Eric 	mac_cb_t	*mcb;
    719   8275       Eric 
    720   8275       Eric 	/* Verify it is not already in the list */
    721   8275       Eric 	for (mcb = *mcb_headp; mcb != NULL; mcb = mcb->mcb_nextp) {
    722   8275       Eric 		if (mcb == mcb_elem)
    723   8275       Eric 			return (B_TRUE);
    724   8275       Eric 	}
    725   8275       Eric 
    726   8275       Eric 	return (B_FALSE);
    727   8275       Eric }
    728   8275       Eric 
    729   8275       Eric boolean_t
    730   8275       Eric mac_callback_find(mac_cb_info_t *mcbi, mac_cb_t **mcb_headp, mac_cb_t *mcb_elem)
    731   8275       Eric {
    732   8275       Eric 	boolean_t	found;
    733   8275       Eric 
    734   8275       Eric 	mutex_enter(mcbi->mcbi_lockp);
    735   8275       Eric 	found = mac_callback_lookup(mcb_headp, mcb_elem);
    736   8275       Eric 	mutex_exit(mcbi->mcbi_lockp);
    737   8275       Eric 
    738   8275       Eric 	return (found);
    739   8275       Eric }
    740   8275       Eric 
    741   8275       Eric /* Free the list of removed callbacks */
    742   8275       Eric void
    743   8275       Eric mac_callback_free(mac_cb_t *rmlist)
    744   8275       Eric {
    745   8275       Eric 	mac_cb_t	*mcb;
    746   8275       Eric 	mac_cb_t	*mcb_next;
    747   8275       Eric 
    748   8275       Eric 	for (mcb = rmlist; mcb != NULL; mcb = mcb_next) {
    749   8275       Eric 		mcb_next = mcb->mcb_nextp;
    750   8275       Eric 		kmem_free(mcb->mcb_objp, mcb->mcb_objsize);
    751   8275       Eric 	}
    752   8275       Eric }
    753   8275       Eric 
    754   8275       Eric /*
    755   8275       Eric  * The promisc callbacks are in 2 lists, one off the 'mip' and another off the
    756   8275       Eric  * 'mcip' threaded by mpi_mi_link and mpi_mci_link respectively. However there
    757   8275       Eric  * is only a single shared total walker count, and an entry can't be physically
    758   8275       Eric  * unlinked if a walker is active on either list. The last walker does this
    759   8275       Eric  * cleanup of logically deleted entries.
    760   8275       Eric  */
    761   8275       Eric void
    762   8275       Eric i_mac_promisc_walker_cleanup(mac_impl_t *mip)
    763   8275       Eric {
    764   8275       Eric 	mac_cb_t	*rmlist;
    765   8275       Eric 	mac_cb_t	*mcb;
    766   8275       Eric 	mac_cb_t	*mcb_next;
    767   8275       Eric 	mac_promisc_impl_t	*mpip;
    768   8275       Eric 
    769   8275       Eric 	/*
    770   8275       Eric 	 * Construct a temporary list of deleted callbacks by walking the
    771   8275       Eric 	 * the mi_promisc_list. Then for each entry in the temporary list,
    772   8275       Eric 	 * remove it from the mci_promisc_list and free the entry.
    773   8275       Eric 	 */
    774   8275       Eric 	rmlist = mac_callback_walker_cleanup(&mip->mi_promisc_cb_info,
    775   8275       Eric 	    &mip->mi_promisc_list);
    776   8275       Eric 
    777   8275       Eric 	for (mcb = rmlist; mcb != NULL; mcb = mcb_next) {
    778   8275       Eric 		mcb_next = mcb->mcb_nextp;
    779   8275       Eric 		mpip = (mac_promisc_impl_t *)mcb->mcb_objp;
    780   8275       Eric 		VERIFY(mac_callback_remove(&mip->mi_promisc_cb_info,
    781   8275       Eric 		    &mpip->mpi_mcip->mci_promisc_list, &mpip->mpi_mci_link));
    782   8275       Eric 		mcb->mcb_flags = 0;
    783   8275       Eric 		mcb->mcb_nextp = NULL;
    784   8275       Eric 		kmem_cache_free(mac_promisc_impl_cache, mpip);
    785   8275       Eric 	}
    786   8275       Eric }
    787   8275       Eric 
    788   8275       Eric void
    789      0     stevel i_mac_notify(mac_impl_t *mip, mac_notify_type_t type)
    790      0     stevel {
    791   8275       Eric 	mac_cb_info_t	*mcbi;
    792   8275       Eric 
    793   8275       Eric 	/*
    794   8275       Eric 	 * Signal the notify thread even after mi_ref has become zero and
    795   8275       Eric 	 * mi_disabled is set. The synchronization with the notify thread
    796   8275       Eric 	 * happens in mac_unregister and that implies the driver must make
    797   8275       Eric 	 * sure it is single-threaded (with respect to mac calls) and that
    798   8275       Eric 	 * all pending mac calls have returned before it calls mac_unregister
    799   8275       Eric 	 */
    800   1852   yz147064 	rw_enter(&i_mac_impl_lock, RW_READER);
    801   8275       Eric 	if (mip->mi_state_flags & MIS_DISABLED)
    802   1852   yz147064 		goto exit;
    803   1852   yz147064 
    804   5009    gd78059 	/*
    805   5009    gd78059 	 * Guard against incorrect notifications.  (Running a newer
    806   5009    gd78059 	 * mac client against an older implementation?)
    807   5009    gd78059 	 */
    808   5009    gd78059 	if (type >= MAC_NNOTE)
    809   1852   yz147064 		goto exit;
    810   1852   yz147064 
    811   8275       Eric 	mcbi = &mip->mi_notify_cb_info;
    812   8275       Eric 	mutex_enter(mcbi->mcbi_lockp);
    813   5009    gd78059 	mip->mi_notify_bits |= (1 << type);
    814   8275       Eric 	cv_broadcast(&mcbi->mcbi_cv);
    815   8275       Eric 	mutex_exit(mcbi->mcbi_lockp);
    816   1852   yz147064 
    817   1852   yz147064 exit:
    818   1852   yz147064 	rw_exit(&i_mac_impl_lock);
    819   1852   yz147064 }
    820   1852   yz147064 
    821   8275       Eric /*
    822   8275       Eric  * Mac serialization primitives. Please see the block comment at the
    823   8275       Eric  * top of the file.
    824   8275       Eric  */
    825   8275       Eric void
    826   8275       Eric i_mac_perim_enter(mac_impl_t *mip)
    827   8275       Eric {
    828   8275       Eric 	mac_client_impl_t	*mcip;
    829   8275       Eric 
    830   8275       Eric 	if (mip->mi_state_flags & MIS_IS_VNIC) {
    831   8275       Eric 		/*
    832   8275       Eric 		 * This is a VNIC. Return the lower mac since that is what
    833   8275       Eric 		 * we want to serialize on.
    834   8275       Eric 		 */
    835   8275       Eric 		mcip = mac_vnic_lower(mip);
    836   8275       Eric 		mip = mcip->mci_mip;
    837   8275       Eric 	}
    838   8275       Eric 
    839   8275       Eric 	mutex_enter(&mip->mi_perim_lock);
    840   8275       Eric 	if (mip->mi_perim_owner == curthread) {
    841   8275       Eric 		mip->mi_perim_ocnt++;
    842   8275       Eric 		mutex_exit(&mip->mi_perim_lock);
    843   8275       Eric 		return;
    844   8275       Eric 	}
    845   8275       Eric 
    846   8275       Eric 	while (mip->mi_perim_owner != NULL)
    847   8275       Eric 		cv_wait(&mip->mi_perim_cv, &mip->mi_perim_lock);
    848   8275       Eric 
    849   8275       Eric 	mip->mi_perim_owner = curthread;
    850   8275       Eric 	ASSERT(mip->mi_perim_ocnt == 0);
    851   8275       Eric 	mip->mi_perim_ocnt++;
    852   8275       Eric #ifdef DEBUG
    853   8275       Eric 	mip->mi_perim_stack_depth = getpcstack(mip->mi_perim_stack,
    854   8275       Eric 	    MAC_PERIM_STACK_DEPTH);
    855   8275       Eric #endif
    856   8275       Eric 	mutex_exit(&mip->mi_perim_lock);
    857   8275       Eric }
    858   8275       Eric 
    859   8275       Eric int
    860   8275       Eric i_mac_perim_enter_nowait(mac_impl_t *mip)
    861   8275       Eric {
    862   8275       Eric 	/*
    863   8275       Eric 	 * The vnic is a special case, since the serialization is done based
    864   8275       Eric 	 * on the lower mac. If the lower mac is busy, it does not imply the
    865   8275       Eric 	 * vnic can't be unregistered. But in the case of other drivers,
    866   8275       Eric 	 * a busy perimeter or open mac handles implies that the mac is busy
    867   8275       Eric 	 * and can't be unregistered.
    868   8275       Eric 	 */
    869   8275       Eric 	if (mip->mi_state_flags & MIS_IS_VNIC) {
    870   8275       Eric 		i_mac_perim_enter(mip);
    871   8275       Eric 		return (0);
    872   8275       Eric 	}
    873   8275       Eric 
    874   8275       Eric 	mutex_enter(&mip->mi_perim_lock);
    875   8275       Eric 	if (mip->mi_perim_owner != NULL) {
    876   8275       Eric 		mutex_exit(&mip->mi_perim_lock);
    877   8275       Eric 		return (EBUSY);
    878   8275       Eric 	}
    879   8275       Eric 	ASSERT(mip->mi_perim_ocnt == 0);
    880   8275       Eric 	mip->mi_perim_owner = curthread;
    881   8275       Eric 	mip->mi_perim_ocnt++;
    882   8275       Eric 	mutex_exit(&mip->mi_perim_lock);
    883   8275       Eric 
    884   8275       Eric 	return (0);
    885   8275       Eric }
    886   8275       Eric 
    887   8275       Eric void
    888   8275       Eric i_mac_perim_exit(mac_impl_t *mip)
    889   8275       Eric {
    890   8275       Eric 	mac_client_impl_t *mcip;
    891   8275       Eric 
    892   8275       Eric 	if (mip->mi_state_flags & MIS_IS_VNIC) {
    893   8275       Eric 		/*
    894   8275       Eric 		 * This is a VNIC. Return the lower mac since that is what
    895   8275       Eric 		 * we want to serialize on.
    896   8275       Eric 		 */
    897   8275       Eric 		mcip = mac_vnic_lower(mip);
    898   8275       Eric 		mip = mcip->mci_mip;
    899   8275       Eric 	}
    900   8275       Eric 
    901   8275       Eric 	ASSERT(mip->mi_perim_owner == curthread && mip->mi_perim_ocnt != 0);
    902   8275       Eric 
    903   8275       Eric 	mutex_enter(&mip->mi_perim_lock);
    904   8275       Eric 	if (--mip->mi_perim_ocnt == 0) {
    905   8275       Eric 		mip->mi_perim_owner = NULL;
    906   8275       Eric 		cv_signal(&mip->mi_perim_cv);
    907   8275       Eric 	}
    908   8275       Eric 	mutex_exit(&mip->mi_perim_lock);
    909   8275       Eric }
    910   8275       Eric 
    911   8275       Eric /*
    912   8275       Eric  * Returns whether the current thread holds the mac perimeter. Used in making
    913   8275       Eric  * assertions.
    914   8275       Eric  */
    915   8275       Eric boolean_t
    916   8275       Eric mac_perim_held(mac_handle_t mh)
    917   8275       Eric {
    918   8275       Eric 	mac_impl_t	*mip = (mac_impl_t *)mh;
    919   8275       Eric 	mac_client_impl_t *mcip;
    920   8275       Eric 
    921   8275       Eric 	if (mip->mi_state_flags & MIS_IS_VNIC) {
    922   8275       Eric 		/*
    923   8275       Eric 		 * This is a VNIC. Return the lower mac since that is what
    924   8275       Eric 		 * we want to serialize on.
    925   8275       Eric 		 */
    926   8275       Eric 		mcip = mac_vnic_lower(mip);
    927   8275       Eric 		mip = mcip->mci_mip;
    928   8275       Eric 	}
    929   8275       Eric 	return (mip->mi_perim_owner == curthread);
    930   8275       Eric }
    931   8275       Eric 
    932   8275       Eric /*
    933   8275       Eric  * mac client interfaces to enter the mac perimeter of a mac end point, given
    934   8275       Eric  * its mac handle, or macname or linkid.
    935   8275       Eric  */
    936   8275       Eric void
    937   8275       Eric mac_perim_enter_by_mh(mac_handle_t mh, mac_perim_handle_t *mphp)
    938   8275       Eric {
    939   8275       Eric 	mac_impl_t	*mip = (mac_impl_t *)mh;
    940   8275       Eric 
    941   8275       Eric 	i_mac_perim_enter(mip);
    942   8275       Eric 	/*
    943   8275       Eric 	 * The mac_perim_handle_t returned encodes the 'mip' and whether a
    944   8275       Eric 	 * mac_open has been done internally while entering the perimeter.
    945   8275       Eric 	 * This information is used in mac_perim_exit
    946   8275       Eric 	 */
    947   8275       Eric 	MAC_ENCODE_MPH(*mphp, mip, 0);
    948   8275       Eric }
    949   8275       Eric 
    950   8275       Eric int
    951   8275       Eric mac_perim_enter_by_macname(const char *name, mac_perim_handle_t *mphp)
    952   8275       Eric {
    953   8275       Eric 	int	err;
    954   8275       Eric 	mac_handle_t	mh;
    955   8275       Eric 
    956   8275       Eric 	if ((err = mac_open(name, &mh)) != 0)
    957   8275       Eric 		return (err);
    958   8275       Eric 
    959   8275       Eric 	mac_perim_enter_by_mh(mh, mphp);
    960   8275       Eric 	MAC_ENCODE_MPH(*mphp, mh, 1);
    961   8275       Eric 	return (0);
    962   8275       Eric }
    963   8275       Eric 
    964   8275       Eric int
    965   8275       Eric mac_perim_enter_by_linkid(datalink_id_t linkid, mac_perim_handle_t *mphp)
    966   8275       Eric {
    967   8275       Eric 	int	err;
    968   8275       Eric 	mac_handle_t	mh;
    969   8275       Eric 
    970   8275       Eric 	if ((err = mac_open_by_linkid(linkid, &mh)) != 0)
    971   8275       Eric 		return (err);
    972   8275       Eric 
    973   8275       Eric 	mac_perim_enter_by_mh(mh, mphp);
    974   8275       Eric 	MAC_ENCODE_MPH(*mphp, mh, 1);
    975   8275       Eric 	return (0);
    976   8275       Eric }
    977   8275       Eric 
    978   8275       Eric void
    979   8275       Eric mac_perim_exit(mac_perim_handle_t mph)
    980   8275       Eric {
    981   8275       Eric 	mac_impl_t	*mip;
    982   8275       Eric 	boolean_t	need_close;
    983   8275       Eric 
    984   8275       Eric 	MAC_DECODE_MPH(mph, mip, need_close);
    985   8275       Eric 	i_mac_perim_exit(mip);
    986   8275       Eric 	if (need_close)
    987   8275       Eric 		mac_close((mac_handle_t)mip);
    988   8275       Eric }
    989   8275       Eric 
    990   8275       Eric int
    991   8275       Eric mac_hold(const char *macname, mac_impl_t **pmip)
    992   8275       Eric {
    993   8275       Eric 	mac_impl_t	*mip;
    994   8275       Eric 	int		err;
    995   8275       Eric 
    996   8275       Eric 	/*
    997   8275       Eric 	 * Check the device name length to make sure it won't overflow our
    998   8275       Eric 	 * buffer.
    999   8275       Eric 	 */
   1000   8275       Eric 	if (strlen(macname) >= MAXNAMELEN)
   1001   8275       Eric 		return (EINVAL);
   1002   8275       Eric 
   1003   8275       Eric 	/*
   1004   8275       Eric 	 * Look up its entry in the global hash table.
   1005   8275       Eric 	 */
   1006   8275       Eric 	rw_enter(&i_mac_impl_lock, RW_WRITER);
   1007   8275       Eric 	err = mod_hash_find(i_mac_impl_hash, (mod_hash_key_t)macname,
   1008   8275       Eric 	    (mod_hash_val_t *)&mip);
   1009   8275       Eric 
   1010   8275       Eric 	if (err != 0) {
   1011   8275       Eric 		rw_exit(&i_mac_impl_lock);
   1012   8275       Eric 		return (ENOENT);
   1013   8275       Eric 	}
   1014   8275       Eric 
   1015   8275       Eric 	if (mip->mi_state_flags & MIS_DISABLED) {
   1016   8275       Eric 		rw_exit(&i_mac_impl_lock);
   1017   8275       Eric 		return (ENOENT);
   1018   8275       Eric 	}
   1019   8275       Eric 
   1020   8275       Eric 	if (mip->mi_state_flags & MIS_EXCLUSIVE_HELD) {
   1021   8275       Eric 		rw_exit(&i_mac_impl_lock);
   1022   8275       Eric 		return (EBUSY);
   1023   8275       Eric 	}
   1024   8275       Eric 
   1025   8275       Eric 	mip->mi_ref++;
   1026   8275       Eric 	rw_exit(&i_mac_impl_lock);
   1027   8275       Eric 
   1028   8275       Eric 	*pmip = mip;
   1029   8275       Eric 	return (0);
   1030   8275       Eric }
   1031   8275       Eric 
   1032   8275       Eric void
   1033   8275       Eric mac_rele(mac_impl_t *mip)
   1034   8275       Eric {
   1035   8275       Eric 	rw_enter(&i_mac_impl_lock, RW_WRITER);
   1036   8275       Eric 	ASSERT(mip->mi_ref != 0);
   1037   8275       Eric 	if (--mip->mi_ref == 0) {
   1038   8275       Eric 		ASSERT(mip->mi_nactiveclients == 0 &&
   1039   8275       Eric 		    !(mip->mi_state_flags & MIS_EXCLUSIVE));
   1040   8275       Eric 	}
   1041   8275       Eric 	rw_exit(&i_mac_impl_lock);
   1042   8275       Eric }
   1043   8275       Eric 
   1044   8275       Eric /*
   1045   8893    Michael  * Private GLDv3 function to start a MAC instance.
   1046   8893    Michael  */
   1047   8893    Michael int
   1048   8893    Michael mac_start(mac_handle_t mh)
   1049   8893    Michael {
   1050   8893    Michael 	mac_impl_t	*mip = (mac_impl_t *)mh;
   1051   8275       Eric 	int		err = 0;
   1052   8275       Eric 
   1053   8275       Eric 	ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
   1054   8275       Eric 	ASSERT(mip->mi_start != NULL);
   1055   8275       Eric 
   1056   8275       Eric 	/*
   1057   8275       Eric 	 * Check whether the device is already started.
   1058   8275       Eric 	 */
   1059   8275       Eric 	if (mip->mi_active++ == 0) {
   1060   8275       Eric 		mac_ring_t *ring = NULL;
   1061   8275       Eric 
   1062   8275       Eric 		/*
   1063   8275       Eric 		 * Start the device.
   1064   8275       Eric 		 */
   1065   8275       Eric 		err = mip->mi_start(mip->mi_driver);
   1066   8275       Eric 		if (err != 0) {
   1067   8275       Eric 			mip->mi_active--;
   1068   8275       Eric 			return (err);
   1069   8275       Eric 		}
   1070   8275       Eric 
   1071   8275       Eric 		/*
   1072   8275       Eric 		 * Start the default tx ring.
   1073   8275       Eric 		 */
   1074   8275       Eric 		if (mip->mi_default_tx_ring != NULL) {
   1075   8275       Eric 
   1076   8275       Eric 			ring = (mac_ring_t *)mip->mi_default_tx_ring;
   1077   8275       Eric 			err = mac_start_ring(ring);
   1078   8275       Eric 			if (err != 0) {
   1079   8275       Eric 				mip->mi_active--;
   1080   8275       Eric 				return (err);
   1081   8275       Eric 			}
   1082   8275       Eric 			ring->mr_state = MR_INUSE;
   1083   8275       Eric 		}
   1084   8275       Eric 
   1085   8275       Eric 		if (mip->mi_rx_groups != NULL) {
   1086   8275       Eric 			/*
   1087   8275       Eric 			 * Start the default ring, since it will be needed
   1088   8275       Eric 			 * to receive broadcast and multicast traffic for
   1089   8275       Eric 			 * both primary and non-primary MAC clients.
   1090   8275       Eric 			 */
   1091   8275       Eric 			mac_group_t *grp = &mip->mi_rx_groups[0];
   1092   8275       Eric 
   1093   8275       Eric 			ASSERT(grp->mrg_state == MAC_GROUP_STATE_REGISTERED);
   1094   8275       Eric 			err = mac_start_group_and_rings(grp);
   1095   8275       Eric 			if (err != 0) {
   1096   8275       Eric 				mip->mi_active--;
   1097   8275       Eric 				if (ring != NULL) {
   1098   8275       Eric 					mac_stop_ring(ring);
   1099   8275       Eric 					ring->mr_state = MR_FREE;
   1100   8275       Eric 				}
   1101   8275       Eric 				return (err);
   1102   8275       Eric 			}
   1103   8275       Eric 			mac_set_rx_group_state(grp, MAC_GROUP_STATE_SHARED);
   1104   8275       Eric 		}
   1105   8275       Eric 	}
   1106   8275       Eric 
   1107   8275       Eric 	return (err);
   1108   8275       Eric }
   1109   8275       Eric 
   1110   8275       Eric /*
   1111   8893    Michael  * Private GLDv3 function to stop a MAC instance.
   1112   8893    Michael  */
   1113   8893    Michael void
   1114   8893    Michael mac_stop(mac_handle_t mh)
   1115   8893    Michael {
   1116   8893    Michael 	mac_impl_t	*mip = (mac_impl_t *)mh;
   1117   8893    Michael 
   1118   8275       Eric 	ASSERT(mip->mi_stop != NULL);
   1119   8275       Eric 	ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
   1120   8275       Eric 
   1121   8275       Eric 	/*
   1122   8275       Eric 	 * Check whether the device is still needed.
   1123   8275       Eric 	 */
   1124   8275       Eric 	ASSERT(mip->mi_active != 0);
   1125   8275       Eric 	if (--mip->mi_active == 0) {
   1126   8275       Eric 		if (mip->mi_rx_groups != NULL) {
   1127   8275       Eric 			/*
   1128   8275       Eric 			 * There should be no more active clients since the
   1129   8275       Eric 			 * MAC is being stopped. Stop the default RX group
   1130   8275       Eric 			 * and transition it back to registered state.
   1131   8275       Eric 			 */
   1132   8275       Eric 			mac_group_t *grp = &mip->mi_rx_groups[0];
   1133   8275       Eric 
   1134   8275       Eric 			/*
   1135   8275       Eric 			 * When clients are torn down, the groups
   1136   8275       Eric 			 * are release via mac_release_rx_group which
   1137   8275       Eric 			 * knows the the default group is always in
   1138   8275       Eric 			 * started mode since broadcast uses it. So
   1139   8275       Eric 			 * we can assert that their are no clients
   1140   8275       Eric 			 * (since mac_bcast_add doesn't register itself
   1141   8275       Eric 			 * as a client) and group is in SHARED state.
   1142   8275       Eric 			 */
   1143   8275       Eric 			ASSERT(grp->mrg_state == MAC_GROUP_STATE_SHARED);
   1144   8275       Eric 			ASSERT(MAC_RX_GROUP_NO_CLIENT(grp) &&
   1145   8275       Eric 			    mip->mi_nactiveclients == 0);
   1146   8275       Eric 			mac_stop_group_and_rings(grp);
   1147   8275       Eric 			mac_set_rx_group_state(grp, MAC_GROUP_STATE_REGISTERED);
   1148   8275       Eric 		}
   1149   8275       Eric 
   1150   8275       Eric 		if (mip->mi_default_tx_ring != NULL) {
   1151   8275       Eric 			mac_ring_t *ring;
   1152   8275       Eric 
   1153   8275       Eric 			ring = (mac_ring_t *)mip->mi_default_tx_ring;
   1154   8275       Eric 			mac_stop_ring(ring);
   1155   8275       Eric 			ring->mr_state = MR_FREE;
   1156   8275       Eric 		}
   1157   8275       Eric 
   1158   8275       Eric 		/*
   1159   8275       Eric 		 * Stop the device.
   1160   8275       Eric 		 */
   1161   8275       Eric 		mip->mi_stop(mip->mi_driver);
   1162   8275       Eric 	}
   1163   8275       Eric }
   1164   8275       Eric 
   1165   8275       Eric int
   1166   9641     Girish i_mac_promisc_set(mac_impl_t *mip, boolean_t on)
   1167   8275       Eric {
   1168   8275       Eric 	int		err = 0;
   1169   8275       Eric 
   1170   8275       Eric 	ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
   1171   8275       Eric 	ASSERT(mip->mi_setpromisc != NULL);
   1172   9641     Girish 
   1173   8275       Eric 	if (on) {
   1174   8275       Eric 		/*
   1175   8275       Eric 		 * Enable promiscuous mode on the device if not yet enabled.
   1176   8275       Eric 		 */
   1177   8275       Eric 		if (mip->mi_devpromisc++ == 0) {
   1178   8275       Eric 			err = mip->mi_setpromisc(mip->mi_driver, B_TRUE);
   1179   8275       Eric 			if (err != 0) {
   1180   8275       Eric 				mip->mi_devpromisc--;
   1181   8275       Eric 				return (err);
   1182   8275       Eric 			}
   1183   8275       Eric 			i_mac_notify(mip, MAC_NOTE_DEVPROMISC);
   1184   8275       Eric 		}
   1185   8275       Eric 	} else {
   1186   8275       Eric 		if (mip->mi_devpromisc == 0)
   1187   8275       Eric 			return (EPROTO);
   1188   8275       Eric 
   1189   8275       Eric 		/*
   1190   8275       Eric 		 * Disable promiscuous mode on the device if this is the last
   1191   8275       Eric 		 * enabling.
   1192   8275       Eric 		 */
   1193   8275       Eric 		if (--mip->mi_devpromisc == 0) {
   1194   8275       Eric 			err = mip->mi_setpromisc(mip->mi_driver, B_FALSE);
   1195   8275       Eric 			if (err != 0) {
   1196   8275       Eric 				mip->mi_devpromisc++;
   1197   8275       Eric 				return (err);
   1198   8275       Eric 			}
   1199   8275       Eric 			i_mac_notify(mip, MAC_NOTE_DEVPROMISC);
   1200   8275       Eric 		}
   1201   9641     Girish 	}
   1202   9641     Girish 
   1203   9641     Girish 	return (0);
   1204   8275       Eric }
   1205   8275       Eric 
   1206   8275       Eric /*
   1207   8275       Eric  * The promiscuity state can change any time. If the caller needs to take
   1208   8275       Eric  * actions that are atomic with the promiscuity state, then the caller needs
   1209   8275       Eric  * to bracket the entire sequence with mac_perim_enter/exit
   1210   8275       Eric  */
   1211   8275       Eric boolean_t
   1212   9641     Girish mac_promisc_get(mac_handle_t mh)
   1213   8275       Eric {
   1214   8275       Eric 	mac_impl_t		*mip = (mac_impl_t *)mh;
   1215   8275       Eric 
   1216   8275       Eric 	/*
   1217   8275       Eric 	 * Return the current promiscuity.
   1218   8275       Eric 	 */
   1219   9641     Girish 	return (mip->mi_devpromisc != 0);
   1220   8275       Eric }
   1221   8275       Eric 
   1222   8275       Eric /*
   1223   8275       Eric  * Invoked at MAC instance attach time to initialize the list
   1224   8275       Eric  * of factory MAC addresses supported by a MAC instance. This function
   1225   8275       Eric  * builds a local cache in the mac_impl_t for the MAC addresses
   1226   8275       Eric  * supported by the underlying hardware. The MAC clients themselves
   1227   8275       Eric  * use the mac_addr_factory*() functions to query and reserve
   1228   8275       Eric  * factory MAC addresses.
   1229   8275       Eric  */
   1230   8275       Eric void
   1231   8275       Eric mac_addr_factory_init(mac_impl_t *mip)
   1232   8275       Eric {
   1233   8275       Eric 	mac_capab_multifactaddr_t capab;
   1234   8275       Eric 	uint8_t *addr;
   1235   8275       Eric 	int i;
   1236   8275       Eric 
   1237   8275       Eric 	/*
   1238   8275       Eric 	 * First round to see how many factory MAC addresses are available.
   1239   8275       Eric 	 */
   1240   8275       Eric 	bzero(&capab, sizeof (capab));
   1241   8275       Eric 	if (!i_mac_capab_get((mac_handle_t)mip, MAC_CAPAB_MULTIFACTADDR,
   1242   8275       Eric 	    &capab) || (capab.mcm_naddr == 0)) {
   1243   8275       Eric 		/*
   1244   8275       Eric 		 * The MAC instance doesn't support multiple factory
   1245   8275       Eric 		 * MAC addresses, we're done here.
   1246   8275       Eric 		 */
   1247   8275       Eric 		return;
   1248   8275       Eric 	}
   1249   8275       Eric 
   1250   8275       Eric 	/*
   1251   8275       Eric 	 * Allocate the space and get all the factory addresses.
   1252   8275       Eric 	 */
   1253   8275       Eric 	addr = kmem_alloc(capab.mcm_naddr * MAXMACADDRLEN, KM_SLEEP);
   1254   8275       Eric 	capab.mcm_getaddr(mip->mi_driver, capab.mcm_naddr, addr);
   1255   8275       Eric 
   1256   8275       Eric 	mip->mi_factory_addr_num = capab.mcm_naddr;
   1257   8275       Eric 	mip->mi_factory_addr = kmem_zalloc(mip->mi_factory_addr_num *
   1258   8275       Eric 	    sizeof (mac_factory_addr_t), KM_SLEEP);
   1259   8275       Eric 
   1260   8275       Eric 	for (i = 0; i < capab.mcm_naddr; i++) {
   1261   8275       Eric 		bcopy(addr + i * MAXMACADDRLEN,
   1262   8275       Eric 		    mip->mi_factory_addr[i].mfa_addr,
   1263   8275       Eric 		    mip->mi_type->mt_addr_length);
   1264   8275       Eric 		mip->mi_factory_addr[i].mfa_in_use = B_FALSE;
   1265   8275       Eric 	}
   1266   8275       Eric 
   1267   8275       Eric 	kmem_free(addr, capab.mcm_naddr * MAXMACADDRLEN);
   1268   8275       Eric }
   1269   8275       Eric 
   1270   8275       Eric void
   1271   8275       Eric mac_addr_factory_fini(mac_impl_t *mip)
   1272   8275       Eric {
   1273   8275       Eric 	if (mip->mi_factory_addr == NULL) {
   1274   8275       Eric 		ASSERT(mip->mi_factory_addr_num == 0);
   1275   8275       Eric 		return;
   1276   8275       Eric 	}
   1277   8275       Eric 
   1278   8275       Eric 	kmem_free(mip->mi_factory_addr, mip->mi_factory_addr_num *
   1279   8275       Eric 	    sizeof (mac_factory_addr_t));
   1280   8275       Eric 
   1281   8275       Eric 	mip->mi_factory_addr = NULL;
   1282   8275       Eric 	mip->mi_factory_addr_num = 0;
   1283   8275       Eric }
   1284   8275       Eric 
   1285   8275       Eric /*
   1286   8275       Eric  * Reserve a factory MAC address. If *slot is set to -1, the function
   1287   8275       Eric  * attempts to reserve any of the available factory MAC addresses and
   1288   8275       Eric  * returns the reserved slot id. If no slots are available, the function
   1289   8275       Eric  * returns ENOSPC. If *slot is not set to -1, the function reserves
   1290   8275       Eric  * the specified slot if it is available, or returns EBUSY is the slot
   1291   8275       Eric  * is already used. Returns ENOTSUP if the underlying MAC does not
   1292   8275       Eric  * support multiple factory addresses. If the slot number is not -1 but
   1293   8275       Eric  * is invalid, returns EINVAL.
   1294   8275       Eric  */
   1295   8275       Eric int
   1296   8275       Eric mac_addr_factory_reserve(mac_client_handle_t mch, int *slot)
   1297   8275       Eric {
   1298   8275       Eric 	mac_client_impl_t *mcip = (mac_client_impl_t *)mch;
   1299   8275       Eric 	mac_impl_t *mip = mcip->mci_mip;
   1300   8275       Eric 	int i, ret = 0;
   1301   8275       Eric 
   1302   8275       Eric 	i_mac_perim_enter(mip);
   1303   8275       Eric 	/*
   1304   8275       Eric 	 * Protect against concurrent readers that may need a self-consistent
   1305   8275       Eric 	 * view of the factory addresses
   1306   8275       Eric 	 */
   1307   8275       Eric 	rw_enter(&mip->mi_rw_lock, RW_WRITER);
   1308   8275       Eric 
   1309   8275       Eric 	if (mip->mi_factory_addr_num == 0) {
   1310   8275       Eric 		ret = ENOTSUP;
   1311   8275       Eric 		goto bail;
   1312   8275       Eric 	}
   1313   8275       Eric 
   1314   8275       Eric 	if (*slot != -1) {
   1315   8275       Eric 		/* check the specified slot */
   1316   8275       Eric 		if (*slot < 1 || *slot > mip->mi_factory_addr_num) {
   1317   8275       Eric 			ret = EINVAL;
   1318   8275       Eric 			goto bail;
   1319   8275       Eric 		}
   1320   8275       Eric 		if (mip->mi_factory_addr[*slot-1].mfa_in_use) {
   1321   8275       Eric 			ret = EBUSY;
   1322   8275       Eric 			goto bail;
   1323   8275       Eric 		}
   1324   8275       Eric 	} else {
   1325   8275       Eric 		/* pick the next available slot */
   1326   8275       Eric 		for (i = 0; i < mip->mi_factory_addr_num; i++) {
   1327   8275       Eric 			if (!mip->mi_factory_addr[i].mfa_in_use)
   1328   8275       Eric 				break;
   1329   8275       Eric 		}
   1330   8275       Eric 
   1331   8275       Eric 		if (i == mip->mi_factory_addr_num) {
   1332   8275       Eric 			ret = ENOSPC;
   1333   8275       Eric 			goto bail;
   1334   8275       Eric 		}
   1335   8275       Eric 		*slot = i+1;
   1336   8275       Eric 	}
   1337   8275       Eric 
   1338   8275       Eric 	mip->mi_factory_addr[*slot-1].mfa_in_use = B_TRUE;
   1339   8275       Eric 	mip->mi_factory_addr[*slot-1].mfa_client = mcip;
   1340   8275       Eric 
   1341   8275       Eric bail:
   1342   8275       Eric 	rw_exit(&mip->mi_rw_lock);
   1343   8275       Eric 	i_mac_perim_exit(mip);
   1344   8275       Eric 	return (ret);
   1345   8275       Eric }
   1346   8275       Eric 
   1347   8275       Eric /*
   1348   8275       Eric  * Release the specified factory MAC address slot.
   1349   8275       Eric  */
   1350   8275       Eric void
   1351   8275       Eric mac_addr_factory_release(mac_client_handle_t mch, uint_t slot)
   1352   8275       Eric {
   1353   8275       Eric 	mac_client_impl_t *mcip = (mac_client_impl_t *)mch;
   1354   8275       Eric 	mac_impl_t *mip = mcip->mci_mip;
   1355   8275       Eric 
   1356   8275       Eric 	i_mac_perim_enter(mip);
   1357   8275       Eric 	/*
   1358   8275       Eric 	 * Protect against concurrent readers that may need a self-consistent
   1359   8275       Eric 	 * view of the factory addresses
   1360   8275       Eric 	 */
   1361   8275       Eric 	rw_enter(&mip->mi_rw_lock, RW_WRITER);
   1362   8275       Eric 
   1363   8275       Eric 	ASSERT(slot > 0 && slot <= mip->mi_factory_addr_num);
   1364   8275       Eric 	ASSERT(mip->mi_factory_addr[slot-1].mfa_in_use);
   1365   8275       Eric 
   1366   8275       Eric 	mip->mi_factory_addr[slot-1].mfa_in_use = B_FALSE;
   1367   8275       Eric 
   1368   8275       Eric 	rw_exit(&mip->mi_rw_lock);
   1369   8275       Eric 	i_mac_perim_exit(mip);
   1370   8275       Eric }
   1371   8275       Eric 
   1372   8275       Eric /*
   1373   8275       Eric  * Stores in mac_addr the value of the specified MAC address. Returns
   1374   8275       Eric  * 0 on success, or EINVAL if the slot number is not valid for the MAC.
   1375   8275       Eric  * The caller must provide a string of at least MAXNAMELEN bytes.
   1376   8275       Eric  */
   1377   8275       Eric void
   1378   8275       Eric mac_addr_factory_value(mac_handle_t mh, int slot, uchar_t *mac_addr,
   1379   8275       Eric     uint_t *addr_len, char *client_name, boolean_t *in_use_arg)
   1380   8275       Eric {
   1381   8275       Eric 	mac_impl_t *mip = (mac_impl_t *)mh;
   1382   8275       Eric 	boolean_t in_use;
   1383   8275       Eric 
   1384   8275       Eric 	ASSERT(slot > 0 && slot <= mip->mi_factory_addr_num);
   1385   8275       Eric 
   1386   8275       Eric 	/*
   1387   8275       Eric 	 * Readers need to hold mi_rw_lock. Writers need to hold mac perimeter
   1388   8275       Eric 	 * and mi_rw_lock
   1389   8275       Eric 	 */
   1390   8275       Eric 	rw_enter(&mip->mi_rw_lock, RW_READER);
   1391   8275       Eric 	bcopy(mip->mi_factory_addr[slot-1].mfa_addr, mac_addr, MAXMACADDRLEN);
   1392   8275       Eric 	*addr_len = mip->mi_type->mt_addr_length;
   1393   8275       Eric 	in_use = mip->mi_factory_addr[slot-1].mfa_in_use;
   1394   8275       Eric 	if (in_use && client_name != NULL) {
   1395   8275       Eric 		bcopy(mip->mi_factory_addr[slot-1].mfa_client->mci_name,
   1396   8275       Eric 		    client_name, MAXNAMELEN);
   1397   8275       Eric 	}
   1398   8275       Eric 	if (in_use_arg != NULL)
   1399   8275       Eric 		*in_use_arg = in_use;
   1400   8275       Eric 	rw_exit(&mip->mi_rw_lock);
   1401   8275       Eric }
   1402   8275       Eric 
   1403   8275       Eric /*
   1404   8275       Eric  * Returns the number of factory MAC addresses (in addition to the
   1405   8275       Eric  * primary MAC address), 0 if the underlying MAC doesn't support
   1406   8275       Eric  * that feature.
   1407   8275       Eric  */
   1408   8275       Eric uint_t
   1409   8275       Eric mac_addr_factory_num(mac_handle_t mh)
   1410   8275       Eric {
   1411   8275       Eric 	mac_impl_t *mip = (mac_impl_t *)mh;
   1412   8275       Eric 
   1413   8275       Eric 	return (mip->mi_factory_addr_num);
   1414   8275       Eric }
   1415   8275       Eric 
   1416   8275       Eric 
   1417   8275       Eric void
   1418   8275       Eric mac_rx_group_unmark(mac_group_t *grp, uint_t flag)
   1419   8275       Eric {
   1420   8275       Eric 	mac_ring_t	*ring;
   1421   8275       Eric 
   1422   8275       Eric 	for (ring = grp->mrg_rings; ring != NULL; ring = ring->mr_next)
   1423   8275       Eric 		ring->mr_flag &= ~flag;
   1424   8275       Eric }
   1425   8275       Eric 
   1426   8275       Eric /*
   1427   8275       Eric  * The following mac_hwrings_xxx() functions are private mac client functions
   1428   8275       Eric  * used by the aggr driver to access and control the underlying HW Rx group
   1429   8275       Eric  * and rings. In this case, the aggr driver has exclusive control of the
   1430   8275       Eric  * underlying HW Rx group/rings, it calls the following functions to
   1431   8275       Eric  * start/stop the HW Rx rings, disable/enable polling, add/remove mac'
   1432   8275       Eric  * addresses, or set up the Rx callback.
   1433   8275       Eric  */
   1434   8275       Eric /* ARGSUSED */
   1435   8275       Eric static void
   1436   8275       Eric mac_hwrings_rx_process(void *arg, mac_resource_handle_t srs,
   1437   8275       Eric     mblk_t *mp_chain, boolean_t loopback)
   1438   8275       Eric {
   1439   8275       Eric 	mac_soft_ring_set_t	*mac_srs = (mac_soft_ring_set_t *)srs;
   1440   8275       Eric 	mac_srs_rx_t		*srs_rx = &mac_srs->srs_rx;
   1441   8275       Eric 	mac_direct_rx_t		proc;
   1442   8275       Eric 	void			*arg1;
   1443   8275       Eric 	mac_resource_handle_t	arg2;
   1444   8275       Eric 
   1445   8275       Eric 	proc = srs_rx->sr_func;
   1446   8275       Eric 	arg1 = srs_rx->sr_arg1;
   1447   8275       Eric 	arg2 = mac_srs->srs_mrh;
   1448   8275       Eric 
   1449   8275       Eric 	proc(arg1, arg2, mp_chain, NULL);
   1450   8275       Eric }
   1451   8275       Eric 
   1452   8275       Eric /*
   1453   8275       Eric  * This function is called to get the list of HW rings that are reserved by
   1454   8275       Eric  * an exclusive mac client.
   1455   8275       Eric  *
   1456   8275       Eric  * Return value: the number of HW rings.
   1457   8275       Eric  */
   1458   8275       Eric int
   1459   8275       Eric mac_hwrings_get(mac_client_handle_t mch, mac_group_handle_t *hwgh,
   1460  10309  Sriharsha     mac_ring_handle_t *hwrh, mac_ring_type_t rtype)
   1461   8275       Eric {
   1462   8275       Eric 	mac_client_impl_t	*mcip = (mac_client_impl_t *)mch;
   1463   8275       Eric 	int			cnt = 0;
   1464   8275       Eric 
   1465  10309  Sriharsha 	switch (rtype) {
   1466  10309  Sriharsha 	case MAC_RING_TYPE_RX: {
   1467  10309  Sriharsha 		flow_entry_t	*flent = mcip->mci_flent;
   1468  10309  Sriharsha 		mac_group_t	*grp;
   1469  10309  Sriharsha 		mac_ring_t	*ring;
   1470  10309  Sriharsha 
   1471  10309  Sriharsha 		grp = flent->fe_rx_ring_group;
   1472  10309  Sriharsha 		/*
   1473  10309  Sriharsha 		 * The mac client did not reserve any RX group, return directly.
   1474  10309  Sriharsha 		 * This is probably because the underlying MAC does not support
   1475  10309  Sriharsha 		 * any groups.
   1476  10309  Sriharsha 		 */
   1477  10309  Sriharsha 		*hwgh = NULL;
   1478  10309  Sriharsha 		if (grp == NULL)
   1479  10309  Sriharsha 			return (0);
   1480  10309  Sriharsha 		/*
   1481  10309  Sriharsha 		 * This group must be reserved by this mac client.
   1482  10309  Sriharsha 		 */
   1483  10309  Sriharsha 		ASSERT((grp->mrg_state == MAC_GROUP_STATE_RESERVED) &&
   1484  10309  Sriharsha 		    (mch == (mac_client_handle_t)
   1485  10309  Sriharsha 		    (MAC_RX_GROUP_ONLY_CLIENT(grp))));
   1486  10309  Sriharsha 		for (ring = grp->mrg_rings;
   1487  10309  Sriharsha 		    ring != NULL; ring = ring->mr_next, cnt++) {
   1488  10309  Sriharsha 			ASSERT(cnt < MAX_RINGS_PER_GROUP);
   1489  10309  Sriharsha 			hwrh[cnt] = (mac_ring_handle_t)ring;
   1490  10309  Sriharsha 		}
   1491  10309  Sriharsha 		*hwgh = (mac_group_handle_t)grp;
   1492  10309  Sriharsha 		return (cnt);
   1493  10309  Sriharsha 	}
   1494  10309  Sriharsha 	case MAC_RING_TYPE_TX: {
   1495  10309  Sriharsha 		mac_soft_ring_set_t	*tx_srs;
   1496  10309  Sriharsha 		mac_srs_tx_t		*tx;
   1497  10309  Sriharsha 
   1498  10309  Sriharsha 		tx_srs = MCIP_TX_SRS(mcip);
   1499  10309  Sriharsha 		tx = &tx_srs->srs_tx;
   1500  10309  Sriharsha 		for (; cnt < tx->st_ring_count; cnt++)
   1501  10309  Sriharsha 			hwrh[cnt] = tx->st_rings[cnt];
   1502  10309  Sriharsha 		return (cnt);
   1503  10309  Sriharsha 	}
   1504  10309  Sriharsha 	default:
   1505  10309  Sriharsha 		ASSERT(B_FALSE);
   1506  10309  Sriharsha 		return (-1);
   1507  10309  Sriharsha 	}
   1508   8275       Eric }
   1509   8275       Eric 
   1510   8275       Eric /*
   1511   8275       Eric  * Setup the RX callback of the mac client which exclusively controls HW ring.
   1512   8275       Eric  */
   1513   8275       Eric void
   1514   8275       Eric mac_hwring_setup(mac_ring_handle_t hwrh, mac_resource_handle_t prh)
   1515   8275       Eric {
   1516   8275       Eric 	mac_ring_t		*hw_ring = (mac_ring_t *)hwrh;
   1517   8275       Eric 	mac_soft_ring_set_t	*mac_srs = hw_ring->mr_srs;
   1518   8275       Eric 
   1519   8275       Eric 	mac_srs->srs_mrh = prh;
   1520   8275       Eric 	mac_srs->srs_rx.sr_lower_proc = mac_hwrings_rx_process;
   1521   8275       Eric }
   1522   8275       Eric 
   1523   8275       Eric void
   1524   8275       Eric mac_hwring_teardown(mac_ring_handle_t hwrh)
   1525   8275       Eric {
   1526   8275       Eric 	mac_ring_t		*hw_ring = (mac_ring_t *)hwrh;
   1527   8275       Eric 	mac_soft_ring_set_t	*mac_srs = hw_ring->mr_srs;
   1528   8275       Eric 
   1529   8275       Eric 	mac_srs->srs_rx.sr_lower_proc = mac_rx_srs_process;
   1530   8275       Eric 	mac_srs->srs_mrh = NULL;
   1531   8275       Eric }
   1532   8275       Eric 
   1533   8275       Eric int
   1534   8275       Eric mac_hwring_disable_intr(mac_ring_handle_t rh)
   1535   8275       Eric {
   1536   8275       Eric 	mac_ring_t *rr_ring = (mac_ring_t *)rh;
   1537   8275       Eric 	mac_intr_t *intr = &rr_ring->mr_info.mri_intr;
   1538   8275       Eric 
   1539   8275       Eric 	return (intr->mi_disable(intr->mi_handle));
   1540   8275       Eric }
   1541   8275       Eric 
   1542   8275       Eric int
   1543   8275       Eric mac_hwring_enable_intr(mac_ring_handle_t rh)
   1544   8275       Eric {
   1545   8275       Eric 	mac_ring_t *rr_ring = (mac_ring_t *)rh;
   1546   8275       Eric 	mac_intr_t *intr = &rr_ring->mr_info.mri_intr;
   1547   8275       Eric 
   1548   8275       Eric 	return (intr->mi_enable(intr->mi_handle));
   1549   8275       Eric }
   1550   8275       Eric 
   1551   8275       Eric int
   1552   8275       Eric mac_hwring_start(mac_ring_handle_t rh)
   1553   8275       Eric {
   1554   8275       Eric 	mac_ring_t *rr_ring = (mac_ring_t *)rh;
   1555   8275       Eric 
   1556   8275       Eric 	MAC_RING_UNMARK(rr_ring, MR_QUIESCE);
   1557   8275       Eric 	return (0);
   1558   8275       Eric }
   1559   8275       Eric 
   1560   8275       Eric void
   1561   8275       Eric mac_hwring_stop(mac_ring_handle_t rh)
   1562   8275       Eric {
   1563   8275       Eric 	mac_ring_t *rr_ring = (mac_ring_t *)rh;
   1564   8275       Eric 
   1565   8275       Eric 	mac_rx_ring_quiesce(rr_ring, MR_QUIESCE);
   1566   8275       Eric }
   1567   8275       Eric 
   1568   8275       Eric mblk_t *
   1569   8275       Eric mac_hwring_poll(mac_ring_handle_t rh, int bytes_to_pickup)
   1570   8275       Eric {
   1571   8275       Eric 	mac_ring_t *rr_ring = (mac_ring_t *)rh;
   1572   8275       Eric 	mac_ring_info_t *info = &rr_ring->mr_info;
   1573   8275       Eric 
   1574   8275       Eric 	return (info->mri_poll(info->mri_driver, bytes_to_pickup));
   1575  10309  Sriharsha }
   1576  10309  Sriharsha 
   1577  10309  Sriharsha /*
   1578  10309  Sriharsha  * Send packets through the selected tx ring.
   1579  10309  Sriharsha  */
   1580  10309  Sriharsha mblk_t *
   1581  10309  Sriharsha mac_hwring_tx(mac_ring_handle_t rh, mblk_t *mp)
   1582  10309  Sriharsha {
   1583  10309  Sriharsha 	mac_ring_t *ring = (mac_ring_t *)rh;
   1584  10309  Sriharsha 	mac_ring_info_t *info = &ring->mr_info;
   1585  10309  Sriharsha 
   1586  10491      Rishi 	ASSERT(ring->mr_type == MAC_RING_TYPE_TX &&
   1587  10491      Rishi 	    ring->mr_state >= MR_INUSE);
   1588  10309  Sriharsha 	return (info->mri_tx(info->mri_driver, mp));
   1589   8275       Eric }
   1590   8275       Eric 
   1591   8275       Eric int
   1592   8275       Eric mac_hwgroup_addmac(mac_group_handle_t gh, const uint8_t *addr)
   1593   8275       Eric {
   1594   8275       Eric 	mac_group_t *group = (mac_group_t *)gh;
   1595   8275       Eric 
   1596   8275       Eric 	return (mac_group_addmac(group, addr));
   1597   8275       Eric }
   1598   8275       Eric 
   1599   8275       Eric int
   1600   8275       Eric mac_hwgroup_remmac(mac_group_handle_t gh, const uint8_t *addr)
   1601   8275       Eric {
   1602   8275       Eric 	mac_group_t *group = (mac_group_t *)gh;
   1603   8275       Eric 
   1604   8275       Eric 	return (mac_group_remmac(group, addr));
   1605   8275       Eric }
   1606   8275       Eric 
   1607   8275       Eric /*
   1608   8275       Eric  * Set the RX group to be shared/reserved. Note that the group must be
   1609   8275       Eric  * started/stopped outside of this function.
   1610   8275       Eric  */
   1611   8275       Eric void
   1612   8275       Eric mac_set_rx_group_state(mac_group_t *grp, mac_group_state_t state)
   1613   8275       Eric {
   1614   8275       Eric 	/*
   1615   8275       Eric 	 * If there is no change in the group state, just return.
   1616   8275       Eric 	 */
   1617   8275       Eric 	if (grp->mrg_state == state)
   1618   8275       Eric 		return;
   1619   8275       Eric 
   1620   8275       Eric 	switch (state) {
   1621   8275       Eric 	case MAC_GROUP_STATE_RESERVED:
   1622   8275       Eric 		/*
   1623   8275       Eric 		 * Successfully reserved the group.
   1624   8275       Eric 		 *
   1625   8275       Eric 		 * Given that there is an exclusive client controlling this
   1626   8275       Eric 		 * group, we enable the group level polling when available,
   1627   8275       Eric 		 * so that SRSs get to turn on/off individual rings they's
   1628   8275       Eric 		 * assigned to.
   1629   8275       Eric 		 */
   1630   8275       Eric 		ASSERT(MAC_PERIM_HELD(grp->mrg_mh));
   1631   8275       Eric 
   1632   8275       Eric 		if (GROUP_INTR_DISABLE_FUNC(grp) != NULL)
   1633   8275       Eric 			GROUP_INTR_DISABLE_FUNC(grp)(GROUP_INTR_HANDLE(grp));
   1634   8275       Eric 
   1635   8275       Eric 		break;
   1636   8275       Eric 
   1637   8275       Eric 	case MAC_GROUP_STATE_SHARED:
   1638   8275       Eric 		/*
   1639   8275       Eric 		 * Set all rings of this group to software classified.
   1640   8275       Eric 		 * If the group has an overriding interrupt, then re-enable it.
   1641   8275       Eric 		 */
   1642   8275       Eric 		ASSERT(MAC_PERIM_HELD(grp->mrg_mh));
   1643   8275       Eric 
   1644   8275       Eric 		if (GROUP_INTR_ENABLE_FUNC(grp) != NULL)
   1645   8275       Eric 			GROUP_INTR_ENABLE_FUNC(grp)(GROUP_INTR_HANDLE(grp));
   1646   8275       Eric 
   1647   8275       Eric 		/* The ring is not available for reservations any more */
   1648   8275       Eric 		break;
   1649   8275       Eric 
   1650   8275       Eric 	case MAC_GROUP_STATE_REGISTERED:
   1651   8275       Eric 		/* Also callable from mac_register, perim is not held */
   1652   8275       Eric 		break;
   1653   8275       Eric 
   1654   8275       Eric 	default:
   1655   8275       Eric 		ASSERT(B_FALSE);
   1656   8275       Eric 		break;
   1657   8275       Eric 	}
   1658   8275       Eric 
   1659   8275       Eric 	grp->mrg_state = state;
   1660   8275       Eric }
   1661   8275       Eric 
   1662   8275       Eric /*
   1663   8275       Eric  * Quiesce future hardware classified packets for the specified Rx ring
   1664   8275       Eric  */
   1665   8275       Eric static void
   1666   8275       Eric mac_rx_ring_quiesce(mac_ring_t *rx_ring, uint_t ring_flag)
   1667   8275       Eric {
   1668   8275       Eric 	ASSERT(rx_ring->mr_classify_type == MAC_HW_CLASSIFIER);
   1669   8275       Eric 	ASSERT(ring_flag == MR_CONDEMNED || ring_flag  == MR_QUIESCE);
   1670   8275       Eric 
   1671   8275       Eric 	mutex_enter(&rx_ring->mr_lock);
   1672   8275       Eric 	rx_ring->mr_flag |= ring_flag;
   1673   8275       Eric 	while (rx_ring->mr_refcnt != 0)
   1674   8275       Eric 		cv_wait(&rx_ring->mr_cv, &rx_ring->mr_lock);
   1675   8275       Eric 	mutex_exit(&rx_ring->mr_lock);
   1676   8275       Eric }
   1677   8275       Eric 
   1678   8275       Eric /*
   1679   8275       Eric  * Please see mac_tx for details about the per cpu locking scheme
   1680   8275       Eric  */
   1681   8275       Eric static void
   1682   8275       Eric mac_tx_lock_all(mac_client_impl_t *mcip)
   1683   8275       Eric {
   1684   8275       Eric 	int	i;
   1685   8275       Eric 
   1686   8275       Eric 	for (i = 0; i <= mac_tx_percpu_cnt; i++)
   1687   8275       Eric 		mutex_enter(&mcip->mci_tx_pcpu[i].pcpu_tx_lock);
   1688   8275       Eric }
   1689   8275       Eric 
   1690   8275       Eric static void
   1691   8275       Eric mac_tx_unlock_all(mac_client_impl_t *mcip)
   1692   8275       Eric {
   1693   8275       Eric 	int	i;
   1694   8275       Eric 
   1695   8275       Eric 	for (i = mac_tx_percpu_cnt; i >= 0; i--)
   1696   8275       Eric 		mutex_exit(&mcip->mci_tx_pcpu[i].pcpu_tx_lock);
   1697   8275       Eric }
   1698   8275       Eric 
   1699   8275       Eric static void
   1700   8275       Eric mac_tx_unlock_allbutzero(mac_client_impl_t *mcip)
   1701   8275       Eric {
   1702   8275       Eric 	int	i;
   1703   8275       Eric 
   1704   8275       Eric 	for (i = mac_tx_percpu_cnt; i > 0; i--)
   1705   8275       Eric 		mutex_exit(&mcip->mci_tx_pcpu[i].pcpu_tx_lock);
   1706   8275       Eric }
   1707   8275       Eric 
   1708   8275       Eric static int
   1709   8275       Eric mac_tx_sum_refcnt(mac_client_impl_t *mcip)
   1710   8275       Eric {
   1711   8275       Eric 	int	i;
   1712   8275       Eric 	int	refcnt = 0;
   1713   8275       Eric 
   1714   8275       Eric 	for (i = 0; i <= mac_tx_percpu_cnt; i++)
   1715   8275       Eric 		refcnt += mcip->mci_tx_pcpu[i].pcpu_tx_refcnt;
   1716   8275       Eric 
   1717   8275       Eric 	return (refcnt);
   1718   8275       Eric }
   1719   8275       Eric 
   1720   8275       Eric /*
   1721   8275       Eric  * Stop future Tx packets coming down from the client in preparation for
   1722   8275       Eric  * quiescing the Tx side. This is needed for dynamic reclaim and reassignment
   1723   8275       Eric  * of rings between clients
   1724   8275       Eric  */
   1725   8275       Eric void
   1726   8275       Eric mac_tx_client_block(mac_client_impl_t *mcip)
   1727   8275       Eric {
   1728   8275       Eric 	mac_tx_lock_all(mcip);
   1729   8275       Eric 	mcip->mci_tx_flag |= MCI_TX_QUIESCE;
   1730   8275       Eric 	while (mac_tx_sum_refcnt(mcip) != 0) {
   1731   8275       Eric 		mac_tx_unlock_allbutzero(mcip);
   1732   8275       Eric 		cv_wait(&mcip->mci_tx_cv, &mcip->mci_tx_pcpu[0].pcpu_tx_lock);
   1733   8275       Eric 		mutex_exit(&mcip->mci_tx_pcpu[0].pcpu_tx_lock);
   1734   8275       Eric 		mac_tx_lock_all(mcip);
   1735   8275       Eric 	}
   1736   8275       Eric 	mac_tx_unlock_all(mcip);
   1737   8275       Eric }
   1738   8275       Eric 
   1739   8275       Eric void
   1740   8275       Eric mac_tx_client_unblock(mac_client_impl_t *mcip)
   1741   8275       Eric {
   1742   8275       Eric 	mac_tx_lock_all(mcip);
   1743   8275       Eric 	mcip->mci_tx_flag &= ~MCI_TX_QUIESCE;
   1744   8275       Eric 	mac_tx_unlock_all(mcip);
   1745   8833       Venu 	/*
   1746   8833       Venu 	 * We may fail to disable flow control for the last MAC_NOTE_TX
   1747   8833       Venu 	 * notification because the MAC client is quiesced. Send the
   1748   8833       Venu 	 * notification again.
   1749   8833       Venu 	 */
   1750   8833       Venu 	i_mac_notify(mcip->mci_mip, MAC_NOTE_TX);
   1751   8275       Eric }
   1752   8275       Eric 
   1753   8275       Eric /*
   1754   8275       Eric  * Wait for an SRS to quiesce. The SRS worker will signal us when the
   1755   8275       Eric  * quiesce is done.
   1756   8275       Eric  */
   1757   8275       Eric static void
   1758   8275       Eric mac_srs_quiesce_wait(mac_soft_ring_set_t *srs, uint_t srs_flag)
   1759   8275       Eric {
   1760   8275       Eric 	mutex_enter(&srs->srs_lock);
   1761   8275       Eric 	while (!(srs->srs_state & srs_flag))
   1762   8275       Eric 		cv_wait(&srs->srs_quiesce_done_cv, &srs->srs_lock);
   1763   8275       Eric 	mutex_exit(&srs->srs_lock);
   1764   8275       Eric }
   1765   8275       Eric 
   1766   8275       Eric /*
   1767   8275       Eric  * Quiescing an Rx SRS is achieved by the following sequence. The protocol
   1768   8275       Eric  * works bottom up by cutting off packet flow from the bottommost point in the
   1769   8275       Eric  * mac, then the SRS, and then the soft rings. There are 2 use cases of this
   1770   8275       Eric  * mechanism. One is a temporary quiesce of the SRS, such as say while changing
   1771   8275       Eric  * the Rx callbacks. Another use case is Rx SRS teardown. In the former case
   1772   8275       Eric  * the QUIESCE prefix/suffix is used and in the latter the CONDEMNED is used
   1773   8275       Eric  * for the SRS and MR flags. In the former case the threads pause waiting for
   1774   8275       Eric  * a restart, while in the latter case the threads exit. The Tx SRS teardown
   1775   8275       Eric  * is also mostly similar to the above.
   1776   8275       Eric  *
   1777   8275       Eric  * 1. Stop future hardware classified packets at the lowest level in the mac.
   1778   8275       Eric  *    Remove any hardware classification rule (CONDEMNED case) and mark the
   1779   8275       Eric  *    rings as CONDEMNED or QUIESCE as appropriate. This prevents the mr_refcnt
   1780   8275       Eric  *    from increasing. Upcalls from the driver that come through hardware
   1781   8275       Eric  *    classification will be dropped in mac_rx from now on. Then we wait for
   1782   8275       Eric  *    the mr_refcnt to drop to zero. When the mr_refcnt reaches zero we are
   1783   8275       Eric  *    sure there aren't any upcall threads from the driver through hardware
   1784   8275       Eric  *    classification. In the case of SRS teardown we also remove the
   1785   8275       Eric  *    classification rule in the driver.
   1786   8275       Eric  *
   1787   8275       Eric  * 2. Stop future software classified packets by marking the flow entry with
   1788   8275       Eric  *    FE_QUIESCE or FE_CONDEMNED as appropriate which prevents the refcnt from
   1789   8275       Eric  *    increasing. We also remove the flow entry from the table in the latter
   1790   8275       Eric  *    case. Then wait for the fe_refcnt to reach an appropriate quiescent value
   1791   8275       Eric  *    that indicates there aren't any active threads using that flow entry.
   1792   8275       Eric  *
   1793   8275       Eric  * 3. Quiesce the SRS and softrings by signaling the SRS. The SRS poll thread,
   1794   8275       Eric  *    SRS worker thread, and the soft ring threads are quiesced in sequence
   1795   8275       Eric  *    with the SRS worker thread serving as a master controller. This
   1796   8275       Eric  *    mechansim is explained in mac_srs_worker_quiesce().
   1797   8275       Eric  *
   1798   8275       Eric  * The restart mechanism to reactivate the SRS and softrings is explained
   1799   8275       Eric  * in mac_srs_worker_restart(). Here we just signal the SRS worker to start the
   1800   8275       Eric  * restart sequence.
   1801   8275       Eric  */
   1802   8275       Eric void
   1803   8275       Eric mac_rx_srs_quiesce(mac_soft_ring_set_t *srs, uint_t srs_quiesce_flag)
   1804   8275       Eric {
   1805   8275       Eric 	flow_entry_t	*flent = srs->srs_flent;
   1806   8275       Eric 	uint_t	mr_flag, srs_done_flag;
   1807   8275       Eric 
   1808   8275       Eric 	ASSERT(MAC_PERIM_HELD((mac_handle_t)FLENT_TO_MIP(flent)));
   1809   8275       Eric 	ASSERT(!(srs->srs_type & SRST_TX));
   1810   8275       Eric 
   1811   8275       Eric 	if (srs_quiesce_flag == SRS_CONDEMNED) {
   1812   8275       Eric 		mr_flag = MR_CONDEMNED;
   1813   8275       Eric 		srs_done_flag = SRS_CONDEMNED_DONE;
   1814   8275       Eric 		if (srs->srs_type & SRST_CLIENT_POLL_ENABLED)
   1815   8275       Eric 			mac_srs_client_poll_disable(srs->srs_mcip, srs);
   1816   8275       Eric 	} else {
   1817   8275       Eric 		ASSERT(srs_quiesce_flag == SRS_QUIESCE);
   1818   8275       Eric 		mr_flag = MR_QUIESCE;
   1819   8275       Eric 		srs_done_flag = SRS_QUIESCE_DONE;
   1820   8275       Eric 		if (srs->srs_type & SRST_CLIENT_POLL_ENABLED)
   1821   8275       Eric 			mac_srs_client_poll_quiesce(srs->srs_mcip, srs);
   1822   8275       Eric 	}
   1823   8275       Eric 
   1824   8275       Eric 	if (srs->srs_ring != NULL) {
   1825   8275       Eric 		mac_rx_ring_quiesce(srs->srs_ring, mr_flag);
   1826   8275       Eric 	} else {
   1827   8275       Eric 		/*
   1828   8275       Eric 		 * SRS is driven by software classification. In case
   1829   8275       Eric 		 * of CONDEMNED, the top level teardown functions will
   1830   8275       Eric 		 * deal with flow removal.
   1831   8275       Eric 		 */
   1832   8275       Eric 		if (srs_quiesce_flag != SRS_CONDEMNED) {
   1833   8275       Eric 			FLOW_MARK(flent, FE_QUIESCE);
   1834   8275       Eric 			mac_flow_wait(flent, FLOW_DRIVER_UPCALL);
   1835   8275       Eric 		}
   1836   8275       Eric 	}
   1837   8275       Eric 
   1838   8275       Eric 	/*
   1839   8275       Eric 	 * Signal the SRS to quiesce itself, and then cv_wait for the
   1840   8275       Eric 	 * SRS quiesce to complete. The SRS worker thread will wake us
   1841   8275       Eric 	 * up when the quiesce is complete
   1842   8275       Eric 	 */
   1843   8275       Eric 	mac_srs_signal(srs, srs_quiesce_flag);
   1844   8275       Eric 	mac_srs_quiesce_wait(srs, srs_done_flag);
   1845   8275       Eric }
   1846   8275       Eric 
   1847   8275       Eric /*
   1848   8275       Eric  * Remove an SRS.
   1849   8275       Eric  */
   1850   8275       Eric void
   1851   8275       Eric mac_rx_srs_remove(mac_soft_ring_set_t *srs)
   1852   8275       Eric {
   1853   8275       Eric 	flow_entry_t *flent = srs->srs_flent;
   1854   8275       Eric 	int i;
   1855   8275       Eric 
   1856   8275       Eric 	mac_rx_srs_quiesce(srs, SRS_CONDEMNED);
   1857   8275       Eric 	/*
   1858   8275       Eric 	 * Locate and remove our entry in the fe_rx_srs[] array, and
   1859   8275       Eric 	 * adjust the fe_rx_srs array entries and array count by
   1860   8275       Eric 	 * moving the last entry into the vacated spot.
   1861   8275       Eric 	 */
   1862   8275       Eric 	mutex_enter(&flent->fe_lock);
   1863   8275       Eric 	for (i = 0; i < flent->fe_rx_srs_cnt; i++) {
   1864   8275       Eric 		if (flent->fe_rx_srs[i] == srs)
   1865   8275       Eric 			break;
   1866   8275       Eric 	}
   1867   8275       Eric 
   1868   8275       Eric 	ASSERT(i != 0 && i < flent->fe_rx_srs_cnt);
   1869   8275       Eric 	if (i != flent->fe_rx_srs_cnt - 1) {
   1870   8275       Eric 		flent->fe_rx_srs[i] =
   1871   8275       Eric 		    flent->fe_rx_srs[flent->fe_rx_srs_cnt - 1];
   1872   8275       Eric 		i = flent->fe_rx_srs_cnt - 1;
   1873   8275       Eric 	}
   1874   8275       Eric 
   1875   8275       Eric 	flent->fe_rx_srs[i] = NULL;
   1876   8275       Eric 	flent->fe_rx_srs_cnt--;
   1877   8275       Eric 	mutex_exit(&flent->fe_lock);
   1878   8275       Eric 
   1879   8275       Eric 	mac_srs_free(srs);
   1880   8275       Eric }
   1881   8275       Eric 
   1882   8275       Eric static void
   1883   8275       Eric mac_srs_clear_flag(mac_soft_ring_set_t *srs, uint_t flag)
   1884   8275       Eric {
   1885   8275       Eric 	mutex_enter(&srs->srs_lock);
   1886   8275       Eric 	srs->srs_state &= ~flag;
   1887   8275       Eric 	mutex_exit(&srs->srs_lock);
   1888   8275       Eric }
   1889   8275       Eric 
   1890   8275       Eric void
   1891   8275       Eric mac_rx_srs_restart(mac_soft_ring_set_t *srs)
   1892   8275       Eric {
   1893   8275       Eric 	flow_entry_t	*flent = srs->srs_flent;
   1894   8275       Eric 	mac_ring_t	*mr;
   1895   8275       Eric 
   1896   8275       Eric 	ASSERT(MAC_PERIM_HELD((mac_handle_t)FLENT_TO_MIP(flent)));
   1897   8275       Eric 	ASSERT((srs->srs_type & SRST_TX) == 0);
   1898   8275       Eric 
   1899   8275       Eric 	/*
   1900   8275       Eric 	 * This handles a change in the number of SRSs between the quiesce and
   1901   8275       Eric 	 * and restart operation of a flow.
   1902   8275       Eric 	 */
   1903   8275       Eric 	if (!SRS_QUIESCED(srs))
   1904   8275       Eric 		return;
   1905   8275       Eric 
   1906   8275       Eric 	/*
   1907   8275       Eric 	 * Signal the SRS to restart itself. Wait for the restart to complete
   1908   8275       Eric 	 * Note that we only restart the SRS if it is not marked as
   1909   8275       Eric 	 * permanently quiesced.
   1910   8275       Eric 	 */
   1911   8275       Eric 	if (!SRS_QUIESCED_PERMANENT(srs)) {
   1912   8275       Eric 		mac_srs_signal(srs, SRS_RESTART);
   1913   8275       Eric 		mac_srs_quiesce_wait(srs, SRS_RESTART_DONE);
   1914   8275       Eric 		mac_srs_clear_flag(srs, SRS_RESTART_DONE);
   1915   8275       Eric 
   1916   8275       Eric 		mac_srs_client_poll_restart(srs->srs_mcip, srs);
   1917   8275       Eric 	}
   1918   8275       Eric 
   1919   8275       Eric 	/* Finally clear the flags to let the packets in */
   1920   8275       Eric 	mr = srs->srs_ring;
   1921   8275       Eric 	if (mr != NULL) {
   1922   8275       Eric 		MAC_RING_UNMARK(mr, MR_QUIESCE);
   1923   8275       Eric 		/* In case the ring was stopped, safely restart it */
   1924   8275       Eric 		(void) mac_start_ring(mr);
   1925   8275       Eric 	} else {
   1926   8275       Eric 		FLOW_UNMARK(flent, FE_QUIESCE);
   1927   8275       Eric 	}
   1928   8275       Eric }
   1929   8275       Eric 
   1930   8275       Eric /*
   1931   8275       Eric  * Temporary quiesce of a flow and associated Rx SRS.
   1932   8275       Eric  * Please see block comment above mac_rx_classify_flow_rem.
   1933   8275       Eric  */
   1934   8275       Eric /* ARGSUSED */
   1935   8275       Eric int
   1936   8275       Eric mac_rx_classify_flow_quiesce(flow_entry_t *flent, void *arg)
   1937   8275       Eric {
   1938   8275       Eric 	int		i;
   1939   8275       Eric 
   1940   8275       Eric 	for (i = 0; i < flent->fe_rx_srs_cnt; i++) {
   1941   8275       Eric 		mac_rx_srs_quiesce((mac_soft_ring_set_t *)flent->fe_rx_srs[i],
   1942   8275       Eric 		    SRS_QUIESCE);
   1943   8275       Eric 	}
   1944   8275       Eric 	return (0);
   1945   8275       Eric }
   1946   8275       Eric 
   1947   8275       Eric /*
   1948   8275       Eric  * Restart a flow and associated Rx SRS that has been quiesced temporarily
   1949   8275       Eric  * Please see block comment above mac_rx_classify_flow_rem
   1950   8275       Eric  */
   1951   8275       Eric /* ARGSUSED */
   1952   8275       Eric int
   1953   8275       Eric mac_rx_classify_flow_restart(flow_entry_t *flent, void *arg)
   1954   8275       Eric {
   1955   8275       Eric 	int		i;
   1956   8275       Eric 
   1957   8275       Eric 	for (i = 0; i < flent->fe_rx_srs_cnt; i++)
   1958   8275       Eric 		mac_rx_srs_restart((mac_soft_ring_set_t *)flent->fe_rx_srs[i]);
   1959   8275       Eric 
   1960   8275       Eric 	return (0);
   1961   8275       Eric }
   1962   8275       Eric 
   1963   8275       Eric void
   1964   8275       Eric mac_srs_perm_quiesce(mac_client_handle_t mch, boolean_t on)
   1965   8275       Eric {
   1966   8275       Eric 	mac_client_impl_t	*mcip = (mac_client_impl_t *)mch;
   1967   8275       Eric 	flow_entry_t		*flent = mcip->mci_flent;
   1968   8275       Eric 	mac_impl_t		*mip = mcip->mci_mip;
   1969   8275       Eric 	mac_soft_ring_set_t	*mac_srs;
   1970   8275       Eric 	int			i;
   1971   8275       Eric 
   1972   8275       Eric 	ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
   1973   8275       Eric 
   1974   8275       Eric 	if (flent == NULL)
   1975   8275       Eric 		return;
   1976   8275       Eric 
   1977   8275       Eric 	for (i = 0; i < flent->fe_rx_srs_cnt; i++) {
   1978   8275       Eric 		mac_srs = flent->fe_rx_srs[i];
   1979   8275       Eric 		mutex_enter(&mac_srs->srs_lock);
   1980   8275       Eric 		if (on)
   1981   8275       Eric 			mac_srs->srs_state |= SRS_QUIESCE_PERM;
   1982   8275       Eric 		else
   1983   8275       Eric 			mac_srs->srs_state &= ~SRS_QUIESCE_PERM;
   1984   8275       Eric 		mutex_exit(&mac_srs->srs_lock);
   1985   8275       Eric 	}
   1986   8275       Eric }
   1987   8275       Eric 
   1988   8275       Eric void
   1989   8275       Eric mac_rx_client_quiesce(mac_client_handle_t mch)
   1990   8275       Eric {
   1991   8275       Eric 	mac_client_impl_t	*mcip = (mac_client_impl_t *)mch;
   1992   8275       Eric 	mac_impl_t		*mip = mcip->mci_mip;
   1993   8275       Eric 
   1994   8275       Eric 	ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
   1995   8275       Eric 
   1996   8275       Eric 	if (MCIP_DATAPATH_SETUP(mcip)) {
   1997   8275       Eric 		(void) mac_rx_classify_flow_quiesce(mcip->mci_flent,
   1998   8275       Eric 		    NULL);
   1999   8275       Eric 		(void) mac_flow_walk_nolock(mcip->mci_subflow_tab,
   2000   8275       Eric 		    mac_rx_classify_flow_quiesce, NULL);
   2001   8275       Eric 	}
   2002   8275       Eric }
   2003   8275       Eric 
   2004   8275       Eric void
   2005   8275       Eric mac_rx_client_restart(mac_client_handle_t mch)
   2006   8275       Eric {
   2007   8275       Eric 	mac_client_impl_t	*mcip = (mac_client_impl_t *)mch;
   2008   8275       Eric 	mac_impl_t		*mip = mcip->mci_mip;
   2009   8275       Eric 
   2010   8275       Eric 	ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
   2011   8275       Eric 
   2012   8275       Eric 	if (MCIP_DATAPATH_SETUP(mcip)) {
   2013   8275       Eric 		(void) mac_rx_classify_flow_restart(mcip->mci_flent, NULL);
   2014   8275       Eric 		(void) mac_flow_walk_nolock(mcip->mci_subflow_tab,
   2015   8275       Eric 		    mac_rx_classify_flow_restart, NULL);
   2016   8275       Eric 	}
   2017   8275       Eric }
   2018   8275       Eric 
   2019   8275       Eric /*
   2020   8275       Eric  * This function only quiesces the Tx SRS and softring worker threads. Callers
   2021   8275       Eric  * need to make sure that there aren't any mac client threads doing current or
   2022   8275       Eric  * future transmits in the mac before calling this function.
   2023   8275       Eric  */
   2024   8275       Eric void
   2025   8275       Eric mac_tx_srs_quiesce(mac_soft_ring_set_t *srs, uint_t srs_quiesce_flag)
   2026   8275       Eric {
   2027   8275       Eric 	mac_client_impl_t	*mcip = srs->srs_mcip;
   2028   8275       Eric 
   2029   8275       Eric 	ASSERT(MAC_PERIM_HELD((mac_handle_t)mcip->mci_mip));
   2030   8275       Eric 
   2031   8275       Eric 	ASSERT(srs->srs_type & SRST_TX);
   2032   8275       Eric 	ASSERT(srs_quiesce_flag == SRS_CONDEMNED ||
   2033   8275       Eric 	    srs_quiesce_flag == SRS_QUIESCE);
   2034   8275       Eric 
   2035   8275       Eric 	/*
   2036   8275       Eric 	 * Signal the SRS to quiesce itself, and then cv_wait for the
   2037   8275       Eric 	 * SRS quiesce to complete. The SRS worker thread will wake us
   2038   8275       Eric 	 * up when the quiesce is complete
   2039   8275       Eric 	 */
   2040   8275       Eric 	mac_srs_signal(srs, srs_quiesce_flag);
   2041   8275       Eric 	mac_srs_quiesce_wait(srs, srs_quiesce_flag == SRS_QUIESCE ?
   2042   8275       Eric 	    SRS_QUIESCE_DONE : SRS_CONDEMNED_DONE);
   2043   8275       Eric }
   2044   8275       Eric 
   2045   8275       Eric void
   2046   8275       Eric mac_tx_srs_restart(mac_soft_ring_set_t *srs)
   2047   8275       Eric {
   2048   8275       Eric 	/*
   2049   8275       Eric 	 * Resizing the fanout could result in creation of new SRSs.
   2050   8275       Eric 	 * They may not necessarily be in the quiesced state in which
   2051   8275       Eric 	 * case it need be restarted
   2052   8275       Eric 	 */
   2053   8275       Eric 	if (!SRS_QUIESCED(srs))
   2054   8275       Eric 		return;
   2055   8275       Eric 
   2056   8275       Eric 	mac_srs_signal(srs, SRS_RESTART);
   2057   8275       Eric 	mac_srs_quiesce_wait(srs, SRS_RESTART_DONE);
   2058   8275       Eric 	mac_srs_clear_flag(srs, SRS_RESTART_DONE);
   2059   8275       Eric }
   2060   8275       Eric 
   2061   8275       Eric /*
   2062   8275       Eric  * Temporary quiesce of a flow and associated Rx SRS.
   2063   8275       Eric  * Please see block comment above mac_rx_srs_quiesce
   2064   8275       Eric  */
   2065   8275       Eric /* ARGSUSED */
   2066   8275       Eric int
   2067   8275       Eric mac_tx_flow_quiesce(flow_entry_t *flent, void *arg)
   2068   8275       Eric {
   2069   8275       Eric 	/*
   2070   8275       Eric 	 * The fe_tx_srs is null for a subflow on an interface that is
   2071   8275       Eric 	 * not plumbed
   2072   8275       Eric 	 */
   2073   8275       Eric 	if (flent->fe_tx_srs != NULL)
   2074   8275       Eric 		mac_tx_srs_quiesce(flent->fe_tx_srs, SRS_QUIESCE);
   2075   8275       Eric 	return (0);
   2076   8275       Eric }
   2077   8275       Eric 
   2078   8275       Eric /* ARGSUSED */
   2079   8275       Eric int
   2080   8275       Eric mac_tx_flow_restart(flow_entry_t *flent, void *arg)
   2081   8275       Eric {
   2082   8275       Eric 	/*
   2083   8275       Eric 	 * The fe_tx_srs is null for a subflow on an interface that is
   2084   8275       Eric 	 * not plumbed
   2085   8275       Eric 	 */
   2086   8275       Eric 	if (flent->fe_tx_srs != NULL)
   2087   8275       Eric 		mac_tx_srs_restart(flent->fe_tx_srs);
   2088   8275       Eric 	return (0);
   2089   8275       Eric }
   2090   8275       Eric 
   2091   8275       Eric void
   2092   8275       Eric mac_tx_client_quiesce(mac_client_impl_t *mcip, uint_t srs_quiesce_flag)
   2093   8275       Eric {
   2094   8275       Eric 	ASSERT(MAC_PERIM_HELD((mac_handle_t)mcip->mci_mip));
   2095   8275       Eric 
   2096   8275       Eric 	mac_tx_client_block(mcip);
   2097   8275       Eric 	if (MCIP_TX_SRS(mcip) != NULL) {
   2098   8275       Eric 		mac_tx_srs_quiesce(MCIP_TX_SRS(mcip), srs_quiesce_flag);
   2099   8275       Eric 		(void) mac_flow_walk_nolock(mcip->mci_subflow_tab,
   2100   8275       Eric 		    mac_tx_flow_quiesce, NULL);
   2101   8275       Eric 	}
   2102   8275       Eric }
   2103   8275       Eric 
   2104   8275       Eric void
   2105   8275       Eric mac_tx_client_restart(mac_client_impl_t *mcip)
   2106   8275       Eric {
   2107   8275       Eric 	ASSERT(MAC_PERIM_HELD((mac_handle_t)mcip->mci_mip));
   2108   8275       Eric 
   2109   8275       Eric 	mac_tx_client_unblock(mcip);
   2110   8275       Eric 	if (MCIP_TX_SRS(mcip) != NULL) {
   2111   8275       Eric 		mac_tx_srs_restart(MCIP_TX_SRS(mcip));
   2112   8275       Eric 		(void) mac_flow_walk_nolock(mcip->mci_subflow_tab,
   2113   8275       Eric 		    mac_tx_flow_restart, NULL);
   2114   8275       Eric 	}
   2115   8275       Eric }
   2116   8275       Eric 
   2117   8275       Eric void
   2118   8275       Eric mac_tx_client_flush(mac_client_impl_t *mcip)
   2119   8275       Eric {
   2120   8275       Eric 	ASSERT(MAC_PERIM_HELD((mac_handle_t)mcip->mci_mip));
   2121   8275       Eric 
   2122   8275       Eric 	mac_tx_client_quiesce(mcip, SRS_QUIESCE);
   2123   8275       Eric 	mac_tx_client_restart(mcip);
   2124   8275       Eric }
   2125   8275       Eric 
   2126   8275       Eric void
   2127   8275       Eric mac_client_quiesce(mac_client_impl_t *mcip)
   2128   8275       Eric {
   2129   8275       Eric 	mac_rx_client_quiesce((mac_client_handle_t)mcip);
   2130   8275       Eric 	mac_tx_client_quiesce(mcip, SRS_QUIESCE);
   2131   8275       Eric }
   2132   8275       Eric 
   2133   8275       Eric void
   2134   8275       Eric mac_client_restart(mac_client_impl_t *mcip)
   2135   8275       Eric {
   2136   8275       Eric 	mac_rx_client_restart((mac_client_handle_t)mcip);
   2137   8275       Eric 	mac_tx_client_restart(mcip);
   2138   8275       Eric }
   2139   8275       Eric 
   2140   8275       Eric /*
   2141   8275       Eric  * Allocate a minor number.
   2142   8275       Eric  */
   2143   8275       Eric minor_t
   2144   8275       Eric mac_minor_hold(boolean_t sleep)
   2145   8275       Eric {
   2146   8275       Eric 	minor_t	minor;
   2147   8275       Eric 
   2148   8275       Eric 	/*
   2149   8275       Eric 	 * Grab a value from the arena.
   2150   8275       Eric 	 */
   2151   8275       Eric 	atomic_add_32(&minor_count, 1);
   2152   8275       Eric 
   2153   8275       Eric 	if (sleep)
   2154   8275       Eric 		minor = (uint_t)id_alloc(minor_ids);
   2155   8275       Eric 	else
   2156   8275       Eric 		minor = (uint_t)id_alloc_nosleep(minor_ids);
   2157   8275       Eric 
   2158   8275       Eric 	if (minor == 0) {
   2159   8275       Eric 		atomic_add_32(&minor_count, -1);
   2160   8275       Eric 		return (0);
   2161   8275       Eric 	}
   2162   8275       Eric 
   2163   8275       Eric 	return (minor);
   2164   8275       Eric }
   2165   8275       Eric 
   2166   8275       Eric /*
   2167   8275       Eric  * Release a previously allocated minor number.
   2168   8275       Eric  */
   2169   8275       Eric void
   2170   8275       Eric mac_minor_rele(minor_t minor)
   2171   8275       Eric {
   2172   8275       Eric 	/*
   2173   8275       Eric 	 * Return the value to the arena.
   2174   8275       Eric 	 */
   2175   8275       Eric 	id_free(minor_ids, minor);
   2176   8275       Eric 	atomic_add_32(&minor_count, -1);
   2177   8275       Eric }
   2178   8275       Eric 
   2179   8275       Eric uint32_t
   2180   8275       Eric mac_no_notification(mac_handle_t mh)
   2181   8275       Eric {
   2182   8275       Eric 	mac_impl_t *mip = (mac_impl_t *)mh;
   2183   9073      Cathy 
   2184   9073      Cathy 	return (((mip->mi_state_flags & MIS_LEGACY) != 0) ?
   2185   9073      Cathy 	    mip->mi_capab_legacy.ml_unsup_note : 0);
   2186   8275       Eric }
   2187   8275       Eric 
   2188   8275       Eric /*
   2189   8275       Eric  * Prevent any new opens of this mac in preparation for unregister
   2190   8275       Eric  */
   2191   8275       Eric int
   2192   8275       Eric i_mac_disable(mac_impl_t *mip)
   2193   8275       Eric {
   2194   8275       Eric 	mac_client_impl_t	*mcip;
   2195   8275       Eric 
   2196   8275       Eric 	rw_enter(&i_mac_impl_lock, RW_WRITER);
   2197   8275       Eric 	if (mip->mi_state_flags & MIS_DISABLED) {
   2198   8275       Eric 		/* Already disabled, return success */
   2199   8275       Eric 		rw_exit(&i_mac_impl_lock);
   2200   8275       Eric 		return (0);
   2201   8275       Eric 	}
   2202   8275       Eric 	/*
   2203   8275       Eric 	 * See if there are any other references to this mac_t (e.g., VLAN's).
   2204   8275       Eric 	 * If so return failure. If all the other checks below pass, then
   2205   8275       Eric 	 * set mi_disabled atomically under the i_mac_impl_lock to prevent
   2206   8275       Eric 	 * any new VLAN's from being created or new mac client opens of this
   2207   8275       Eric 	 * mac end point.
   2208   8275       Eric 	 */
   2209   8275       Eric 	if (mip->mi_ref > 0) {
   2210   8275       Eric 		rw_exit(&i_mac_impl_lock);
   2211   8275       Eric 		return (EBUSY);
   2212   8275       Eric 	}
   2213   8275       Eric 
   2214   8275       Eric 	/*
   2215   8275       Eric 	 * mac clients must delete all multicast groups they join before
   2216   8275       Eric 	 * closing. bcast groups are reference counted, the last client
   2217   8275       Eric 	 * to delete the group will wait till the group is physically
   2218   8275       Eric 	 * deleted. Since all clients have closed this mac end point
   2219   8275       Eric 	 * mi_bcast_ngrps must be zero at this point
   2220   8275       Eric 	 */
   2221   8275       Eric 	ASSERT(mip->mi_bcast_ngrps == 0);
   2222   8275       Eric 
   2223   8275       Eric 	/*
   2224   8275       Eric 	 * Don't let go of this if it has some flows.
   2225   8275       Eric 	 * All other code guarantees no flows are added to a disabled
   2226   8275       Eric 	 * mac, therefore it is sufficient to check for the flow table
   2227   8275       Eric 	 * only here.
   2228   8275       Eric 	 */
   2229   8275       Eric 	mcip = mac_primary_client_handle(mip);
   2230   8275       Eric 	if ((mcip != NULL) && mac_link_has_flows((mac_client_handle_t)mcip)) {
   2231   8275       Eric 		rw_exit(&i_mac_impl_lock);
   2232   8275       Eric 		return (ENOTEMPTY);
   2233   8275       Eric 	}
   2234   8275       Eric 
   2235   8275       Eric 	mip->mi_state_flags |= MIS_DISABLED;
   2236   8275       Eric 	rw_exit(&i_mac_impl_lock);
   2237   8275       Eric 	return (0);
   2238   8275       Eric }
   2239   8275       Eric 
   2240   8275       Eric int
   2241   8275       Eric mac_disable_nowait(mac_handle_t mh)
   2242   8275       Eric {
   2243   8275       Eric 	mac_impl_t	*mip = (mac_impl_t *)mh;
   2244   8275       Eric 	int err;
   2245   8275       Eric 
   2246   8275       Eric 	if ((err = i_mac_perim_enter_nowait(mip)) != 0)
   2247   8275       Eric 		return (err);
   2248   8275       Eric 	err = i_mac_disable(mip);
   2249   8275       Eric 	i_mac_perim_exit(mip);
   2250   8275       Eric 	return (err);
   2251   8275       Eric }
   2252   8275       Eric 
   2253   8275       Eric int
   2254   8275       Eric mac_disable(mac_handle_t mh)
   2255   8275       Eric {
   2256   8275       Eric 	mac_impl_t	*mip = (mac_impl_t *)mh;
   2257   8275       Eric 	int err;
   2258   8275       Eric 
   2259   8275       Eric 	i_mac_perim_enter(mip);
   2260   8275       Eric 	err = i_mac_disable(mip);
   2261   8275       Eric 	i_mac_perim_exit(mip);
   2262   8275       Eric 
   2263   8275       Eric 	/*
   2264   8275       Eric 	 * Clean up notification thread and wait for it to exit.
   2265   8275       Eric 	 */
   2266   8275       Eric 	if (err == 0)
   2267   8275       Eric 		i_mac_notify_exit(mip);
   2268   8275       Eric 
   2269   8275       Eric 	return (err);
   2270   8275       Eric }
   2271   8275       Eric 
   2272   8275       Eric /*
   2273   8275       Eric  * Called when the MAC instance has a non empty flow table, to de-multiplex
   2274   8275       Eric  * incoming packets to the right flow.
   2275   8275       Eric  * The MAC's rw lock is assumed held as a READER.
   2276   8275       Eric  */
   2277   8275       Eric /* ARGSUSED */
   2278   8275       Eric static mblk_t *
   2279   8275       Eric mac_rx_classify(mac_impl_t *mip, mac_resource_handle_t mrh, mblk_t *mp)
   2280   8275       Eric {
   2281   8275       Eric 	flow_entry_t	*flent = NULL;
   2282   8275       Eric 	uint_t		flags = FLOW_INBOUND;
   2283   8275       Eric 	int		err;
   2284   8275       Eric 
   2285   8275       Eric 	/*
   2286   8275       Eric 	 * If the mac is a port of an aggregation, pass FLOW_IGNORE_VLAN
   2287   8275       Eric 	 * to mac_flow_lookup() so that the VLAN packets can be successfully
   2288   8275       Eric 	 * passed to the non-VLAN aggregation flows.
   2289   8275       Eric 	 *
   2290   8275       Eric 	 * Note that there is possibly a race between this and
   2291   8275       Eric 	 * mac_unicast_remove/add() and VLAN packets could be incorrectly
   2292   8275       Eric 	 * classified to non-VLAN flows of non-aggregation mac clients. These
   2293   8275       Eric 	 * VLAN packets will be then filtered out by the mac module.
   2294   8275       Eric 	 */
   2295   8275       Eric 	if ((mip->mi_state_flags & MIS_EXCLUSIVE) != 0)
   2296   8275       Eric 		flags |= FLOW_IGNORE_VLAN;
   2297   8275       Eric 
   2298   8275       Eric 	err = mac_flow_lookup(mip->mi_flow_tab, mp, flags, &flent);
   2299   8275       Eric 	if (err != 0) {
   2300   8275       Eric 		/* no registered receive function */
   2301   8275       Eric 		return (mp);
   2302   8275       Eric 	} else {
   2303   8275       Eric 		mac_client_impl_t	*mcip;
   2304   8275       Eric 
   2305   8275       Eric 		/*
   2306   8275       Eric 		 * This flent might just be an additional one on the MAC client,
   2307   8275       Eric 		 * i.e. for classification purposes (different fdesc), however
   2308   8275       Eric 		 * the resources, SRS et. al., are in the mci_flent, so if
   2309   8275       Eric 		 * this isn't the mci_flent, we need to get it.
   2310   8275       Eric 		 */
   2311   8275       Eric 		if ((mcip = flent->fe_mcip) != NULL &&
   2312   8275       Eric 		    mcip->mci_flent != flent) {
   2313   8275       Eric 			FLOW_REFRELE(flent);
   2314   8275       Eric 			flent = mcip->mci_flent;
   2315   8275       Eric 			FLOW_TRY_REFHOLD(flent, err);
   2316   8275       Eric 			if (err != 0)
   2317   8275       Eric 				return (mp);
   2318   8275       Eric 		}
   2319   8275       Eric 		(flent->fe_cb_fn)(flent->fe_cb_arg1, flent->fe_cb_arg2, mp,
   2320   8275       Eric 		    B_FALSE);
   2321   8275       Eric 		FLOW_REFRELE(flent);
   2322   8275       Eric 	}
   2323   8275       Eric 	return (NULL);
   2324   8275       Eric }
   2325   8275       Eric 
   2326   8275       Eric mblk_t *
   2327   8275       Eric mac_rx_flow(mac_handle_t mh, mac_resource_handle_t mrh, mblk_t *mp_chain)
   2328   8275       Eric {
   2329   8275       Eric 	mac_impl_t	*mip = (mac_impl_t *)mh;
   2330   8275       Eric 	mblk_t		*bp, *bp1, **bpp, *list = NULL;
   2331   8275       Eric 
   2332   8275       Eric 	/*
   2333   8275       Eric 	 * We walk the chain and attempt to classify each packet.
   2334   8275       Eric 	 * The packets that couldn't be classified will be returned
   2335   8275       Eric 	 * back to the caller.
   2336   8275       Eric 	 */
   2337   8275       Eric 	bp = mp_chain;
   2338   8275       Eric 	bpp = &list;
   2339   8275       Eric 	while (bp != NULL) {
   2340   8275       Eric 		bp1 = bp;
   2341   8275       Eric 		bp = bp->b_next;
   2342   8275       Eric 		bp1->b_next = NULL;
   2343   8275       Eric 
   2344   8275       Eric 		if (mac_rx_classify(mip, mrh, bp1) != NULL) {
   2345   8275       Eric 			*bpp = bp1;
   2346   8275       Eric 			bpp = &bp1->b_next;
   2347   8275       Eric 		}
   2348   8275       Eric 	}
   2349   8275       Eric 	return (list);
   2350   8275       Eric }
   2351   8275       Eric 
   2352   8275       Eric static int
   2353   8275       Eric mac_tx_flow_srs_wakeup(flow_entry_t *flent, void *arg)
   2354   8275       Eric {
   2355   8275       Eric 	mac_ring_handle_t ring = arg;
   2356   8275       Eric 
   2357   8275       Eric 	if (flent->fe_tx_srs)
   2358   8275       Eric 		mac_tx_srs_wakeup(flent->fe_tx_srs, ring);
   2359   8275       Eric 	return (0);
   2360   8275       Eric }
   2361   8275       Eric 
   2362   8275       Eric void
   2363   8275       Eric i_mac_tx_srs_notify(mac_impl_t *mip, mac_ring_handle_t ring)
   2364   8275       Eric {
   2365   8275       Eric 	mac_client_impl_t	*cclient;
   2366   8275       Eric 	mac_soft_ring_set_t	*mac_srs;
   2367   8275       Eric 
   2368   8275       Eric 	/*
   2369   8275       Eric 	 * After grabbing the mi_rw_lock, the list of clients can't change.
   2370   8275       Eric 	 * If there are any clients mi_disabled must be B_FALSE and can't
   2371   8275       Eric 	 * get set since there are clients. If there aren't any clients we
   2372   8275       Eric 	 * don't do anything. In any case the mip has to be valid. The driver
   2373   8275       Eric 	 * must make sure that it goes single threaded (with respect to mac
   2374   8275       Eric 	 * calls) and wait for all pending mac calls to finish before calling
   2375   8275       Eric 	 * mac_unregister.
   2376   8275       Eric 	 */
   2377   8275       Eric 	rw_enter(&i_mac_impl_lock, RW_READER);
   2378   8275       Eric 	if (mip->mi_state_flags & MIS_DISABLED) {
   2379   8275       Eric 		rw_exit(&i_mac_impl_lock);
   2380   8275       Eric 		return;
   2381   8275       Eric 	}
   2382   8275       Eric 
   2383   8275       Eric 	/*
   2384   8275       Eric 	 * Get MAC tx srs from walking mac_client_handle list.
   2385   8275       Eric 	 */
   2386   8275       Eric 	rw_enter(&mip->mi_rw_lock, RW_READER);
   2387   8275       Eric 	for (cclient = mip->mi_clients_list; cclient != NULL;
   2388   8275       Eric 	    cclient = cclient->mci_client_next) {
   2389   8275       Eric 		if ((mac_srs = MCIP_TX_SRS(cclient)) != NULL)
   2390   8275       Eric 			mac_tx_srs_wakeup(mac_srs, ring);
   2391   8833       Venu 		(void) mac_flow_walk(cclient->mci_subflow_tab,
   2392   8833       Venu 		    mac_tx_flow_srs_wakeup, ring);
   2393   8275       Eric 	}
   2394   8275       Eric 	rw_exit(&mip->mi_rw_lock);
   2395   8275       Eric 	rw_exit(&i_mac_impl_lock);
   2396   8275       Eric }
   2397   8275       Eric 
   2398   8275       Eric /* ARGSUSED */
   2399   8275       Eric void
   2400   8275       Eric mac_multicast_refresh(mac_handle_t mh, mac_multicst_t refresh, void *arg,
   2401   8275       Eric     boolean_t add)
   2402   8275       Eric {
   2403   8275       Eric 	mac_impl_t *mip = (mac_impl_t *)mh;
   2404   8275       Eric 
   2405   8275       Eric 	i_mac_perim_enter((mac_impl_t *)mh);
   2406   8275       Eric 	/*
   2407   8275       Eric 	 * If no specific refresh function was given then default to the
   2408   8275       Eric 	 * driver's m_multicst entry point.
   2409   8275       Eric 	 */
   2410   8275       Eric 	if (refresh == NULL) {
   2411   8275       Eric 		refresh = mip->mi_multicst;
   2412   8275       Eric 		arg = mip->mi_driver;
   2413   8275       Eric 	}
   2414   8275       Eric 
   2415   8275       Eric 	mac_bcast_refresh(mip, refresh, arg, add);
   2416   8275       Eric 	i_mac_perim_exit((mac_impl_t *)mh);
   2417   8275       Eric }
   2418   8275       Eric 
   2419   8275       Eric void
   2420   8275       Eric mac_promisc_refresh(mac_handle_t mh, mac_setpromisc_t refresh, void *arg)
   2421   8275       Eric {
   2422   8275       Eric 	mac_impl_t	*mip = (mac_impl_t *)mh;
   2423   8275       Eric 
   2424   8275       Eric 	/*
   2425   8275       Eric 	 * If no specific refresh function was given then default to the
   2426   8275       Eric 	 * driver's m_promisc entry point.
   2427   8275       Eric 	 */
   2428   8275       Eric 	if (refresh == NULL) {
   2429   8275       Eric 		refresh = mip->mi_setpromisc;
   2430   8275       Eric 		arg = mip->mi_driver;
   2431   8275       Eric 	}
   2432   8275       Eric 	ASSERT(refresh != NULL);
   2433   8275       Eric 
   2434   8275       Eric 	/*
   2435   8275       Eric 	 * Call the refresh function with the current promiscuity.
   2436   8275       Eric 	 */
   2437   8275       Eric 	refresh(arg, (mip->mi_devpromisc != 0));
   2438   8275       Eric }
   2439   8275       Eric 
   2440   8275       Eric /*
   2441   8275       Eric  * The mac client requests that the mac not to change its margin size to
   2442   8275       Eric  * be less than the specified value.  If "current" is B_TRUE, then the client
   2443   8275       Eric  * requests the mac not to change its margin size to be smaller than the
   2444   8275       Eric  * current size. Further, return the current margin size value in this case.
   2445   8275       Eric  *
   2446   8275       Eric  * We keep every requested size in an ordered list from largest to smallest.
   2447   8275       Eric  */
   2448   8275       Eric int
   2449   8275       Eric mac_margin_add(mac_handle_t mh, uint32_t *marginp, boolean_t current)
   2450   8275       Eric {
   2451   8275       Eric 	mac_impl_t		*mip = (mac_impl_t *)mh;
   2452   8275       Eric 	mac_margin_req_t	**pp, *p;
   2453   8275       Eric 	int			err = 0;
   2454   8275       Eric 
   2455   8275       Eric 	rw_enter(&(mip->mi_rw_lock), RW_WRITER);
   2456   8275       Eric 	if (current)
   2457   8275       Eric 		*marginp = mip->mi_margin;
   2458   8275       Eric 
   2459   8275       Eric 	/*
   2460   8275       Eric 	 * If the current margin value cannot satisfy the margin requested,
   2461   8275       Eric 	 * return ENOTSUP directly.
   2462   8275       Eric 	 */
   2463   8275       Eric 	if (*marginp > mip->mi_margin) {
   2464   8275       Eric 		err = ENOTSUP;
   2465   8275       Eric 		goto done;
   2466   8275       Eric 	}
   2467   8275       Eric 
   2468   8275       Eric 	/*
   2469   8275       Eric 	 * Check whether the given margin is already in the list. If so,
   2470   8275       Eric 	 * bump the reference count.
   2471   8275       Eric 	 */
   2472   8275       Eric 	for (pp = &mip->mi_mmrp; (p = *pp) != NULL; pp = &p->mmr_nextp) {
   2473   8275       Eric 		if (p->mmr_margin == *marginp) {
   2474   8275       Eric 			/*
   2475   8275       Eric 			 * The margin requested is already in the list,
   2476   8275       Eric 			 * so just bump the reference count.
   2477   8275       Eric 			 */
   2478   8275       Eric 			p->mmr_ref++;
   2479   8275       Eric 			goto done;
   2480   8275       Eric 		}
   2481   8275       Eric 		if (p->mmr_margin < *marginp)
   2482   8275       Eric 			break;
   2483   8275       Eric 	}
   2484   8275       Eric 
   2485   8275       Eric 
   2486   8275       Eric 	p = kmem_zalloc(sizeof (mac_margin_req_t), KM_SLEEP);
   2487   8275       Eric 	p->mmr_margin = *marginp;
   2488   8275       Eric 	p->mmr_ref++;
   2489   8275       Eric 	p->mmr_nextp = *pp;
   2490   8275       Eric 	*pp = p;
   2491   8275       Eric 
   2492   8275       Eric done:
   2493   8275       Eric 	rw_exit(&(mip->mi_rw_lock));
   2494   8275       Eric 	return (err);
   2495   8275       Eric }
   2496   8275       Eric 
   2497   8275       Eric /*
   2498   8275       Eric  * The mac client requests to cancel its previous mac_margin_add() request.
   2499   8275       Eric  * We remove the requested margin size from the list.
   2500   8275       Eric  */
   2501   8275       Eric int
   2502   8275       Eric mac_margin_remove(mac_handle_t mh, uint32_t margin)
   2503   8275       Eric {
   2504   8275       Eric 	mac_impl_t		*mip = (mac_impl_t *)mh;
   2505   8275       Eric 	mac_margin_req_t	**pp, *p;
   2506   8275       Eric 	int			err = 0;
   2507   8275       Eric 
   2508   8275       Eric 	rw_enter(&(mip->mi_rw_lock), RW_WRITER);
   2509   8275       Eric 	/*
   2510   8275       Eric 	 * Find the entry in the list for the given margin.
   2511   8275       Eric 	 */
   2512   8275       Eric 	for (pp = &(mip->mi_mmrp); (p = *pp) != NULL; pp = &(p->mmr_nextp)) {
   2513   8275       Eric 		if (p->mmr_margin == margin) {
   2514   8275       Eric 			if (--p->mmr_ref == 0)
   2515   8275       Eric 				break;
   2516   8275       Eric 
   2517   8275       Eric 			/*
   2518   8275       Eric 			 * There is still a reference to this address so
   2519   8275       Eric 			 * there's nothing more to do.
   2520   8275       Eric 			 */
   2521   8275       Eric 			goto done;
   2522   8275       Eric 		}
   2523   8275       Eric 	}
   2524   8275       Eric 
   2525   8275       Eric 	/*
   2526   8275       Eric 	 * We did not find an entry for the given margin.
   2527   8275       Eric 	 */
   2528   8275       Eric 	if (p == NULL) {
   2529   8275       Eric 		err = ENOENT;
   2530   8275       Eric 		goto done;
   2531   8275       Eric 	}
   2532   8275       Eric 
   2533   8275       Eric 	ASSERT(p->mmr_ref == 0);
   2534   8275       Eric 
   2535   8275       Eric 	/*
   2536   8275       Eric 	 * Remove it from the list.
   2537   8275       Eric 	 */
   2538   8275       Eric 	*pp = p->mmr_nextp;
   2539   8275       Eric 	kmem_free(p, sizeof (mac_margin_req_t));
   2540   8275       Eric done:
   2541   8275       Eric 	rw_exit(&(mip->mi_rw_lock));
   2542   8275       Eric 	return (err);
   2543   8275       Eric }
   2544   8275       Eric 
   2545   8275       Eric boolean_t
   2546   8275       Eric mac_margin_update(mac_handle_t mh, uint32_t margin)
   2547   8275       Eric {
   2548   8275       Eric 	mac_impl_t	*mip = (mac_impl_t *)mh;
   2549   8275       Eric 	uint32_t	margin_needed = 0;
   2550   8275       Eric 
   2551   8275       Eric 	rw_enter(&(mip->mi_rw_lock), RW_WRITER);
   2552   8275       Eric 
   2553   8275       Eric 	if (mip->mi_mmrp != NULL)
   2554   8275       Eric 		margin_needed = mip->mi_mmrp->mmr_margin;
   2555   8275       Eric 
   2556   8275       Eric 	if (margin_needed <= margin)
   2557   8275       Eric 		mip->mi_margin = margin;
   2558   8275       Eric 
   2559   8275       Eric 	rw_exit(&(mip->mi_rw_lock));
   2560   8275       Eric 
   2561   8275       Eric 	if (margin_needed <= margin)
   2562   8275       Eric 		i_mac_notify(mip, MAC_NOTE_MARGIN);
   2563   8275       Eric 
   2564   8275       Eric 	return (margin_needed <= margin);
   2565   8275       Eric }
   2566   8275       Eric 
   2567   8275       Eric /*
   2568   8275       Eric  * MAC Type Plugin functions.
   2569   8275       Eric  */
   2570   8275       Eric 
   2571   8275       Eric mactype_t *
   2572   8275       Eric mactype_getplugin(const char *pname)
   2573   2311        seb {
   2574   2311        seb 	mactype_t	*mtype = NULL;
   2575   2311        seb 	boolean_t	tried_modload = B_FALSE;
   2576   2311        seb 
   2577   3288        seb 	mutex_enter(&i_mactype_lock);
   2578   3288        seb 
   2579   2311        seb find_registered_mactype:
   2580   3288        seb 	if (mod_hash_find(i_mactype_hash, (mod_hash_key_t)pname,
   2581   3288        seb 	    (mod_hash_val_t *)&mtype) != 0) {
   2582   3288        seb 		if (!tried_modload) {
   2583   3288        seb 			/*
   2584   3288        seb 			 * If the plugin has not yet been loaded, then
   2585   3288        seb 			 * attempt to load it now.  If modload() succeeds,
   2586   3288        seb 			 * the plugin should have registered using
   2587   3288        seb 			 * mactype_register(), in which case we can go back
   2588   3288        seb 			 * and attempt to find it again.
   2589   3288        seb 			 */
   2590   3288        seb 			if (modload(MACTYPE_KMODDIR, (char *)pname) != -1) {
   2591   3288        seb 				tried_modload = B_TRUE;
   2592   3288        seb 				goto find_registered_mactype;
   2593   3288        seb 			}
   2594   3288        seb 		}
   2595   3288        seb 	} else {
   2596   2311        seb 		/*
   2597   3288        seb 		 * Note that there's no danger that the plugin we've loaded
   2598   3288        seb 		 * could be unloaded between the modload() step and the
   2599   3288        seb 		 * reference count bump here, as we're holding
   2600   3288        seb 		 * i_mactype_lock, which mactype_unregister() also holds.
   2601   2311        seb 		 */
   2602   3288        seb 		atomic_inc_32(&mtype->mt_ref);
   2603   2311        seb 	}
   2604   2311        seb 
   2605   3288        seb 	mutex_exit(&i_mactype_lock);
   2606   3288        seb 	return (mtype);
   2607      0     stevel }
   2608   2311        seb 
   2609   2311        seb mactype_register_t *
   2610   2311        seb mactype_alloc(uint_t mactype_version)
   2611   2311        seb {
   2612   2311        seb 	mactype_register_t *mtrp;
   2613   2311        seb 
   2614   2311        seb 	/*
   2615   2311        seb 	 * Make sure there isn't a version mismatch between the plugin and
   2616   2311        seb 	 * the framework.  In the future, if multiple versions are
   2617   2311        seb 	 * supported, this check could become more sophisticated.
   2618   2311        seb 	 */
   2619   2311        seb 	if (mactype_version != MACTYPE_VERSION)
   2620   2311        seb 		return (NULL);
   2621   2311        seb 
   2622   2311        seb 	mtrp = kmem_zalloc(sizeof (mactype_register_t), KM_SLEEP);
   2623   2311        seb 	mtrp->mtr_version = mactype_version;
   2624   2311        seb 	return (mtrp);
   2625   2311        seb }
   2626   2311        seb 
   2627   2311        seb void
   2628   2311        seb mactype_free(mactype_register_t *mtrp)
   2629   2311        seb {
   2630   2311        seb 	kmem_free(mtrp, sizeof (mactype_register_t));
   2631   2311        seb }
   2632   2311        seb 
   2633   2311        seb int
   2634   2311        seb mactype_register(mactype_register_t *mtrp)
   2635   2311        seb {
   2636   2311        seb 	mactype_t	*mtp;
   2637   2311        seb 	mactype_ops_t	*ops = mtrp->mtr_ops;
   2638   2311        seb 
   2639   2311        seb 	/* Do some sanity checking before we register this MAC type. */
   2640   6353   dr146992 	if (mtrp->mtr_ident == NULL || ops == NULL)
   2641   2311        seb 		return (EINVAL);
   2642   2311        seb 
   2643   2311        seb 	/*
   2644   2311        seb 	 * Verify that all mandatory callbacks are set in the ops
   2645   2311        seb 	 * vector.
   2646   2311        seb 	 */
   2647   2311        seb 	if (ops->mtops_unicst_verify == NULL ||
   2648   2311        seb 	    ops->mtops_multicst_verify == NULL ||
   2649   2311        seb 	    ops->mtops_sap_verify == NULL ||
   2650   2311        seb 	    ops->mtops_header == NULL ||
   2651   2311        seb 	    ops->mtops_header_info == NULL) {
   2652   2311        seb 		return (EINVAL);
   2653   2311        seb 	}
   2654   2311        seb 
   2655   2311        seb 	mtp = kmem_zalloc(sizeof (*mtp), KM_SLEEP);
   2656   2311        seb 	mtp->mt_ident = mtrp->mtr_ident;
   2657   2311        seb 	mtp->mt_ops = *ops;
   2658   2311        seb 	mtp->mt_type = mtrp->mtr_mactype;
   2659   3147   xc151355 	mtp->mt_nativetype = mtrp->mtr_nativetype;
   2660   2311        seb 	mtp->mt_addr_length = mtrp->mtr_addrlen;
   2661   2311        seb 	if (mtrp->mtr_brdcst_addr != NULL) {
   2662   2311        seb 		mtp->mt_brdcst_addr = kmem_alloc(mtrp->mtr_addrlen, KM_SLEEP);
   2663   2311        seb 		bcopy(mtrp->mtr_brdcst_addr, mtp->mt_brdcst_addr,
   2664   2311        seb 		    mtrp->mtr_addrlen);
   2665   2311        seb 	}
   2666   2311        seb 
   2667   2311        seb 	mtp->mt_stats = mtrp->mtr_stats;
   2668   2311        seb 	mtp->mt_statcount = mtrp->mtr_statcount;
   2669   2311        seb 
   2670   6512    sowmini 	mtp->mt_mapping = mtrp->mtr_mapping;
   2671   6512    sowmini 	mtp->mt_mappingcount = mtrp->mtr_mappingcount;
   2672   6512    sowmini 
   2673   2311        seb 	if (mod_hash_insert(i_mactype_hash,
   2674   2311        seb 	    (mod_hash_key_t)mtp->mt_ident, (mod_hash_val_t)mtp) != 0) {
   2675   2311        seb 		kmem_free(mtp->mt_brdcst_addr, mtp->mt_addr_length);
   2676   2311        seb 		kmem_free(mtp, sizeof (*mtp));
   2677   2311        seb 		return (EEXIST);
   2678   2311        seb 	}
   2679   2311        seb 	return (0);
   2680   2311        seb }
   2681   2311        seb 
   2682   2311        seb int
   2683   2311        seb mactype_unregister(const char *ident)
   2684   2311        seb {
   2685   2311        seb 	mactype_t	*mtp;
   2686   2311        seb 	mod_hash_val_t	val;
   2687   2311        seb 	int 		err;
   2688   2311        seb 
   2689   2311        seb 	/*
   2690   2311        seb 	 * Let's not allow MAC drivers to use this plugin while we're
   2691   3288        seb 	 * trying to unregister it.  Holding i_mactype_lock also prevents a
   2692   3288        seb 	 * plugin from unregistering while a MAC driver is attempting to
   2693   3288        seb 	 * hold a reference to it in i_mactype_getplugin().
   2694   2311        seb 	 */
   2695   3288        seb 	mutex_enter(&i_mactype_lock);
   2696   2311        seb 
   2697   2311        seb 	if ((err = mod_hash_find(i_mactype_hash, (mod_hash_key_t)ident,
   2698   2311        seb 	    (mod_hash_val_t *)&mtp)) != 0) {
   2699   2311        seb 		/* A plugin is trying to unregister, but it never registered. */
   2700   3288        seb 		err = ENXIO;
   2701   3288        seb 		goto done;
   2702   2311        seb 	}
   2703   2311        seb 
   2704   3288        seb 	if (mtp->mt_ref != 0) {
   2705   3288        seb 		err = EBUSY;
   2706   3288        seb 		goto done;
   2707   2311        seb 	}
   2708   2311        seb 
   2709   2311        seb 	err = mod_hash_remove(i_mactype_hash, (mod_hash_key_t)ident, &val);
   2710   2311        seb 	ASSERT(err == 0);
   2711   2311        seb 	if (err != 0) {
   2712   2311        seb 		/* This should never happen, thus the ASSERT() above. */
   2713   3288        seb 		err = EINVAL;
   2714   3288        seb 		goto done;
   2715   2311        seb 	}
   2716   2311        seb 	ASSERT(mtp == (mactype_t *)val);
   2717   2311        seb 
   2718  10616  Sebastien 	if (mtp->mt_brdcst_addr != NULL)
   2719  10616  Sebastien 		kmem_free(mtp->mt_brdcst_addr, mtp->mt_addr_length);
   2720   2311        seb 	kmem_free(mtp, sizeof (mactype_t));
   2721   3288        seb done:
   2722   3288        seb 	mutex_exit(&i_mactype_lock);
   2723   3288        seb 	return (err);
   2724   2311        seb }
   2725   5903    sowmini 
   2726   8275       Eric /*
   2727  10491      Rishi  * mac_set_prop() sets mac or hardware driver properties:
   2728  10491      Rishi  * 	MAC resource properties include maxbw, priority, and cpu binding list.
   2729  10491      Rishi  *	Driver properties are private properties to the hardware, such as mtu
   2730  10491      Rishi  *	and speed.  There's one other MAC property -- the PVID.
   2731  10491      Rishi  * If the property is a driver property, mac_set_prop() calls driver's callback
   2732  10491      Rishi  * function to set it.
   2733  10491      Rishi  * If the property is a mac resource property, mac_set_prop() invokes
   2734  10491      Rishi  * mac_set_resources() which will cache the property value in mac_impl_t and
   2735  10491      Rishi  * may call mac_client_set_resource() to update property value of the primary
   2736  10491      Rishi  * mac client, if it exists.
   2737  10491      Rishi  */
   2738  10491      Rishi int
   2739  10491      Rishi mac_set_prop(mac_handle_t mh, mac_prop_t *macprop, void *val, uint_t valsize)
   2740  10491      Rishi {
   2741  10491      Rishi 	int err = ENOTSUP;
   2742  10491      Rishi 	mac_impl_t *mip = (mac_impl_t *)mh;
   2743  10491      Rishi 
   2744  10491      Rishi 	ASSERT(MAC_PERIM_HELD(mh));
   2745  10491      Rishi 
   2746   8275       Eric 	switch (macprop->mp_id) {
   2747   8275       Eric 	case MAC_PROP_MAXBW:
   2748   8275       Eric 	case MAC_PROP_PRIO:
   2749  10734       Eric 	case MAC_PROP_PROTECT:
   2750  10491      Rishi 	case MAC_PROP_BIND_CPU: {
   2751   8275       Eric 		mac_resource_props_t mrp;
   2752   8275       Eric 
   2753  10491      Rishi 		/* If it is mac property, call mac_set_resources() */
   2754   8275       Eric 		if (valsize < sizeof (mac_resource_props_t))
   2755   8275       Eric 			return (EINVAL);
   2756   8275       Eric 		bcopy(val, &mrp, sizeof (mrp));
   2757  10491      Rishi 		err = mac_set_resources(mh, &mrp);
   2758  10491      Rishi 		break;
   2759  10491      Rishi 	}
   2760  10491      Rishi 
   2761  10491      Rishi 	case MAC_PROP_PVID:
   2762  10491      Rishi 		if (valsize < sizeof (uint16_t) ||
   2763  10491      Rishi 		    (mip->mi_state_flags & MIS_IS_VNIC))
   2764  10491      Rishi 			return (EINVAL);
   2765  10491      Rishi 		err = mac_set_pvid(mh, *(uint16_t *)val);
   2766  10491      Rishi 		break;
   2767  10491      Rishi 
   2768   8603     Girish 	case MAC_PROP_MTU: {
   2769   8603     Girish 		uint32_t mtu;
   2770   8603     Girish 
   2771   8603     Girish 		if (valsize < sizeof (mtu))
   2772   8603     Girish 			return (EINVAL);
   2773   8603     Girish 		bcopy(val, &mtu, sizeof (mtu));
   2774   8603     Girish 		err = mac_set_mtu(mh, mtu, NULL);
   2775   8603     Girish 		break;
   2776   8603     Girish 	}
   2777  10491      Rishi 
   2778  10491      Rishi 	case MAC_PROP_LLIMIT:
   2779  10491      Rishi 	case MAC_PROP_LDECAY: {
   2780  10491      Rishi 		uint32_t learnval;
   2781  10491      Rishi 
   2782  10491      Rishi 		if (valsize < sizeof (learnval) ||
   2783  10491      Rishi 		    (mip->mi_state_flags & MIS_IS_VNIC))
   2784  10491      Rishi 			return (EINVAL);
   2785  10491      Rishi 		bcopy(val, &learnval, sizeof (learnval));
   2786  10491      Rishi 		if (learnval == 0 && macprop->mp_id == MAC_PROP_LDECAY)
   2787  10491      Rishi 			return (EINVAL);
   2788  10491      Rishi 		if (macprop->mp_id == MAC_PROP_LLIMIT)
   2789  10491      Rishi 			mip->mi_llimit = learnval;
   2790  10491      Rishi 		else
   2791  10491      Rishi 			mip->mi_ldecay = learnval;
   2792  10491      Rishi 		err = 0;
   2793  10491      Rishi 		break;
   2794  10491      Rishi 	}
   2795  10491      Rishi 
   2796   8603     Girish 	default:
   2797   8603     Girish 		/* For other driver properties, call driver's callback */
   2798   8603     Girish 		if (mip->mi_callbacks->mc_callbacks & MC_SETPROP) {
   2799   8603     Girish 			err = mip->mi_callbacks->mc_setprop(mip->mi_driver,
   2800   8603     Girish 			    macprop->mp_name, macprop->mp_id, valsize, val);
   2801   8603     Girish 		}
   2802   8603     Girish 	}
   2803   8275       Eric 	return (err);
   2804   8275       Eric }
   2805   8275       Eric 
   2806   8275       Eric /*
   2807   8275       Eric  * mac_get_prop() gets mac or hardware driver properties.
   2808   8275       Eric  *
   2809   8275       Eric  * If the property is a driver property, mac_get_prop() calls driver's callback
   2810   8275       Eric  * function to get it.
   2811   8275       Eric  * If the property is a mac property, mac_get_prop() invokes mac_get_resources()
   2812   8275       Eric  * which returns the cached value in mac_impl_t.
   2813   8275       Eric  */
   2814   5903    sowmini int
   2815   8118  Vasumathi mac_get_prop(mac_handle_t mh, mac_prop_t *macprop, void *val, uint_t valsize,
   2816   8118  Vasumathi     uint_t *perm)
   2817   5903    sowmini {
   2818   5903    sowmini 	int err = ENOTSUP;
   2819   5903    sowmini 	mac_impl_t *mip = (mac_impl_t *)mh;
   2820   6512    sowmini 	link_state_t link_state;
   2821   9514     Girish 	boolean_t is_getprop, is_setprop;
   2822   9514     Girish 
   2823   9514     Girish 	is_getprop = (mip->mi_callbacks->mc_callbacks & MC_GETPROP);
   2824   9514     Girish 	is_setprop = (mip->mi_callbacks->mc_callbacks & MC_SETPROP);
   2825   8275       Eric 
   2826  10491      Rishi 	switch (macprop->mp_id) {
   2827  10491      Rishi 	case MAC_PROP_MAXBW:
   2828  10491      Rishi 	case MAC_PROP_PRIO:
   2829  10734       Eric 	case MAC_PROP_PROTECT:
   2830  10491      Rishi 	case MAC_PROP_BIND_CPU: {
   2831   8275       Eric 		mac_resource_props_t mrp;
   2832   8275       Eric 
   2833  10491      Rishi 		/* If mac property, read from cache */
   2834   8275       Eric 		if (valsize < sizeof (mac_resource_props_t))
   2835   8275       Eric 			return (EINVAL);
   2836   8275       Eric 		mac_get_resources(mh, &mrp);
   2837   8275       Eric 		bcopy(&mrp, val, sizeof (mac_resource_props_t));
   2838   8275       Eric 		return (0);
   2839   8275       Eric 	}
   2840   5903    sowmini 
   2841  10491      Rishi 	case MAC_PROP_PVID:
   2842  10491      Rishi 		if (valsize < sizeof (uint16_t) ||
   2843  10491      Rishi 		    (mip->mi_state_flags & MIS_IS_VNIC))
   2844  10491      Rishi 			return (EINVAL);
   2845  10491      Rishi 		*(uint16_t *)val = mac_get_pvid(mh);
   2846  10491      Rishi 		return (0);
   2847  10491      Rishi 
   2848  10491      Rishi 	case MAC_PROP_LLIMIT:
   2849  10491      Rishi 	case MAC_PROP_LDECAY:
   2850  10491      Rishi 		if (valsize < sizeof (uint32_t) ||
   2851  10491      Rishi 		    (mip->mi_state_flags & MIS_IS_VNIC))
   2852  10491      Rishi 			return (EINVAL);
   2853  10491      Rishi 		if (macprop->mp_id == MAC_PROP_LLIMIT)
   2854  10491      Rishi 			bcopy(&mip->mi_llimit, val, sizeof (mip->mi_llimit));
   2855  10491      Rishi 		else
   2856  10491      Rishi 			bcopy(&mip->mi_ldecay, val, sizeof (mip->mi_ldecay));
   2857  10491      Rishi 		return (0);
   2858  10491      Rishi 
   2859   9514     Girish 	case MAC_PROP_MTU: {
   2860   9514     Girish 		uint32_t sdu;
   2861   9514     Girish 		mac_propval_range_t range;
   2862   9514     Girish 
   2863   9514     Girish 		if ((macprop->mp_flags & MAC_PROP_POSSIBLE) != 0) {
   2864   9514     Girish 			if (valsize < sizeof (mac_propval_range_t))
   2865   9514     Girish 				return (EINVAL);
   2866   9514     Girish 			if (is_getprop) {
   2867   9514     Girish 				err = mip->mi_callbacks->mc_getprop(mip->
   2868   9514     Girish 				    mi_driver, macprop->mp_name, macprop->mp_id,
   2869   9514     Girish 				    macprop->mp_flags, valsize, val, perm);
   2870   9514     Girish 			}
   2871   9514     Girish 			/*
   2872   9514     Girish 			 * If the driver doesn't have *_m_getprop defined or
   2873   9514     Girish 			 * if the driver doesn't support setting MTU then
   2874   9514     Girish 			 * return the CURRENT value as POSSIBLE value.
   2875   9514     Girish 			 */
   2876   9514     Girish 			if (!is_getprop || err == ENOTSUP) {
   2877   9514     Girish 				mac_sdu_get(mh, NULL, &sdu);
   2878   9514     Girish 				range.mpr_count = 1;
   2879   9514     Girish 				range.mpr_type = MAC_PROPVAL_UINT32;
   2880   9514     Girish 				range.range_uint32[0].mpur_min =
   2881   9514     Girish 				    range.range_uint32[0].mpur_max = sdu;
   2882   9514     Girish 				bcopy(&range, val, sizeof (range));
   2883   9514     Girish 				err = 0;
   2884   9514     Girish 			}
   2885   9514     Girish 			return (err);
   2886   9514     Girish 		}
   2887   6512    sowmini 		if (valsize < sizeof (sdu))
   2888   6512    sowmini 			return (EINVAL);
   2889   6789   am223141 		if ((macprop->mp_flags & MAC_PROP_DEFAULT) == 0) {
   2890   6512    sowmini 			mac_sdu_get(mh, NULL, &sdu);
   2891   6512    sowmini 			bcopy(&sdu, val, sizeof (sdu));
   2892   9514     Girish 			if (is_setprop && (mip->mi_callbacks->mc_setprop(mip->
   2893   9514     Girish 			    mi_driver, macprop->mp_name, macprop->mp_id,
   2894   9514     Girish 			    valsize, val) == 0)) {
   2895   8603     Girish 				*perm = MAC_PROP_PERM_RW;
   2896   8603     Girish 			} else {
   2897   8118  Vasumathi 				*perm = MAC_PROP_PERM_READ;
   2898   8603     Girish 			}
   2899   6512    sowmini 			return (0);
   2900   6512    sowmini 		} else {
   2901   6512    sowmini 			if (mip->mi_info.mi_media == DL_ETHER) {
   2902   6512    sowmini 				sdu = ETHERMTU;
   2903   6512    sowmini 				bcopy(&sdu, val, sizeof (sdu));
   2904   8603     Girish 
   2905   6512    sowmini 				return (0);
   2906   6512    sowmini 			}
   2907   6512    sowmini 			/*
   2908   6512    sowmini 			 * ask driver for its default.
   2909   6512    sowmini 			 */
   2910   6512    sowmini 			break;
   2911   6512    sowmini 		}
   2912   9514     Girish 	}
   2913   6789   am223141 	case MAC_PROP_STATUS:
   2914   6512    sowmini 		if (valsize < sizeof (link_state))
   2915   6512    sowmini 			return (EINVAL);
   2916   8118  Vasumathi 		*perm = MAC_PROP_PERM_READ;
   2917   6512    sowmini 		link_state = mac_link_get(mh);
   2918   6512    sowmini 		bcopy(&link_state, val, sizeof (link_state));
   2919   6512    sowmini 		return (0);
   2920   6512    sowmini 	default:
   2921   6512    sowmini 		break;
   2922   8275       Eric 
   2923   8275       Eric 	}
   2924   8275       Eric 	/* If driver property, request from driver */
   2925   9514     Girish 	if (is_getprop) {
   2926   5903    sowmini 		err = mip->mi_callbacks->mc_getprop(mip->mi_driver,
   2927   6512    sowmini 		    macprop->mp_name, macprop->mp_id, macprop->mp_flags,
   2928   8118  Vasumathi 		    valsize, val, perm);
   2929   5903    sowmini 	}
   2930   5903    sowmini 	return (err);
   2931   5903    sowmini }
   2932   5903    sowmini 
   2933   9073      Cathy int
   2934   9073      Cathy mac_fastpath_disable(mac_handle_t mh)
   2935   9073      Cathy {
   2936   9073      Cathy 	mac_impl_t	*mip = (mac_impl_t *)mh;
   2937   9073      Cathy 
   2938   9073      Cathy 	if ((mip->mi_state_flags & MIS_LEGACY) == 0)
   2939   9073      Cathy 		return (0);
   2940   9073      Cathy 
   2941   9073      Cathy 	return (mip->mi_capab_legacy.ml_fastpath_disable(mip->mi_driver));
   2942   9073      Cathy }
   2943   9073      Cathy 
   2944   9073      Cathy void
   2945   9073      Cathy mac_fastpath_enable(mac_handle_t mh)
   2946   9073      Cathy {
   2947   9073      Cathy 	mac_impl_t	*mip = (mac_impl_t *)mh;
   2948   9073      Cathy 
   2949   9073      Cathy 	if ((mip->mi_state_flags & MIS_LEGACY) == 0)
   2950   9073      Cathy 		return;
   2951   9073      Cathy 
   2952   9073      Cathy 	mip->mi_capab_legacy.ml_fastpath_enable(mip->mi_driver);
   2953   9073      Cathy }
   2954   9073      Cathy 
   2955   8275       Eric void
   2956   6512    sowmini mac_register_priv_prop(mac_impl_t *mip, mac_priv_prop_t *mpp, uint_t nprop)
   2957   6512    sowmini {
   2958   6512    sowmini 	mac_priv_prop_t *mpriv;
   2959   6512    sowmini 
   2960   6512    sowmini 	if (mpp == NULL)
   2961   6512    sowmini 		return;
   2962   6512    sowmini 
   2963   6512    sowmini 	mpriv = kmem_zalloc(nprop * sizeof (*mpriv), KM_SLEEP);
   2964   6512    sowmini 	(void) memcpy(mpriv, mpp, nprop * sizeof (*mpriv));
   2965   6512    sowmini 	mip->mi_priv_prop = mpriv;
   2966   6512    sowmini 	mip->mi_priv_prop_count = nprop;
   2967   6512    sowmini }
   2968   7406    Sowmini 
   2969   8275       Eric void
   2970   7406    Sowmini mac_unregister_priv_prop(mac_impl_t *mip)
   2971   7406    Sowmini {
   2972   7406    Sowmini 	mac_priv_prop_t	*mpriv;
   2973   7406    Sowmini 
   2974   7406    Sowmini 	mpriv = mip->mi_priv_prop;
   2975   7406    Sowmini 	if (mpriv != NULL) {
   2976   7406    Sowmini 		kmem_free(mpriv, mip->mi_priv_prop_count * sizeof (*mpriv));
   2977   7406    Sowmini 		mip->mi_priv_prop = NULL;
   2978   7406    Sowmini 	}
   2979   7406    Sowmini 	mip->mi_priv_prop_count = 0;
   2980   7406    Sowmini }
   2981   8275       Eric 
   2982   8275       Eric /*
   2983   8275       Eric  * mac_ring_t 'mr' macros. Some rogue drivers may access ring structure
   2984   8275       Eric  * (by invoking mac_rx()) even after processing mac_stop_ring(). In such
   2985   8275       Eric  * cases if MAC free's the ring structure after mac_stop_ring(), any
   2986   8275       Eric  * illegal access to the ring structure coming from the driver will panic
   2987   8275       Eric  * the system. In order to protect the system from such inadverent access,
   2988   8275       Eric  * we maintain a cache of rings in the mac_impl_t after they get free'd up.
   2989   8275       Eric  * When packets are received on free'd up rings, MAC (through the generation
   2990   8275       Eric  * count mechanism) will drop such packets.
   2991   8275       Eric  */
   2992   8275       Eric static mac_ring_t *
   2993   8275       Eric mac_ring_alloc(mac_impl_t *mip, mac_capab_rings_t *cap_rings)
   2994   8275       Eric {
   2995   8275       Eric 	mac_ring_t *ring;
   2996   8275       Eric 
   2997   8275       Eric 	if (cap_rings->mr_type == MAC_RING_TYPE_RX) {
   2998   8275       Eric 		mutex_enter(&mip->mi_ring_lock);
   2999   8275       Eric 		if (mip->mi_ring_freelist != NULL) {
   3000   8275       Eric 			ring = mip->mi_ring_freelist;
   3001   8275       Eric 			mip->mi_ring_freelist = ring->mr_next;
   3002   8275       Eric 			bzero(ring, sizeof (mac_ring_t));
   3003   8275       Eric 		} else {
   3004   8275       Eric 			ring = kmem_cache_alloc(mac_ring_cache, KM_SLEEP);
   3005   8275       Eric 		}
   3006   8275       Eric 		mutex_exit(&mip->mi_ring_lock);
   3007   8275       Eric 	} else {
   3008   8275       Eric 		ring = kmem_zalloc(sizeof (mac_ring_t), KM_SLEEP);
   3009   8275       Eric 	}
   3010   8275       Eric 	ASSERT((ring != NULL) && (ring->mr_state == MR_FREE));
   3011   8275       Eric 	return (ring);
   3012   8275       Eric }
   3013   8275       Eric 
   3014   8275       Eric static void
   3015   8275       Eric mac_ring_free(mac_impl_t *mip, mac_ring_t *ring)
   3016   8275       Eric {
   3017   8275       Eric 	if (ring->mr_type == MAC_RING_TYPE_RX) {
   3018   8275       Eric 		mutex_enter(&mip->mi_ring_lock);
   3019   8275       Eric 		ring->mr_state = MR_FREE;
   3020   8275       Eric 		ring->mr_flag = 0;
   3021   8275       Eric 		ring->mr_next = mip->mi_ring_freelist;
   3022   8275       Eric 		mip->mi_ring_freelist = ring;
   3023   8275       Eric 		mutex_exit(&mip->mi_ring_lock);
   3024   8275       Eric 	} else {
   3025   8275       Eric 		kmem_free(ring, sizeof (mac_ring_t));
   3026   8275       Eric 	}
   3027   8275       Eric }
   3028   8275       Eric 
   3029   8275       Eric static void
   3030   8275       Eric mac_ring_freeall(mac_impl_t *mip)
   3031   8275       Eric {
   3032   8275       Eric 	mac_ring_t *ring_next;
   3033   8275       Eric 	mutex_enter(&mip->mi_ring_lock);
   3034   8275       Eric 	mac_ring_t *ring = mip->mi_ring_freelist;
   3035   8275       Eric 	while (ring != NULL) {
   3036   8275       Eric 		ring_next = ring->mr_next;
   3037   8275       Eric 		kmem_cache_free(mac_ring_cache, ring);
   3038   8275       Eric 		ring = ring_next;
   3039   8275       Eric 	}
   3040   8275       Eric 	mip->mi_ring_freelist = NULL;
   3041   8275       Eric 	mutex_exit(&mip->mi_ring_lock);
   3042   8275       Eric }
   3043   8275       Eric 
   3044   8275       Eric int
   3045   8275       Eric mac_start_ring(mac_ring_t *ring)
   3046   8275       Eric {
   3047   8275       Eric 	int rv = 0;
   3048   8275       Eric 
   3049   8275       Eric 	if (ring->mr_start != NULL)
   3050   8275       Eric 		rv = ring->mr_start(ring->mr_driver, ring->mr_gen_num);
   3051   8275       Eric 
   3052   8275       Eric 	return (rv);
   3053   8275       Eric }
   3054   8275       Eric 
   3055   8275       Eric void
   3056   8275       Eric mac_stop_ring(mac_ring_t *ring)
   3057   8275       Eric {
   3058   8275       Eric 	if (ring->mr_stop != NULL)
   3059   8275       Eric 		ring->mr_stop(ring->mr_driver);
   3060   8275       Eric 
   3061   8275       Eric 	/*
   3062   8275       Eric 	 * Increment the ring generation number for this ring.
   3063   8275       Eric 	 */
   3064   8275       Eric 	ring->mr_gen_num++;
   3065   8275       Eric }
   3066   8275       Eric 
   3067   8275       Eric int
   3068   8275       Eric mac_start_group(mac_group_t *group)
   3069   8275       Eric {
   3070   8275       Eric 	int rv = 0;
   3071   8275       Eric 
   3072   8275       Eric 	if (group->mrg_start != NULL)
   3073   8275       Eric 		rv = group->mrg_start(group->mrg_driver);
   3074   8275       Eric 
   3075   8275       Eric 	return (rv);
   3076   8275       Eric }
   3077   8275       Eric 
   3078   8275       Eric void
   3079   8275       Eric mac_stop_group(mac_group_t *group)
   3080   8275       Eric {
   3081   8275       Eric 	if (group->mrg_stop != NULL)
   3082   8275       Eric 		group->mrg_stop(group->mrg_driver);
   3083   8275       Eric }
   3084   8275       Eric 
   3085   8275       Eric /*
   3086   8275       Eric  * Called from mac_start() on the default Rx group. Broadcast and multicast
   3087   8275       Eric  * packets are received only on the default group. Hence the default group
   3088   8275       Eric  * needs to be up even if the primary client is not up, for the other groups
   3089   8275       Eric  * to be functional. We do this by calling this function at mac_start time
   3090   8275       Eric  * itself. However the broadcast packets that are received can't make their
   3091   8275       Eric  * way beyond mac_rx until a mac client creates a broadcast flow.
   3092   8275       Eric  */
   3093   8275       Eric static int
   3094   8275       Eric mac_start_group_and_rings(mac_group_t *group)
   3095   8275       Eric {
   3096   8275       Eric 	mac_ring_t	*ring;
   3097   8275       Eric 	int		rv = 0;
   3098   8275       Eric 
   3099   8275       Eric 	ASSERT(group->mrg_state == MAC_GROUP_STATE_REGISTERED);
   3100   8275       Eric 	if ((rv = mac_start_group(group)) != 0)
   3101   8275       Eric 		return (rv);
   3102   8275       Eric 
   3103   8275       Eric 	for (ring = group->mrg_rings; ring != NULL; ring = ring->mr_next) {
   3104   8275       Eric 		ASSERT(ring->mr_state == MR_FREE);
   3105   8275       Eric 		if ((rv = mac_start_ring(ring)) != 0)
   3106   8275       Eric 			goto error;
   3107   8275       Eric 		ring->mr_state = MR_INUSE;
   3108   8275       Eric 		ring->mr_classify_type = MAC_SW_CLASSIFIER;
   3109   8275       Eric 	}
   3110   8275       Eric 	return (0);
   3111   8275       Eric 
   3112   8275       Eric error:
   3113   8275       Eric 	mac_stop_group_and_rings(group);
   3114   8275       Eric 	return (rv);
   3115   8275       Eric }
   3116   8275       Eric 
   3117   8275       Eric /* Called from mac_stop on the default Rx group */
   3118   8275       Eric static void
   3119   8275       Eric mac_stop_group_and_rings(mac_group_t *group)
   3120   8275       Eric {
   3121   8275       Eric 	mac_ring_t	*ring;
   3122   8275       Eric 
   3123   8275       Eric 	for (ring = group->mrg_rings; ring != NULL; ring = ring->mr_next) {
   3124   8275       Eric 		if (ring->mr_state != MR_FREE) {
   3125   8275       Eric 			mac_stop_ring(ring);
   3126   8275       Eric 			ring->mr_state = MR_FREE;
   3127   8275       Eric 			ring->mr_flag = 0;
   3128   8275       Eric 			ring->mr_classify_type = MAC_NO_CLASSIFIER;
   3129   8275       Eric 		}
   3130   8275       Eric 	}
   3131   8275       Eric 	mac_stop_group(group);
   3132   8275       Eric }
   3133   8275       Eric 
   3134   8275       Eric 
   3135   8275       Eric static mac_ring_t *
   3136   8275       Eric mac_init_ring(mac_impl_t *mip, mac_group_t *group, int index,
   3137   8275       Eric     mac_capab_rings_t *cap_rings)
   3138   8275       Eric {
   3139   8275       Eric 	mac_ring_t *ring;
   3140   8275       Eric 	mac_ring_info_t ring_info;
   3141   8275       Eric 
   3142   8275       Eric 	ring = mac_ring_alloc(mip, cap_rings);
   3143   8275       Eric 
   3144   8275       Eric 	/* Prepare basic information of ring */
   3145   8275       Eric 	ring->mr_index = index;
   3146   8275       Eric 	ring->mr_type = group->mrg_type;
   3147   8275       Eric 	ring->mr_gh = (mac_group_handle_t)group;
   3148   8275       Eric 
   3149   8275       Eric 	/* Insert the new ring to the list. */
   3150   8275       Eric 	ring->mr_next = group->mrg_rings;
   3151   8275       Eric 	group->mrg_rings = ring;
   3152   8275       Eric 
   3153   8275       Eric 	/* Zero to reuse the info data structure */
   3154   8275       Eric 	bzero(&ring_info, sizeof (ring_info));
   3155   8275       Eric 
   3156   8275       Eric 	/* Query ring information from driver */
   3157   8275       Eric 	cap_rings->mr_rget(mip->mi_driver, group->mrg_type, group->mrg_index,
   3158   8275       Eric 	    index, &ring_info, (mac_ring_handle_t)ring);
   3159   8275       Eric 
   3160   8275       Eric 	ring->mr_info = ring_info;
   3161   8275       Eric 
   3162   8275       Eric 	/* Update ring's status */
   3163   8275       Eric 	ring->mr_state = MR_FREE;
   3164   8275       Eric 	ring->mr_flag = 0;
   3165   8275       Eric 
   3166   8275       Eric 	/* Update the ring count of the group */
   3167   8275       Eric 	group->mrg_cur_count++;
   3168   8275       Eric 	return (ring);
   3169   8275       Eric }
   3170   8275       Eric 
   3171   8275       Eric /*
   3172   8275       Eric  * Rings are chained together for easy regrouping.
   3173   8275       Eric  */
   3174   8275       Eric static void
   3175   8275       Eric mac_init_group(mac_impl_t *mip, mac_group_t *group, int size,
   3176   8275       Eric     mac_capab_rings_t *cap_rings)
   3177   8275       Eric {
   3178   8275       Eric 	int index;
   3179   8275       Eric 
   3180   8275       Eric 	/*
   3181   8275       Eric 	 * Initialize all ring members of this group. Size of zero will not
   3182   8275       Eric 	 * enter the loop, so it's safe for initializing an empty group.
   3183   8275       Eric 	 */
   3184