Home | History | Annotate | Download | only in aggr
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 /*
     27  * IEEE 802.3ad Link Aggregation -- Link Aggregation Groups.
     28  *
     29  * An instance of the structure aggr_grp_t is allocated for each
     30  * link aggregation group. When created, aggr_grp_t objects are
     31  * entered into the aggr_grp_hash hash table maintained by the modhash
     32  * module. The hash key is the linkid associated with the link
     33  * aggregation group.
     34  *
     35  * A set of MAC ports are associated with each association group.
     36  */
     37 
     38 #include <sys/types.h>
     39 #include <sys/sysmacros.h>
     40 #include <sys/conf.h>
     41 #include <sys/cmn_err.h>
     42 #include <sys/disp.h>
     43 #include <sys/list.h>
     44 #include <sys/ksynch.h>
     45 #include <sys/kmem.h>
     46 #include <sys/stream.h>
     47 #include <sys/modctl.h>
     48 #include <sys/ddi.h>
     49 #include <sys/sunddi.h>
     50 #include <sys/atomic.h>
     51 #include <sys/stat.h>
     52 #include <sys/modhash.h>
     53 #include <sys/id_space.h>
     54 #include <sys/strsun.h>
     55 #include <sys/cred.h>
     56 #include <sys/dlpi.h>
     57 #include <sys/zone.h>
     58 #include <sys/mac_provider.h>
     59 #include <sys/dls.h>
     60 #include <sys/vlan.h>
     61 #include <sys/aggr.h>
     62 #include <sys/aggr_impl.h>
     63 
     64 static int aggr_m_start(void *);
     65 static void aggr_m_stop(void *);
     66 static int aggr_m_promisc(void *, boolean_t);
     67 static int aggr_m_multicst(void *, boolean_t, const uint8_t *);
     68 static int aggr_m_unicst(void *, const uint8_t *);
     69 static int aggr_m_stat(void *, uint_t, uint64_t *);
     70 static void aggr_m_ioctl(void *, queue_t *, mblk_t *);
     71 static boolean_t aggr_m_capab_get(void *, mac_capab_t, void *);
     72 static int aggr_m_setprop(void *, const char *, mac_prop_id_t, uint_t,
     73     const void *);
     74 static int aggr_m_getprop(void *, const char *, mac_prop_id_t, uint_t,
     75     uint_t, void *, uint_t *);
     76 
     77 
     78 static aggr_port_t *aggr_grp_port_lookup(aggr_grp_t *, datalink_id_t);
     79 static int aggr_grp_rem_port(aggr_grp_t *, aggr_port_t *, boolean_t *,
     80     boolean_t *);
     81 
     82 static void aggr_grp_capab_set(aggr_grp_t *);
     83 static boolean_t aggr_grp_capab_check(aggr_grp_t *, aggr_port_t *);
     84 static uint_t aggr_grp_max_sdu(aggr_grp_t *);
     85 static uint32_t aggr_grp_max_margin(aggr_grp_t *);
     86 static boolean_t aggr_grp_sdu_check(aggr_grp_t *, aggr_port_t *);
     87 static boolean_t aggr_grp_margin_check(aggr_grp_t *, aggr_port_t *);
     88 
     89 static int aggr_add_pseudo_rx_group(aggr_port_t *, aggr_pseudo_rx_group_t *);
     90 static void aggr_rem_pseudo_rx_group(aggr_port_t *, aggr_pseudo_rx_group_t *);
     91 static int aggr_pseudo_disable_intr(mac_intr_handle_t);
     92 static int aggr_pseudo_enable_intr(mac_intr_handle_t);
     93 static int aggr_pseudo_start_ring(mac_ring_driver_t, uint64_t);
     94 static void aggr_pseudo_stop_ring(mac_ring_driver_t);
     95 static int aggr_addmac(void *, const uint8_t *);
     96 static int aggr_remmac(void *, const uint8_t *);
     97 static mblk_t *aggr_rx_poll(void *, int);
     98 static void aggr_fill_ring(void *, mac_ring_type_t, const int,
     99     const int, mac_ring_info_t *, mac_ring_handle_t);
    100 static void aggr_fill_group(void *, mac_ring_type_t, const int,
    101     mac_group_info_t *, mac_group_handle_t);
    102 
    103 static kmem_cache_t	*aggr_grp_cache;
    104 static mod_hash_t	*aggr_grp_hash;
    105 static krwlock_t	aggr_grp_lock;
    106 static uint_t		aggr_grp_cnt;
    107 static id_space_t	*key_ids;
    108 
    109 #define	GRP_HASHSZ		64
    110 #define	GRP_HASH_KEY(linkid)	((mod_hash_key_t)(uintptr_t)linkid)
    111 #define	AGGR_PORT_NAME_DELIMIT '-'
    112 
    113 static uchar_t aggr_zero_mac[] = {0, 0, 0, 0, 0, 0};
    114 
    115 #define	AGGR_M_CALLBACK_FLAGS	\
    116 	(MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_GETPROP)
    117 
    118 static mac_callbacks_t aggr_m_callbacks = {
    119 	AGGR_M_CALLBACK_FLAGS,
    120 	aggr_m_stat,
    121 	aggr_m_start,
    122 	aggr_m_stop,
    123 	aggr_m_promisc,
    124 	aggr_m_multicst,
    125 	NULL,
    126 	aggr_m_tx,
    127 	aggr_m_ioctl,
    128 	aggr_m_capab_get,
    129 	NULL,
    130 	NULL,
    131 	aggr_m_setprop,
    132 	aggr_m_getprop
    133 };
    134 
    135 /*ARGSUSED*/
    136 static int
    137 aggr_grp_constructor(void *buf, void *arg, int kmflag)
    138 {
    139 	aggr_grp_t *grp = buf;
    140 
    141 	bzero(grp, sizeof (*grp));
    142 	mutex_init(&grp->lg_lacp_lock, NULL, MUTEX_DEFAULT, NULL);
    143 	cv_init(&grp->lg_lacp_cv, NULL, CV_DEFAULT, NULL);
    144 	rw_init(&grp->lg_tx_lock, NULL, RW_DRIVER, NULL);
    145 	mutex_init(&grp->lg_port_lock, NULL, MUTEX_DEFAULT, NULL);
    146 	cv_init(&grp->lg_port_cv, NULL, CV_DEFAULT, NULL);
    147 	grp->lg_link_state = LINK_STATE_UNKNOWN;
    148 	return (0);
    149 }
    150 
    151 /*ARGSUSED*/
    152 static void
    153 aggr_grp_destructor(void *buf, void *arg)
    154 {
    155 	aggr_grp_t *grp = buf;
    156 
    157 	if (grp->lg_tx_ports != NULL) {
    158 		kmem_free(grp->lg_tx_ports,
    159 		    grp->lg_tx_ports_size * sizeof (aggr_port_t *));
    160 	}
    161 
    162 	mutex_destroy(&grp->lg_lacp_lock);
    163 	cv_destroy(&grp->lg_lacp_cv);
    164 	mutex_destroy(&grp->lg_port_lock);
    165 	cv_destroy(&grp->lg_port_cv);
    166 	rw_destroy(&grp->lg_tx_lock);
    167 }
    168 
    169 void
    170 aggr_grp_init(void)
    171 {
    172 	aggr_grp_cache = kmem_cache_create("aggr_grp_cache",
    173 	    sizeof (aggr_grp_t), 0, aggr_grp_constructor,
    174 	    aggr_grp_destructor, NULL, NULL, NULL, 0);
    175 
    176 	aggr_grp_hash = mod_hash_create_idhash("aggr_grp_hash",
    177 	    GRP_HASHSZ, mod_hash_null_valdtor);
    178 	rw_init(&aggr_grp_lock, NULL, RW_DEFAULT, NULL);
    179 	aggr_grp_cnt = 0;
    180 
    181 	/*
    182 	 * Allocate an id space to manage key values (when key is not
    183 	 * specified). The range of the id space will be from
    184 	 * (AGGR_MAX_KEY + 1) to UINT16_MAX, because the LACP protocol
    185 	 * uses a 16-bit key.
    186 	 */
    187 	key_ids = id_space_create("aggr_key_ids", AGGR_MAX_KEY + 1, UINT16_MAX);
    188 	ASSERT(key_ids != NULL);
    189 }
    190 
    191 void
    192 aggr_grp_fini(void)
    193 {
    194 	id_space_destroy(key_ids);
    195 	rw_destroy(&aggr_grp_lock);
    196 	mod_hash_destroy_idhash(aggr_grp_hash);
    197 	kmem_cache_destroy(aggr_grp_cache);
    198 }
    199 
    200 uint_t
    201 aggr_grp_count(void)
    202 {
    203 	uint_t	count;
    204 
    205 	rw_enter(&aggr_grp_lock, RW_READER);
    206 	count = aggr_grp_cnt;
    207 	rw_exit(&aggr_grp_lock);
    208 	return (count);
    209 }
    210 
    211 /*
    212  * Since both aggr_port_notify_cb() and aggr_port_timer_thread() functions
    213  * requires the mac perimeter, this function holds a reference of the aggr
    214  * and aggr won't call mac_unregister() until this reference drops to 0.
    215  */
    216 void
    217 aggr_grp_port_hold(aggr_port_t *port)
    218 {
    219 	aggr_grp_t	*grp = port->lp_grp;
    220 
    221 	AGGR_PORT_REFHOLD(port);
    222 	mutex_enter(&grp->lg_port_lock);
    223 	grp->lg_port_ref++;
    224 	mutex_exit(&grp->lg_port_lock);
    225 }
    226 
    227 /*
    228  * Release the reference of the grp and inform aggr_grp_delete() calling
    229  * mac_unregister() is now safe.
    230  */
    231 void
    232 aggr_grp_port_rele(aggr_port_t *port)
    233 {
    234 	aggr_grp_t	*grp = port->lp_grp;
    235 
    236 	mutex_enter(&grp->lg_port_lock);
    237 	if (--grp->lg_port_ref == 0)
    238 		cv_signal(&grp->lg_port_cv);
    239 	mutex_exit(&grp->lg_port_lock);
    240 	AGGR_PORT_REFRELE(port);
    241 }
    242 
    243 /*
    244  * Wait for the port's lacp timer thread and the port's notification callback
    245  * to exit.
    246  */
    247 void
    248 aggr_grp_port_wait(aggr_grp_t *grp)
    249 {
    250 	mutex_enter(&grp->lg_port_lock);
    251 	if (grp->lg_port_ref != 0)
    252 		cv_wait(&grp->lg_port_cv, &grp->lg_port_lock);
    253 	mutex_exit(&grp->lg_port_lock);
    254 }
    255 
    256 /*
    257  * Attach a port to a link aggregation group.
    258  *
    259  * A port is attached to a link aggregation group once its speed
    260  * and link state have been verified.
    261  *
    262  * Returns B_TRUE if the group link state or speed has changed. If
    263  * it's the case, the caller must notify the MAC layer via a call
    264  * to mac_link().
    265  */
    266 boolean_t
    267 aggr_grp_attach_port(aggr_grp_t *grp, aggr_port_t *port)
    268 {
    269 	boolean_t link_state_changed = B_FALSE;
    270 
    271 	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
    272 	ASSERT(MAC_PERIM_HELD(port->lp_mh));
    273 
    274 	if (port->lp_state == AGGR_PORT_STATE_ATTACHED)
    275 		return (B_FALSE);
    276 
    277 	/*
    278 	 * Validate the MAC port link speed and update the group
    279 	 * link speed if needed.
    280 	 */
    281 	if (port->lp_ifspeed == 0 ||
    282 	    port->lp_link_state != LINK_STATE_UP ||
    283 	    port->lp_link_duplex != LINK_DUPLEX_FULL) {
    284 		/*
    285 		 * Can't attach a MAC port with unknown link speed,
    286 		 * down link, or not in full duplex mode.
    287 		 */
    288 		return (B_FALSE);
    289 	}
    290 
    291 	if (grp->lg_ifspeed == 0) {
    292 		/*
    293 		 * The group inherits the speed of the first link being
    294 		 * attached.
    295 		 */
    296 		grp->lg_ifspeed = port->lp_ifspeed;
    297 		link_state_changed = B_TRUE;
    298 	} else if (grp->lg_ifspeed != port->lp_ifspeed) {
    299 		/*
    300 		 * The link speed of the MAC port must be the same as
    301 		 * the group link speed, as per 802.3ad. Since it is
    302 		 * not, the attach is cancelled.
    303 		 */
    304 		return (B_FALSE);
    305 	}
    306 
    307 	grp->lg_nattached_ports++;
    308 
    309 	/*
    310 	 * Update the group link state.
    311 	 */
    312 	if (grp->lg_link_state != LINK_STATE_UP) {
    313 		grp->lg_link_state = LINK_STATE_UP;
    314 		grp->lg_link_duplex = LINK_DUPLEX_FULL;
    315 		link_state_changed = B_TRUE;
    316 	}
    317 
    318 	/*
    319 	 * Update port's state.
    320 	 */
    321 	port->lp_state = AGGR_PORT_STATE_ATTACHED;
    322 
    323 	aggr_grp_multicst_port(port, B_TRUE);
    324 
    325 	/*
    326 	 * Set port's receive callback
    327 	 */
    328 	mac_rx_set(port->lp_mch, aggr_recv_cb, port);
    329 
    330 	/*
    331 	 * If LACP is OFF, the port can be used to send data as soon
    332 	 * as its link is up and verified to be compatible with the
    333 	 * aggregation.
    334 	 *
    335 	 * If LACP is active or passive, notify the LACP subsystem, which
    336 	 * will enable sending on the port following the LACP protocol.
    337 	 */
    338 	if (grp->lg_lacp_mode == AGGR_LACP_OFF)
    339 		aggr_send_port_enable(port);
    340 	else
    341 		aggr_lacp_port_attached(port);
    342 
    343 	return (link_state_changed);
    344 }
    345 
    346 boolean_t
    347 aggr_grp_detach_port(aggr_grp_t *grp, aggr_port_t *port)
    348 {
    349 	boolean_t link_state_changed = B_FALSE;
    350 
    351 	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
    352 	ASSERT(MAC_PERIM_HELD(port->lp_mh));
    353 
    354 	/* update state */
    355 	if (port->lp_state != AGGR_PORT_STATE_ATTACHED)
    356 		return (B_FALSE);
    357 
    358 	mac_rx_clear(port->lp_mch);
    359 
    360 	aggr_grp_multicst_port(port, B_FALSE);
    361 
    362 	if (grp->lg_lacp_mode == AGGR_LACP_OFF)
    363 		aggr_send_port_disable(port);
    364 	else
    365 		aggr_lacp_port_detached(port);
    366 
    367 	port->lp_state = AGGR_PORT_STATE_STANDBY;
    368 
    369 	grp->lg_nattached_ports--;
    370 	if (grp->lg_nattached_ports == 0) {
    371 		/* the last attached MAC port of the group is being detached */
    372 		grp->lg_ifspeed = 0;
    373 		grp->lg_link_state = LINK_STATE_DOWN;
    374 		grp->lg_link_duplex = LINK_DUPLEX_UNKNOWN;
    375 		link_state_changed = B_TRUE;
    376 	}
    377 
    378 	return (link_state_changed);
    379 }
    380 
    381 /*
    382  * Update the MAC addresses of the constituent ports of the specified
    383  * group. This function is invoked:
    384  * - after creating a new aggregation group.
    385  * - after adding new ports to an aggregation group.
    386  * - after removing a port from a group when the MAC address of
    387  *   that port was used for the MAC address of the group.
    388  * - after the MAC address of a port changed when the MAC address
    389  *   of that port was used for the MAC address of the group.
    390  *
    391  * Return true if the link state of the aggregation changed, for example
    392  * as a result of a failure changing the MAC address of one of the
    393  * constituent ports.
    394  */
    395 boolean_t
    396 aggr_grp_update_ports_mac(aggr_grp_t *grp)
    397 {
    398 	aggr_port_t *cport;
    399 	boolean_t link_state_changed = B_FALSE;
    400 	mac_perim_handle_t mph;
    401 
    402 	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
    403 
    404 	for (cport = grp->lg_ports; cport != NULL;
    405 	    cport = cport->lp_next) {
    406 		mac_perim_enter_by_mh(cport->lp_mh, &mph);
    407 		if (aggr_port_unicst(cport) != 0) {
    408 			if (aggr_grp_detach_port(grp, cport))
    409 				link_state_changed = B_TRUE;
    410 		} else {
    411 			/*
    412 			 * If a port was detached because of a previous
    413 			 * failure changing the MAC address, the port is
    414 			 * reattached when it successfully changes the MAC
    415 			 * address now, and this might cause the link state
    416 			 * of the aggregation to change.
    417 			 */
    418 			if (aggr_grp_attach_port(grp, cport))
    419 				link_state_changed = B_TRUE;
    420 		}
    421 		mac_perim_exit(mph);
    422 	}
    423 	return (link_state_changed);
    424 }
    425 
    426 /*
    427  * Invoked when the MAC address of a port has changed. If the port's
    428  * MAC address was used for the group MAC address, set mac_addr_changedp
    429  * to B_TRUE to indicate to the caller that it should send a MAC_NOTE_UNICST
    430  * notification. If the link state changes due to detach/attach of
    431  * the constituent port, set link_state_changedp to B_TRUE to indicate
    432  * to the caller that it should send a MAC_NOTE_LINK notification. In both
    433  * cases, it is the responsibility of the caller to invoke notification
    434  * functions after releasing the the port lock.
    435  */
    436 void
    437 aggr_grp_port_mac_changed(aggr_grp_t *grp, aggr_port_t *port,
    438     boolean_t *mac_addr_changedp, boolean_t *link_state_changedp)
    439 {
    440 	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
    441 	ASSERT(MAC_PERIM_HELD(port->lp_mh));
    442 	ASSERT(mac_addr_changedp != NULL);
    443 	ASSERT(link_state_changedp != NULL);
    444 
    445 	*mac_addr_changedp = B_FALSE;
    446 	*link_state_changedp = B_FALSE;
    447 
    448 	if (grp->lg_addr_fixed) {
    449 		/*
    450 		 * The group is using a fixed MAC address or an automatic
    451 		 * MAC address has not been set.
    452 		 */
    453 		return;
    454 	}
    455 
    456 	if (grp->lg_mac_addr_port == port) {
    457 		/*
    458 		 * The MAC address of the port was assigned to the group
    459 		 * MAC address. Update the group MAC address.
    460 		 */
    461 		bcopy(port->lp_addr, grp->lg_addr, ETHERADDRL);
    462 		*mac_addr_changedp = B_TRUE;
    463 	} else {
    464 		/*
    465 		 * Update the actual port MAC address to the MAC address
    466 		 * of the group.
    467 		 */
    468 		if (aggr_port_unicst(port) != 0) {
    469 			*link_state_changedp = aggr_grp_detach_port(grp, port);
    470 		} else {
    471 			/*
    472 			 * If a port was detached because of a previous
    473 			 * failure changing the MAC address, the port is
    474 			 * reattached when it successfully changes the MAC
    475 			 * address now, and this might cause the link state
    476 			 * of the aggregation to change.
    477 			 */
    478 			*link_state_changedp = aggr_grp_attach_port(grp, port);
    479 		}
    480 	}
    481 }
    482 
    483 /*
    484  * Add a port to a link aggregation group.
    485  */
    486 static int
    487 aggr_grp_add_port(aggr_grp_t *grp, datalink_id_t port_linkid, boolean_t force,
    488     aggr_port_t **pp)
    489 {
    490 	aggr_port_t *port, **cport;
    491 	mac_perim_handle_t mph;
    492 	zoneid_t port_zoneid = ALL_ZONES;
    493 	int err;
    494 
    495 	/* The port must be int the same zone as the aggregation. */
    496 	if (zone_check_datalink(&port_zoneid, port_linkid) != 0)
    497 		port_zoneid = GLOBAL_ZONEID;
    498 	if (grp->lg_zoneid != port_zoneid)
    499 		return (EBUSY);
    500 
    501 	/*
    502 	 * lg_mh could be NULL when the function is called during the creation
    503 	 * of the aggregation.
    504 	 */
    505 	ASSERT(grp->lg_mh == NULL || MAC_PERIM_HELD(grp->lg_mh));
    506 
    507 	/* create new port */
    508 	err = aggr_port_create(grp, port_linkid, force, &port);
    509 	if (err != 0)
    510 		return (err);
    511 
    512 	mac_perim_enter_by_mh(port->lp_mh, &mph);
    513 
    514 	/* add port to list of group constituent ports */
    515 	cport = &grp->lg_ports;
    516 	while (*cport != NULL)
    517 		cport = &((*cport)->lp_next);
    518 	*cport = port;
    519 
    520 	/*
    521 	 * Back reference to the group it is member of. A port always
    522 	 * holds a reference to its group to ensure that the back
    523 	 * reference is always valid.
    524 	 */
    525 	port->lp_grp = grp;
    526 	AGGR_GRP_REFHOLD(grp);
    527 	grp->lg_nports++;
    528 
    529 	aggr_lacp_init_port(port);
    530 	mac_perim_exit(mph);
    531 
    532 	if (pp != NULL)
    533 		*pp = port;
    534 
    535 	return (0);
    536 }
    537 
    538 /*
    539  * Add a pseudo Rx ring for the given HW ring handle.
    540  */
    541 static int
    542 aggr_add_pseudo_rx_ring(aggr_port_t *port,
    543     aggr_pseudo_rx_group_t *rx_grp, mac_ring_handle_t hw_rh)
    544 {
    545 	aggr_pseudo_rx_ring_t	*ring;
    546 	int			err;
    547 	int			j;
    548 
    549 	for (j = 0; j < MAX_RINGS_PER_GROUP; j++) {
    550 		ring = rx_grp->arg_rings + j;
    551 		if (!(ring->arr_flags & MAC_PSEUDO_RING_INUSE))
    552 			break;
    553 	}
    554 
    555 	/*
    556 	 * No slot for this new Rx ring.
    557 	 */
    558 	if (j == MAX_RINGS_PER_GROUP)
    559 		return (EIO);
    560 
    561 	ring->arr_flags |= MAC_PSEUDO_RING_INUSE;
    562 	ring->arr_hw_rh = hw_rh;
    563 	ring->arr_port = port;
    564 	rx_grp->arg_ring_cnt++;
    565 
    566 	/*
    567 	 * The group is already registered, dynamically add a new ring to the
    568 	 * mac group.
    569 	 */
    570 	mac_hwring_setup(hw_rh, (mac_resource_handle_t)ring);
    571 	if ((err = mac_group_add_ring(rx_grp->arg_gh, j)) != 0) {
    572 		ring->arr_flags &= ~MAC_PSEUDO_RING_INUSE;
    573 		ring->arr_hw_rh = NULL;
    574 		ring->arr_port = NULL;
    575 		rx_grp->arg_ring_cnt--;
    576 		mac_hwring_teardown(hw_rh);
    577 	}
    578 	return (err);
    579 }
    580 
    581 /*
    582  * Remove the pseudo Rx ring of the given HW ring handle.
    583  */
    584 static void
    585 aggr_rem_pseudo_rx_ring(aggr_pseudo_rx_group_t *rx_grp, mac_ring_handle_t hw_rh)
    586 {
    587 	aggr_pseudo_rx_ring_t	*ring;
    588 	int			j;
    589 
    590 	for (j = 0; j < MAX_RINGS_PER_GROUP; j++) {
    591 		ring = rx_grp->arg_rings + j;
    592 		if (!(ring->arr_flags & MAC_PSEUDO_RING_INUSE) ||
    593 		    ring->arr_hw_rh != hw_rh) {
    594 			continue;
    595 		}
    596 
    597 		mac_group_rem_ring(rx_grp->arg_gh, ring->arr_rh);
    598 
    599 		ring->arr_flags &= ~MAC_PSEUDO_RING_INUSE;
    600 		ring->arr_hw_rh = NULL;
    601 		ring->arr_port = NULL;
    602 		rx_grp->arg_ring_cnt--;
    603 		mac_hwring_teardown(hw_rh);
    604 		break;
    605 	}
    606 }
    607 
    608 /*
    609  * This function is called to create pseudo rings over the hardware rings of
    610  * the underlying device. Note that there is a 1:1 mapping between the pseudo
    611  * RX rings of the aggr and the hardware rings of the underlying port.
    612  */
    613 static int
    614 aggr_add_pseudo_rx_group(aggr_port_t *port, aggr_pseudo_rx_group_t *rx_grp)
    615 {
    616 	aggr_grp_t		*grp = port->lp_grp;
    617 	mac_ring_handle_t	hw_rh[MAX_RINGS_PER_GROUP];
    618 	aggr_unicst_addr_t	*addr, *a;
    619 	mac_perim_handle_t	pmph;
    620 	int			hw_rh_cnt, i = 0, j;
    621 	int			err = 0;
    622 
    623 	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
    624 	mac_perim_enter_by_mh(port->lp_mh, &pmph);
    625 
    626 	/*
    627 	 * This function must be called after the aggr registers its mac
    628 	 * and its RX group has been initialized.
    629 	 */
    630 	ASSERT(rx_grp->arg_gh != NULL);
    631 
    632 	/*
    633 	 * Get the list the the underlying HW rings.
    634 	 */
    635 	hw_rh_cnt = mac_hwrings_get(port->lp_mch, &port->lp_hwgh, hw_rh,
    636 	    MAC_RING_TYPE_RX);
    637 
    638 	if (port->lp_hwgh != NULL) {
    639 		/*
    640 		 * Quiesce the HW ring and the mac srs on the ring. Note
    641 		 * that the HW ring will be restarted when the pseudo ring
    642 		 * is started. At that time all the packets will be
    643 		 * directly passed up to the pseudo RX ring and handled
    644 		 * by mac srs created over the pseudo RX ring.
    645 		 */
    646 		mac_rx_client_quiesce(port->lp_mch);
    647 		mac_srs_perm_quiesce(port->lp_mch, B_TRUE);
    648 	}
    649 
    650 	/*
    651 	 * Add all the unicast addresses to the newly added port.
    652 	 */
    653 	for (addr = rx_grp->arg_macaddr; addr != NULL; addr = addr->aua_next) {
    654 		if ((err = aggr_port_addmac(port, addr->aua_addr)) != 0)
    655 			break;
    656 	}
    657 
    658 	for (i = 0; err == 0 && i < hw_rh_cnt; i++)
    659 		err = aggr_add_pseudo_rx_ring(port, rx_grp, hw_rh[i]);
    660 
    661 	if (err != 0) {
    662 		for (j = 0; j < i; j++)
    663 			aggr_rem_pseudo_rx_ring(rx_grp, hw_rh[j]);
    664 
    665 		for (a = rx_grp->arg_macaddr; a != addr; a = a->aua_next)
    666 			aggr_port_remmac(port, a->aua_addr);
    667 
    668 		if (port->lp_hwgh != NULL) {
    669 			mac_srs_perm_quiesce(port->lp_mch, B_FALSE);
    670 			mac_rx_client_restart(port->lp_mch);
    671 			port->lp_hwgh = NULL;
    672 		}
    673 	} else {
    674 		port->lp_grp_added = B_TRUE;
    675 	}
    676 done:
    677 	mac_perim_exit(pmph);
    678 	return (err);
    679 }
    680 
    681 /*
    682  * This function is called by aggr to remove pseudo RX rings over the
    683  * HW rings of the underlying port.
    684  */
    685 static void
    686 aggr_rem_pseudo_rx_group(aggr_port_t *port, aggr_pseudo_rx_group_t *rx_grp)
    687 {
    688 	aggr_grp_t		*grp = port->lp_grp;
    689 	mac_ring_handle_t	hw_rh[MAX_RINGS_PER_GROUP];
    690 	aggr_unicst_addr_t	*addr;
    691 	mac_group_handle_t	hwgh;
    692 	mac_perim_handle_t	pmph;
    693 	int			hw_rh_cnt, i;
    694 
    695 	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
    696 	mac_perim_enter_by_mh(port->lp_mh, &pmph);
    697 
    698 	if (!port->lp_grp_added)
    699 		goto done;
    700 
    701 	ASSERT(rx_grp->arg_gh != NULL);
    702 	hw_rh_cnt = mac_hwrings_get(port->lp_mch, &hwgh, hw_rh,
    703 	    MAC_RING_TYPE_RX);
    704 
    705 	/*
    706 	 * If hw_rh_cnt is 0, it means that the underlying port does not
    707 	 * support RX rings. Directly return in this case.
    708 	 */
    709 	for (i = 0; i < hw_rh_cnt; i++)
    710 		aggr_rem_pseudo_rx_ring(rx_grp, hw_rh[i]);
    711 
    712 	for (addr = rx_grp->arg_macaddr; addr != NULL; addr = addr->aua_next)
    713 		aggr_port_remmac(port, addr->aua_addr);
    714 
    715 	if (port->lp_hwgh != NULL) {
    716 		port->lp_hwgh = NULL;
    717 
    718 		/*
    719 		 * First clear the permanent-quiesced flag of the RX srs then
    720 		 * restart the HW ring and the mac srs on the ring. Note that
    721 		 * the HW ring and associated SRS will soon been removed when
    722 		 * the port is removed from the aggr.
    723 		 */
    724 		mac_srs_perm_quiesce(port->lp_mch, B_FALSE);
    725 		mac_rx_client_restart(port->lp_mch);
    726 	}
    727 
    728 	port->lp_grp_added = B_FALSE;
    729 done:
    730 	mac_perim_exit(pmph);
    731 }
    732 
    733 static int
    734 aggr_pseudo_disable_intr(mac_intr_handle_t ih)
    735 {
    736 	aggr_pseudo_rx_ring_t *rr_ring = (aggr_pseudo_rx_ring_t *)ih;
    737 	return (mac_hwring_disable_intr(rr_ring->arr_hw_rh));
    738 }
    739 
    740 static int
    741 aggr_pseudo_enable_intr(mac_intr_handle_t ih)
    742 {
    743 	aggr_pseudo_rx_ring_t *rr_ring = (aggr_pseudo_rx_ring_t *)ih;
    744 	return (mac_hwring_enable_intr(rr_ring->arr_hw_rh));
    745 }
    746 
    747 static int
    748 aggr_pseudo_start_ring(mac_ring_driver_t arg, uint64_t mr_gen)
    749 {
    750 	aggr_pseudo_rx_ring_t *rr_ring = (aggr_pseudo_rx_ring_t *)arg;
    751 	int err;
    752 
    753 	err = mac_hwring_start(rr_ring->arr_hw_rh);
    754 	if (err == 0)
    755 		rr_ring->arr_gen = mr_gen;
    756 	return (err);
    757 }
    758 
    759 static void
    760 aggr_pseudo_stop_ring(mac_ring_driver_t arg)
    761 {
    762 	aggr_pseudo_rx_ring_t *rr_ring = (aggr_pseudo_rx_ring_t *)arg;
    763 	mac_hwring_stop(rr_ring->arr_hw_rh);
    764 }
    765 
    766 /*
    767  * Add one or more ports to an existing link aggregation group.
    768  */
    769 int
    770 aggr_grp_add_ports(datalink_id_t linkid, uint_t nports, boolean_t force,
    771     laioc_port_t *ports)
    772 {
    773 	int rc, i, nadded = 0;
    774 	aggr_grp_t *grp = NULL;
    775 	aggr_port_t *port;
    776 	boolean_t link_state_changed = B_FALSE;
    777 	mac_perim_handle_t mph, pmph;
    778 
    779 	/* get group corresponding to linkid */
    780 	rw_enter(&aggr_grp_lock, RW_READER);
    781 	if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid),
    782 	    (mod_hash_val_t *)&grp) != 0) {
    783 		rw_exit(&aggr_grp_lock);
    784 		return (ENOENT);
    785 	}
    786 	AGGR_GRP_REFHOLD(grp);
    787 
    788 	/*
    789 	 * Hold the perimeter so that the aggregation won't be destroyed.
    790 	 */
    791 	mac_perim_enter_by_mh(grp->lg_mh, &mph);
    792 	rw_exit(&aggr_grp_lock);
    793 
    794 	/* add the specified ports to group */
    795 	for (i = 0; i < nports; i++) {
    796 		/* add port to group */
    797 		if ((rc = aggr_grp_add_port(grp, ports[i].lp_linkid,
    798 		    force, &port)) != 0) {
    799 			goto bail;
    800 		}
    801 		ASSERT(port != NULL);
    802 		nadded++;
    803 
    804 		/* check capabilities */
    805 		if (!aggr_grp_capab_check(grp, port) ||
    806 		    !aggr_grp_sdu_check(grp, port) ||
    807 		    !aggr_grp_margin_check(grp, port)) {
    808 			rc = ENOTSUP;
    809 			goto bail;
    810 		}
    811 
    812 		/*
    813 		 * Create the pseudo ring for each HW ring of the underlying
    814 		 * port.
    815 		 */
    816 		rc = aggr_add_pseudo_rx_group(port, &grp->lg_rx_group);
    817 		if (rc != 0)
    818 			goto bail;
    819 
    820 		mac_perim_enter_by_mh(port->lp_mh, &pmph);
    821 
    822 		/* set LACP mode */
    823 		aggr_port_lacp_set_mode(grp, port);
    824 
    825 		/* start port if group has already been started */
    826 		if (grp->lg_started) {
    827 			rc = aggr_port_start(port);
    828 			if (rc != 0) {
    829 				mac_perim_exit(pmph);
    830 				goto bail;
    831 			}
    832 
    833 			/*
    834 			 * Turn on the promiscuous mode over the port when it
    835 			 * is requested to be turned on to receive the
    836 			 * non-primary address over a port, or the promiscous
    837 			 * mode is enabled over the aggr.
    838 			 */
    839 			if (grp->lg_promisc || port->lp_prom_addr != NULL) {
    840 				rc = aggr_port_promisc(port, B_TRUE);
    841 				if (rc != 0) {
    842 					mac_perim_exit(pmph);
    843 					goto bail;
    844 				}
    845 			}
    846 		}
    847 		mac_perim_exit(pmph);
    848 
    849 		/*
    850 		 * Attach each port if necessary.
    851 		 */
    852 		if (aggr_port_notify_link(grp, port))
    853 			link_state_changed = B_TRUE;
    854 
    855 		/*
    856 		 * Initialize the callback functions for this port.
    857 		 */
    858 		aggr_port_init_callbacks(port);
    859 	}
    860 
    861 	/* update the MAC address of the constituent ports */
    862 	if (aggr_grp_update_ports_mac(grp))
    863 		link_state_changed = B_TRUE;
    864 
    865 	if (link_state_changed)
    866 		mac_link_update(grp->lg_mh, grp->lg_link_state);
    867 
    868 bail:
    869 	if (rc != 0) {
    870 		/* stop and remove ports that have been added */
    871 		for (i = 0; i < nadded; i++) {
    872 			port = aggr_grp_port_lookup(grp, ports[i].lp_linkid);
    873 			ASSERT(port != NULL);
    874 			if (grp->lg_started) {
    875 				mac_perim_enter_by_mh(port->lp_mh, &pmph);
    876 				(void) aggr_port_promisc(port, B_FALSE);
    877 				aggr_port_stop(port);
    878 				mac_perim_exit(pmph);
    879 			}
    880 			aggr_rem_pseudo_rx_group(port, &grp->lg_rx_group);
    881 			(void) aggr_grp_rem_port(grp, port, NULL, NULL);
    882 		}
    883 	}
    884 
    885 	mac_perim_exit(mph);
    886 	AGGR_GRP_REFRELE(grp);
    887 	return (rc);
    888 }
    889 
    890 static int
    891 aggr_grp_modify_common(aggr_grp_t *grp, uint8_t update_mask, uint32_t policy,
    892     boolean_t mac_fixed, const uchar_t *mac_addr, aggr_lacp_mode_t lacp_mode,
    893     aggr_lacp_timer_t lacp_timer)
    894 {
    895 	boolean_t mac_addr_changed = B_FALSE;
    896 	boolean_t link_state_changed = B_FALSE;
    897 	mac_perim_handle_t pmph;
    898 
    899 	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
    900 
    901 	/* validate fixed address if specified */
    902 	if ((update_mask & AGGR_MODIFY_MAC) && mac_fixed &&
    903 	    ((bcmp(aggr_zero_mac, mac_addr, ETHERADDRL) == 0) ||
    904 	    (mac_addr[0] & 0x01))) {
    905 		return (EINVAL);
    906 	}
    907 
    908 	/* update policy if requested */
    909 	if (update_mask & AGGR_MODIFY_POLICY)
    910 		aggr_send_update_policy(grp, policy);
    911 
    912 	/* update unicast MAC address if requested */
    913 	if (update_mask & AGGR_MODIFY_MAC) {
    914 		if (mac_fixed) {
    915 			/* user-supplied MAC address */
    916 			grp->lg_mac_addr_port = NULL;
    917 			if (bcmp(mac_addr, grp->lg_addr, ETHERADDRL) != 0) {
    918 				bcopy(mac_addr, grp->lg_addr, ETHERADDRL);
    919 				mac_addr_changed = B_TRUE;
    920 			}
    921 		} else if (grp->lg_addr_fixed) {
    922 			/* switch from user-supplied to automatic */
    923 			aggr_port_t *port = grp->lg_ports;
    924 
    925 			mac_perim_enter_by_mh(port->lp_mh, &pmph);
    926 			bcopy(port->lp_addr, grp->lg_addr, ETHERADDRL);
    927 			grp->lg_mac_addr_port = port;
    928 			mac_addr_changed = B_TRUE;
    929 			mac_perim_exit(pmph);
    930 		}
    931 		grp->lg_addr_fixed = mac_fixed;
    932 	}
    933 
    934 	if (mac_addr_changed)
    935 		link_state_changed = aggr_grp_update_ports_mac(grp);
    936 
    937 	if (update_mask & AGGR_MODIFY_LACP_MODE)
    938 		aggr_lacp_update_mode(grp, lacp_mode);
    939 
    940 	if (update_mask & AGGR_MODIFY_LACP_TIMER)
    941 		aggr_lacp_update_timer(grp, lacp_timer);
    942 
    943 	if (link_state_changed)
    944 		mac_link_update(grp->lg_mh, grp->lg_link_state);
    945 
    946 	if (mac_addr_changed)
    947 		mac_unicst_update(grp->lg_mh, grp->lg_addr);
    948 
    949 	return (0);
    950 }
    951 
    952 /*
    953  * Update properties of an existing link aggregation group.
    954  */
    955 int
    956 aggr_grp_modify(datalink_id_t linkid, uint8_t update_mask, uint32_t policy,
    957     boolean_t mac_fixed, const uchar_t *mac_addr, aggr_lacp_mode_t lacp_mode,
    958     aggr_lacp_timer_t lacp_timer)
    959 {
    960 	aggr_grp_t *grp = NULL;
    961 	mac_perim_handle_t mph;
    962 	int err;
    963 
    964 	/* get group corresponding to linkid */
    965 	rw_enter(&aggr_grp_lock, RW_READER);
    966 	if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid),
    967 	    (mod_hash_val_t *)&grp) != 0) {
    968 		rw_exit(&aggr_grp_lock);
    969 		return (ENOENT);
    970 	}
    971 	AGGR_GRP_REFHOLD(grp);
    972 
    973 	/*
    974 	 * Hold the perimeter so that the aggregation won't be destroyed.
    975 	 */
    976 	mac_perim_enter_by_mh(grp->lg_mh, &mph);
    977 	rw_exit(&aggr_grp_lock);
    978 
    979 	err = aggr_grp_modify_common(grp, update_mask, policy, mac_fixed,
    980 	    mac_addr, lacp_mode, lacp_timer);
    981 
    982 	mac_perim_exit(mph);
    983 	AGGR_GRP_REFRELE(grp);
    984 	return (err);
    985 }
    986 
    987 /*
    988  * Create a new link aggregation group upon request from administrator.
    989  * Returns 0 on success, an errno on failure.
    990  */
    991 int
    992 aggr_grp_create(datalink_id_t linkid, uint32_t key, uint_t nports,
    993     laioc_port_t *ports, uint32_t policy, boolean_t mac_fixed, boolean_t force,
    994     uchar_t *mac_addr, aggr_lacp_mode_t lacp_mode, aggr_lacp_timer_t lacp_timer,
    995     cred_t *credp)
    996 {
    997 	aggr_grp_t *grp = NULL;
    998 	aggr_port_t *port;
    999 	mac_register_t *mac;
   1000 	boolean_t link_state_changed;
   1001 	mac_perim_handle_t mph;
   1002 	int err;
   1003 	int i;
   1004 
   1005 	/* need at least one port */
   1006 	if (nports == 0)
   1007 		return (EINVAL);
   1008 
   1009 	rw_enter(&aggr_grp_lock, RW_WRITER);
   1010 
   1011 	/* does a group with the same linkid already exist? */
   1012 	err = mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid),
   1013 	    (mod_hash_val_t *)&grp);
   1014 	if (err == 0) {
   1015 		rw_exit(&aggr_grp_lock);
   1016 		return (EEXIST);
   1017 	}
   1018 
   1019 	grp = kmem_cache_alloc(aggr_grp_cache, KM_SLEEP);
   1020 
   1021 	grp->lg_refs = 1;
   1022 	grp->lg_closing = B_FALSE;
   1023 	grp->lg_force = force;
   1024 	grp->lg_linkid = linkid;
   1025 	grp->lg_zoneid = crgetzoneid(credp);
   1026 	grp->lg_ifspeed = 0;
   1027 	grp->lg_link_state = LINK_STATE_UNKNOWN;
   1028 	grp->lg_link_duplex = LINK_DUPLEX_UNKNOWN;
   1029 	grp->lg_started = B_FALSE;
   1030 	grp->lg_promisc = B_FALSE;
   1031 	grp->lg_lacp_done = B_FALSE;
   1032 	grp->lg_lacp_head = grp->lg_lacp_tail = NULL;
   1033 	grp->lg_lacp_rx_thread = thread_create(NULL, 0,
   1034 	    aggr_lacp_rx_thread, grp, 0, &p0, TS_RUN, minclsyspri);
   1035 	bzero(&grp->lg_rx_group, sizeof (aggr_pseudo_rx_group_t));
   1036 	aggr_lacp_init_grp(grp);
   1037 
   1038 	/* add MAC ports to group */
   1039 	grp->lg_ports = NULL;
   1040 	grp->lg_nports = 0;
   1041 	grp->lg_nattached_ports = 0;
   1042 	grp->lg_ntx_ports = 0;
   1043 
   1044 	/*
   1045 	 * If key is not specified by the user, allocate the key.
   1046 	 */
   1047 	if ((key == 0) && ((key = (uint32_t)id_alloc(key_ids)) == 0)) {
   1048 		err = ENOMEM;
   1049 		goto bail;
   1050 	}
   1051 	grp->lg_key = key;
   1052 
   1053 	for (i = 0; i < nports; i++) {
   1054 		err = aggr_grp_add_port(grp, ports[i].lp_linkid, force, NULL);
   1055 		if (err != 0)
   1056 			goto bail;
   1057 	}
   1058 
   1059 	/*
   1060 	 * If no explicit MAC address was specified by the administrator,
   1061 	 * set it to the MAC address of the first port.
   1062 	 */
   1063 	grp->lg_addr_fixed = mac_fixed;
   1064 	if (grp->lg_addr_fixed) {
   1065 		/* validate specified address */
   1066 		if (bcmp(aggr_zero_mac, mac_addr, ETHERADDRL) == 0) {
   1067 			err = EINVAL;
   1068 			goto bail;
   1069 		}
   1070 		bcopy(mac_addr, grp->lg_addr, ETHERADDRL);
   1071 	} else {
   1072 		bcopy(grp->lg_ports->lp_addr, grp->lg_addr, ETHERADDRL);
   1073 		grp->lg_mac_addr_port = grp->lg_ports;
   1074 	}
   1075 
   1076 	/* set the initial group capabilities */
   1077 	aggr_grp_capab_set(grp);
   1078 
   1079 	if ((mac = mac_alloc(MAC_VERSION)) == NULL) {
   1080 		err = ENOMEM;
   1081 		goto bail;
   1082 	}
   1083 	mac->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
   1084 	mac->m_driver = grp;
   1085 	mac->m_dip = aggr_dip;
   1086 	mac->m_instance = grp->lg_key > AGGR_MAX_KEY ? (uint_t)-1 : grp->lg_key;
   1087 	mac->m_src_addr = grp->lg_addr;
   1088 	mac->m_callbacks = &aggr_m_callbacks;
   1089 	mac->m_min_sdu = 0;
   1090 	mac->m_max_sdu = grp->lg_max_sdu = aggr_grp_max_sdu(grp);
   1091 	mac->m_margin = aggr_grp_max_margin(grp);
   1092 	mac->m_v12n = MAC_VIRT_LEVEL1;
   1093 	err = mac_register(mac, &grp->lg_mh);
   1094 	mac_free(mac);
   1095 	if (err != 0)
   1096 		goto bail;
   1097 
   1098 	err = dls_devnet_create(grp->lg_mh, grp->lg_linkid, crgetzoneid(credp));
   1099 	if (err != 0) {
   1100 		(void) mac_unregister(grp->lg_mh);
   1101 		grp->lg_mh = NULL;
   1102 		goto bail;
   1103 	}
   1104 
   1105 	mac_perim_enter_by_mh(grp->lg_mh, &mph);
   1106 
   1107 	/*
   1108 	 * Update the MAC address of the constituent ports.
   1109 	 * None of the port is attached at this time, the link state of the
   1110 	 * aggregation will not change.
   1111 	 */
   1112 	link_state_changed = aggr_grp_update_ports_mac(grp);
   1113 	ASSERT(!link_state_changed);
   1114 
   1115 	/* update outbound load balancing policy */
   1116 	aggr_send_update_policy(grp, policy);
   1117 
   1118 	/* set LACP mode */
   1119 	aggr_lacp_set_mode(grp, lacp_mode, lacp_timer);
   1120 
   1121 	/*
   1122 	 * Attach each port if necessary.
   1123 	 */
   1124 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
   1125 		/*
   1126 		 * Create the pseudo ring for each HW ring of the underlying
   1127 		 * port. Note that this is done after the aggr registers the
   1128 		 * mac.
   1129 		 */
   1130 		VERIFY(aggr_add_pseudo_rx_group(port, &grp->lg_rx_group) == 0);
   1131 		if (aggr_port_notify_link(grp, port))
   1132 			link_state_changed = B_TRUE;
   1133 
   1134 		/*
   1135 		 * Initialize the callback functions for this port.
   1136 		 */
   1137 		aggr_port_init_callbacks(port);
   1138 	}
   1139 
   1140 	if (link_state_changed)
   1141 		mac_link_update(grp->lg_mh, grp->lg_link_state);
   1142 
   1143 	/* add new group to hash table */
   1144 	err = mod_hash_insert(aggr_grp_hash, GRP_HASH_KEY(linkid),
   1145 	    (mod_hash_val_t)grp);
   1146 	ASSERT(err == 0);
   1147 	aggr_grp_cnt++;
   1148 
   1149 	mac_perim_exit(mph);
   1150 	rw_exit(&aggr_grp_lock);
   1151 	return (0);
   1152 
   1153 bail:
   1154 
   1155 	grp->lg_closing = B_TRUE;
   1156 
   1157 	port = grp->lg_ports;
   1158 	while (port != NULL) {
   1159 		aggr_port_t *cport;
   1160 
   1161 		cport = port->lp_next;
   1162 		aggr_port_delete(port);
   1163 		port = cport;
   1164 	}
   1165 
   1166 	/*
   1167 	 * Inform the lacp_rx thread to exit.
   1168 	 */
   1169 	mutex_enter(&grp->lg_lacp_lock);
   1170 	grp->lg_lacp_done = B_TRUE;
   1171 	cv_signal(&grp->lg_lacp_cv);
   1172 	while (grp->lg_lacp_rx_thread != NULL)
   1173 		cv_wait(&grp->lg_lacp_cv, &grp->lg_lacp_lock);
   1174 	mutex_exit(&grp->lg_lacp_lock);
   1175 
   1176 	rw_exit(&aggr_grp_lock);
   1177 	AGGR_GRP_REFRELE(grp);
   1178 	return (err);
   1179 }
   1180 
   1181 /*
   1182  * Return a pointer to the member of a group with specified linkid.
   1183  */
   1184 static aggr_port_t *
   1185 aggr_grp_port_lookup(aggr_grp_t *grp, datalink_id_t linkid)
   1186 {
   1187 	aggr_port_t *port;
   1188 
   1189 	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
   1190 
   1191 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
   1192 		if (port->lp_linkid == linkid)
   1193 			break;
   1194 	}
   1195 
   1196 	return (port);
   1197 }
   1198 
   1199 /*
   1200  * Stop, detach and remove a port from a link aggregation group.
   1201  */
   1202 static int
   1203 aggr_grp_rem_port(aggr_grp_t *grp, aggr_port_t *port,
   1204     boolean_t *mac_addr_changedp, boolean_t *link_state_changedp)
   1205 {
   1206 	int rc = 0;
   1207 	aggr_port_t **pport;
   1208 	boolean_t mac_addr_changed = B_FALSE;
   1209 	boolean_t link_state_changed = B_FALSE;
   1210 	mac_perim_handle_t mph;
   1211 	uint64_t val;
   1212 	uint_t i;
   1213 	uint_t stat;
   1214 
   1215 	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
   1216 	ASSERT(grp->lg_nports > 1);
   1217 	ASSERT(!grp->lg_closing);
   1218 
   1219 	/* unlink port */
   1220 	for (pport = &grp->lg_ports; *pport != port;
   1221 	    pport = &(*pport)->lp_next) {
   1222 		if (*pport == NULL) {
   1223 			rc = ENOENT;
   1224 			goto done;
   1225 		}
   1226 	}
   1227 	*pport = port->lp_next;
   1228 
   1229 	mac_perim_enter_by_mh(port->lp_mh, &mph);
   1230 
   1231 	/*
   1232 	 * If the MAC address of the port being removed was assigned
   1233 	 * to the group, update the group MAC address
   1234 	 * using the MAC address of a different port.
   1235 	 */
   1236 	if (!grp->lg_addr_fixed && grp->lg_mac_addr_port == port) {
   1237 		/*
   1238 		 * Set the MAC address of the group to the
   1239 		 * MAC address of its first port.
   1240 		 */
   1241 		bcopy(grp->lg_ports->lp_addr, grp->lg_addr, ETHERADDRL);
   1242 		grp->lg_mac_addr_port = grp->lg_ports;
   1243 		mac_addr_changed = B_TRUE;
   1244 	}
   1245 
   1246 	link_state_changed = aggr_grp_detach_port(grp, port);
   1247 
   1248 	/*
   1249 	 * Add the counter statistics of the ports while it was aggregated
   1250 	 * to the group's residual statistics.  This is done by obtaining
   1251 	 * the current counter from the underlying MAC then subtracting the
   1252 	 * value of the counter at the moment it was added to the
   1253 	 * aggregation.
   1254 	 */
   1255 	for (i = 0; i < MAC_NSTAT; i++) {
   1256 		stat = i + MAC_STAT_MIN;
   1257 		if (!MAC_STAT_ISACOUNTER(stat))
   1258 			continue;
   1259 		val = aggr_port_stat(port, stat);
   1260 		val -= port->lp_stat[i];
   1261 		grp->lg_stat[i] += val;
   1262 	}
   1263 	for (i = 0; i < ETHER_NSTAT; i++) {
   1264 		stat = i + MACTYPE_STAT_MIN;
   1265 		if (!ETHER_STAT_ISACOUNTER(stat))
   1266 			continue;
   1267 		val = aggr_port_stat(port, stat);
   1268 		val -= port->lp_ether_stat[i];
   1269 		grp->lg_ether_stat[i] += val;
   1270 	}
   1271 
   1272 	grp->lg_nports--;
   1273 	mac_perim_exit(mph);
   1274 
   1275 	aggr_port_delete(port);
   1276 
   1277 	/*
   1278 	 * If the group MAC address has changed, update the MAC address of
   1279 	 * the remaining constituent ports according to the new MAC
   1280 	 * address of the group.
   1281 	 */
   1282 	if (mac_addr_changed && aggr_grp_update_ports_mac(grp))
   1283 		link_state_changed = B_TRUE;
   1284 
   1285 done:
   1286 	if (mac_addr_changedp != NULL)
   1287 		*mac_addr_changedp = mac_addr_changed;
   1288 	if (link_state_changedp != NULL)
   1289 		*link_state_changedp = link_state_changed;
   1290 
   1291 	return (rc);
   1292 }
   1293 
   1294 /*
   1295  * Remove one or more ports from an existing link aggregation group.
   1296  */
   1297 int
   1298 aggr_grp_rem_ports(datalink_id_t linkid, uint_t nports, laioc_port_t *ports)
   1299 {
   1300 	int rc = 0, i;
   1301 	aggr_grp_t *grp = NULL;
   1302 	aggr_port_t *port;
   1303 	boolean_t mac_addr_update = B_FALSE, mac_addr_changed;
   1304 	boolean_t link_state_update = B_FALSE, link_state_changed;
   1305 	mac_perim_handle_t mph, pmph;
   1306 
   1307 	/* get group corresponding to linkid */
   1308 	rw_enter(&aggr_grp_lock, RW_READER);
   1309 	if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid),
   1310 	    (mod_hash_val_t *)&grp) != 0) {
   1311 		rw_exit(&aggr_grp_lock);
   1312 		return (ENOENT);
   1313 	}
   1314 	AGGR_GRP_REFHOLD(grp);
   1315 
   1316 	/*
   1317 	 * Hold the perimeter so that the aggregation won't be destroyed.
   1318 	 */
   1319 	mac_perim_enter_by_mh(grp->lg_mh, &mph);
   1320 	rw_exit(&aggr_grp_lock);
   1321 
   1322 	/* we need to keep at least one port per group */
   1323 	if (nports >= grp->lg_nports) {
   1324 		rc = EINVAL;
   1325 		goto bail;
   1326 	}
   1327 
   1328 	/* first verify that all the groups are valid */
   1329 	for (i = 0; i < nports; i++) {
   1330 		if (aggr_grp_port_lookup(grp, ports[i].lp_linkid) == NULL) {
   1331 			/* port not found */
   1332 			rc = ENOENT;
   1333 			goto bail;
   1334 		}
   1335 	}
   1336 
   1337 	/* clear the promiscous mode for the specified ports */
   1338 	for (i = 0; i < nports && rc == 0; i++) {
   1339 		/* lookup port */
   1340 		port = aggr_grp_port_lookup(grp, ports[i].lp_linkid);
   1341 		ASSERT(port != NULL);
   1342 
   1343 		mac_perim_enter_by_mh(port->lp_mh, &pmph);
   1344 		rc = aggr_port_promisc(port, B_FALSE);
   1345 		mac_perim_exit(pmph);
   1346 	}
   1347 	if (rc != 0) {
   1348 		for (i = 0; i < nports; i++) {
   1349 			port = aggr_grp_port_lookup(grp,
   1350 			    ports[i].lp_linkid);
   1351 			ASSERT(port != NULL);
   1352 
   1353 			/*
   1354 			 * Turn the promiscuous mode back on if it is required
   1355 			 * to receive the non-primary address over a port, or
   1356 			 * the promiscous mode is enabled over the aggr.
   1357 			 */
   1358 			mac_perim_enter_by_mh(port->lp_mh, &pmph);
   1359 			if (port->lp_started && (grp->lg_promisc ||
   1360 			    port->lp_prom_addr != NULL)) {
   1361 				(void) aggr_port_promisc(port, B_TRUE);
   1362 			}
   1363 			mac_perim_exit(pmph);
   1364 		}
   1365 		goto bail;
   1366 	}
   1367 
   1368 	/* remove the specified ports from group */
   1369 	for (i = 0; i < nports; i++) {
   1370 		/* lookup port */
   1371 		port = aggr_grp_port_lookup(grp, ports[i].lp_linkid);
   1372 		ASSERT(port != NULL);
   1373 
   1374 		/* stop port if group has already been started */
   1375 		if (grp->lg_started) {
   1376 			mac_perim_enter_by_mh(port->lp_mh, &pmph);
   1377 			aggr_port_stop(port);
   1378 			mac_perim_exit(pmph);
   1379 		}
   1380 
   1381 		aggr_rem_pseudo_rx_group(port, &grp->lg_rx_group);
   1382 		/* remove port from group */
   1383 		rc = aggr_grp_rem_port(grp, port, &mac_addr_changed,
   1384 		    &link_state_changed);
   1385 		ASSERT(rc == 0);
   1386 		mac_addr_update = mac_addr_update || mac_addr_changed;
   1387 		link_state_update = link_state_update || link_state_changed;
   1388 	}
   1389 
   1390 bail:
   1391 	if (mac_addr_update)
   1392 		mac_unicst_update(grp->lg_mh, grp->lg_addr);
   1393 	if (link_state_update)
   1394 		mac_link_update(grp->lg_mh, grp->lg_link_state);
   1395 
   1396 	mac_perim_exit(mph);
   1397 	AGGR_GRP_REFRELE(grp);
   1398 
   1399 	return (rc);
   1400 }
   1401 
   1402 int
   1403 aggr_grp_delete(datalink_id_t linkid, cred_t *cred)
   1404 {
   1405 	aggr_grp_t *grp = NULL;
   1406 	aggr_port_t *port, *cport;
   1407 	datalink_id_t tmpid;
   1408 	mod_hash_val_t val;
   1409 	mac_perim_handle_t mph, pmph;
   1410 	int err;
   1411 
   1412 	rw_enter(&aggr_grp_lock, RW_WRITER);
   1413 
   1414 	if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid),
   1415 	    (mod_hash_val_t *)&grp) != 0) {
   1416 		rw_exit(&aggr_grp_lock);
   1417 		return (ENOENT);
   1418 	}
   1419 
   1420 	/*
   1421 	 * Note that dls_devnet_destroy() must be called before lg_lock is
   1422 	 * held. Otherwise, it will deadlock if another thread is in
   1423 	 * aggr_m_stat() and thus has a kstat_hold() on the kstats that
   1424 	 * dls_devnet_destroy() needs to delete.
   1425 	 */
   1426 	if ((err = dls_devnet_destroy(grp->lg_mh, &tmpid, B_TRUE)) != 0) {
   1427 		rw_exit(&aggr_grp_lock);
   1428 		return (err);
   1429 	}
   1430 	ASSERT(linkid == tmpid);
   1431 
   1432 	/*
   1433 	 * Unregister from the MAC service module. Since this can
   1434 	 * fail if a client hasn't closed the MAC port, we gracefully
   1435 	 * fail the operation.
   1436 	 */
   1437 	if ((err = mac_disable(grp->lg_mh)) != 0) {
   1438 		(void) dls_devnet_create(grp->lg_mh, linkid, crgetzoneid(cred));
   1439 		rw_exit(&aggr_grp_lock);
   1440 		return (err);
   1441 	}
   1442 	(void) mod_hash_remove(aggr_grp_hash, GRP_HASH_KEY(linkid), &val);
   1443 	ASSERT(grp == (aggr_grp_t *)val);
   1444 
   1445 	ASSERT(aggr_grp_cnt > 0);
   1446 	aggr_grp_cnt--;
   1447 	rw_exit(&aggr_grp_lock);
   1448 
   1449 	/*
   1450 	 * Inform the lacp_rx thread to exit.
   1451 	 */
   1452 	mutex_enter(&grp->lg_lacp_lock);
   1453 	grp->lg_lacp_done = B_TRUE;
   1454 	cv_signal(&grp->lg_lacp_cv);
   1455 	while (grp->lg_lacp_rx_thread != NULL)
   1456 		cv_wait(&grp->lg_lacp_cv, &grp->lg_lacp_lock);
   1457 	mutex_exit(&grp->lg_lacp_lock);
   1458 
   1459 	mac_perim_enter_by_mh(grp->lg_mh, &mph);
   1460 
   1461 	grp->lg_closing = B_TRUE;
   1462 	/* detach and free MAC ports associated with group */
   1463 	port = grp->lg_ports;
   1464 	while (port != NULL) {
   1465 		cport = port->lp_next;
   1466 		mac_perim_enter_by_mh(port->lp_mh, &pmph);
   1467 		if (grp->lg_started)
   1468 			aggr_port_stop(port);
   1469 		(void) aggr_grp_detach_port(grp, port);
   1470 		mac_perim_exit(pmph);
   1471 		aggr_rem_pseudo_rx_group(port, &grp->lg_rx_group);
   1472 		aggr_port_delete(port);
   1473 		port = cport;
   1474 	}
   1475 
   1476 	mac_perim_exit(mph);
   1477 
   1478 	/*
   1479 	 * Wait for the port's lacp timer thread and its notification callback
   1480 	 * to exit before calling mac_unregister() since both needs to access
   1481 	 * the mac perimeter of the grp.
   1482 	 */
   1483 	aggr_grp_port_wait(grp);
   1484 
   1485 	VERIFY(mac_unregister(grp->lg_mh) == 0);
   1486 	grp->lg_mh = NULL;
   1487 
   1488 	AGGR_GRP_REFRELE(grp);
   1489 	return (0);
   1490 }
   1491 
   1492 void
   1493 aggr_grp_free(aggr_grp_t *grp)
   1494 {
   1495 	ASSERT(grp->lg_refs == 0);
   1496 	ASSERT(grp->lg_port_ref == 0);
   1497 	if (grp->lg_key > AGGR_MAX_KEY) {
   1498 		id_free(key_ids, grp->lg_key);
   1499 		grp->lg_key = 0;
   1500 	}
   1501 	kmem_cache_free(aggr_grp_cache, grp);
   1502 }
   1503 
   1504 int
   1505 aggr_grp_info(datalink_id_t linkid, void *fn_arg,
   1506     aggr_grp_info_new_grp_fn_t new_grp_fn,
   1507     aggr_grp_info_new_port_fn_t new_port_fn, cred_t *cred)
   1508 {
   1509 	aggr_grp_t	*grp;
   1510 	aggr_port_t	*port;
   1511 	mac_perim_handle_t mph, pmph;
   1512 	int		rc = 0;
   1513 
   1514 	/*
   1515 	 * Make sure that the aggregation link is visible from the caller's
   1516 	 * zone.
   1517 	 */
   1518 	if (!dls_devnet_islinkvisible(linkid, crgetzoneid(cred)))
   1519 		return (ENOENT);
   1520 
   1521 	rw_enter(&aggr_grp_lock, RW_READER);
   1522 
   1523 	if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid),
   1524 	    (mod_hash_val_t *)&grp) != 0) {
   1525 		rw_exit(&aggr_grp_lock);
   1526 		return (ENOENT);
   1527 	}
   1528 	AGGR_GRP_REFHOLD(grp);
   1529 
   1530 	mac_perim_enter_by_mh(grp->lg_mh, &mph);
   1531 	rw_exit(&aggr_grp_lock);
   1532 
   1533 	rc = new_grp_fn(fn_arg, grp->lg_linkid,
   1534 	    (grp->lg_key > AGGR_MAX_KEY) ? 0 : grp->lg_key, grp->lg_addr,
   1535 	    grp->lg_addr_fixed, grp->lg_force, grp->lg_tx_policy,
   1536 	    grp->lg_nports, grp->lg_lacp_mode, grp->aggr.PeriodicTimer);
   1537 
   1538 	if (rc != 0)
   1539 		goto bail;
   1540 
   1541 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
   1542 		mac_perim_enter_by_mh(port->lp_mh, &pmph);
   1543 		rc = new_port_fn(fn_arg, port->lp_linkid, port->lp_addr,
   1544 		    port->lp_state, &port->lp_lacp.ActorOperPortState);
   1545 		mac_perim_exit(pmph);
   1546 
   1547 		if (rc != 0)
   1548 			goto bail;
   1549 	}
   1550 
   1551 bail:
   1552 	mac_perim_exit(mph);
   1553 	AGGR_GRP_REFRELE(grp);
   1554 	return (rc);
   1555 }
   1556 
   1557 /*ARGSUSED*/
   1558 static void
   1559 aggr_m_ioctl(void *arg, queue_t *q, mblk_t *mp)
   1560 {
   1561 	miocnak(q, mp, 0, ENOTSUP);
   1562 }
   1563 
   1564 static int
   1565 aggr_grp_stat(aggr_grp_t *grp, uint_t stat, uint64_t *val)
   1566 {
   1567 	aggr_port_t	*port;
   1568 	uint_t		stat_index;
   1569 
   1570 	/* We only aggregate counter statistics. */
   1571 	if (IS_MAC_STAT(stat) && !MAC_STAT_ISACOUNTER(stat) ||
   1572 	    IS_MACTYPE_STAT(stat) && !ETHER_STAT_ISACOUNTER(stat)) {
   1573 		return (ENOTSUP);
   1574 	}
   1575 
   1576 	/*
   1577 	 * Counter statistics for a group are computed by aggregating the
   1578 	 * counters of the members MACs while they were aggregated, plus
   1579 	 * the residual counter of the group itself, which is updated each
   1580 	 * time a MAC is removed from the group.
   1581 	 */
   1582 	*val = 0;
   1583 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
   1584 		/* actual port statistic */
   1585 		*val += aggr_port_stat(port, stat);
   1586 		/*
   1587 		 * minus the port stat when it was added, plus any residual
   1588 		 * amount for the group.
   1589 		 */
   1590 		if (IS_MAC_STAT(stat)) {
   1591 			stat_index = stat - MAC_STAT_MIN;
   1592 			*val -= port->lp_stat[stat_index];
   1593 			*val += grp->lg_stat[stat_index];
   1594 		} else if (IS_MACTYPE_STAT(stat)) {
   1595 			stat_index = stat - MACTYPE_STAT_MIN;
   1596 			*val -= port->lp_ether_stat[stat_index];
   1597 			*val += grp->lg_ether_stat[stat_index];
   1598 		}
   1599 	}
   1600 	return (0);
   1601 }
   1602 
   1603 static int
   1604 aggr_m_stat(void *arg, uint_t stat, uint64_t *val)
   1605 {
   1606 	aggr_grp_t		*grp = arg;
   1607 	mac_perim_handle_t	mph;
   1608 	int			rval = 0;
   1609 
   1610 	mac_perim_enter_by_mh(grp->lg_mh, &mph);
   1611 
   1612 	switch (stat) {
   1613 	case MAC_STAT_IFSPEED:
   1614 		*val = grp->lg_ifspeed;
   1615 		break;
   1616 
   1617 	case ETHER_STAT_LINK_DUPLEX:
   1618 		*val = grp->lg_link_duplex;
   1619 		break;
   1620 
   1621 	default:
   1622 		/*
   1623 		 * For all other statistics, we return the aggregated stat
   1624 		 * from the underlying ports.  aggr_grp_stat() will set
   1625 		 * rval appropriately if the statistic isn't a counter.
   1626 		 */
   1627 		rval = aggr_grp_stat(grp, stat, val);
   1628 	}
   1629 
   1630 	mac_perim_exit(mph);
   1631 	return (rval);
   1632 }
   1633 
   1634 static int
   1635 aggr_m_start(void *arg)
   1636 {
   1637 	aggr_grp_t *grp = arg;
   1638 	aggr_port_t *port;
   1639 	mac_perim_handle_t mph, pmph;
   1640 
   1641 	mac_perim_enter_by_mh(grp->lg_mh, &mph);
   1642 
   1643 	/*
   1644 	 * Attempts to start all configured members of the group.
   1645 	 * Group members will be attached when their link-up notification
   1646 	 * is received.
   1647 	 */
   1648 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
   1649 		mac_perim_enter_by_mh(port->lp_mh, &pmph);
   1650 		if (aggr_port_start(port) != 0) {
   1651 			mac_perim_exit(pmph);
   1652 			continue;
   1653 		}
   1654 
   1655 		/*
   1656 		 * Turn on the promiscuous mode if it is required to receive
   1657 		 * the non-primary address over a port, or the promiscous
   1658 		 * mode is enabled over the aggr.
   1659 		 */
   1660 		if (grp->lg_promisc || port->lp_prom_addr != NULL) {
   1661 			if (aggr_port_promisc(port, B_TRUE) != 0)
   1662 				aggr_port_stop(port);
   1663 		}
   1664 		mac_perim_exit(pmph);
   1665 	}
   1666 
   1667 	grp->lg_started = B_TRUE;
   1668 
   1669 	mac_perim_exit(mph);
   1670 	return (0);
   1671 }
   1672 
   1673 static void
   1674 aggr_m_stop(void *arg)
   1675 {
   1676 	aggr_grp_t *grp = arg;
   1677 	aggr_port_t *port;
   1678 	mac_perim_handle_t mph, pmph;
   1679 
   1680 	mac_perim_enter_by_mh(grp->lg_mh, &mph);
   1681 
   1682 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
   1683 		mac_perim_enter_by_mh(port->lp_mh, &pmph);
   1684 
   1685 		/* reset port promiscuous mode */
   1686 		(void) aggr_port_promisc(port, B_FALSE);
   1687 
   1688 		aggr_port_stop(port);
   1689 		mac_perim_exit(pmph);
   1690 	}
   1691 
   1692 	grp->lg_started = B_FALSE;
   1693 	mac_perim_exit(mph);
   1694 }
   1695 
   1696 static int
   1697 aggr_m_promisc(void *arg, boolean_t on)
   1698 {
   1699 	aggr_grp_t *grp = arg;
   1700 	aggr_port_t *port;
   1701 	boolean_t link_state_changed = B_FALSE;
   1702 	mac_perim_handle_t mph, pmph;
   1703 
   1704 	AGGR_GRP_REFHOLD(grp);
   1705 	mac_perim_enter_by_mh(grp->lg_mh, &mph);
   1706 
   1707 	ASSERT(!grp->lg_closing);
   1708 
   1709 	if (on == grp->lg_promisc)
   1710 		goto bail;
   1711 
   1712 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
   1713 		int	err = 0;
   1714 
   1715 		mac_perim_enter_by_mh(port->lp_mh, &pmph);
   1716 		AGGR_PORT_REFHOLD(port);
   1717 		if (!on && (port->lp_prom_addr == NULL))
   1718 			err = aggr_port_promisc(port, B_FALSE);
   1719 		else if (on && port->lp_started)
   1720 			err = aggr_port_promisc(port, B_TRUE);
   1721 
   1722 		if (err != 0) {
   1723 			if (aggr_grp_detach_port(grp, port))
   1724 				link_state_changed = B_TRUE;
   1725 		} else {
   1726 			/*
   1727 			 * If a port was detached because of a previous
   1728 			 * failure changing the promiscuity, the port
   1729 			 * is reattached when it successfully changes
   1730 			 * the promiscuity now, and this might cause
   1731 			 * the link state of the aggregation to change.
   1732 			 */
   1733 			if (aggr_grp_attach_port(grp, port))
   1734 				link_state_changed = B_TRUE;
   1735 		}
   1736 		mac_perim_exit(pmph);
   1737 		AGGR_PORT_REFRELE(port);
   1738 	}
   1739 
   1740 	grp->lg_promisc = on;
   1741 
   1742 	if (link_state_changed)
   1743 		mac_link_update(grp->lg_mh, grp->lg_link_state);
   1744 
   1745 bail:
   1746 	mac_perim_exit(mph);
   1747 	AGGR_GRP_REFRELE(grp);
   1748 
   1749 	return (0);
   1750 }
   1751 
   1752 static void
   1753 aggr_grp_port_rename(const char *new_name, void *arg)
   1754 {
   1755 	/*
   1756 	 * aggr port's mac client name is the format of "aggr link name" plus
   1757 	 * AGGR_PORT_NAME_DELIMIT plus "underneath link name".
   1758 	 */
   1759 	int aggr_len, link_len, clnt_name_len, i;
   1760 	char *str_end, *str_st, *str_del;
   1761 	char aggr_name[MAXNAMELEN];
   1762 	char link_name[MAXNAMELEN];
   1763 	char *clnt_name;
   1764 	aggr_grp_t *aggr_grp = arg;
   1765 	aggr_port_t *aggr_port = aggr_grp->lg_ports;
   1766 
   1767 	for (i = 0; i < aggr_grp->lg_nports; i++) {
   1768 		clnt_name = mac_client_name(aggr_port->lp_mch);
   1769 		clnt_name_len = strlen(clnt_name);
   1770 		str_st = clnt_name;
   1771 		str_end = &(clnt_name[clnt_name_len]);
   1772 		str_del = strchr(str_st, AGGR_PORT_NAME_DELIMIT);
   1773 		ASSERT(str_del != NULL);
   1774 		aggr_len = (intptr_t)((uintptr_t)str_del - (uintptr_t)str_st);
   1775 		link_len = (intptr_t)((uintptr_t)str_end - (uintptr_t)str_del);
   1776 		bzero(aggr_name, MAXNAMELEN);
   1777 		bzero(link_name, MAXNAMELEN);
   1778 		bcopy(clnt_name, aggr_name, aggr_len);
   1779 		bcopy(str_del, link_name, link_len + 1);
   1780 		bzero(clnt_name, MAXNAMELEN);
   1781 		(void) snprintf(clnt_name, MAXNAMELEN, "%s%s", new_name,
   1782 		    link_name);
   1783 
   1784 		(void) mac_rename_primary(aggr_port->lp_mh, NULL);
   1785 		aggr_port = aggr_port->lp_next;
   1786 	}
   1787 }
   1788 
   1789 /*
   1790  * Initialize the capabilities that are advertised for the group
   1791  * according to the capabilities of the constituent ports.
   1792  */
   1793 static boolean_t
   1794 aggr_m_capab_get(void *arg, mac_capab_t cap, void *cap_data)
   1795 {
   1796 	aggr_grp_t *grp = arg;
   1797 
   1798 	switch (cap) {
   1799 	case MAC_CAPAB_HCKSUM: {
   1800 		uint32_t *hcksum_txflags = cap_data;
   1801 		*hcksum_txflags = grp->lg_hcksum_txflags;
   1802 		break;
   1803 	}
   1804 	case MAC_CAPAB_LSO: {
   1805 		mac_capab_lso_t *cap_lso = cap_data;
   1806 
   1807 		if (grp->lg_lso) {
   1808 			*cap_lso = grp->lg_cap_lso;
   1809 			break;
   1810 		} else {
   1811 			return (B_FALSE);
   1812 		}
   1813 	}
   1814 	case MAC_CAPAB_NO_NATIVEVLAN:
   1815 		return (!grp->lg_vlan);
   1816 	case MAC_CAPAB_NO_ZCOPY:
   1817 		return (!grp->lg_zcopy);
   1818 	case MAC_CAPAB_RINGS: {
   1819 		mac_capab_rings_t *cap_rings = cap_data;
   1820 
   1821 		if (cap_rings->mr_type == MAC_RING_TYPE_RX) {
   1822 			cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC;
   1823 			cap_rings->mr_rnum = grp->lg_rx_group.arg_ring_cnt;
   1824 			cap_rings->mr_rget = aggr_fill_ring;
   1825 
   1826 			/*
   1827 			 * An aggregation advertises only one (pseudo) RX
   1828 			 * group, which virtualizes the main/primary group of
   1829 			 * the underlying devices.
   1830 			 */
   1831 			cap_rings->mr_gnum = 1;
   1832 			cap_rings->mr_gget = aggr_fill_group;
   1833 			cap_rings->mr_gaddring = NULL;
   1834 			cap_rings->mr_gremring = NULL;
   1835 		} else {
   1836 			return (B_FALSE);
   1837 		}
   1838 		break;
   1839 	}
   1840 	case MAC_CAPAB_AGGR:
   1841 	{
   1842 		mac_capab_aggr_t *aggr_cap;
   1843 
   1844 		if (cap_data != NULL) {
   1845 			aggr_cap = cap_data;
   1846 			aggr_cap->mca_rename_fn = aggr_grp_port_rename;
   1847 			aggr_cap->mca_unicst = aggr_m_unicst;
   1848 		}
   1849 		return (B_TRUE);
   1850 	}
   1851 	default:
   1852 		return (B_FALSE);
   1853 	}
   1854 	return (B_TRUE);
   1855 }
   1856 
   1857 /*
   1858  * Callback funtion for MAC layer to register groups.
   1859  */
   1860 static void
   1861 aggr_fill_group(void *arg, mac_ring_type_t rtype, const int index,
   1862     mac_group_info_t *infop, mac_group_handle_t gh)
   1863 {
   1864 	aggr_grp_t *grp = arg;
   1865 	aggr_pseudo_rx_group_t *rx_group;
   1866 
   1867 	ASSERT(rtype == MAC_RING_TYPE_RX && index == 0);
   1868 	rx_group = &grp->lg_rx_group;
   1869 	rx_group->arg_gh = gh;
   1870 	rx_group->arg_grp = grp;
   1871 
   1872 	infop->mgi_driver = (mac_group_driver_t)rx_group;
   1873 	infop->mgi_start = NULL;
   1874 	infop->mgi_stop = NULL;
   1875 	infop->mgi_addmac = aggr_addmac;
   1876 	infop->mgi_remmac = aggr_remmac;
   1877 	infop->mgi_count = rx_group->arg_ring_cnt;
   1878 }
   1879 
   1880 /*
   1881  * Callback funtion for MAC layer to register all rings.
   1882  */
   1883 static void
   1884 aggr_fill_ring(void *arg, mac_ring_type_t rtype, const int rg_index,
   1885     const int index, mac_ring_info_t *infop, mac_ring_handle_t rh)
   1886 {
   1887 	aggr_grp_t	*grp = arg;
   1888 
   1889 	switch (rtype) {
   1890 	case MAC_RING_TYPE_RX: {
   1891 		aggr_pseudo_rx_group_t	*rx_group = &grp->lg_rx_group;
   1892 		aggr_pseudo_rx_ring_t	*rx_ring;
   1893 		mac_intr_t		aggr_mac_intr;
   1894 
   1895 		ASSERT(rg_index == 0);
   1896 
   1897 		ASSERT((index >= 0) && (index < rx_group->arg_ring_cnt));
   1898 		rx_ring = rx_group->arg_rings + index;
   1899 		rx_ring->arr_rh = rh;
   1900 
   1901 		/*
   1902 		 * Entrypoint to enable interrupt (disable poll) and
   1903 		 * disable interrupt (enable poll).
   1904 		 */
   1905 		aggr_mac_intr.mi_handle = (mac_intr_handle_t)rx_ring;
   1906 		aggr_mac_intr.mi_enable = aggr_pseudo_enable_intr;
   1907 		aggr_mac_intr.mi_disable = aggr_pseudo_disable_intr;
   1908 
   1909 		infop->mri_driver = (mac_ring_driver_t)rx_ring;
   1910 		infop->mri_start = aggr_pseudo_start_ring;
   1911 		infop->mri_stop = aggr_pseudo_stop_ring;
   1912 
   1913 		infop->mri_intr = aggr_mac_intr;
   1914 		infop->mri_poll = aggr_rx_poll;
   1915 		break;
   1916 	}
   1917 	default:
   1918 		break;
   1919 	}
   1920 }
   1921 
   1922 static mblk_t *
   1923 aggr_rx_poll(void *arg, int bytes_to_pickup)
   1924 {
   1925 	aggr_pseudo_rx_ring_t *rr_ring = arg;
   1926 	aggr_port_t *port = rr_ring->arr_port;
   1927 	aggr_grp_t *grp = port->lp_grp;
   1928 	mblk_t *mp_chain, *mp, **mpp;
   1929 
   1930 	mp_chain = mac_hwring_poll(rr_ring->arr_hw_rh, bytes_to_pickup);
   1931 
   1932 	if (grp->lg_lacp_mode == AGGR_LACP_OFF)
   1933 		return (mp_chain);
   1934 
   1935 	mpp = &mp_chain;
   1936 	while ((mp = *mpp) != NULL) {
   1937 		if (MBLKL(mp) >= sizeof (struct ether_header)) {
   1938 			struct ether_header *ehp;
   1939 
   1940 			ehp = (struct ether_header *)mp->b_rptr;
   1941 			if (ntohs(ehp->ether_type) == ETHERTYPE_SLOW) {
   1942 				*mpp = mp->b_next;
   1943 				mp->b_next = NULL;
   1944 				aggr_recv_lacp(port,
   1945 				    (mac_resource_handle_t)rr_ring, mp);
   1946 				continue;
   1947 			}
   1948 		}
   1949 
   1950 		if (!port->lp_collector_enabled) {
   1951 			*mpp = mp->b_next;
   1952 			mp->b_next = NULL;
   1953 			freemsg(mp);
   1954 			continue;
   1955 		}
   1956 		mpp = &mp->b_next;
   1957 	}
   1958 	return (mp_chain);
   1959 }
   1960 
   1961 static int
   1962 aggr_addmac(void *arg, const uint8_t *mac_addr)
   1963 {
   1964 	aggr_pseudo_rx_group_t	*rx_group = (aggr_pseudo_rx_group_t *)arg;
   1965 	aggr_unicst_addr_t	*addr, **pprev;
   1966 	aggr_grp_t		*grp = rx_group->arg_grp;
   1967 	aggr_port_t		*port, *p;
   1968 	mac_perim_handle_t	mph;
   1969 	int			err = 0;
   1970 
   1971 	mac_perim_enter_by_mh(grp->lg_mh, &mph);
   1972 
   1973 	if (bcmp(mac_addr, grp->lg_addr, ETHERADDRL) == 0) {
   1974 		mac_perim_exit(mph);
   1975 		return (0);
   1976 	}
   1977 
   1978 	/*
   1979 	 * Insert this mac address into the list of mac addresses owned by
   1980 	 * the aggregation pseudo group.
   1981 	 */
   1982 	pprev = &rx_group->arg_macaddr;
   1983 	while ((addr = *pprev) != NULL) {
   1984 		if (bcmp(mac_addr, addr->aua_addr, ETHERADDRL) == 0) {
   1985 			mac_perim_exit(mph);
   1986 			return (EEXIST);
   1987 		}
   1988 		pprev = &addr->aua_next;
   1989 	}
   1990 	addr = kmem_alloc(sizeof (aggr_unicst_addr_t), KM_SLEEP);
   1991 	bcopy(mac_addr, addr->aua_addr, ETHERADDRL);
   1992 	addr->aua_next = NULL;
   1993 	*pprev = addr;
   1994 
   1995 	for (port = grp->lg_ports; port != NULL; port = port->lp_next)
   1996 		if ((err = aggr_port_addmac(port, mac_addr)) != 0)
   1997 			break;
   1998 
   1999 	if (err != 0) {
   2000 		for (p = grp->lg_ports; p != port; p = p->lp_next)
   2001 			aggr_port_remmac(p, mac_addr);
   2002 
   2003 		*pprev = NULL;
   2004 		kmem_free(addr, sizeof (aggr_unicst_addr_t));
   2005 	}
   2006 
   2007 	mac_perim_exit(mph);
   2008 	return (err);
   2009 }
   2010 
   2011 static int
   2012 aggr_remmac(void *arg, const uint8_t *mac_addr)
   2013 {
   2014 	aggr_pseudo_rx_group_t	*rx_group = (aggr_pseudo_rx_group_t *)arg;
   2015 	aggr_unicst_addr_t	*addr, **pprev;
   2016 	aggr_grp_t		*grp = rx_group->arg_grp;
   2017 	aggr_port_t		*port;
   2018 	mac_perim_handle_t	mph;
   2019 	int			err = 0;
   2020 
   2021 	mac_perim_enter_by_mh(grp->lg_mh, &mph);
   2022 
   2023 	if (bcmp(mac_addr, grp->lg_addr, ETHERADDRL) == 0) {
   2024 		mac_perim_exit(mph);
   2025 		return (0);
   2026 	}
   2027 
   2028 	/*
   2029 	 * Insert this mac address into the list of mac addresses owned by
   2030 	 * the aggregation pseudo group.
   2031 	 */
   2032 	pprev = &rx_group->arg_macaddr;
   2033 	while ((addr = *pprev) != NULL) {
   2034 		if (bcmp(mac_addr, addr->aua_addr, ETHERADDRL) != 0) {
   2035 			pprev = &addr->aua_next;
   2036 			continue;
   2037 		}
   2038 		break;
   2039 	}
   2040 	if (addr == NULL) {
   2041 		mac_perim_exit(mph);
   2042 		return (EINVAL);
   2043 	}
   2044 
   2045 	for (port = grp->lg_ports; port != NULL; port = port->lp_next)
   2046 		aggr_port_remmac(port, mac_addr);
   2047 
   2048 	*pprev = addr->aua_next;
   2049 	kmem_free(addr, sizeof (aggr_unicst_addr_t));
   2050 
   2051 	mac_perim_exit(mph);
   2052 	return (err);
   2053 }
   2054 
   2055 /*
   2056  * Add or remove the multicast addresses that are defined for the group
   2057  * to or from the specified port.
   2058  *
   2059  * Note that aggr_grp_multicst_port(..., B_TRUE) is called when the port
   2060  * is started and attached, and aggr_grp_multicst_port(..., B_FALSE) is
   2061  * called when the port is either stopped or detached.
   2062  */
   2063 void
   2064 aggr_grp_multicst_port(aggr_port_t *port, boolean_t add)
   2065 {
   2066 	aggr_grp_t *grp = port->lp_grp;
   2067 
   2068 	ASSERT(MAC_PERIM_HELD(port->lp_mh));
   2069 	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
   2070 
   2071 	if (!port->lp_started || port->lp_state != AGGR_PORT_STATE_ATTACHED)
   2072 		return;
   2073 
   2074 	mac_multicast_refresh(grp->lg_mh, aggr_port_multicst, port, add);
   2075 }
   2076 
   2077 static int
   2078 aggr_m_multicst(void *arg, boolean_t add, const uint8_t *addrp)
   2079 {
   2080 	aggr_grp_t *grp = arg;
   2081 	aggr_port_t *port = NULL;
   2082 	mac_perim_handle_t mph;
   2083 	int err = 0, cerr;
   2084 
   2085 	mac_perim_enter_by_mh(grp->lg_mh, &mph);
   2086 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
   2087 		if (port->lp_state != AGGR_PORT_STATE_ATTACHED ||
   2088 		    !port->lp_started) {
   2089 			continue;
   2090 		}
   2091 		cerr = aggr_port_multicst(port, add, addrp);
   2092 		if (cerr != 0 && err == 0)
   2093 			err = cerr;
   2094 	}
   2095 	mac_perim_exit(mph);
   2096 	return (err);
   2097 }
   2098 
   2099 static int
   2100 aggr_m_unicst(void *arg, const uint8_t *macaddr)
   2101 {
   2102 	aggr_grp_t *grp = arg;
   2103 	mac_perim_handle_t mph;
   2104 	int err;
   2105 
   2106 	mac_perim_enter_by_mh(grp->lg_mh, &mph);
   2107 	err = aggr_grp_modify_common(grp, AGGR_MODIFY_MAC, 0, B_TRUE, macaddr,
   2108 	    0, 0);
   2109 	mac_perim_exit(mph);
   2110 	return (err);
   2111 }
   2112 
   2113 /*
   2114  * Initialize the capabilities that are advertised for the group
   2115  * according to the capabilities of the constituent ports.
   2116  */
   2117 static void
   2118 aggr_grp_capab_set(aggr_grp_t *grp)
   2119 {
   2120 	uint32_t cksum;
   2121 	aggr_port_t *port;
   2122 	mac_capab_lso_t cap_lso;
   2123 
   2124 	ASSERT(grp->lg_mh == NULL);
   2125 	ASSERT(grp->lg_ports != NULL);
   2126 
   2127 	grp->lg_hcksum_txflags = (uint32_t)-1;
   2128 	grp->lg_zcopy = B_TRUE;
   2129 	grp->lg_vlan = B_TRUE;
   2130 
   2131 	grp->lg_lso = B_TRUE;
   2132 	grp->lg_cap_lso.lso_flags = (t_uscalar_t)-1;
   2133 	grp->lg_cap_lso.lso_basic_tcp_ipv4.lso_max = (t_uscalar_t)-1;
   2134 
   2135 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
   2136 		if (!mac_capab_get(port->lp_mh, MAC_CAPAB_HCKSUM, &cksum))
   2137 			cksum = 0;
   2138 		grp->lg_hcksum_txflags &= cksum;
   2139 
   2140 		grp->lg_vlan &=
   2141 		    !mac_capab_get(port->lp_mh, MAC_CAPAB_NO_NATIVEVLAN, NULL);
   2142 
   2143 		grp->lg_zcopy &=
   2144 		    !mac_capab_get(port->lp_mh, MAC_CAPAB_NO_ZCOPY, NULL);
   2145 
   2146 		grp->lg_lso &=
   2147 		    mac_capab_get(port->lp_mh, MAC_CAPAB_LSO, &cap_lso);
   2148 		if (grp->lg_lso) {
   2149 			grp->lg_cap_lso.lso_flags &= cap_lso.lso_flags;
   2150 			if (grp->lg_cap_lso.lso_basic_tcp_ipv4.lso_max >
   2151 			    cap_lso.lso_basic_tcp_ipv4.lso_max)
   2152 				grp->lg_cap_lso.lso_basic_tcp_ipv4.lso_max =
   2153 				    cap_lso.lso_basic_tcp_ipv4.lso_max;
   2154 		}
   2155 	}
   2156 }
   2157 
   2158 /*
   2159  * Checks whether the capabilities of the port being added are compatible
   2160  * with the current capabilities of the aggregation.
   2161  */
   2162 static boolean_t
   2163 aggr_grp_capab_check(aggr_grp_t *grp, aggr_port_t *port)
   2164 {
   2165 	uint32_t hcksum_txflags;
   2166 
   2167 	ASSERT(grp->lg_ports != NULL);
   2168 
   2169 	if (((!mac_capab_get(port->lp_mh, MAC_CAPAB_NO_NATIVEVLAN, NULL)) &
   2170 	    grp->lg_vlan) != grp->lg_vlan) {
   2171 		return (B_FALSE);
   2172 	}
   2173 
   2174 	if (((!mac_capab_get(port->lp_mh, MAC_CAPAB_NO_ZCOPY, NULL)) &
   2175 	    grp->lg_zcopy) != grp->lg_zcopy) {
   2176 		return (B_FALSE);
   2177 	}
   2178 
   2179 	if (!mac_capab_get(port->lp_mh, MAC_CAPAB_HCKSUM, &hcksum_txflags)) {
   2180 		if (grp->lg_hcksum_txflags != 0)
   2181 			return (B_FALSE);
   2182 	} else if ((hcksum_txflags & grp->lg_hcksum_txflags) !=
   2183 	    grp->lg_hcksum_txflags) {
   2184 		return (B_FALSE);
   2185 	}
   2186 
   2187 	if (grp->lg_lso) {
   2188 		mac_capab_lso_t cap_lso;
   2189 
   2190 		if (mac_capab_get(port->lp_mh, MAC_CAPAB_LSO, &cap_lso)) {
   2191 			if ((grp->lg_cap_lso.lso_flags & cap_lso.lso_flags) !=
   2192 			    grp->lg_cap_lso.lso_flags)
   2193 				return (B_FALSE);
   2194 			if (grp->lg_cap_lso.lso_basic_tcp_ipv4.lso_max >
   2195 			    cap_lso.lso_basic_tcp_ipv4.lso_max)
   2196 				return (B_FALSE);
   2197 		} else {
   2198 			return (B_FALSE);
   2199 		}
   2200 	}
   2201 
   2202 	return (B_TRUE);
   2203 }
   2204 
   2205 /*
   2206  * Returns the maximum SDU according to the SDU of the constituent ports.
   2207  */
   2208 static uint_t
   2209 aggr_grp_max_sdu(aggr_grp_t *grp)
   2210 {
   2211 	uint_t max_sdu = (uint_t)-1;
   2212 	aggr_port_t *port;
   2213 
   2214 	ASSERT(grp->lg_ports != NULL);
   2215 
   2216 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
   2217 		uint_t port_sdu_max;
   2218 
   2219 		mac_sdu_get(port->lp_mh, NULL, &port_sdu_max);
   2220 		if (max_sdu > port_sdu_max)
   2221 			max_sdu = port_sdu_max;
   2222 	}
   2223 
   2224 	return (max_sdu);
   2225 }
   2226 
   2227 /*
   2228  * Checks if the maximum SDU of the specified port is compatible
   2229  * with the maximum SDU of the specified aggregation group, returns
   2230  * B_TRUE if it is, B_FALSE otherwise.
   2231  */
   2232 static boolean_t
   2233 aggr_grp_sdu_check(aggr_grp_t *grp, aggr_port_t *port)
   2234 {
   2235 	uint_t port_sdu_max;
   2236 
   2237 	mac_sdu_get(port->lp_mh, NULL, &port_sdu_max);
   2238 	return (port_sdu_max >= grp->lg_max_sdu);
   2239 }
   2240 
   2241 /*
   2242  * Returns the maximum margin according to the margin of the constituent ports.
   2243  */
   2244 static uint32_t
   2245 aggr_grp_max_margin(aggr_grp_t *grp)
   2246 {
   2247 	uint32_t margin = UINT32_MAX;
   2248 	aggr_port_t *port;
   2249 
   2250 	ASSERT(grp->lg_mh == NULL);
   2251 	ASSERT(grp->lg_ports != NULL);
   2252 
   2253 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
   2254 		if (margin > port->lp_margin)
   2255 			margin = port->lp_margin;
   2256 	}
   2257 
   2258 	grp->lg_margin = margin;
   2259 	return (margin);
   2260 }
   2261 
   2262 /*
   2263  * Checks if the maximum margin of the specified port is compatible
   2264  * with the maximum margin of the specified aggregation group, returns
   2265  * B_TRUE if it is, B_FALSE otherwise.
   2266  */
   2267 static boolean_t
   2268 aggr_grp_margin_check(aggr_grp_t *grp, aggr_port_t *port)
   2269 {
   2270 	if (port->lp_margin >= grp->lg_margin)
   2271 		return (B_TRUE);
   2272 
   2273 	/*
   2274 	 * See whether the current margin value is allowed to be changed to
   2275 	 * the new value.
   2276 	 */
   2277 	if (!mac_margin_update(grp->lg_mh, port->lp_margin))
   2278 		return (B_FALSE);
   2279 
   2280 	grp->lg_margin = port->lp_margin;
   2281 	return (B_TRUE);
   2282 }
   2283 
   2284 /*
   2285  * Set MTU on individual ports of an aggregation group
   2286  */
   2287 static int
   2288 aggr_set_port_sdu(aggr_grp_t *grp, aggr_port_t *port, uint32_t sdu,
   2289     uint32_t *old_mtu)
   2290 {
   2291 	boolean_t 		removed = B_FALSE;
   2292 	mac_perim_handle_t	mph;
   2293 	mac_diag_t		diag;
   2294 	int			err, rv, retry = 0;
   2295 
   2296 	if (port->lp_mah != NULL) {
   2297 		(void) mac_unicast_remove(port->lp_mch, port->lp_mah);
   2298 		port->lp_mah = NULL;
   2299 		removed = B_TRUE;
   2300 	}
   2301 	err = mac_set_mtu(port->lp_mh, sdu, old_mtu);
   2302 try_again:
   2303 	if (removed && (rv = mac_unicast_add(port->lp_mch, NULL,
   2304 	    MAC_UNICAST_PRIMARY | MAC_UNICAST_DISABLE_TX_VID_CHECK,
   2305 	    &port->lp_mah, 0, &diag)) != 0) {
   2306 		/*
   2307 		 * following is a workaround for a bug in 'bge' driver.
   2308 		 * See CR 6794654 for more information and this work around
   2309 		 * will be removed once the CR is fixed.
   2310 		 */
   2311 		if (rv == EIO && retry++ < 3) {
   2312 			delay(2 * hz);
   2313 			goto try_again;
   2314 		}
   2315 		/*
   2316 		 * if mac_unicast_add() failed while setting the MTU,
   2317 		 * detach the port from the group.
   2318 		 */
   2319 		mac_perim_enter_by_mh(port->lp_mh, &mph);
   2320 		(void) aggr_grp_detach_port(grp, port);
   2321 		mac_perim_exit(mph);
   2322 		cmn_err(CE_WARN, "Unable to restart the port %s while "
   2323 		    "setting MTU. Detaching the port from the aggregation.",
   2324 		    mac_client_name(port->lp_mch));
   2325 	}
   2326 	return (err);
   2327 }
   2328 
   2329 static int
   2330 aggr_sdu_update(aggr_grp_t *grp, uint32_t sdu)
   2331 {
   2332 	int			err = 0, i, rv;
   2333 	aggr_port_t		*port;
   2334 	uint32_t		*mtu;
   2335 
   2336 	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
   2337 
   2338 	/*
   2339 	 * If the MTU being set is equal to aggr group's maximum
   2340 	 * allowable value, then there is nothing to change
   2341 	 */
   2342 	if (sdu == grp->lg_max_sdu)
   2343 		return (0);
   2344 
   2345 	/* 0 is aggr group's min sdu */
   2346 	if (sdu == 0)
   2347 		return (EINVAL);
   2348 
   2349 	mtu = kmem_alloc(sizeof (uint32_t) * grp->lg_nports, KM_SLEEP);
   2350 	for (port = grp->lg_ports, i = 0; port != NULL && err == 0;
   2351 	    port = port->lp_next, i++) {
   2352 		err = aggr_set_port_sdu(grp, port, sdu, mtu + i);
   2353 	}
   2354 	if (err != 0) {
   2355 		/* recover from error: reset the mtus of the ports */
   2356 		aggr_port_t *tmp;
   2357 
   2358 		for (tmp = grp->lg_ports, i = 0; tmp != port;
   2359 		    tmp = tmp->lp_next, i++) {
   2360 			(void) aggr_set_port_sdu(grp, tmp, *(mtu + i), NULL);
   2361 		}
   2362 		goto bail;
   2363 	}
   2364 	grp->lg_max_sdu = aggr_grp_max_sdu(grp);
   2365 	rv = mac_maxsdu_update(grp->lg_mh, grp->lg_max_sdu);
   2366 	ASSERT(rv == 0);
   2367 bail:
   2368 	kmem_free(mtu, sizeof (uint32_t) * grp->lg_nports);
   2369 	return (err);
   2370 }
   2371 
   2372 /*
   2373  * Callback functions for set/get of properties
   2374  */
   2375 /*ARGSUSED*/
   2376 static int
   2377 aggr_m_setprop(void *m_driver, const char *pr_name, mac_prop_id_t pr_num,
   2378     uint_t pr_valsize, const void *pr_val)
   2379 {
   2380 	int 		err = ENOTSUP;
   2381 	aggr_grp_t 	*grp = m_driver;
   2382 
   2383 	switch (pr_num) {
   2384 	case MAC_PROP_MTU: {
   2385 		uint32_t 	mtu;
   2386 
   2387 		if (pr_valsize < sizeof (mtu)) {
   2388 			err = EINVAL;
   2389 			break;
   2390 		}
   2391 		bcopy(pr_val, &mtu, sizeof (mtu));
   2392 		err = aggr_sdu_update(grp, mtu);
   2393 		break;
   2394 	}
   2395 	default:
   2396 		break;
   2397 	}
   2398 	return (err);
   2399 }
   2400 
   2401 int
   2402 aggr_grp_possible_mtu_range(aggr_grp_t *grp, mac_propval_range_t *range)
   2403 {
   2404 	mac_propval_range_t		*vals;
   2405 	mac_propval_uint32_range_t	*ur;
   2406 	aggr_port_t			*port;
   2407 	mac_perim_handle_t		mph;
   2408 	mac_prop_t 			macprop;
   2409 	uint_t 				perm, i;
   2410 	uint32_t 			min = 0, max = (uint32_t)-1;
   2411 	int 				err = 0;
   2412 
   2413 	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
   2414 
   2415 	vals = kmem_alloc(sizeof (mac_propval_range_t) * grp->lg_nports,
   2416 	    KM_SLEEP);
   2417 	macprop.mp_id = MAC_PROP_MTU;
   2418 	macprop.mp_name = "mtu";
   2419 	macprop.mp_flags = MAC_PROP_POSSIBLE;
   2420 
   2421 	for (port = grp->lg_ports, i = 0; port != NULL;
   2422 	    port = port->lp_next, i++) {
   2423 		mac_perim_enter_by_mh(port->lp_mh, &mph);
   2424 		err = mac_get_prop(port->lp_mh, &macprop, vals + i,
   2425 		    sizeof (mac_propval_range_t), &perm);
   2426 		mac_perim_exit(mph);
   2427 		if (err != 0)
   2428 			break;
   2429 	}
   2430 	/*
   2431 	 * if any of the underlying ports does not support changing MTU then
   2432 	 * just return ENOTSUP
   2433 	 */
   2434 	if (port != NULL) {
   2435 		ASSERT(err != 0);
   2436 		goto done;
   2437 	}
   2438 	range->mpr_count = 1;
   2439 	range->mpr_type = MAC_PROPVAL_UINT32;
   2440 	for (i = 0; i < grp->lg_nports; i++) {
   2441 		ur = &((vals + i)->range_uint32[0]);
   2442 		/*
   2443 		 * Take max of the min, for range_min; that is the minimum
   2444 		 * MTU value for an aggregation is the maximum of the
   2445 		 * minimum values of all the underlying ports
   2446 		 */
   2447 		if (ur->mpur_min > min)
   2448 			min = ur->mpur_min;
   2449 		/* Take min of the max, for range_max */
   2450 		if (ur->mpur_max < max)
   2451 			max = ur->mpur_max;
   2452 	}
   2453 	range->range_uint32[0].mpur_min = min;
   2454 	range->range_uint32[0].mpur_max = max;
   2455 done:
   2456 	kmem_free(vals, sizeof (mac_propval_range_t) * grp->lg_nports);
   2457 	return (err);
   2458 }
   2459 
   2460 /*ARGSUSED*/
   2461 static int
   2462 aggr_m_getprop(void *m_driver, const char *pr_name, mac_prop_id_t pr_num,
   2463     uint_t pr_flags, uint_t pr_valsize, void *pr_val, uint_t *perm)
   2464 {
   2465 	mac_propval_range_t 	range;
   2466 	int 			err = ENOTSUP;
   2467 	aggr_grp_t		*grp = m_driver;
   2468 
   2469 	switch (pr_num) {
   2470 	case MAC_PROP_MTU:
   2471 		if (!(pr_flags & MAC_PROP_POSSIBLE))
   2472 			return (ENOTSUP);
   2473 		if (pr_valsize < sizeof (mac_propval_range_t))
   2474 			return (EINVAL);
   2475 		if ((err = aggr_grp_possible_mtu_range(grp, &range)) != 0)
   2476 			return (err);
   2477 		bcopy(&range, pr_val, sizeof (range));
   2478 		return (0);
   2479 	}
   2480 	return (err);
   2481 }
   2482