Home | History | Annotate | Download | only in mac
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 
     22 /*
     23  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
     24  * Use is subject to license terms.
     25  */
     26 
     27 #include <sys/types.h>
     28 #include <sys/conf.h>
     29 #include <sys/id_space.h>
     30 #include <sys/esunddi.h>
     31 #include <sys/stat.h>
     32 #include <sys/mkdev.h>
     33 #include <sys/stream.h>
     34 #include <sys/strsubr.h>
     35 #include <sys/dlpi.h>
     36 #include <sys/modhash.h>
     37 #include <sys/mac.h>
     38 #include <sys/mac_provider.h>
     39 #include <sys/mac_impl.h>
     40 #include <sys/mac_client_impl.h>
     41 #include <sys/mac_client_priv.h>
     42 #include <sys/mac_soft_ring.h>
     43 #include <sys/dld.h>
     44 #include <sys/modctl.h>
     45 #include <sys/fs/dv_node.h>
     46 #include <sys/thread.h>
     47 #include <sys/proc.h>
     48 #include <sys/callb.h>
     49 #include <sys/cpuvar.h>
     50 #include <sys/atomic.h>
     51 #include <sys/sdt.h>
     52 #include <sys/mac_flow.h>
     53 #include <sys/ddi_intr_impl.h>
     54 #include <sys/disp.h>
     55 #include <sys/sdt.h>
     56 
     57 /*
     58  * MAC Provider Interface.
     59  *
     60  * Interface for GLDv3 compatible NIC drivers.
     61  */
     62 
     63 static void i_mac_notify_thread(void *);
     64 
     65 typedef void (*mac_notify_default_cb_fn_t)(mac_impl_t *);
     66 
     67 static const mac_notify_default_cb_fn_t mac_notify_cb_list[MAC_NNOTE] = {
     68 	mac_fanout_recompute,	/* MAC_NOTE_LINK */
     69 	NULL,		/* MAC_NOTE_UNICST */
     70 	NULL,		/* MAC_NOTE_TX */
     71 	NULL,		/* MAC_NOTE_DEVPROMISC */
     72 	NULL,		/* MAC_NOTE_FASTPATH_FLUSH */
     73 	NULL,		/* MAC_NOTE_SDU_SIZE */
     74 	NULL,		/* MAC_NOTE_MARGIN */
     75 	NULL,		/* MAC_NOTE_CAPAB_CHG */
     76 	NULL		/* MAC_NOTE_LOWLINK */
     77 };
     78 
     79 /*
     80  * Driver support functions.
     81  */
     82 
     83 /* REGISTRATION */
     84 
     85 mac_register_t *
     86 mac_alloc(uint_t mac_version)
     87 {
     88 	mac_register_t *mregp;
     89 
     90 	/*
     91 	 * Make sure there isn't a version mismatch between the driver and
     92 	 * the framework.  In the future, if multiple versions are
     93 	 * supported, this check could become more sophisticated.
     94 	 */
     95 	if (mac_version != MAC_VERSION)
     96 		return (NULL);
     97 
     98 	mregp = kmem_zalloc(sizeof (mac_register_t), KM_SLEEP);
     99 	mregp->m_version = mac_version;
    100 	return (mregp);
    101 }
    102 
    103 void
    104 mac_free(mac_register_t *mregp)
    105 {
    106 	kmem_free(mregp, sizeof (mac_register_t));
    107 }
    108 
    109 /*
    110  * mac_register() is how drivers register new MACs with the GLDv3
    111  * framework.  The mregp argument is allocated by drivers using the
    112  * mac_alloc() function, and can be freed using mac_free() immediately upon
    113  * return from mac_register().  Upon success (0 return value), the mhp
    114  * opaque pointer becomes the driver's handle to its MAC interface, and is
    115  * the argument to all other mac module entry points.
    116  */
    117 /* ARGSUSED */
    118 int
    119 mac_register(mac_register_t *mregp, mac_handle_t *mhp)
    120 {
    121 	mac_impl_t		*mip;
    122 	mactype_t		*mtype;
    123 	int			err = EINVAL;
    124 	struct devnames		*dnp = NULL;
    125 	uint_t			instance;
    126 	boolean_t		style1_created = B_FALSE;
    127 	boolean_t		style2_created = B_FALSE;
    128 	char			*driver;
    129 	minor_t			minor = 0;
    130 
    131 	/* A successful call to mac_init_ops() sets the DN_GLDV3_DRIVER flag. */
    132 	if (!GLDV3_DRV(ddi_driver_major(mregp->m_dip)))
    133 		return (EINVAL);
    134 
    135 	/* Find the required MAC-Type plugin. */
    136 	if ((mtype = mactype_getplugin(mregp->m_type_ident)) == NULL)
    137 		return (EINVAL);
    138 
    139 	/* Create a mac_impl_t to represent this MAC. */
    140 	mip = kmem_cache_alloc(i_mac_impl_cachep, KM_SLEEP);
    141 
    142 	/*
    143 	 * The mac is not ready for open yet.
    144 	 */
    145 	mip->mi_state_flags |= MIS_DISABLED;
    146 
    147 	/*
    148 	 * When a mac is registered, the m_instance field can be set to:
    149 	 *
    150 	 *  0:	Get the mac's instance number from m_dip.
    151 	 *	This is usually used for physical device dips.
    152 	 *
    153 	 *  [1 .. MAC_MAX_MINOR-1]: Use the value as the mac's instance number.
    154 	 *	For example, when an aggregation is created with the key option,
    155 	 *	"key" will be used as the instance number.
    156 	 *
    157 	 *  -1: Assign an instance number from [MAC_MAX_MINOR .. MAXMIN-1].
    158 	 *	This is often used when a MAC of a virtual link is registered
    159 	 *	(e.g., aggregation when "key" is not specified, or vnic).
    160 	 *
    161 	 * Note that the instance number is used to derive the mi_minor field
    162 	 * of mac_impl_t, which will then be used to derive the name of kstats
    163 	 * and the devfs nodes.  The first 2 cases are needed to preserve
    164 	 * backward compatibility.
    165 	 */
    166 	switch (mregp->m_instance) {
    167 	case 0:
    168 		instance = ddi_get_instance(mregp->m_dip);
    169 		break;
    170 	case ((uint_t)-1):
    171 		minor = mac_minor_hold(B_TRUE);
    172 		if (minor == 0) {
    173 			err = ENOSPC;
    174 			goto fail;
    175 		}
    176 		instance = minor - 1;
    177 		break;
    178 	default:
    179 		instance = mregp->m_instance;
    180 		if (instance >= MAC_MAX_MINOR) {
    181 			err = EINVAL;
    182 			goto fail;
    183 		}
    184 		break;
    185 	}
    186 
    187 	mip->mi_minor = (minor_t)(instance + 1);
    188 	mip->mi_dip = mregp->m_dip;
    189 	mip->mi_clients_list = NULL;
    190 	mip->mi_nclients = 0;
    191 
    192 	/* Set the default IEEE Port VLAN Identifier */
    193 	mip->mi_pvid = 1;
    194 
    195 	/* Default bridge link learning protection values */
    196 	mip->mi_llimit = 1000;
    197 	mip->mi_ldecay = 200;
    198 
    199 	driver = (char *)ddi_driver_name(mip->mi_dip);
    200 
    201 	/* Construct the MAC name as <drvname><instance> */
    202 	(void) snprintf(mip->mi_name, sizeof (mip->mi_name), "%s%d",
    203 	    driver, instance);
    204 
    205 	mip->mi_driver = mregp->m_driver;
    206 
    207 	mip->mi_type = mtype;
    208 	mip->mi_margin = mregp->m_margin;
    209 	mip->mi_info.mi_media = mtype->mt_type;
    210 	mip->mi_info.mi_nativemedia = mtype->mt_nativetype;
    211 	if (mregp->m_max_sdu <= mregp->m_min_sdu)
    212 		goto fail;
    213 	mip->mi_sdu_min = mregp->m_min_sdu;
    214 	mip->mi_sdu_max = mregp->m_max_sdu;
    215 	mip->mi_info.mi_addr_length = mip->mi_type->mt_addr_length;
    216 	/*
    217 	 * If the media supports a broadcast address, cache a pointer to it
    218 	 * in the mac_info_t so that upper layers can use it.
    219 	 */
    220 	mip->mi_info.mi_brdcst_addr = mip->mi_type->mt_brdcst_addr;
    221 
    222 	mip->mi_v12n_level = mregp->m_v12n;
    223 
    224 	/*
    225 	 * Copy the unicast source address into the mac_info_t, but only if
    226 	 * the MAC-Type defines a non-zero address length.  We need to
    227 	 * handle MAC-Types that have an address length of 0
    228 	 * (point-to-point protocol MACs for example).
    229 	 */
    230 	if (mip->mi_type->mt_addr_length > 0) {
    231 		if (mregp->m_src_addr == NULL)
    232 			goto fail;
    233 		mip->mi_info.mi_unicst_addr =
    234 		    kmem_alloc(mip->mi_type->mt_addr_length, KM_SLEEP);
    235 		bcopy(mregp->m_src_addr, mip->mi_info.mi_unicst_addr,
    236 		    mip->mi_type->mt_addr_length);
    237 
    238 		/*
    239 		 * Copy the fixed 'factory' MAC address from the immutable
    240 		 * info.  This is taken to be the MAC address currently in
    241 		 * use.
    242 		 */
    243 		bcopy(mip->mi_info.mi_unicst_addr, mip->mi_addr,
    244 		    mip->mi_type->mt_addr_length);
    245 
    246 		/*
    247 		 * At this point, we should set up the classification
    248 		 * rules etc but we delay it till mac_open() so that
    249 		 * the resource discovery has taken place and we
    250 		 * know someone wants to use the device. Otherwise
    251 		 * memory gets allocated for Rx ring structures even
    252 		 * during probe.
    253 		 */
    254 
    255 		/* Copy the destination address if one is provided. */
    256 		if (mregp->m_dst_addr != NULL) {
    257 			bcopy(mregp->m_dst_addr, mip->mi_dstaddr,
    258 			    mip->mi_type->mt_addr_length);
    259 			mip->mi_dstaddr_set = B_TRUE;
    260 		}
    261 	} else if (mregp->m_src_addr != NULL) {
    262 		goto fail;
    263 	}
    264 
    265 	/*
    266 	 * The format of the m_pdata is specific to the plugin.  It is
    267 	 * passed in as an argument to all of the plugin callbacks.  The
    268 	 * driver can update this information by calling
    269 	 * mac_pdata_update().
    270 	 */
    271 	if (mip->mi_type->mt_ops.mtops_ops & MTOPS_PDATA_VERIFY) {
    272 		/*
    273 		 * Verify if the supplied plugin data is valid.  Note that
    274 		 * even if the caller passed in a NULL pointer as plugin data,
    275 		 * we still need to verify if that's valid as the plugin may
    276 		 * require plugin data to function.
    277 		 */
    278 		if (!mip->mi_type->mt_ops.mtops_pdata_verify(mregp->m_pdata,
    279 		    mregp->m_pdata_size)) {
    280 			goto fail;
    281 		}
    282 		if (mregp->m_pdata != NULL) {
    283 			mip->mi_pdata =
    284 			    kmem_alloc(mregp->m_pdata_size, KM_SLEEP);
    285 			bcopy(mregp->m_pdata, mip->mi_pdata,
    286 			    mregp->m_pdata_size);
    287 			mip->mi_pdata_size = mregp->m_pdata_size;
    288 		}
    289 	} else if (mregp->m_pdata != NULL) {
    290 		/*
    291 		 * The caller supplied non-NULL plugin data, but the plugin
    292 		 * does not recognize plugin data.
    293 		 */
    294 		err = EINVAL;
    295 		goto fail;
    296 	}
    297 
    298 	/*
    299 	 * Register the private properties.
    300 	 */
    301 	mac_register_priv_prop(mip, mregp->m_priv_props,
    302 	    mregp->m_priv_prop_count);
    303 
    304 	/*
    305 	 * Stash the driver callbacks into the mac_impl_t, but first sanity
    306 	 * check to make sure all mandatory callbacks are set.
    307 	 */
    308 	if (mregp->m_callbacks->mc_getstat == NULL ||
    309 	    mregp->m_callbacks->mc_start == NULL ||
    310 	    mregp->m_callbacks->mc_stop == NULL ||
    311 	    mregp->m_callbacks->mc_setpromisc == NULL ||
    312 	    mregp->m_callbacks->mc_multicst == NULL) {
    313 		goto fail;
    314 	}
    315 	mip->mi_callbacks = mregp->m_callbacks;
    316 
    317 	if (mac_capab_get((mac_handle_t)mip, MAC_CAPAB_LEGACY,
    318 	    &mip->mi_capab_legacy)) {
    319 		mip->mi_state_flags |= MIS_LEGACY;
    320 		mip->mi_phy_dev = mip->mi_capab_legacy.ml_dev;
    321 	} else {
    322 		mip->mi_phy_dev = makedevice(ddi_driver_major(mip->mi_dip),
    323 		    mip->mi_minor);
    324 	}
    325 
    326 	/*
    327 	 * Allocate a notification thread. thread_create blocks for memory
    328 	 * if needed, it never fails.
    329 	 */
    330 	mip->mi_notify_thread = thread_create(NULL, 0, i_mac_notify_thread,
    331 	    mip, 0, &p0, TS_RUN, minclsyspri);
    332 
    333 	/*
    334 	 * Initialize the capabilities
    335 	 */
    336 
    337 	if (i_mac_capab_get((mac_handle_t)mip, MAC_CAPAB_VNIC, NULL))
    338 		mip->mi_state_flags |= MIS_IS_VNIC;
    339 
    340 	if (i_mac_capab_get((mac_handle_t)mip, MAC_CAPAB_AGGR, NULL))
    341 		mip->mi_state_flags |= MIS_IS_AGGR;
    342 
    343 	mac_addr_factory_init(mip);
    344 
    345 	/*
    346 	 * Enforce the virtrualization level registered.
    347 	 */
    348 	if (mip->mi_v12n_level & MAC_VIRT_LEVEL1) {
    349 		if (mac_init_rings(mip, MAC_RING_TYPE_RX) != 0 ||
    350 		    mac_init_rings(mip, MAC_RING_TYPE_TX) != 0)
    351 			goto fail;
    352 
    353 		/*
    354 		 * The driver needs to register at least rx rings for this
    355 		 * virtualization level.
    356 		 */
    357 		if (mip->mi_rx_groups == NULL)
    358 			goto fail;
    359 	}
    360 
    361 	/*
    362 	 * The driver must set mc_unicst entry point to NULL when it advertises
    363 	 * CAP_RINGS for rx groups.
    364 	 */
    365 	if (mip->mi_rx_groups != NULL) {
    366 		if (mregp->m_callbacks->mc_unicst != NULL)
    367 			goto fail;
    368 	} else {
    369 		if (mregp->m_callbacks->mc_unicst == NULL)
    370 			goto fail;
    371 	}
    372 
    373 	/*
    374 	 * The driver must set mc_tx entry point to NULL when it advertises
    375 	 * CAP_RINGS for tx rings.
    376 	 */
    377 	if (mip->mi_tx_groups != NULL) {
    378 		if (mregp->m_callbacks->mc_tx != NULL)
    379 			goto fail;
    380 	} else {
    381 		if (mregp->m_callbacks->mc_tx == NULL)
    382 			goto fail;
    383 	}
    384 
    385 	/*
    386 	 * Initialize MAC addresses. Must be called after mac_init_rings().
    387 	 */
    388 	mac_init_macaddr(mip);
    389 
    390 	mip->mi_share_capab.ms_snum = 0;
    391 	if (mip->mi_v12n_level & MAC_VIRT_HIO) {
    392 		(void) mac_capab_get((mac_handle_t)mip, MAC_CAPAB_SHARES,
    393 		    &mip->mi_share_capab);
    394 	}
    395 
    396 	/*
    397 	 * Initialize the kstats for this device.
    398 	 */
    399 	mac_stat_create(mip);
    400 
    401 	/* Zero out any properties. */
    402 	bzero(&mip->mi_resource_props, sizeof (mac_resource_props_t));
    403 
    404 	if (mip->mi_minor <= MAC_MAX_MINOR) {
    405 		/* Create a style-2 DLPI device */
    406 		if (ddi_create_minor_node(mip->mi_dip, driver, S_IFCHR, 0,
    407 		    DDI_NT_NET, CLONE_DEV) != DDI_SUCCESS)
    408 			goto fail;
    409 		style2_created = B_TRUE;
    410 
    411 		/* Create a style-1 DLPI device */
    412 		if (ddi_create_minor_node(mip->mi_dip, mip->mi_name, S_IFCHR,
    413 		    mip->mi_minor, DDI_NT_NET, 0) != DDI_SUCCESS)
    414 			goto fail;
    415 		style1_created = B_TRUE;
    416 	}
    417 
    418 	mac_flow_l2tab_create(mip, &mip->mi_flow_tab);
    419 
    420 	rw_enter(&i_mac_impl_lock, RW_WRITER);
    421 	if (mod_hash_insert(i_mac_impl_hash,
    422 	    (mod_hash_key_t)mip->mi_name, (mod_hash_val_t)mip) != 0) {
    423 		rw_exit(&i_mac_impl_lock);
    424 		err = EEXIST;
    425 		goto fail;
    426 	}
    427 
    428 	DTRACE_PROBE2(mac__register, struct devnames *, dnp,
    429 	    (mac_impl_t *), mip);
    430 
    431 	/*
    432 	 * Mark the MAC to be ready for open.
    433 	 */
    434 	mip->mi_state_flags &= ~MIS_DISABLED;
    435 	rw_exit(&i_mac_impl_lock);
    436 
    437 	atomic_inc_32(&i_mac_impl_count);
    438 
    439 	cmn_err(CE_NOTE, "!%s registered", mip->mi_name);
    440 	*mhp = (mac_handle_t)mip;
    441 	return (0);
    442 
    443 fail:
    444 	if (style1_created)
    445 		ddi_remove_minor_node(mip->mi_dip, mip->mi_name);
    446 
    447 	if (style2_created)
    448 		ddi_remove_minor_node(mip->mi_dip, driver);
    449 
    450 	mac_addr_factory_fini(mip);
    451 
    452 	/* Clean up registered MAC addresses */
    453 	mac_fini_macaddr(mip);
    454 
    455 	/* Clean up registered rings */
    456 	mac_free_rings(mip, MAC_RING_TYPE_RX);
    457 	mac_free_rings(mip, MAC_RING_TYPE_TX);
    458 
    459 	/* Clean up notification thread */
    460 	if (mip->mi_notify_thread != NULL)
    461 		i_mac_notify_exit(mip);
    462 
    463 	if (mip->mi_info.mi_unicst_addr != NULL) {
    464 		kmem_free(mip->mi_info.mi_unicst_addr,
    465 		    mip->mi_type->mt_addr_length);
    466 		mip->mi_info.mi_unicst_addr = NULL;
    467 	}
    468 
    469 	mac_stat_destroy(mip);
    470 
    471 	if (mip->mi_type != NULL) {
    472 		atomic_dec_32(&mip->mi_type->mt_ref);
    473 		mip->mi_type = NULL;
    474 	}
    475 
    476 	if (mip->mi_pdata != NULL) {
    477 		kmem_free(mip->mi_pdata, mip->mi_pdata_size);
    478 		mip->mi_pdata = NULL;
    479 		mip->mi_pdata_size = 0;
    480 	}
    481 
    482 	if (minor != 0) {
    483 		ASSERT(minor > MAC_MAX_MINOR);
    484 		mac_minor_rele(minor);
    485 	}
    486 
    487 	mac_unregister_priv_prop(mip);
    488 
    489 	/*
    490 	 * Clear the state before destroying the mac_impl_t
    491 	 */
    492 	mip->mi_state_flags = 0;
    493 
    494 	kmem_cache_free(i_mac_impl_cachep, mip);
    495 	return (err);
    496 }
    497 
    498 /*
    499  * Unregister from the GLDv3 framework
    500  */
    501 int
    502 mac_unregister(mac_handle_t mh)
    503 {
    504 	int			err;
    505 	mac_impl_t		*mip = (mac_impl_t *)mh;
    506 	mod_hash_val_t		val;
    507 	mac_margin_req_t	*mmr, *nextmmr;
    508 
    509 	/* Fail the unregister if there are any open references to this mac. */
    510 	if ((err = mac_disable_nowait(mh)) != 0)
    511 		return (err);
    512 
    513 	/*
    514 	 * Clean up notification thread and wait for it to exit.
    515 	 */
    516 	i_mac_notify_exit(mip);
    517 
    518 	i_mac_perim_enter(mip);
    519 
    520 	/*
    521 	 * There is still resource properties configured over this mac.
    522 	 */
    523 	if (mip->mi_resource_props.mrp_mask != 0)
    524 		mac_fastpath_enable((mac_handle_t)mip);
    525 
    526 	if (mip->mi_minor < MAC_MAX_MINOR + 1) {
    527 		ddi_remove_minor_node(mip->mi_dip, mip->mi_name);
    528 		ddi_remove_minor_node(mip->mi_dip,
    529 		    (char *)ddi_driver_name(mip->mi_dip));
    530 	}
    531 
    532 	ASSERT(mip->mi_nactiveclients == 0 && !(mip->mi_state_flags &
    533 	    MIS_EXCLUSIVE));
    534 
    535 	mac_stat_destroy(mip);
    536 
    537 	(void) mod_hash_remove(i_mac_impl_hash,
    538 	    (mod_hash_key_t)mip->mi_name, &val);
    539 	ASSERT(mip == (mac_impl_t *)val);
    540 
    541 	ASSERT(i_mac_impl_count > 0);
    542 	atomic_dec_32(&i_mac_impl_count);
    543 
    544 	if (mip->mi_pdata != NULL)
    545 		kmem_free(mip->mi_pdata, mip->mi_pdata_size);
    546 	mip->mi_pdata = NULL;
    547 	mip->mi_pdata_size = 0;
    548 
    549 	/*
    550 	 * Free the list of margin request.
    551 	 */
    552 	for (mmr = mip->mi_mmrp; mmr != NULL; mmr = nextmmr) {
    553 		nextmmr = mmr->mmr_nextp;
    554 		kmem_free(mmr, sizeof (mac_margin_req_t));
    555 	}
    556 	mip->mi_mmrp = NULL;
    557 
    558 	mip->mi_linkstate = mip->mi_lowlinkstate = LINK_STATE_UNKNOWN;
    559 	kmem_free(mip->mi_info.mi_unicst_addr, mip->mi_type->mt_addr_length);
    560 	mip->mi_info.mi_unicst_addr = NULL;
    561 
    562 	atomic_dec_32(&mip->mi_type->mt_ref);
    563 	mip->mi_type = NULL;
    564 
    565 	/*
    566 	 * Free the primary MAC address.
    567 	 */
    568 	mac_fini_macaddr(mip);
    569 
    570 	/*
    571 	 * free all rings
    572 	 */
    573 	mac_free_rings(mip, MAC_RING_TYPE_RX);
    574 	mac_free_rings(mip, MAC_RING_TYPE_TX);
    575 
    576 	mac_addr_factory_fini(mip);
    577 
    578 	bzero(mip->mi_addr, MAXMACADDRLEN);
    579 	bzero(mip->mi_dstaddr, MAXMACADDRLEN);
    580 
    581 	/* and the flows */
    582 	mac_flow_tab_destroy(mip->mi_flow_tab);
    583 	mip->mi_flow_tab = NULL;
    584 
    585 	if (mip->mi_minor > MAC_MAX_MINOR)
    586 		mac_minor_rele(mip->mi_minor);
    587 
    588 	cmn_err(CE_NOTE, "!%s unregistered", mip->mi_name);
    589 
    590 	/*
    591 	 * Reset the perim related fields to default values before
    592 	 * kmem_cache_free
    593 	 */
    594 	i_mac_perim_exit(mip);
    595 	mip->mi_state_flags = 0;
    596 
    597 	mac_unregister_priv_prop(mip);
    598 
    599 	ASSERT(mip->mi_bridge_link == NULL);
    600 	kmem_cache_free(i_mac_impl_cachep, mip);
    601 
    602 	return (0);
    603 }
    604 
    605 /* DATA RECEPTION */
    606 
    607 /*
    608  * This function is invoked for packets received by the MAC driver in
    609  * interrupt context. The ring generation number provided by the driver
    610  * is matched with the ring generation number held in MAC. If they do not
    611  * match, received packets are considered stale packets coming from an older
    612  * assignment of the ring. Drop them.
    613  */
    614 void
    615 mac_rx_ring(mac_handle_t mh, mac_ring_handle_t mrh, mblk_t *mp_chain,
    616     uint64_t mr_gen_num)
    617 {
    618 	mac_ring_t		*mr = (mac_ring_t *)mrh;
    619 
    620 	if ((mr != NULL) && (mr->mr_gen_num != mr_gen_num)) {
    621 		DTRACE_PROBE2(mac__rx__rings__stale__packet, uint64_t,
    622 		    mr->mr_gen_num, uint64_t, mr_gen_num);
    623 		freemsgchain(mp_chain);
    624 		return;
    625 	}
    626 	mac_rx(mh, (mac_resource_handle_t)mrh, mp_chain);
    627 }
    628 
    629 /*
    630  * This function is invoked for each packet received by the underlying driver.
    631  */
    632 void
    633 mac_rx(mac_handle_t mh, mac_resource_handle_t mrh, mblk_t *mp_chain)
    634 {
    635 	mac_impl_t *mip = (mac_impl_t *)mh;
    636 
    637 	/*
    638 	 * Check if the link is part of a bridge.  If not, then we don't need
    639 	 * to take the lock to remain consistent.  Make this common case
    640 	 * lock-free and tail-call optimized.
    641 	 */
    642 	if (mip->mi_bridge_link == NULL) {
    643 		mac_rx_common(mh, mrh, mp_chain);
    644 	} else {
    645 		/*
    646 		 * Once we take a reference on the bridge link, the bridge
    647 		 * module itself can't unload, so the callback pointers are
    648 		 * stable.
    649 		 */
    650 		mutex_enter(&mip->mi_bridge_lock);
    651 		if ((mh = mip->mi_bridge_link) != NULL)
    652 			mac_bridge_ref_cb(mh, B_TRUE);
    653 		mutex_exit(&mip->mi_bridge_lock);
    654 		if (mh == NULL) {
    655 			mac_rx_common((mac_handle_t)mip, mrh, mp_chain);
    656 		} else {
    657 			mac_bridge_rx_cb(mh, mrh, mp_chain);
    658 			mac_bridge_ref_cb(mh, B_FALSE);
    659 		}
    660 	}
    661 }
    662 
    663 /*
    664  * Special case function: this allows snooping of packets transmitted and
    665  * received by TRILL. By design, they go directly into the TRILL module.
    666  */
    667 void
    668 mac_trill_snoop(mac_handle_t mh, mblk_t *mp)
    669 {
    670 	mac_impl_t *mip = (mac_impl_t *)mh;
    671 
    672 	if (mip->mi_promisc_list != NULL)
    673 		mac_promisc_dispatch(mip, mp, NULL);
    674 }
    675 
    676 /*
    677  * This is the upward reentry point for packets arriving from the bridging
    678  * module and from mac_rx for links not part of a bridge.
    679  */
    680 void
    681 mac_rx_common(mac_handle_t mh, mac_resource_handle_t mrh, mblk_t *mp_chain)
    682 {
    683 	mac_impl_t		*mip = (mac_impl_t *)mh;
    684 	mac_ring_t		*mr = (mac_ring_t *)mrh;
    685 	mac_soft_ring_set_t 	*mac_srs;
    686 	mblk_t			*bp = mp_chain;
    687 	boolean_t		hw_classified = B_FALSE;
    688 
    689 	/*
    690 	 * If there are any promiscuous mode callbacks defined for
    691 	 * this MAC, pass them a copy if appropriate.
    692 	 */
    693 	if (mip->mi_promisc_list != NULL)
    694 		mac_promisc_dispatch(mip, mp_chain, NULL);
    695 
    696 	if (mr != NULL) {
    697 		/*
    698 		 * If the SRS teardown has started, just return. The 'mr'
    699 		 * continues to be valid until the driver unregisters the mac.
    700 		 * Hardware classified packets will not make their way up
    701 		 * beyond this point once the teardown has started. The driver
    702 		 * is never passed a pointer to a flow entry or SRS or any
    703 		 * structure that can be freed much before mac_unregister.
    704 		 */
    705 		mutex_enter(&mr->mr_lock);
    706 		if ((mr->mr_state != MR_INUSE) || (mr->mr_flag &
    707 		    (MR_INCIPIENT | MR_CONDEMNED | MR_QUIESCE))) {
    708 			mutex_exit(&mr->mr_lock);
    709 			freemsgchain(mp_chain);
    710 			return;
    711 		}
    712 		if (mr->mr_classify_type == MAC_HW_CLASSIFIER) {
    713 			hw_classified = B_TRUE;
    714 			MR_REFHOLD_LOCKED(mr);
    715 		}
    716 		mutex_exit(&mr->mr_lock);
    717 
    718 		/*
    719 		 * We check if an SRS is controlling this ring.
    720 		 * If so, we can directly call the srs_lower_proc
    721 		 * routine otherwise we need to go through mac_rx_classify
    722 		 * to reach the right place.
    723 		 */
    724 		if (hw_classified) {
    725 			mac_srs = mr->mr_srs;
    726 			/*
    727 			 * This is supposed to be the fast path.
    728 			 * All packets received though here were steered by
    729 			 * the hardware classifier, and share the same
    730 			 * MAC header info.
    731 			 */
    732 			mac_srs->srs_rx.sr_lower_proc(mh,
    733 			    (mac_resource_handle_t)mac_srs, mp_chain, B_FALSE);
    734 			MR_REFRELE(mr);
    735 			return;
    736 		}
    737 		/* We'll fall through to software classification */
    738 	} else {
    739 		flow_entry_t *flent;
    740 		int err;
    741 
    742 		rw_enter(&mip->mi_rw_lock, RW_READER);
    743 		if (mip->mi_single_active_client != NULL) {
    744 			flent = mip->mi_single_active_client->mci_flent_list;
    745 			FLOW_TRY_REFHOLD(flent, err);
    746 			rw_exit(&mip->mi_rw_lock);
    747 			if (err == 0) {
    748 				(flent->fe_cb_fn)(flent->fe_cb_arg1,
    749 				    flent->fe_cb_arg2, mp_chain, B_FALSE);
    750 				FLOW_REFRELE(flent);
    751 				return;
    752 			}
    753 		} else {
    754 			rw_exit(&mip->mi_rw_lock);
    755 		}
    756 	}
    757 
    758 	if (!FLOW_TAB_EMPTY(mip->mi_flow_tab)) {
    759 		if ((bp = mac_rx_flow(mh, mrh, bp)) == NULL)
    760 			return;
    761 	}
    762 
    763 	freemsgchain(bp);
    764 }
    765 
    766 /* DATA TRANSMISSION */
    767 
    768 /*
    769  * A driver's notification to resume transmission, in case of a provider
    770  * without TX rings.
    771  */
    772 void
    773 mac_tx_update(mac_handle_t mh)
    774 {
    775 	/*
    776 	 * Walk the list of MAC clients (mac_client_handle)
    777 	 * and update
    778 	 */
    779 	i_mac_tx_srs_notify((mac_impl_t *)mh, NULL);
    780 }
    781 
    782 /*
    783  * A driver's notification to resume transmission on the specified TX ring.
    784  */
    785 void
    786 mac_tx_ring_update(mac_handle_t mh, mac_ring_handle_t rh)
    787 {
    788 	i_mac_tx_srs_notify((mac_impl_t *)mh, rh);
    789 }
    790 
    791 /* LINK STATE */
    792 /*
    793  * Notify the MAC layer about a link state change
    794  */
    795 void
    796 mac_link_update(mac_handle_t mh, link_state_t link)
    797 {
    798 	mac_impl_t	*mip = (mac_impl_t *)mh;
    799 
    800 	/*
    801 	 * Save the link state.
    802 	 */
    803 	mip->mi_lowlinkstate = link;
    804 
    805 	/*
    806 	 * Send a MAC_NOTE_LOWLINK notification.  This tells the notification
    807 	 * thread to deliver both lower and upper notifications.
    808 	 */
    809 	i_mac_notify(mip, MAC_NOTE_LOWLINK);
    810 }
    811 
    812 /*
    813  * Notify the MAC layer about a link state change due to bridging.
    814  */
    815 void
    816 mac_link_redo(mac_handle_t mh, link_state_t link)
    817 {
    818 	mac_impl_t	*mip = (mac_impl_t *)mh;
    819 
    820 	/*
    821 	 * Save the link state.
    822 	 */
    823 	mip->mi_linkstate = link;
    824 
    825 	/*
    826 	 * Send a MAC_NOTE_LINK notification.  Only upper notifications are
    827 	 * made.
    828 	 */
    829 	i_mac_notify(mip, MAC_NOTE_LINK);
    830 }
    831 
    832 /* MINOR NODE HANDLING */
    833 
    834 /*
    835  * Given a dev_t, return the instance number (PPA) associated with it.
    836  * Drivers can use this in their getinfo(9e) implementation to lookup
    837  * the instance number (i.e. PPA) of the device, to use as an index to
    838  * their own array of soft state structures.
    839  *
    840  * Returns -1 on error.
    841  */
    842 int
    843 mac_devt_to_instance(dev_t devt)
    844 {
    845 	return (dld_devt_to_instance(devt));
    846 }
    847 
    848 /*
    849  * This function returns the first minor number that is available for
    850  * driver private use.  All minor numbers smaller than this are
    851  * reserved for GLDv3 use.
    852  */
    853 minor_t
    854 mac_private_minor(void)
    855 {
    856 	return (MAC_PRIVATE_MINOR);
    857 }
    858 
    859 /* OTHER CONTROL INFORMATION */
    860 
    861 /*
    862  * A driver notified us that its primary MAC address has changed.
    863  */
    864 void
    865 mac_unicst_update(mac_handle_t mh, const uint8_t *addr)
    866 {
    867 	mac_impl_t	*mip = (mac_impl_t *)mh;
    868 
    869 	if (mip->mi_type->mt_addr_length == 0)
    870 		return;
    871 
    872 	i_mac_perim_enter(mip);
    873 
    874 	/*
    875 	 * If address changes, freshen the MAC address value and update
    876 	 * all MAC clients that share this MAC address.
    877 	 */
    878 	if (bcmp(addr, mip->mi_addr, mip->mi_type->mt_addr_length) != 0) {
    879 		mac_freshen_macaddr(mac_find_macaddr(mip, mip->mi_addr),
    880 		    (uint8_t *)addr);
    881 	}
    882 
    883 	i_mac_perim_exit(mip);
    884 
    885 	/*
    886 	 * Send a MAC_NOTE_UNICST notification.
    887 	 */
    888 	i_mac_notify(mip, MAC_NOTE_UNICST);
    889 }
    890 
    891 void
    892 mac_dst_update(mac_handle_t mh, const uint8_t *addr)
    893 {
    894 	mac_impl_t	*mip = (mac_impl_t *)mh;
    895 
    896 	if (mip->mi_type->mt_addr_length == 0)
    897 		return;
    898 
    899 	i_mac_perim_enter(mip);
    900 	bcopy(addr, mip->mi_dstaddr, mip->mi_type->mt_addr_length);
    901 	i_mac_perim_exit(mip);
    902 	i_mac_notify(mip, MAC_NOTE_DEST);
    903 }
    904 
    905 /*
    906  * MAC plugin information changed.
    907  */
    908 int
    909 mac_pdata_update(mac_handle_t mh, void *mac_pdata, size_t dsize)
    910 {
    911 	mac_impl_t	*mip = (mac_impl_t *)mh;
    912 
    913 	/*
    914 	 * Verify that the plugin supports MAC plugin data and that the
    915 	 * supplied data is valid.
    916 	 */
    917 	if (!(mip->mi_type->mt_ops.mtops_ops & MTOPS_PDATA_VERIFY))
    918 		return (EINVAL);
    919 	if (!mip->mi_type->mt_ops.mtops_pdata_verify(mac_pdata, dsize))
    920 		return (EINVAL);
    921 
    922 	if (mip->mi_pdata != NULL)
    923 		kmem_free(mip->mi_pdata, mip->mi_pdata_size);
    924 
    925 	mip->mi_pdata = kmem_alloc(dsize, KM_SLEEP);
    926 	bcopy(mac_pdata, mip->mi_pdata, dsize);
    927 	mip->mi_pdata_size = dsize;
    928 
    929 	/*
    930 	 * Since the MAC plugin data is used to construct MAC headers that
    931 	 * were cached in fast-path headers, we need to flush fast-path
    932 	 * information for links associated with this mac.
    933 	 */
    934 	i_mac_notify(mip, MAC_NOTE_FASTPATH_FLUSH);
    935 	return (0);
    936 }
    937 
    938 /*
    939  * Invoked by driver as well as the framework to notify its capability change.
    940  */
    941 void
    942 mac_capab_update(mac_handle_t mh)
    943 {
    944 	/* Send MAC_NOTE_CAPAB_CHG notification */
    945 	i_mac_notify((mac_impl_t *)mh, MAC_NOTE_CAPAB_CHG);
    946 }
    947 
    948 int
    949 mac_maxsdu_update(mac_handle_t mh, uint_t sdu_max)
    950 {
    951 	mac_impl_t	*mip = (mac_impl_t *)mh;
    952 
    953 	if (sdu_max == 0 || sdu_max < mip->mi_sdu_min)
    954 		return (EINVAL);
    955 	mip->mi_sdu_max = sdu_max;
    956 
    957 	/* Send a MAC_NOTE_SDU_SIZE notification. */
    958 	i_mac_notify(mip, MAC_NOTE_SDU_SIZE);
    959 	return (0);
    960 }
    961 
    962 /* PRIVATE FUNCTIONS, FOR INTERNAL USE ONLY */
    963 
    964 /*
    965  * Updates the mac_impl structure with the current state of the link
    966  */
    967 static void
    968 i_mac_log_link_state(mac_impl_t *mip)
    969 {
    970 	/*
    971 	 * If no change, then it is not interesting.
    972 	 */
    973 	if (mip->mi_lastlowlinkstate == mip->mi_lowlinkstate)
    974 		return;
    975 
    976 	switch (mip->mi_lowlinkstate) {
    977 	case LINK_STATE_UP:
    978 		if (mip->mi_type->mt_ops.mtops_ops & MTOPS_LINK_DETAILS) {
    979 			char det[200];
    980 
    981 			mip->mi_type->mt_ops.mtops_link_details(det,
    982 			    sizeof (det), (mac_handle_t)mip, mip->mi_pdata);
    983 
    984 			cmn_err(CE_NOTE, "!%s link up, %s", mip->mi_name, det);
    985 		} else {
    986 			cmn_err(CE_NOTE, "!%s link up", mip->mi_name);
    987 		}
    988 		break;
    989 
    990 	case LINK_STATE_DOWN:
    991 		/*
    992 		 * Only transitions from UP to DOWN are interesting
    993 		 */
    994 		if (mip->mi_lastlowlinkstate != LINK_STATE_UNKNOWN)
    995 			cmn_err(CE_NOTE, "!%s link down", mip->mi_name);
    996 		break;
    997 
    998 	case LINK_STATE_UNKNOWN:
    999 		/*
   1000 		 * This case is normally not interesting.
   1001 		 */
   1002 		break;
   1003 	}
   1004 	mip->mi_lastlowlinkstate = mip->mi_lowlinkstate;
   1005 }
   1006 
   1007 /*
   1008  * Main routine for the callbacks notifications thread
   1009  */
   1010 static void
   1011 i_mac_notify_thread(void *arg)
   1012 {
   1013 	mac_impl_t	*mip = arg;
   1014 	callb_cpr_t	cprinfo;
   1015 	mac_cb_t	*mcb;
   1016 	mac_cb_info_t	*mcbi;
   1017 	mac_notify_cb_t	*mncb;
   1018 
   1019 	mcbi = &mip->mi_notify_cb_info;
   1020 	CALLB_CPR_INIT(&cprinfo, mcbi->mcbi_lockp, callb_generic_cpr,
   1021 	    "i_mac_notify_thread");
   1022 
   1023 	mutex_enter(mcbi->mcbi_lockp);
   1024 
   1025 	for (;;) {
   1026 		uint32_t	bits;
   1027 		uint32_t	type;
   1028 
   1029 		bits = mip->mi_notify_bits;
   1030 		if (bits == 0) {
   1031 			CALLB_CPR_SAFE_BEGIN(&cprinfo);
   1032 			cv_wait(&mcbi->mcbi_cv, mcbi->mcbi_lockp);
   1033 			CALLB_CPR_SAFE_END(&cprinfo, mcbi->mcbi_lockp);
   1034 			continue;
   1035 		}
   1036 		mip->mi_notify_bits = 0;
   1037 		if ((bits & (1 << MAC_NNOTE)) != 0) {
   1038 			/* request to quit */
   1039 			ASSERT(mip->mi_state_flags & MIS_DISABLED);
   1040 			break;
   1041 		}
   1042 
   1043 		mutex_exit(mcbi->mcbi_lockp);
   1044 
   1045 		/*
   1046 		 * Log link changes on the actual link, but then do reports on
   1047 		 * synthetic state (if part of a bridge).
   1048 		 */
   1049 		if ((bits & (1 << MAC_NOTE_LOWLINK)) != 0) {
   1050 			link_state_t newstate;
   1051 			mac_handle_t mh;
   1052 
   1053 			i_mac_log_link_state(mip);
   1054 			newstate = mip->mi_lowlinkstate;
   1055 			if (mip->mi_bridge_link != NULL) {
   1056 				mutex_enter(&mip->mi_bridge_lock);
   1057 				if ((mh = mip->mi_bridge_link) != NULL) {
   1058 					newstate = mac_bridge_ls_cb(mh,
   1059 					    newstate);
   1060 				}
   1061 				mutex_exit(&mip->mi_bridge_lock);
   1062 			}
   1063 			if (newstate != mip->mi_linkstate) {
   1064 				mip->mi_linkstate = newstate;
   1065 				bits |= 1 << MAC_NOTE_LINK;
   1066 			}
   1067 		}
   1068 
   1069 		/*
   1070 		 * Do notification callbacks for each notification type.
   1071 		 */
   1072 		for (type = 0; type < MAC_NNOTE; type++) {
   1073 			if ((bits & (1 << type)) == 0) {
   1074 				continue;
   1075 			}
   1076 
   1077 			if (mac_notify_cb_list[type] != NULL)
   1078 				(*mac_notify_cb_list[type])(mip);
   1079 
   1080 			/*
   1081 			 * Walk the list of notifications.
   1082 			 */
   1083 			MAC_CALLBACK_WALKER_INC(&mip->mi_notify_cb_info);
   1084 			for (mcb = mip->mi_notify_cb_list; mcb != NULL;
   1085 			    mcb = mcb->mcb_nextp) {
   1086 				mncb = (mac_notify_cb_t *)mcb->mcb_objp;
   1087 				mncb->mncb_fn(mncb->mncb_arg, type);
   1088 			}
   1089 			MAC_CALLBACK_WALKER_DCR(&mip->mi_notify_cb_info,
   1090 			    &mip->mi_notify_cb_list);
   1091 		}
   1092 
   1093 		mutex_enter(mcbi->mcbi_lockp);
   1094 	}
   1095 
   1096 	mip->mi_state_flags |= MIS_NOTIFY_DONE;
   1097 	cv_broadcast(&mcbi->mcbi_cv);
   1098 
   1099 	/* CALLB_CPR_EXIT drops the lock */
   1100 	CALLB_CPR_EXIT(&cprinfo);
   1101 	thread_exit();
   1102 }
   1103 
   1104 /*
   1105  * Signal the i_mac_notify_thread asking it to quit.
   1106  * Then wait till it is done.
   1107  */
   1108 void
   1109 i_mac_notify_exit(mac_impl_t *mip)
   1110 {
   1111 	mac_cb_info_t	*mcbi;
   1112 
   1113 	mcbi = &mip->mi_notify_cb_info;
   1114 
   1115 	mutex_enter(mcbi->mcbi_lockp);
   1116 	mip->mi_notify_bits = (1 << MAC_NNOTE);
   1117 	cv_broadcast(&mcbi->mcbi_cv);
   1118 
   1119 
   1120 	while ((mip->mi_notify_thread != NULL) &&
   1121 	    !(mip->mi_state_flags & MIS_NOTIFY_DONE)) {
   1122 		cv_wait(&mcbi->mcbi_cv, mcbi->mcbi_lockp);
   1123 	}
   1124 
   1125 	/* Necessary clean up before doing kmem_cache_free */
   1126 	mip->mi_state_flags &= ~MIS_NOTIFY_DONE;
   1127 	mip->mi_notify_bits = 0;
   1128 	mip->mi_notify_thread = NULL;
   1129 	mutex_exit(mcbi->mcbi_lockp);
   1130 }
   1131 
   1132 /*
   1133  * Entry point invoked by drivers to dynamically add a ring to an
   1134  * existing group.
   1135  */
   1136 int
   1137 mac_group_add_ring(mac_group_handle_t gh, int index)
   1138 {
   1139 	mac_group_t *group = (mac_group_t *)gh;
   1140 	mac_impl_t *mip = (mac_impl_t *)group->mrg_mh;
   1141 	int ret;
   1142 
   1143 	i_mac_perim_enter(mip);
   1144 
   1145 	/*
   1146 	 * Only RX rings can be added or removed by drivers currently.
   1147 	 */
   1148 	ASSERT(group->mrg_type == MAC_RING_TYPE_RX);
   1149 
   1150 	ret = i_mac_group_add_ring(group, NULL, index);
   1151 
   1152 	i_mac_perim_exit(mip);
   1153 
   1154 	return (ret);
   1155 }
   1156 
   1157 /*
   1158  * Entry point invoked by drivers to dynamically remove a ring
   1159  * from an existing group. The specified ring handle must no longer
   1160  * be used by the driver after a call to this function.
   1161  */
   1162 void
   1163 mac_group_rem_ring(mac_group_handle_t gh, mac_ring_handle_t rh)
   1164 {
   1165 	mac_group_t *group = (mac_group_t *)gh;
   1166 	mac_impl_t *mip = (mac_impl_t *)group->mrg_mh;
   1167 
   1168 	i_mac_perim_enter(mip);
   1169 
   1170 	/*
   1171 	 * Only RX rings can be added or removed by drivers currently.
   1172 	 */
   1173 	ASSERT(group->mrg_type == MAC_RING_TYPE_RX);
   1174 
   1175 	i_mac_group_rem_ring(group, (mac_ring_t *)rh, B_TRUE);
   1176 
   1177 	i_mac_perim_exit(mip);
   1178 }
   1179