Home | History | Annotate | Download | only in vnic
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 #include <sys/types.h>
     27 #include <sys/cred.h>
     28 #include <sys/sysmacros.h>
     29 #include <sys/conf.h>
     30 #include <sys/cmn_err.h>
     31 #include <sys/list.h>
     32 #include <sys/ksynch.h>
     33 #include <sys/kmem.h>
     34 #include <sys/stream.h>
     35 #include <sys/modctl.h>
     36 #include <sys/ddi.h>
     37 #include <sys/sunddi.h>
     38 #include <sys/atomic.h>
     39 #include <sys/stat.h>
     40 #include <sys/modhash.h>
     41 #include <sys/strsubr.h>
     42 #include <sys/strsun.h>
     43 #include <sys/dlpi.h>
     44 #include <sys/mac.h>
     45 #include <sys/mac_provider.h>
     46 #include <sys/mac_client.h>
     47 #include <sys/mac_client_priv.h>
     48 #include <sys/mac_ether.h>
     49 #include <sys/dls.h>
     50 #include <sys/pattr.h>
     51 #include <sys/time.h>
     52 #include <sys/vlan.h>
     53 #include <sys/vnic.h>
     54 #include <sys/vnic_impl.h>
     55 #include <sys/mac_flow_impl.h>
     56 #include <inet/ip_impl.h>
     57 
     58 /*
     59  * Note that for best performance, the VNIC is a passthrough design.
     60  * For each VNIC corresponds a MAC client of the underlying MAC (lower MAC).
     61  * This MAC client is opened by the VNIC driver at VNIC creation,
     62  * and closed when the VNIC is deleted.
     63  * When a MAC client of the VNIC itself opens a VNIC, the MAC layer
     64  * (upper MAC) detects that the MAC being opened is a VNIC. Instead
     65  * of allocating a new MAC client, it asks the VNIC driver to return
     66  * the lower MAC client handle associated with the VNIC, and that handle
     67  * is returned to the upper MAC client directly. This allows access
     68  * by upper MAC clients of the VNIC to have direct access to the lower
     69  * MAC client for the control path and data path.
     70  *
     71  * Due to this passthrough, some of the entry points exported by the
     72  * VNIC driver are never directly invoked. These entry points include
     73  * vnic_m_start, vnic_m_stop, vnic_m_promisc, vnic_m_multicst, etc.
     74  */
     75 
     76 static int vnic_m_start(void *);
     77 static void vnic_m_stop(void *);
     78 static int vnic_m_promisc(void *, boolean_t);
     79 static int vnic_m_multicst(void *, boolean_t, const uint8_t *);
     80 static int vnic_m_unicst(void *, const uint8_t *);
     81 static int vnic_m_stat(void *, uint_t, uint64_t *);
     82 static void vnic_m_ioctl(void *, queue_t *, mblk_t *);
     83 static int vnic_m_setprop(void *, const char *, mac_prop_id_t, uint_t,
     84     const void *);
     85 static int vnic_m_getprop(void *, const char *, mac_prop_id_t, uint_t,
     86     uint_t, void *, uint_t *);
     87 static mblk_t *vnic_m_tx(void *, mblk_t *);
     88 static boolean_t vnic_m_capab_get(void *, mac_capab_t, void *);
     89 static void vnic_notify_cb(void *, mac_notify_type_t);
     90 
     91 static kmem_cache_t	*vnic_cache;
     92 static krwlock_t	vnic_lock;
     93 static uint_t		vnic_count;
     94 
     95 #define	ANCHOR_VNIC_MIN_MTU	576
     96 #define	ANCHOR_VNIC_MAX_MTU	9000
     97 
     98 /* hash of VNICs (vnic_t's), keyed by VNIC id */
     99 static mod_hash_t	*vnic_hash;
    100 #define	VNIC_HASHSZ	64
    101 #define	VNIC_HASH_KEY(vnic_id)	((mod_hash_key_t)(uintptr_t)vnic_id)
    102 
    103 #define	VNIC_M_CALLBACK_FLAGS	\
    104 	(MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_GETPROP)
    105 
    106 static mac_callbacks_t vnic_m_callbacks = {
    107 	VNIC_M_CALLBACK_FLAGS,
    108 	vnic_m_stat,
    109 	vnic_m_start,
    110 	vnic_m_stop,
    111 	vnic_m_promisc,
    112 	vnic_m_multicst,
    113 	vnic_m_unicst,
    114 	vnic_m_tx,
    115 	vnic_m_ioctl,
    116 	vnic_m_capab_get,
    117 	NULL,
    118 	NULL,
    119 	vnic_m_setprop,
    120 	vnic_m_getprop
    121 };
    122 
    123 void
    124 vnic_dev_init(void)
    125 {
    126 	vnic_cache = kmem_cache_create("vnic_cache",
    127 	    sizeof (vnic_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
    128 
    129 	vnic_hash = mod_hash_create_idhash("vnic_hash",
    130 	    VNIC_HASHSZ, mod_hash_null_valdtor);
    131 
    132 	rw_init(&vnic_lock, NULL, RW_DEFAULT, NULL);
    133 
    134 	vnic_count = 0;
    135 }
    136 
    137 void
    138 vnic_dev_fini(void)
    139 {
    140 	ASSERT(vnic_count == 0);
    141 
    142 	rw_destroy(&vnic_lock);
    143 	mod_hash_destroy_idhash(vnic_hash);
    144 	kmem_cache_destroy(vnic_cache);
    145 }
    146 
    147 uint_t
    148 vnic_dev_count(void)
    149 {
    150 	return (vnic_count);
    151 }
    152 
    153 static vnic_ioc_diag_t
    154 vnic_mac2vnic_diag(mac_diag_t diag)
    155 {
    156 	switch (diag) {
    157 	case MAC_DIAG_MACADDR_NIC:
    158 		return (VNIC_IOC_DIAG_MACADDR_NIC);
    159 	case MAC_DIAG_MACADDR_INUSE:
    160 		return (VNIC_IOC_DIAG_MACADDR_INUSE);
    161 	case MAC_DIAG_MACADDR_INVALID:
    162 		return (VNIC_IOC_DIAG_MACADDR_INVALID);
    163 	case MAC_DIAG_MACADDRLEN_INVALID:
    164 		return (VNIC_IOC_DIAG_MACADDRLEN_INVALID);
    165 	case MAC_DIAG_MACFACTORYSLOTINVALID:
    166 		return (VNIC_IOC_DIAG_MACFACTORYSLOTINVALID);
    167 	case MAC_DIAG_MACFACTORYSLOTUSED:
    168 		return (VNIC_IOC_DIAG_MACFACTORYSLOTUSED);
    169 	case MAC_DIAG_MACFACTORYSLOTALLUSED:
    170 		return (VNIC_IOC_DIAG_MACFACTORYSLOTALLUSED);
    171 	case MAC_DIAG_MACFACTORYNOTSUP:
    172 		return (VNIC_IOC_DIAG_MACFACTORYNOTSUP);
    173 	case MAC_DIAG_MACPREFIX_INVALID:
    174 		return (VNIC_IOC_DIAG_MACPREFIX_INVALID);
    175 	case MAC_DIAG_MACPREFIXLEN_INVALID:
    176 		return (VNIC_IOC_DIAG_MACPREFIXLEN_INVALID);
    177 	case MAC_DIAG_MACNO_HWRINGS:
    178 		return (VNIC_IOC_DIAG_NO_HWRINGS);
    179 	default:
    180 		return (VNIC_IOC_DIAG_NONE);
    181 	}
    182 }
    183 
    184 static int
    185 vnic_unicast_add(vnic_t *vnic, vnic_mac_addr_type_t vnic_addr_type,
    186     int *addr_slot, uint_t prefix_len, int *addr_len_ptr_arg,
    187     uint8_t *mac_addr_arg, uint16_t flags, vnic_ioc_diag_t *diag,
    188     uint16_t vid)
    189 {
    190 	mac_diag_t mac_diag;
    191 	uint16_t mac_flags = 0;
    192 	int err;
    193 	uint_t addr_len;
    194 
    195 	if (flags & VNIC_IOC_CREATE_NODUPCHECK)
    196 		mac_flags |= MAC_UNICAST_NODUPCHECK;
    197 
    198 	switch (vnic_addr_type) {
    199 	case VNIC_MAC_ADDR_TYPE_FIXED:
    200 	case VNIC_MAC_ADDR_TYPE_VRID:
    201 		/*
    202 		 * The MAC address value to assign to the VNIC
    203 		 * is already provided in mac_addr_arg. addr_len_ptr_arg
    204 		 * already contains the MAC address length.
    205 		 */
    206 		break;
    207 
    208 	case VNIC_MAC_ADDR_TYPE_RANDOM:
    209 		/*
    210 		 * Random MAC address. There are two sub-cases:
    211 		 *
    212 		 * 1 - If mac_len == 0, a new MAC address is generated.
    213 		 *	The length of the MAC address to generated depends
    214 		 *	on the type of MAC used. The prefix to use for the MAC
    215 		 *	address is stored in the most significant bytes
    216 		 *	of the mac_addr argument, and its length is specified
    217 		 *	by the mac_prefix_len argument. This prefix can
    218 		 *	correspond to a IEEE OUI in the case of Ethernet,
    219 		 *	for example.
    220 		 *
    221 		 * 2 - If mac_len > 0, the address was already picked
    222 		 *	randomly, and is now passed back during VNIC
    223 		 *	re-creation. The mac_addr argument contains the MAC
    224 		 *	address that was generated. We distinguish this
    225 		 *	case from the fixed MAC address case, since we
    226 		 *	want the user consumers to know, when they query
    227 		 *	the list of VNICs, that a VNIC was assigned a
    228 		 *	random MAC address vs assigned a fixed address
    229 		 *	specified by the user.
    230 		 */
    231 
    232 		/*
    233 		 * If it's a pre-generated address, we're done. mac_addr_arg
    234 		 * and addr_len_ptr_arg already contain the MAC address
    235 		 * value and length.
    236 		 */
    237 		if (*addr_len_ptr_arg > 0)
    238 			break;
    239 
    240 		/* generate a new random MAC address */
    241 		if ((err = mac_addr_random(vnic->vn_mch,
    242 		    prefix_len, mac_addr_arg, &mac_diag)) != 0) {
    243 			*diag = vnic_mac2vnic_diag(mac_diag);
    244 			return (err);
    245 		}
    246 		*addr_len_ptr_arg = mac_addr_len(vnic->vn_lower_mh);
    247 		break;
    248 
    249 	case VNIC_MAC_ADDR_TYPE_FACTORY:
    250 		err = mac_addr_factory_reserve(vnic->vn_mch, addr_slot);
    251 		if (err != 0) {
    252 			if (err == EINVAL)
    253 				*diag = VNIC_IOC_DIAG_MACFACTORYSLOTINVALID;
    254 			if (err == EBUSY)
    255 				*diag = VNIC_IOC_DIAG_MACFACTORYSLOTUSED;
    256 			if (err == ENOSPC)
    257 				*diag = VNIC_IOC_DIAG_MACFACTORYSLOTALLUSED;
    258 			return (err);
    259 		}
    260 
    261 		mac_addr_factory_value(vnic->vn_lower_mh, *addr_slot,
    262 		    mac_addr_arg, &addr_len, NULL, NULL);
    263 		*addr_len_ptr_arg = addr_len;
    264 		break;
    265 
    266 	case VNIC_MAC_ADDR_TYPE_AUTO:
    267 		/* first try to allocate a factory MAC address */
    268 		err = mac_addr_factory_reserve(vnic->vn_mch, addr_slot);
    269 		if (err == 0) {
    270 			mac_addr_factory_value(vnic->vn_lower_mh, *addr_slot,
    271 			    mac_addr_arg, &addr_len, NULL, NULL);
    272 			vnic_addr_type = VNIC_MAC_ADDR_TYPE_FACTORY;
    273 			*addr_len_ptr_arg = addr_len;
    274 			break;
    275 		}
    276 
    277 		/*
    278 		 * Allocating a factory MAC address failed, generate a
    279 		 * random MAC address instead.
    280 		 */
    281 		if ((err = mac_addr_random(vnic->vn_mch,
    282 		    prefix_len, mac_addr_arg, &mac_diag)) != 0) {
    283 			*diag = vnic_mac2vnic_diag(mac_diag);
    284 			return (err);
    285 		}
    286 		*addr_len_ptr_arg = mac_addr_len(vnic->vn_lower_mh);
    287 		vnic_addr_type = VNIC_MAC_ADDR_TYPE_RANDOM;
    288 		break;
    289 	case VNIC_MAC_ADDR_TYPE_PRIMARY:
    290 		/*
    291 		 * We get the address here since we copy it in the
    292 		 * vnic's vn_addr.
    293 		 */
    294 		mac_unicast_primary_get(vnic->vn_lower_mh, mac_addr_arg);
    295 		*addr_len_ptr_arg = mac_addr_len(vnic->vn_lower_mh);
    296 		mac_flags |= MAC_UNICAST_VNIC_PRIMARY;
    297 		break;
    298 	}
    299 
    300 	vnic->vn_addr_type = vnic_addr_type;
    301 
    302 	err = mac_unicast_add(vnic->vn_mch, mac_addr_arg, mac_flags,
    303 	    &vnic->vn_muh, vid, &mac_diag);
    304 	if (err != 0) {
    305 		if (vnic_addr_type == VNIC_MAC_ADDR_TYPE_FACTORY) {
    306 			/* release factory MAC address */
    307 			mac_addr_factory_release(vnic->vn_mch, *addr_slot);
    308 		}
    309 		*diag = vnic_mac2vnic_diag(mac_diag);
    310 	}
    311 
    312 	return (err);
    313 }
    314 
    315 /*
    316  * Create a new VNIC upon request from administrator.
    317  * Returns 0 on success, an errno on failure.
    318  */
    319 /* ARGSUSED */
    320 int
    321 vnic_dev_create(datalink_id_t vnic_id, datalink_id_t linkid,
    322     vnic_mac_addr_type_t *vnic_addr_type, int *mac_len, uchar_t *mac_addr,
    323     int *mac_slot, uint_t mac_prefix_len, uint16_t vid, vrid_t vrid,
    324     int af, mac_resource_props_t *mrp, uint32_t flags, vnic_ioc_diag_t *diag,
    325     cred_t *credp)
    326 {
    327 	vnic_t *vnic;
    328 	mac_register_t *mac;
    329 	int err;
    330 	boolean_t is_anchor = ((flags & VNIC_IOC_CREATE_ANCHOR) != 0);
    331 	char vnic_name[MAXNAMELEN];
    332 	const mac_info_t *minfop;
    333 	uint32_t req_hwgrp_flag = ((flags & VNIC_IOC_CREATE_REQ_HWRINGS) != 0) ?
    334 	    MAC_OPEN_FLAGS_REQ_HWRINGS : 0;
    335 
    336 	*diag = VNIC_IOC_DIAG_NONE;
    337 
    338 	rw_enter(&vnic_lock, RW_WRITER);
    339 
    340 	/* does a VNIC with the same id already exist? */
    341 	err = mod_hash_find(vnic_hash, VNIC_HASH_KEY(vnic_id),
    342 	    (mod_hash_val_t *)&vnic);
    343 	if (err == 0) {
    344 		rw_exit(&vnic_lock);
    345 		return (EEXIST);
    346 	}
    347 
    348 	vnic = kmem_cache_alloc(vnic_cache, KM_NOSLEEP);
    349 	if (vnic == NULL) {
    350 		rw_exit(&vnic_lock);
    351 		return (ENOMEM);
    352 	}
    353 
    354 	bzero(vnic, sizeof (*vnic));
    355 
    356 	vnic->vn_id = vnic_id;
    357 	vnic->vn_link_id = linkid;
    358 	vnic->vn_vrid = vrid;
    359 	vnic->vn_af = af;
    360 
    361 	if (!is_anchor) {
    362 		if (linkid == DATALINK_INVALID_LINKID) {
    363 			err = EINVAL;
    364 			goto bail;
    365 		}
    366 
    367 		/*
    368 		 * Open the lower MAC and assign its initial bandwidth and
    369 		 * MAC address. We do this here during VNIC creation and
    370 		 * do not wait until the upper MAC client open so that we
    371 		 * can validate the VNIC creation parameters (bandwidth,
    372 		 * MAC address, etc) and reserve a factory MAC address if
    373 		 * one was requested.
    374 		 */
    375 		err = mac_open_by_linkid(linkid, &vnic->vn_lower_mh);
    376 		if (err != 0)
    377 			goto bail;
    378 
    379 		/*
    380 		 * VNIC(vlan) over VNICs(vlans) is not supported.
    381 		 */
    382 		if (mac_is_vnic(vnic->vn_lower_mh)) {
    383 			err = EINVAL;
    384 			goto bail;
    385 		}
    386 
    387 		/* only ethernet support for now */
    388 		minfop = mac_info(vnic->vn_lower_mh);
    389 		if (minfop->mi_nativemedia != DL_ETHER) {
    390 			err = ENOTSUP;
    391 			goto bail;
    392 		}
    393 
    394 		(void) dls_mgmt_get_linkinfo(vnic_id, vnic_name, NULL, NULL,
    395 		    NULL);
    396 		err = mac_client_open(vnic->vn_lower_mh, &vnic->vn_mch,
    397 		    vnic_name, MAC_OPEN_FLAGS_IS_VNIC | req_hwgrp_flag);
    398 		if (err != 0)
    399 			goto bail;
    400 
    401 		if (mrp != NULL) {
    402 			err = mac_client_set_resources(vnic->vn_mch, mrp);
    403 			if (err != 0)
    404 				goto bail;
    405 		}
    406 		/* assign a MAC address to the VNIC */
    407 
    408 		err = vnic_unicast_add(vnic, *vnic_addr_type, mac_slot,
    409 		    mac_prefix_len, mac_len, mac_addr, flags, diag, vid);
    410 		if (err != 0) {
    411 			vnic->vn_muh = NULL;
    412 			if (diag != NULL && req_hwgrp_flag != 0)
    413 				*diag = VNIC_IOC_DIAG_NO_HWRINGS;
    414 			goto bail;
    415 		}
    416 
    417 		/* register to receive notification from underlying MAC */
    418 		vnic->vn_mnh = mac_notify_add(vnic->vn_lower_mh, vnic_notify_cb,
    419 		    vnic);
    420 
    421 		*vnic_addr_type = vnic->vn_addr_type;
    422 		vnic->vn_addr_len = *mac_len;
    423 		vnic->vn_vid = vid;
    424 
    425 		bcopy(mac_addr, vnic->vn_addr, vnic->vn_addr_len);
    426 
    427 		if (vnic->vn_addr_type == VNIC_MAC_ADDR_TYPE_FACTORY)
    428 			vnic->vn_slot_id = *mac_slot;
    429 
    430 		/* set the initial VNIC capabilities */
    431 		if (!mac_capab_get(vnic->vn_lower_mh, MAC_CAPAB_HCKSUM,
    432 		    &vnic->vn_hcksum_txflags))
    433 			vnic->vn_hcksum_txflags = 0;
    434 	}
    435 
    436 	/* register with the MAC module */
    437 	if ((mac = mac_alloc(MAC_VERSION)) == NULL)
    438 		goto bail;
    439 
    440 	mac->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
    441 	mac->m_driver = vnic;
    442 	mac->m_dip = vnic_get_dip();
    443 	mac->m_instance = (uint_t)-1;
    444 	mac->m_src_addr = vnic->vn_addr;
    445 	mac->m_callbacks = &vnic_m_callbacks;
    446 
    447 	if (!is_anchor) {
    448 		/*
    449 		 * If this is a VNIC based VLAN, then we check for the
    450 		 * margin unless it has been created with the force
    451 		 * flag. If we are configuring a VLAN over an etherstub,
    452 		 * we don't check the margin even if force is not set.
    453 		 */
    454 		if (vid == 0 || (flags & VNIC_IOC_CREATE_FORCE) != 0) {
    455 			if (vid != VLAN_ID_NONE)
    456 				vnic->vn_force = B_TRUE;
    457 			/*
    458 			 * As the current margin size of the underlying mac is
    459 			 * used to determine the margin size of the VNIC
    460 			 * itself, request the underlying mac not to change
    461 			 * to a smaller margin size.
    462 			 */
    463 			err = mac_margin_add(vnic->vn_lower_mh,
    464 			    &vnic->vn_margin, B_TRUE);
    465 			ASSERT(err == 0);
    466 		} else {
    467 			vnic->vn_margin = VLAN_TAGSZ;
    468 			err = mac_margin_add(vnic->vn_lower_mh,
    469 			    &vnic->vn_margin, B_FALSE);
    470 			if (err != 0) {
    471 				mac_free(mac);
    472 				if (diag != NULL)
    473 					*diag = VNIC_IOC_DIAG_MACMARGIN_INVALID;
    474 				goto bail;
    475 			}
    476 		}
    477 
    478 		mac_sdu_get(vnic->vn_lower_mh, &mac->m_min_sdu,
    479 		    &mac->m_max_sdu);
    480 	} else {
    481 		vnic->vn_margin = VLAN_TAGSZ;
    482 		mac->m_min_sdu = ANCHOR_VNIC_MIN_MTU;
    483 		mac->m_max_sdu = ANCHOR_VNIC_MAX_MTU;
    484 	}
    485 
    486 	mac->m_margin = vnic->vn_margin;
    487 
    488 	err = mac_register(mac, &vnic->vn_mh);
    489 	mac_free(mac);
    490 	if (err != 0) {
    491 		VERIFY(is_anchor || mac_margin_remove(vnic->vn_lower_mh,
    492 		    vnic->vn_margin) == 0);
    493 		goto bail;
    494 	}
    495 
    496 	/* Set the VNIC's MAC in the client */
    497 	if (!is_anchor)
    498 		mac_set_upper_mac(vnic->vn_mch, vnic->vn_mh);
    499 
    500 	err = dls_devnet_create(vnic->vn_mh, vnic->vn_id, crgetzoneid(credp));
    501 	if (err != 0) {
    502 		VERIFY(is_anchor || mac_margin_remove(vnic->vn_lower_mh,
    503 		    vnic->vn_margin) == 0);
    504 		(void) mac_unregister(vnic->vn_mh);
    505 		goto bail;
    506 	}
    507 
    508 	/* add new VNIC to hash table */
    509 	err = mod_hash_insert(vnic_hash, VNIC_HASH_KEY(vnic_id),
    510 	    (mod_hash_val_t)vnic);
    511 	ASSERT(err == 0);
    512 	vnic_count++;
    513 
    514 	vnic->vn_enabled = B_TRUE;
    515 	rw_exit(&vnic_lock);
    516 
    517 	return (0);
    518 
    519 bail:
    520 	rw_exit(&vnic_lock);
    521 	if (!is_anchor) {
    522 		if (vnic->vn_mnh != NULL)
    523 			(void) mac_notify_remove(vnic->vn_mnh, B_TRUE);
    524 		if (vnic->vn_muh != NULL)
    525 			(void) mac_unicast_remove(vnic->vn_mch, vnic->vn_muh);
    526 		if (vnic->vn_mch != NULL)
    527 			mac_client_close(vnic->vn_mch, MAC_CLOSE_FLAGS_IS_VNIC);
    528 		if (vnic->vn_lower_mh != NULL)
    529 			mac_close(vnic->vn_lower_mh);
    530 	}
    531 
    532 	kmem_cache_free(vnic_cache, vnic);
    533 	return (err);
    534 }
    535 
    536 /*
    537  * Modify the properties of an existing VNIC.
    538  */
    539 /* ARGSUSED */
    540 int
    541 vnic_dev_modify(datalink_id_t vnic_id, uint_t modify_mask,
    542     vnic_mac_addr_type_t mac_addr_type, uint_t mac_len, uchar_t *mac_addr,
    543     uint_t mac_slot, mac_resource_props_t *mrp)
    544 {
    545 	vnic_t *vnic = NULL;
    546 
    547 	rw_enter(&vnic_lock, RW_WRITER);
    548 
    549 	if (mod_hash_find(vnic_hash, VNIC_HASH_KEY(vnic_id),
    550 	    (mod_hash_val_t *)&vnic) != 0) {
    551 		rw_exit(&vnic_lock);
    552 		return (ENOENT);
    553 	}
    554 
    555 	rw_exit(&vnic_lock);
    556 
    557 	return (0);
    558 }
    559 
    560 /* ARGSUSED */
    561 int
    562 vnic_dev_delete(datalink_id_t vnic_id, uint32_t flags, cred_t *credp)
    563 {
    564 	vnic_t *vnic = NULL;
    565 	mod_hash_val_t val;
    566 	datalink_id_t tmpid;
    567 	int rc;
    568 
    569 	rw_enter(&vnic_lock, RW_WRITER);
    570 
    571 	if (mod_hash_find(vnic_hash, VNIC_HASH_KEY(vnic_id),
    572 	    (mod_hash_val_t *)&vnic) != 0) {
    573 		rw_exit(&vnic_lock);
    574 		return (ENOENT);
    575 	}
    576 
    577 	if ((rc = dls_devnet_destroy(vnic->vn_mh, &tmpid, B_TRUE)) != 0) {
    578 		rw_exit(&vnic_lock);
    579 		return (rc);
    580 	}
    581 
    582 	ASSERT(vnic_id == tmpid);
    583 
    584 	/*
    585 	 * We cannot unregister the MAC yet. Unregistering would
    586 	 * free up mac_impl_t which should not happen at this time.
    587 	 * So disable mac_impl_t by calling mac_disable(). This will prevent
    588 	 * any new claims on mac_impl_t.
    589 	 */
    590 	if ((rc = mac_disable(vnic->vn_mh)) != 0) {
    591 		(void) dls_devnet_create(vnic->vn_mh, vnic_id,
    592 		    crgetzoneid(credp));
    593 		rw_exit(&vnic_lock);
    594 		return (rc);
    595 	}
    596 
    597 	vnic->vn_enabled = B_FALSE;
    598 	(void) mod_hash_remove(vnic_hash, VNIC_HASH_KEY(vnic_id), &val);
    599 	ASSERT(vnic == (vnic_t *)val);
    600 	vnic_count--;
    601 	rw_exit(&vnic_lock);
    602 
    603 	/*
    604 	 * XXX-nicolas shouldn't have a void cast here, if it's
    605 	 * expected that the function will never fail, then we should
    606 	 * have an ASSERT().
    607 	 */
    608 	(void) mac_unregister(vnic->vn_mh);
    609 
    610 	if (vnic->vn_lower_mh != NULL) {
    611 		/*
    612 		 * Check if MAC address for the vnic was obtained from the
    613 		 * factory MAC addresses. If yes, release it.
    614 		 */
    615 		if (vnic->vn_addr_type == VNIC_MAC_ADDR_TYPE_FACTORY) {
    616 			(void) mac_addr_factory_release(vnic->vn_mch,
    617 			    vnic->vn_slot_id);
    618 		}
    619 		(void) mac_margin_remove(vnic->vn_lower_mh, vnic->vn_margin);
    620 		(void) mac_notify_remove(vnic->vn_mnh, B_TRUE);
    621 		(void) mac_unicast_remove(vnic->vn_mch, vnic->vn_muh);
    622 		mac_client_close(vnic->vn_mch, MAC_CLOSE_FLAGS_IS_VNIC);
    623 		mac_close(vnic->vn_lower_mh);
    624 	}
    625 
    626 	kmem_cache_free(vnic_cache, vnic);
    627 	return (0);
    628 }
    629 
    630 /* ARGSUSED */
    631 mblk_t *
    632 vnic_m_tx(void *arg, mblk_t *mp_chain)
    633 {
    634 	/*
    635 	 * This function could be invoked for an anchor VNIC when sending
    636 	 * broadcast and multicast packets, and unicast packets which did
    637 	 * not match any local known destination.
    638 	 */
    639 	freemsgchain(mp_chain);
    640 	return (NULL);
    641 }
    642 
    643 /*ARGSUSED*/
    644 static void
    645 vnic_m_ioctl(void *arg, queue_t *q, mblk_t *mp)
    646 {
    647 	miocnak(q, mp, 0, ENOTSUP);
    648 }
    649 
    650 /*
    651  * This entry point cannot be passed-through, since it is invoked
    652  * for the per-VNIC kstats which must be exported independently
    653  * of the existence of VNIC MAC clients.
    654  */
    655 static int
    656 vnic_m_stat(void *arg, uint_t stat, uint64_t *val)
    657 {
    658 	vnic_t *vnic = arg;
    659 	int rval = 0;
    660 
    661 	if (vnic->vn_lower_mh == NULL) {
    662 		/*
    663 		 * It's an anchor VNIC, which does not have any
    664 		 * statistics in itself.
    665 		 */
    666 		return (ENOTSUP);
    667 	}
    668 
    669 	/*
    670 	 * ENOTSUP must be reported for unsupported stats, the VNIC
    671 	 * driver reports a subset of the stats that would
    672 	 * be returned by a real piece of hardware.
    673 	 */
    674 
    675 	switch (stat) {
    676 	case MAC_STAT_LINK_STATE:
    677 	case MAC_STAT_LINK_UP:
    678 	case MAC_STAT_PROMISC:
    679 	case MAC_STAT_IFSPEED:
    680 	case MAC_STAT_MULTIRCV:
    681 	case MAC_STAT_MULTIXMT:
    682 	case MAC_STAT_BRDCSTRCV:
    683 	case MAC_STAT_BRDCSTXMT:
    684 	case MAC_STAT_OPACKETS:
    685 	case MAC_STAT_OBYTES:
    686 	case MAC_STAT_IERRORS:
    687 	case MAC_STAT_OERRORS:
    688 	case MAC_STAT_RBYTES:
    689 	case MAC_STAT_IPACKETS:
    690 		*val = mac_client_stat_get(vnic->vn_mch, stat);
    691 		break;
    692 	default:
    693 		rval = ENOTSUP;
    694 	}
    695 
    696 	return (rval);
    697 }
    698 
    699 /*
    700  * Invoked by the upper MAC to retrieve the lower MAC client handle
    701  * corresponding to a VNIC. A pointer to this function is obtained
    702  * by the upper MAC via capability query.
    703  *
    704  * XXX-nicolas Note: this currently causes all VNIC MAC clients to
    705  * receive the same MAC client handle for the same VNIC. This is ok
    706  * as long as we have only one VNIC MAC client which sends and
    707  * receives data, but we don't currently enforce this at the MAC layer.
    708  */
    709 static void *
    710 vnic_mac_client_handle(void *vnic_arg)
    711 {
    712 	vnic_t *vnic = vnic_arg;
    713 
    714 	return (vnic->vn_mch);
    715 }
    716 
    717 
    718 /*
    719  * Return information about the specified capability.
    720  */
    721 /* ARGSUSED */
    722 static boolean_t
    723 vnic_m_capab_get(void *arg, mac_capab_t cap, void *cap_data)
    724 {
    725 	vnic_t *vnic = arg;
    726 
    727 	switch (cap) {
    728 	case MAC_CAPAB_HCKSUM: {
    729 		uint32_t *hcksum_txflags = cap_data;
    730 
    731 		*hcksum_txflags = vnic->vn_hcksum_txflags &
    732 		    (HCKSUM_INET_FULL_V4 | HCKSUM_IPHDRCKSUM |
    733 		    HCKSUM_INET_PARTIAL);
    734 		break;
    735 	}
    736 	case MAC_CAPAB_VNIC: {
    737 		mac_capab_vnic_t *vnic_capab = cap_data;
    738 
    739 		if (vnic->vn_lower_mh == NULL) {
    740 			/*
    741 			 * It's an anchor VNIC, we don't have an underlying
    742 			 * NIC and MAC client handle.
    743 			 */
    744 			return (B_FALSE);
    745 		}
    746 
    747 		if (vnic_capab != NULL) {
    748 			vnic_capab->mcv_arg = vnic;
    749 			vnic_capab->mcv_mac_client_handle =
    750 			    vnic_mac_client_handle;
    751 		}
    752 		break;
    753 	}
    754 	case MAC_CAPAB_ANCHOR_VNIC: {
    755 		/* since it's an anchor VNIC we don't have lower mac handle */
    756 		if (vnic->vn_lower_mh == NULL) {
    757 			ASSERT(vnic->vn_link_id == 0);
    758 			return (B_TRUE);
    759 		}
    760 		return (B_FALSE);
    761 	}
    762 	case MAC_CAPAB_NO_NATIVEVLAN:
    763 	case MAC_CAPAB_NO_ZCOPY:
    764 		return (B_TRUE);
    765 	case MAC_CAPAB_VRRP: {
    766 		mac_capab_vrrp_t *vrrp_capab = cap_data;
    767 
    768 		if (vnic->vn_vrid != 0) {
    769 			if (vrrp_capab != NULL)
    770 				vrrp_capab->mcv_af = vnic->vn_af;
    771 			return (B_TRUE);
    772 		}
    773 		return (B_FALSE);
    774 	}
    775 	default:
    776 		return (B_FALSE);
    777 	}
    778 	return (B_TRUE);
    779 }
    780 
    781 /* ARGSUSED */
    782 static int
    783 vnic_m_start(void *arg)
    784 {
    785 	return (0);
    786 }
    787 
    788 /* ARGSUSED */
    789 static void
    790 vnic_m_stop(void *arg)
    791 {
    792 }
    793 
    794 /* ARGSUSED */
    795 static int
    796 vnic_m_promisc(void *arg, boolean_t on)
    797 {
    798 	return (0);
    799 }
    800 
    801 /* ARGSUSED */
    802 static int
    803 vnic_m_multicst(void *arg, boolean_t add, const uint8_t *addrp)
    804 {
    805 	return (0);
    806 }
    807 
    808 static int
    809 vnic_m_unicst(void *arg, const uint8_t *macaddr)
    810 {
    811 	vnic_t *vnic = arg;
    812 
    813 	return (mac_vnic_unicast_set(vnic->vn_mch, macaddr));
    814 }
    815 
    816 /*
    817  * Callback functions for set/get of properties
    818  */
    819 /*ARGSUSED*/
    820 static int
    821 vnic_m_setprop(void *m_driver, const char *pr_name, mac_prop_id_t pr_num,
    822     uint_t pr_valsize, const void *pr_val)
    823 {
    824 	int 		err = ENOTSUP;
    825 	vnic_t		*vn = m_driver;
    826 
    827 	/* allow setting MTU only on an etherstub */
    828 	if (vn->vn_link_id != DATALINK_INVALID_LINKID)
    829 		return (err);
    830 
    831 	switch (pr_num) {
    832 	case MAC_PROP_MTU: {
    833 		uint32_t	mtu;
    834 
    835 		if (pr_valsize < sizeof (mtu)) {
    836 			err = EINVAL;
    837 			break;
    838 		}
    839 		bcopy(pr_val, &mtu, sizeof (mtu));
    840 		if (mtu < ANCHOR_VNIC_MIN_MTU || mtu > ANCHOR_VNIC_MAX_MTU) {
    841 			err = EINVAL;
    842 			break;
    843 		}
    844 		err = mac_maxsdu_update(vn->vn_mh, mtu);
    845 		break;
    846 	}
    847 	default:
    848 		break;
    849 	}
    850 	return (err);
    851 }
    852 
    853 /*ARGSUSED*/
    854 static int
    855 vnic_m_getprop(void *m_driver, const char *pr_name, mac_prop_id_t pr_num,
    856     uint_t pr_flags, uint_t pr_valsize, void *pr_val, uint_t *perm)
    857 {
    858 	mac_propval_range_t 	range;
    859 	vnic_t			*vn = m_driver;
    860 	int 			err = ENOTSUP;
    861 
    862 	/* MTU setting allowed only on an etherstub */
    863 	if (vn->vn_link_id != DATALINK_INVALID_LINKID)
    864 		return (err);
    865 
    866 	switch (pr_num) {
    867 	case MAC_PROP_MTU:
    868 		if (!(pr_flags & MAC_PROP_POSSIBLE))
    869 			return (ENOTSUP);
    870 		if (pr_valsize < sizeof (mac_propval_range_t))
    871 			return (EINVAL);
    872 		range.mpr_count = 1;
    873 		range.mpr_type = MAC_PROPVAL_UINT32;
    874 		range.range_uint32[0].mpur_min = ANCHOR_VNIC_MIN_MTU;
    875 		range.range_uint32[0].mpur_max = ANCHOR_VNIC_MAX_MTU;
    876 		bcopy(&range, pr_val, sizeof (range));
    877 		return (0);
    878 	default:
    879 		break;
    880 	}
    881 
    882 	return (err);
    883 }
    884 
    885 int
    886 vnic_info(vnic_info_t *info, cred_t *credp)
    887 {
    888 	vnic_t		*vnic;
    889 	int		err;
    890 
    891 	/* Make sure that the VNIC link is visible from the caller's zone. */
    892 	if (!dls_devnet_islinkvisible(info->vn_vnic_id, crgetzoneid(credp)))
    893 		return (ENOENT);
    894 
    895 	rw_enter(&vnic_lock, RW_WRITER);
    896 
    897 	err = mod_hash_find(vnic_hash, VNIC_HASH_KEY(info->vn_vnic_id),
    898 	    (mod_hash_val_t *)&vnic);
    899 	if (err != 0) {
    900 		rw_exit(&vnic_lock);
    901 		return (ENOENT);
    902 	}
    903 
    904 	info->vn_link_id = vnic->vn_link_id;
    905 	info->vn_mac_addr_type = vnic->vn_addr_type;
    906 	info->vn_mac_len = vnic->vn_addr_len;
    907 	bcopy(vnic->vn_addr, info->vn_mac_addr, MAXMACADDRLEN);
    908 	info->vn_mac_slot = vnic->vn_slot_id;
    909 	info->vn_mac_prefix_len = 0;
    910 	info->vn_vid = vnic->vn_vid;
    911 	info->vn_force = vnic->vn_force;
    912 	info->vn_vrid = vnic->vn_vrid;
    913 	info->vn_af = vnic->vn_af;
    914 
    915 	bzero(&info->vn_resource_props, sizeof (mac_resource_props_t));
    916 	if (vnic->vn_mch != NULL)
    917 		mac_resource_ctl_get(vnic->vn_mch, &info->vn_resource_props);
    918 
    919 	rw_exit(&vnic_lock);
    920 	return (0);
    921 }
    922 
    923 static void
    924 vnic_notify_cb(void *arg, mac_notify_type_t type)
    925 {
    926 	vnic_t *vnic = arg;
    927 
    928 	/*
    929 	 * Do not deliver notifications if the vnic is not fully initialized
    930 	 * or is in process of being torn down.
    931 	 */
    932 	if (!vnic->vn_enabled)
    933 		return;
    934 
    935 	switch (type) {
    936 	case MAC_NOTE_UNICST:
    937 		/*
    938 		 * Only the VLAN VNIC needs to be notified with primary MAC
    939 		 * address change.
    940 		 */
    941 		if (vnic->vn_addr_type != VNIC_MAC_ADDR_TYPE_PRIMARY)
    942 			return;
    943 
    944 		/*  the unicast MAC address value */
    945 		mac_unicast_primary_get(vnic->vn_lower_mh, vnic->vn_addr);
    946 
    947 		/* notify its upper layer MAC about MAC address change */
    948 		mac_unicst_update(vnic->vn_mh, (const uint8_t *)vnic->vn_addr);
    949 		break;
    950 
    951 	case MAC_NOTE_LINK:
    952 		mac_link_update(vnic->vn_mh,
    953 		    mac_client_stat_get(vnic->vn_mch, MAC_STAT_LINK_STATE));
    954 		break;
    955 
    956 	default:
    957 		break;
    958 	}
    959 }
    960