Home | History | Annotate | Download | only in io
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 
     22 /*
     23  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
     24  * Use is subject to license terms.
     25  */
     26 
     27 #include <sys/types.h>
     28 #include <sys/errno.h>
     29 #include <sys/param.h>
     30 #include <sys/stream.h>
     31 #include <sys/kmem.h>
     32 #include <sys/conf.h>
     33 #include <sys/devops.h>
     34 #include <sys/ksynch.h>
     35 #include <sys/stat.h>
     36 #include <sys/modctl.h>
     37 #include <sys/modhash.h>
     38 #include <sys/debug.h>
     39 #include <sys/ethernet.h>
     40 #include <sys/dlpi.h>
     41 #include <net/if.h>
     42 #include <sys/mac_provider.h>
     43 #include <sys/mac_client.h>
     44 #include <sys/mac_client_priv.h>
     45 #include <sys/mac_ether.h>
     46 #include <sys/ddi.h>
     47 #include <sys/sunddi.h>
     48 #include <sys/strsun.h>
     49 #include <sys/note.h>
     50 #include <sys/atomic.h>
     51 #include <sys/vnet.h>
     52 #include <sys/vlan.h>
     53 #include <sys/vnet_mailbox.h>
     54 #include <sys/vnet_common.h>
     55 #include <sys/dds.h>
     56 #include <sys/strsubr.h>
     57 #include <sys/taskq.h>
     58 
     59 /*
     60  * Function prototypes.
     61  */
     62 
     63 /* DDI entrypoints */
     64 static int vnetdevinfo(dev_info_t *, ddi_info_cmd_t, void *, void **);
     65 static int vnetattach(dev_info_t *, ddi_attach_cmd_t);
     66 static int vnetdetach(dev_info_t *, ddi_detach_cmd_t);
     67 
     68 /* MAC entrypoints  */
     69 static int vnet_m_stat(void *, uint_t, uint64_t *);
     70 static int vnet_m_start(void *);
     71 static void vnet_m_stop(void *);
     72 static int vnet_m_promisc(void *, boolean_t);
     73 static int vnet_m_multicst(void *, boolean_t, const uint8_t *);
     74 static int vnet_m_unicst(void *, const uint8_t *);
     75 mblk_t *vnet_m_tx(void *, mblk_t *);
     76 static void vnet_m_ioctl(void *arg, queue_t *q, mblk_t *mp);
     77 #ifdef	VNET_IOC_DEBUG
     78 static void vnet_force_link_state(vnet_t *vnetp, queue_t *q, mblk_t *mp);
     79 #endif
     80 static boolean_t vnet_m_capab(void *arg, mac_capab_t cap, void *cap_data);
     81 static void vnet_get_ring(void *arg, mac_ring_type_t rtype, const int g_index,
     82 	const int r_index, mac_ring_info_t *infop, mac_ring_handle_t r_handle);
     83 static void vnet_get_group(void *arg, mac_ring_type_t type, const int index,
     84 	mac_group_info_t *infop, mac_group_handle_t handle);
     85 static int vnet_rx_ring_start(mac_ring_driver_t rdriver, uint64_t mr_gen_num);
     86 static void vnet_rx_ring_stop(mac_ring_driver_t rdriver);
     87 static int vnet_tx_ring_start(mac_ring_driver_t rdriver, uint64_t mr_gen_num);
     88 static void vnet_tx_ring_stop(mac_ring_driver_t rdriver);
     89 static int vnet_ring_enable_intr(void *arg);
     90 static int vnet_ring_disable_intr(void *arg);
     91 static mblk_t *vnet_rx_poll(void *arg, int bytes_to_pickup);
     92 static int vnet_addmac(void *arg, const uint8_t *mac_addr);
     93 static int vnet_remmac(void *arg, const uint8_t *mac_addr);
     94 
     95 /* vnet internal functions */
     96 static int vnet_unattach(vnet_t *vnetp);
     97 static void vnet_ring_grp_init(vnet_t *vnetp);
     98 static void vnet_ring_grp_uninit(vnet_t *vnetp);
     99 static int vnet_mac_register(vnet_t *);
    100 static int vnet_read_mac_address(vnet_t *vnetp);
    101 static int vnet_bind_vgenring(vnet_res_t *vresp);
    102 static void vnet_unbind_vgenring(vnet_res_t *vresp);
    103 static int vnet_bind_hwrings(vnet_t *vnetp);
    104 static void vnet_unbind_hwrings(vnet_t *vnetp);
    105 static int vnet_bind_rings(vnet_res_t *vresp);
    106 static void vnet_unbind_rings(vnet_res_t *vresp);
    107 static int vnet_hio_stat(void *, uint_t, uint64_t *);
    108 static int vnet_hio_start(void *);
    109 static void vnet_hio_stop(void *);
    110 static void vnet_hio_notify_cb(void *arg, mac_notify_type_t type);
    111 mblk_t *vnet_hio_tx(void *, mblk_t *);
    112 
    113 /* Forwarding database (FDB) routines */
    114 static void vnet_fdb_create(vnet_t *vnetp);
    115 static void vnet_fdb_destroy(vnet_t *vnetp);
    116 static vnet_res_t *vnet_fdbe_find(vnet_t *vnetp, struct ether_addr *addrp);
    117 static void vnet_fdbe_find_cb(mod_hash_key_t key, mod_hash_val_t val);
    118 void vnet_fdbe_add(vnet_t *vnetp, vnet_res_t *vresp);
    119 static void vnet_fdbe_del(vnet_t *vnetp, vnet_res_t *vresp);
    120 
    121 static void vnet_rx_frames_untag(uint16_t pvid, mblk_t **mp);
    122 static void vnet_rx(vio_net_handle_t vrh, mblk_t *mp);
    123 static void vnet_tx_update(vio_net_handle_t vrh);
    124 static void vnet_res_start_task(void *arg);
    125 static void vnet_start_resources(vnet_t *vnetp);
    126 static void vnet_stop_resources(vnet_t *vnetp);
    127 static void vnet_dispatch_res_task(vnet_t *vnetp);
    128 static void vnet_res_start_task(void *arg);
    129 static void vnet_handle_res_err(vio_net_handle_t vrh, vio_net_err_val_t err);
    130 static void vnet_add_resource(vnet_t *vnetp, vnet_res_t *vresp);
    131 static vnet_res_t *vnet_rem_resource(vnet_t *vnetp, vnet_res_t *vresp);
    132 
    133 /* Exported to vnet_gen */
    134 int vnet_mtu_update(vnet_t *vnetp, uint32_t mtu);
    135 void vnet_link_update(vnet_t *vnetp, link_state_t link_state);
    136 void vnet_dds_cleanup_hio(vnet_t *vnetp);
    137 
    138 static kstat_t *vnet_hio_setup_kstats(char *ks_mod, char *ks_name,
    139     vnet_res_t *vresp);
    140 static int vnet_hio_update_kstats(kstat_t *ksp, int rw);
    141 static void vnet_hio_get_stats(vnet_res_t *vresp, vnet_hio_stats_t *statsp);
    142 static void vnet_hio_destroy_kstats(kstat_t *ksp);
    143 
    144 /* Exported to to vnet_dds */
    145 int vnet_send_dds_msg(vnet_t *vnetp, void *dmsg);
    146 int vnet_hio_mac_init(vnet_t *vnetp, char *ifname);
    147 void vnet_hio_mac_cleanup(vnet_t *vnetp);
    148 
    149 /* Externs that are imported from vnet_gen */
    150 extern int vgen_init(void *vnetp, uint64_t regprop, dev_info_t *vnetdip,
    151     const uint8_t *macaddr, void **vgenhdl);
    152 extern int vgen_init_mdeg(void *arg);
    153 extern void vgen_uninit(void *arg);
    154 extern int vgen_dds_tx(void *arg, void *dmsg);
    155 extern void vgen_mod_init(void);
    156 extern int vgen_mod_cleanup(void);
    157 extern void vgen_mod_fini(void);
    158 extern int vgen_enable_intr(void *arg);
    159 extern int vgen_disable_intr(void *arg);
    160 extern mblk_t *vgen_poll(void *arg, int bytes_to_pickup);
    161 
    162 /* Externs that are imported from vnet_dds */
    163 extern void vdds_mod_init(void);
    164 extern void vdds_mod_fini(void);
    165 extern int vdds_init(vnet_t *vnetp);
    166 extern void vdds_cleanup(vnet_t *vnetp);
    167 extern void vdds_process_dds_msg(vnet_t *vnetp, vio_dds_msg_t *dmsg);
    168 extern void vdds_cleanup_hybrid_res(void *arg);
    169 extern void vdds_cleanup_hio(vnet_t *vnetp);
    170 
    171 /* Externs imported from mac_impl */
    172 extern mblk_t *mac_hwring_tx(mac_ring_handle_t, mblk_t *);
    173 
    174 #define	DRV_NAME	"vnet"
    175 #define	VNET_FDBE_REFHOLD(p)						\
    176 {									\
    177 	atomic_inc_32(&(p)->refcnt);					\
    178 	ASSERT((p)->refcnt != 0);					\
    179 }
    180 
    181 #define	VNET_FDBE_REFRELE(p)						\
    182 {									\
    183 	ASSERT((p)->refcnt != 0);					\
    184 	atomic_dec_32(&(p)->refcnt);					\
    185 }
    186 
    187 #ifdef	VNET_IOC_DEBUG
    188 #define	VNET_M_CALLBACK_FLAGS	(MC_IOCTL | MC_GETCAPAB)
    189 #else
    190 #define	VNET_M_CALLBACK_FLAGS	(MC_GETCAPAB)
    191 #endif
    192 
    193 static mac_callbacks_t vnet_m_callbacks = {
    194 	VNET_M_CALLBACK_FLAGS,
    195 	vnet_m_stat,
    196 	vnet_m_start,
    197 	vnet_m_stop,
    198 	vnet_m_promisc,
    199 	vnet_m_multicst,
    200 	NULL,	/* m_unicst entry must be NULL while rx rings are exposed */
    201 	NULL,	/* m_tx entry must be NULL while tx rings are exposed */
    202 	vnet_m_ioctl,
    203 	vnet_m_capab,
    204 	NULL
    205 };
    206 
    207 static mac_callbacks_t vnet_hio_res_callbacks = {
    208 	0,
    209 	vnet_hio_stat,
    210 	vnet_hio_start,
    211 	vnet_hio_stop,
    212 	NULL,
    213 	NULL,
    214 	NULL,
    215 	vnet_hio_tx,
    216 	NULL,
    217 	NULL,
    218 	NULL
    219 };
    220 
    221 /*
    222  * Linked list of "vnet_t" structures - one per instance.
    223  */
    224 static vnet_t	*vnet_headp = NULL;
    225 static krwlock_t vnet_rw;
    226 
    227 /* Tunables */
    228 uint32_t vnet_ntxds = VNET_NTXDS;	/* power of 2 transmit descriptors */
    229 uint32_t vnet_ldcwd_interval = VNET_LDCWD_INTERVAL; /* watchdog freq in msec */
    230 uint32_t vnet_ldcwd_txtimeout = VNET_LDCWD_TXTIMEOUT;  /* tx timeout in msec */
    231 uint32_t vnet_ldc_mtu = VNET_LDC_MTU;		/* ldc mtu */
    232 
    233 /* Configure tx serialization in mac layer for the vnet device */
    234 boolean_t vnet_mac_tx_serialize = B_TRUE;
    235 
    236 /*
    237  * Set this to non-zero to enable additional internal receive buffer pools
    238  * based on the MTU of the device for better performance at the cost of more
    239  * memory consumption. This is turned off by default, to use allocb(9F) for
    240  * receive buffer allocations of sizes > 2K.
    241  */
    242 boolean_t vnet_jumbo_rxpools = B_FALSE;
    243 
    244 /* # of chains in fdb hash table */
    245 uint32_t	vnet_fdb_nchains = VNET_NFDB_HASH;
    246 
    247 /* Internal tunables */
    248 uint32_t	vnet_ethermtu = 1500;	/* mtu of the device */
    249 
    250 /*
    251  * Default vlan id. This is only used internally when the "default-vlan-id"
    252  * property is not present in the MD device node. Therefore, this should not be
    253  * used as a tunable; if this value is changed, the corresponding variable
    254  * should be updated to the same value in vsw and also other vnets connected to
    255  * the same vsw.
    256  */
    257 uint16_t	vnet_default_vlan_id = 1;
    258 
    259 /* delay in usec to wait for all references on a fdb entry to be dropped */
    260 uint32_t vnet_fdbe_refcnt_delay = 10;
    261 
    262 static struct ether_addr etherbroadcastaddr = {
    263 	0xff, 0xff, 0xff, 0xff, 0xff, 0xff
    264 };
    265 
    266 /* mac_open() retry delay in usec */
    267 uint32_t vnet_mac_open_delay = 100;	/* 0.1 ms */
    268 
    269 /* max # of mac_open() retries */
    270 uint32_t vnet_mac_open_retries = 100;
    271 
    272 /*
    273  * Property names
    274  */
    275 static char macaddr_propname[] = "local-mac-address";
    276 
    277 /*
    278  * This is the string displayed by modinfo(1m).
    279  */
    280 static char vnet_ident[] = "vnet driver";
    281 extern struct mod_ops mod_driverops;
    282 static struct cb_ops cb_vnetops = {
    283 	nulldev,		/* cb_open */
    284 	nulldev,		/* cb_close */
    285 	nodev,			/* cb_strategy */
    286 	nodev,			/* cb_print */
    287 	nodev,			/* cb_dump */
    288 	nodev,			/* cb_read */
    289 	nodev,			/* cb_write */
    290 	nodev,			/* cb_ioctl */
    291 	nodev,			/* cb_devmap */
    292 	nodev,			/* cb_mmap */
    293 	nodev,			/* cb_segmap */
    294 	nochpoll,		/* cb_chpoll */
    295 	ddi_prop_op,		/* cb_prop_op */
    296 	NULL,			/* cb_stream */
    297 	(int)(D_MP)		/* cb_flag */
    298 };
    299 
    300 static struct dev_ops vnetops = {
    301 	DEVO_REV,		/* devo_rev */
    302 	0,			/* devo_refcnt */
    303 	NULL,			/* devo_getinfo */
    304 	nulldev,		/* devo_identify */
    305 	nulldev,		/* devo_probe */
    306 	vnetattach,		/* devo_attach */
    307 	vnetdetach,		/* devo_detach */
    308 	nodev,			/* devo_reset */
    309 	&cb_vnetops,		/* devo_cb_ops */
    310 	(struct bus_ops *)NULL,	/* devo_bus_ops */
    311 	NULL,			/* devo_power */
    312 	ddi_quiesce_not_supported,	/* devo_quiesce */
    313 };
    314 
    315 static struct modldrv modldrv = {
    316 	&mod_driverops,		/* Type of module.  This one is a driver */
    317 	vnet_ident,		/* ID string */
    318 	&vnetops		/* driver specific ops */
    319 };
    320 
    321 static struct modlinkage modlinkage = {
    322 	MODREV_1, (void *)&modldrv, NULL
    323 };
    324 
    325 #ifdef DEBUG
    326 
    327 /*
    328  * Print debug messages - set to 0xf to enable all msgs
    329  */
    330 int vnet_dbglevel = 0x8;
    331 
    332 static void
    333 debug_printf(const char *fname, void *arg, const char *fmt, ...)
    334 {
    335 	char    buf[512];
    336 	va_list ap;
    337 	vnet_t *vnetp = (vnet_t *)arg;
    338 	char    *bufp = buf;
    339 
    340 	if (vnetp == NULL) {
    341 		(void) sprintf(bufp, "%s: ", fname);
    342 		bufp += strlen(bufp);
    343 	} else {
    344 		(void) sprintf(bufp, "vnet%d:%s: ", vnetp->instance, fname);
    345 		bufp += strlen(bufp);
    346 	}
    347 	va_start(ap, fmt);
    348 	(void) vsprintf(bufp, fmt, ap);
    349 	va_end(ap);
    350 	cmn_err(CE_CONT, "%s\n", buf);
    351 }
    352 
    353 #endif
    354 
    355 /* _init(9E): initialize the loadable module */
    356 int
    357 _init(void)
    358 {
    359 	int status;
    360 
    361 	DBG1(NULL, "enter\n");
    362 
    363 	mac_init_ops(&vnetops, "vnet");
    364 	status = mod_install(&modlinkage);
    365 	if (status != 0) {
    366 		mac_fini_ops(&vnetops);
    367 	}
    368 	vdds_mod_init();
    369 	vgen_mod_init();
    370 	DBG1(NULL, "exit(%d)\n", status);
    371 	return (status);
    372 }
    373 
    374 /* _fini(9E): prepare the module for unloading. */
    375 int
    376 _fini(void)
    377 {
    378 	int		status;
    379 
    380 	DBG1(NULL, "enter\n");
    381 
    382 	status = vgen_mod_cleanup();
    383 	if (status != 0)
    384 		return (status);
    385 
    386 	status = mod_remove(&modlinkage);
    387 	if (status != 0)
    388 		return (status);
    389 	mac_fini_ops(&vnetops);
    390 	vgen_mod_fini();
    391 	vdds_mod_fini();
    392 
    393 	DBG1(NULL, "exit(%d)\n", status);
    394 	return (status);
    395 }
    396 
    397 /* _info(9E): return information about the loadable module */
    398 int
    399 _info(struct modinfo *modinfop)
    400 {
    401 	return (mod_info(&modlinkage, modinfop));
    402 }
    403 
    404 /*
    405  * attach(9E): attach a device to the system.
    406  * called once for each instance of the device on the system.
    407  */
    408 static int
    409 vnetattach(dev_info_t *dip, ddi_attach_cmd_t cmd)
    410 {
    411 	vnet_t			*vnetp;
    412 	int			status;
    413 	int			instance;
    414 	uint64_t		reg;
    415 	char			qname[TASKQ_NAMELEN];
    416 	vnet_attach_progress_t	attach_progress;
    417 
    418 	attach_progress = AST_init;
    419 
    420 	switch (cmd) {
    421 	case DDI_ATTACH:
    422 		break;
    423 	case DDI_RESUME:
    424 	case DDI_PM_RESUME:
    425 	default:
    426 		goto vnet_attach_fail;
    427 	}
    428 
    429 	instance = ddi_get_instance(dip);
    430 	DBG1(NULL, "instance(%d) enter\n", instance);
    431 
    432 	/* allocate vnet_t and mac_t structures */
    433 	vnetp = kmem_zalloc(sizeof (vnet_t), KM_SLEEP);
    434 	vnetp->dip = dip;
    435 	vnetp->instance = instance;
    436 	rw_init(&vnetp->vrwlock, NULL, RW_DRIVER, NULL);
    437 	rw_init(&vnetp->vsw_fp_rw, NULL, RW_DRIVER, NULL);
    438 	attach_progress |= AST_vnet_alloc;
    439 
    440 	vnet_ring_grp_init(vnetp);
    441 	attach_progress |= AST_ring_init;
    442 
    443 	status = vdds_init(vnetp);
    444 	if (status != 0) {
    445 		goto vnet_attach_fail;
    446 	}
    447 	attach_progress |= AST_vdds_init;
    448 
    449 	/* setup links to vnet_t from both devinfo and mac_t */
    450 	ddi_set_driver_private(dip, (caddr_t)vnetp);
    451 
    452 	/* read the mac address */
    453 	status = vnet_read_mac_address(vnetp);
    454 	if (status != DDI_SUCCESS) {
    455 		goto vnet_attach_fail;
    456 	}
    457 	attach_progress |= AST_read_macaddr;
    458 
    459 	reg = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
    460 	    DDI_PROP_DONTPASS, "reg", -1);
    461 	if (reg == -1) {
    462 		goto vnet_attach_fail;
    463 	}
    464 	vnetp->reg = reg;
    465 
    466 	vnet_fdb_create(vnetp);
    467 	attach_progress |= AST_fdbh_alloc;
    468 
    469 	(void) snprintf(qname, TASKQ_NAMELEN, "vnet_taskq%d", instance);
    470 	if ((vnetp->taskqp = ddi_taskq_create(dip, qname, 1,
    471 	    TASKQ_DEFAULTPRI, 0)) == NULL) {
    472 		cmn_err(CE_WARN, "!vnet%d: Unable to create task queue",
    473 		    instance);
    474 		goto vnet_attach_fail;
    475 	}
    476 	attach_progress |= AST_taskq_create;
    477 
    478 	/* add to the list of vnet devices */
    479 	WRITE_ENTER(&vnet_rw);
    480 	vnetp->nextp = vnet_headp;
    481 	vnet_headp = vnetp;
    482 	RW_EXIT(&vnet_rw);
    483 
    484 	attach_progress |= AST_vnet_list;
    485 
    486 	/*
    487 	 * Initialize the generic vnet plugin which provides communication via
    488 	 * sun4v LDC (logical domain channel) based resources. This involves 2
    489 	 * steps; first, vgen_init() is invoked to read the various properties
    490 	 * of the vnet device from its MD node (including its mtu which is
    491 	 * needed to mac_register()) and obtain a handle to the vgen layer.
    492 	 * After mac_register() is done and we have a mac handle, we then
    493 	 * invoke vgen_init_mdeg() which registers with the the MD event
    494 	 * generator (mdeg) framework to allow LDC resource notifications.
    495 	 * Note: this sequence also allows us to report the correct default #
    496 	 * of pseudo rings (2TX and 3RX) in vnet_m_capab() which gets invoked
    497 	 * in the context of mac_register(); and avoids conflicting with
    498 	 * dynamic pseudo rx rings which get added/removed as a result of mdeg
    499 	 * events in vgen.
    500 	 */
    501 	status = vgen_init(vnetp, reg, vnetp->dip,
    502 	    (uint8_t *)vnetp->curr_macaddr, &vnetp->vgenhdl);
    503 	if (status != DDI_SUCCESS) {
    504 		DERR(vnetp, "vgen_init() failed\n");
    505 		goto vnet_attach_fail;
    506 	}
    507 	attach_progress |= AST_vgen_init;
    508 
    509 	status = vnet_mac_register(vnetp);
    510 	if (status != DDI_SUCCESS) {
    511 		goto vnet_attach_fail;
    512 	}
    513 	vnetp->link_state = LINK_STATE_UNKNOWN;
    514 	attach_progress |= AST_macreg;
    515 
    516 	status = vgen_init_mdeg(vnetp->vgenhdl);
    517 	if (status != DDI_SUCCESS) {
    518 		goto vnet_attach_fail;
    519 	}
    520 	attach_progress |= AST_init_mdeg;
    521 
    522 	vnetp->attach_progress = attach_progress;
    523 
    524 	DBG1(NULL, "instance(%d) exit\n", instance);
    525 	return (DDI_SUCCESS);
    526 
    527 vnet_attach_fail:
    528 	vnetp->attach_progress = attach_progress;
    529 	status = vnet_unattach(vnetp);
    530 	ASSERT(status == 0);
    531 	return (DDI_FAILURE);
    532 }
    533 
    534 /*
    535  * detach(9E): detach a device from the system.
    536  */
    537 static int
    538 vnetdetach(dev_info_t *dip, ddi_detach_cmd_t cmd)
    539 {
    540 	vnet_t		*vnetp;
    541 	int		instance;
    542 
    543 	instance = ddi_get_instance(dip);
    544 	DBG1(NULL, "instance(%d) enter\n", instance);
    545 
    546 	vnetp = ddi_get_driver_private(dip);
    547 	if (vnetp == NULL) {
    548 		goto vnet_detach_fail;
    549 	}
    550 
    551 	switch (cmd) {
    552 	case DDI_DETACH:
    553 		break;
    554 	case DDI_SUSPEND:
    555 	case DDI_PM_SUSPEND:
    556 	default:
    557 		goto vnet_detach_fail;
    558 	}
    559 
    560 	if (vnet_unattach(vnetp) != 0) {
    561 		goto vnet_detach_fail;
    562 	}
    563 
    564 	return (DDI_SUCCESS);
    565 
    566 vnet_detach_fail:
    567 	return (DDI_FAILURE);
    568 }
    569 
    570 /*
    571  * Common routine to handle vnetattach() failure and vnetdetach(). Note that
    572  * the only reason this function could fail is if mac_unregister() fails.
    573  * Otherwise, this function must ensure that all resources are freed and return
    574  * success.
    575  */
    576 static int
    577 vnet_unattach(vnet_t *vnetp)
    578 {
    579 	vnet_attach_progress_t	attach_progress;
    580 
    581 	attach_progress = vnetp->attach_progress;
    582 
    583 	/*
    584 	 * Disable the mac device in the gldv3 subsystem. This can fail, in
    585 	 * particular if there are still any open references to this mac
    586 	 * device; in which case we just return failure without continuing to
    587 	 * detach further.
    588 	 * If it succeeds, we then invoke vgen_uninit() which should unregister
    589 	 * any pseudo rings registered with the mac layer. Note we keep the
    590 	 * AST_macreg flag on, so we can unregister with the mac layer at
    591 	 * the end of this routine.
    592 	 */
    593 	if (attach_progress & AST_macreg) {
    594 		if (mac_disable(vnetp->mh) != 0) {
    595 			return (1);
    596 		}
    597 	}
    598 
    599 	/*
    600 	 * Now that we have disabled the device, we must finish all other steps
    601 	 * and successfully return from this function; otherwise we will end up
    602 	 * leaving the device in a broken/unusable state.
    603 	 *
    604 	 * First, release any hybrid resources assigned to this vnet device.
    605 	 */
    606 	if (attach_progress & AST_vdds_init) {
    607 		vdds_cleanup(vnetp);
    608 		attach_progress &= ~AST_vdds_init;
    609 	}
    610 
    611 	/*
    612 	 * Uninit vgen. This stops further mdeg callbacks to this vnet
    613 	 * device and/or its ports; and detaches any existing ports.
    614 	 */
    615 	if (attach_progress & (AST_vgen_init|AST_init_mdeg)) {
    616 		vgen_uninit(vnetp->vgenhdl);
    617 		attach_progress &= ~AST_vgen_init;
    618 		attach_progress &= ~AST_init_mdeg;
    619 	}
    620 
    621 	/* Destroy the taskq. */
    622 	if (attach_progress & AST_taskq_create) {
    623 		ddi_taskq_destroy(vnetp->taskqp);
    624 		attach_progress &= ~AST_taskq_create;
    625 	}
    626 
    627 	/* Destroy fdb. */
    628 	if (attach_progress & AST_fdbh_alloc) {
    629 		vnet_fdb_destroy(vnetp);
    630 		attach_progress &= ~AST_fdbh_alloc;
    631 	}
    632 
    633 	/* Remove from the device list */
    634 	if (attach_progress & AST_vnet_list) {
    635 		vnet_t		**vnetpp;
    636 		/* unlink from instance(vnet_t) list */
    637 		WRITE_ENTER(&vnet_rw);
    638 		for (vnetpp = &vnet_headp; *vnetpp;
    639 		    vnetpp = &(*vnetpp)->nextp) {
    640 			if (*vnetpp == vnetp) {
    641 				*vnetpp = vnetp->nextp;
    642 				break;
    643 			}
    644 		}
    645 		RW_EXIT(&vnet_rw);
    646 		attach_progress &= ~AST_vnet_list;
    647 	}
    648 
    649 	if (attach_progress & AST_ring_init) {
    650 		vnet_ring_grp_uninit(vnetp);
    651 		attach_progress &= ~AST_ring_init;
    652 	}
    653 
    654 	if (attach_progress & AST_macreg) {
    655 		VERIFY(mac_unregister(vnetp->mh) == 0);
    656 		vnetp->mh = NULL;
    657 		attach_progress &= ~AST_macreg;
    658 	}
    659 
    660 	if (attach_progress & AST_vnet_alloc) {
    661 		rw_destroy(&vnetp->vrwlock);
    662 		rw_destroy(&vnetp->vsw_fp_rw);
    663 		attach_progress &= ~AST_vnet_list;
    664 		KMEM_FREE(vnetp);
    665 	}
    666 
    667 	return (0);
    668 }
    669 
    670 /* enable the device for transmit/receive */
    671 static int
    672 vnet_m_start(void *arg)
    673 {
    674 	vnet_t		*vnetp = arg;
    675 
    676 	DBG1(vnetp, "enter\n");
    677 
    678 	WRITE_ENTER(&vnetp->vrwlock);
    679 	vnetp->flags |= VNET_STARTED;
    680 	vnet_start_resources(vnetp);
    681 	RW_EXIT(&vnetp->vrwlock);
    682 
    683 	DBG1(vnetp, "exit\n");
    684 	return (VNET_SUCCESS);
    685 
    686 }
    687 
    688 /* stop transmit/receive for the device */
    689 static void
    690 vnet_m_stop(void *arg)
    691 {
    692 	vnet_t		*vnetp = arg;
    693 
    694 	DBG1(vnetp, "enter\n");
    695 
    696 	WRITE_ENTER(&vnetp->vrwlock);
    697 	if (vnetp->flags & VNET_STARTED) {
    698 		/*
    699 		 * Set the flags appropriately; this should prevent starting of
    700 		 * any new resources that are added(see vnet_res_start_task()),
    701 		 * while we release the vrwlock in vnet_stop_resources() before
    702 		 * stopping each resource.
    703 		 */
    704 		vnetp->flags &= ~VNET_STARTED;
    705 		vnetp->flags |= VNET_STOPPING;
    706 		vnet_stop_resources(vnetp);
    707 		vnetp->flags &= ~VNET_STOPPING;
    708 	}
    709 	RW_EXIT(&vnetp->vrwlock);
    710 
    711 	DBG1(vnetp, "exit\n");
    712 }
    713 
    714 /* set the unicast mac address of the device */
    715 static int
    716 vnet_m_unicst(void *arg, const uint8_t *macaddr)
    717 {
    718 	_NOTE(ARGUNUSED(macaddr))
    719 
    720 	vnet_t *vnetp = arg;
    721 
    722 	DBG1(vnetp, "enter\n");
    723 	/*
    724 	 * NOTE: setting mac address dynamically is not supported.
    725 	 */
    726 	DBG1(vnetp, "exit\n");
    727 
    728 	return (VNET_FAILURE);
    729 }
    730 
    731 /* enable/disable a multicast address */
    732 static int
    733 vnet_m_multicst(void *arg, boolean_t add, const uint8_t *mca)
    734 {
    735 	_NOTE(ARGUNUSED(add, mca))
    736 
    737 	vnet_t		*vnetp = arg;
    738 	vnet_res_t	*vresp;
    739 	mac_register_t	*macp;
    740 	mac_callbacks_t	*cbp;
    741 	int		rv = VNET_SUCCESS;
    742 
    743 	DBG1(vnetp, "enter\n");
    744 
    745 	READ_ENTER(&vnetp->vsw_fp_rw);
    746 	if (vnetp->vsw_fp == NULL) {
    747 		RW_EXIT(&vnetp->vsw_fp_rw);
    748 		return (EAGAIN);
    749 	}
    750 	VNET_FDBE_REFHOLD(vnetp->vsw_fp);
    751 	RW_EXIT(&vnetp->vsw_fp_rw);
    752 
    753 	vresp = vnetp->vsw_fp;
    754 	macp = &vresp->macreg;
    755 	cbp = macp->m_callbacks;
    756 	rv = cbp->mc_multicst(macp->m_driver, add, mca);
    757 
    758 	VNET_FDBE_REFRELE(vnetp->vsw_fp);
    759 
    760 	DBG1(vnetp, "exit(%d)\n", rv);
    761 	return (rv);
    762 }
    763 
    764 /* set or clear promiscuous mode on the device */
    765 static int
    766 vnet_m_promisc(void *arg, boolean_t on)
    767 {
    768 	_NOTE(ARGUNUSED(on))
    769 
    770 	vnet_t *vnetp = arg;
    771 	DBG1(vnetp, "enter\n");
    772 	/*
    773 	 * NOTE: setting promiscuous mode is not supported, just return success.
    774 	 */
    775 	DBG1(vnetp, "exit\n");
    776 	return (VNET_SUCCESS);
    777 }
    778 
    779 /*
    780  * Transmit a chain of packets. This function provides switching functionality
    781  * based on the destination mac address to reach other guests (within ldoms) or
    782  * external hosts.
    783  */
    784 mblk_t *
    785 vnet_tx_ring_send(void *arg, mblk_t *mp)
    786 {
    787 	vnet_pseudo_tx_ring_t	*tx_ringp;
    788 	vnet_t			*vnetp;
    789 	vnet_res_t		*vresp;
    790 	mblk_t			*next;
    791 	mblk_t			*resid_mp;
    792 	mac_register_t		*macp;
    793 	struct ether_header	*ehp;
    794 	boolean_t		is_unicast;
    795 	boolean_t		is_pvid;	/* non-default pvid ? */
    796 	boolean_t		hres;		/* Hybrid resource ? */
    797 	void			*tx_arg;
    798 
    799 	tx_ringp = (vnet_pseudo_tx_ring_t *)arg;
    800 	vnetp = (vnet_t *)tx_ringp->vnetp;
    801 	DBG1(vnetp, "enter\n");
    802 	ASSERT(mp != NULL);
    803 
    804 	is_pvid = (vnetp->pvid != vnetp->default_vlan_id) ? B_TRUE : B_FALSE;
    805 
    806 	while (mp != NULL) {
    807 
    808 		next = mp->b_next;
    809 		mp->b_next = NULL;
    810 
    811 		/*
    812 		 * Find fdb entry for the destination
    813 		 * and hold a reference to it.
    814 		 */
    815 		ehp = (struct ether_header *)mp->b_rptr;
    816 		vresp = vnet_fdbe_find(vnetp, &ehp->ether_dhost);
    817 		if (vresp != NULL) {
    818 
    819 			/*
    820 			 * Destination found in FDB.
    821 			 * The destination is a vnet device within ldoms
    822 			 * and directly reachable, invoke the tx function
    823 			 * in the fdb entry.
    824 			 */
    825 			macp = &vresp->macreg;
    826 			resid_mp = macp->m_callbacks->mc_tx(macp->m_driver, mp);
    827 
    828 			/* tx done; now release ref on fdb entry */
    829 			VNET_FDBE_REFRELE(vresp);
    830 
    831 			if (resid_mp != NULL) {
    832 				/* m_tx failed */
    833 				mp->b_next = next;
    834 				break;
    835 			}
    836 		} else {
    837 			is_unicast = !(IS_BROADCAST(ehp) ||
    838 			    (IS_MULTICAST(ehp)));
    839 			/*
    840 			 * Destination is not in FDB.
    841 			 * If the destination is broadcast or multicast,
    842 			 * then forward the packet to vswitch.
    843 			 * If a Hybrid resource avilable, then send the
    844 			 * unicast packet via hybrid resource, otherwise
    845 			 * forward it to vswitch.
    846 			 */
    847 			READ_ENTER(&vnetp->vsw_fp_rw);
    848 
    849 			if ((is_unicast) && (vnetp->hio_fp != NULL)) {
    850 				vresp = vnetp->hio_fp;
    851 				hres = B_TRUE;
    852 			} else {
    853 				vresp = vnetp->vsw_fp;
    854 				hres = B_FALSE;
    855 			}
    856 			if (vresp == NULL) {
    857 				/*
    858 				 * no fdb entry to vsw? drop the packet.
    859 				 */
    860 				RW_EXIT(&vnetp->vsw_fp_rw);
    861 				freemsg(mp);
    862 				mp = next;
    863 				continue;
    864 			}
    865 
    866 			/* ref hold the fdb entry to vsw */
    867 			VNET_FDBE_REFHOLD(vresp);
    868 
    869 			RW_EXIT(&vnetp->vsw_fp_rw);
    870 
    871 			/*
    872 			 * In the case of a hybrid resource we need to insert
    873 			 * the tag for the pvid case here; unlike packets that
    874 			 * are destined to a vnet/vsw in which case the vgen
    875 			 * layer does the tagging before sending it over ldc.
    876 			 */
    877 			if (hres == B_TRUE) {
    878 				/*
    879 				 * Determine if the frame being transmitted
    880 				 * over the hybrid resource is untagged. If so,
    881 				 * insert the tag before transmitting.
    882 				 */
    883 				if (is_pvid == B_TRUE &&
    884 				    ehp->ether_type != htons(ETHERTYPE_VLAN)) {
    885 
    886 					mp = vnet_vlan_insert_tag(mp,
    887 					    vnetp->pvid);
    888 					if (mp == NULL) {
    889 						VNET_FDBE_REFRELE(vresp);
    890 						mp = next;
    891 						continue;
    892 					}
    893 
    894 				}
    895 
    896 				macp = &vresp->macreg;
    897 				tx_arg = tx_ringp;
    898 			} else {
    899 				macp = &vresp->macreg;
    900 				tx_arg = macp->m_driver;
    901 			}
    902 			resid_mp = macp->m_callbacks->mc_tx(tx_arg, mp);
    903 
    904 			/* tx done; now release ref on fdb entry */
    905 			VNET_FDBE_REFRELE(vresp);
    906 
    907 			if (resid_mp != NULL) {
    908 				/* m_tx failed */
    909 				mp->b_next = next;
    910 				break;
    911 			}
    912 		}
    913 
    914 		mp = next;
    915 	}
    916 
    917 	DBG1(vnetp, "exit\n");
    918 	return (mp);
    919 }
    920 
    921 /* get statistics from the device */
    922 int
    923 vnet_m_stat(void *arg, uint_t stat, uint64_t *val)
    924 {
    925 	vnet_t *vnetp = arg;
    926 	vnet_res_t	*vresp;
    927 	mac_register_t	*macp;
    928 	mac_callbacks_t	*cbp;
    929 	uint64_t val_total = 0;
    930 
    931 	DBG1(vnetp, "enter\n");
    932 
    933 	/*
    934 	 * get the specified statistic from each transport and return the
    935 	 * aggregate val.  This obviously only works for counters.
    936 	 */
    937 	if ((IS_MAC_STAT(stat) && !MAC_STAT_ISACOUNTER(stat)) ||
    938 	    (IS_MACTYPE_STAT(stat) && !ETHER_STAT_ISACOUNTER(stat))) {
    939 		return (ENOTSUP);
    940 	}
    941 
    942 	READ_ENTER(&vnetp->vrwlock);
    943 	for (vresp = vnetp->vres_list; vresp != NULL; vresp = vresp->nextp) {
    944 		macp = &vresp->macreg;
    945 		cbp = macp->m_callbacks;
    946 		if (cbp->mc_getstat(macp->m_driver, stat, val) == 0)
    947 			val_total += *val;
    948 	}
    949 	RW_EXIT(&vnetp->vrwlock);
    950 
    951 	*val = val_total;
    952 
    953 	DBG1(vnetp, "exit\n");
    954 	return (0);
    955 }
    956 
    957 static void
    958 vnet_ring_grp_init(vnet_t *vnetp)
    959 {
    960 	vnet_pseudo_rx_group_t	*rx_grp;
    961 	vnet_pseudo_rx_ring_t	*rx_ringp;
    962 	vnet_pseudo_tx_group_t	*tx_grp;
    963 	vnet_pseudo_tx_ring_t	*tx_ringp;
    964 	int			i;
    965 
    966 	tx_grp = &vnetp->tx_grp[0];
    967 	tx_ringp = kmem_zalloc(sizeof (vnet_pseudo_tx_ring_t) *
    968 	    VNET_NUM_PSEUDO_TXRINGS, KM_SLEEP);
    969 	for (i = 0; i < VNET_NUM_PSEUDO_TXRINGS; i++) {
    970 		tx_ringp[i].state |= VNET_TXRING_SHARED;
    971 	}
    972 	tx_grp->rings = tx_ringp;
    973 	tx_grp->ring_cnt = VNET_NUM_PSEUDO_TXRINGS;
    974 
    975 	rx_grp = &vnetp->rx_grp[0];
    976 	rx_grp->max_ring_cnt = MAX_RINGS_PER_GROUP;
    977 	rw_init(&rx_grp->lock, NULL, RW_DRIVER, NULL);
    978 	rx_ringp = kmem_zalloc(sizeof (vnet_pseudo_rx_ring_t) *
    979 	    rx_grp->max_ring_cnt, KM_SLEEP);
    980 
    981 	/*
    982 	 * Setup the first 3 Pseudo RX Rings that are reserved;
    983 	 * 1 for LDC resource to vswitch + 2 for RX rings of Hybrid resource.
    984 	 */
    985 	rx_ringp[0].state |= VNET_RXRING_INUSE|VNET_RXRING_LDC_SERVICE;
    986 	rx_ringp[0].index = 0;
    987 	rx_ringp[1].state |= VNET_RXRING_INUSE|VNET_RXRING_HYBRID;
    988 	rx_ringp[1].index = 1;
    989 	rx_ringp[2].state |= VNET_RXRING_INUSE|VNET_RXRING_HYBRID;
    990 	rx_ringp[2].index = 2;
    991 
    992 	rx_grp->ring_cnt = VNET_NUM_PSEUDO_RXRINGS_DEFAULT;
    993 	rx_grp->rings = rx_ringp;
    994 
    995 	for (i = VNET_NUM_PSEUDO_RXRINGS_DEFAULT;
    996 	    i < rx_grp->max_ring_cnt; i++) {
    997 		rx_ringp = &rx_grp->rings[i];
    998 		rx_ringp->state = VNET_RXRING_FREE;
    999 		rx_ringp->index = i;
   1000 	}
   1001 }
   1002 
   1003 static void
   1004 vnet_ring_grp_uninit(vnet_t *vnetp)
   1005 {
   1006 	vnet_pseudo_rx_group_t	*rx_grp;
   1007 	vnet_pseudo_tx_group_t	*tx_grp;
   1008 
   1009 	tx_grp = &vnetp->tx_grp[0];
   1010 	if (tx_grp->rings != NULL) {
   1011 		ASSERT(tx_grp->ring_cnt == VNET_NUM_PSEUDO_TXRINGS);
   1012 		kmem_free(tx_grp->rings, sizeof (vnet_pseudo_tx_ring_t) *
   1013 		    tx_grp->ring_cnt);
   1014 		tx_grp->rings = NULL;
   1015 	}
   1016 
   1017 	rx_grp = &vnetp->rx_grp[0];
   1018 	if (rx_grp->rings != NULL) {
   1019 		ASSERT(rx_grp->max_ring_cnt == MAX_RINGS_PER_GROUP);
   1020 		ASSERT(rx_grp->ring_cnt == VNET_NUM_PSEUDO_RXRINGS_DEFAULT);
   1021 		kmem_free(rx_grp->rings, sizeof (vnet_pseudo_rx_ring_t) *
   1022 		    rx_grp->max_ring_cnt);
   1023 		rx_grp->rings = NULL;
   1024 	}
   1025 }
   1026 
   1027 static vnet_pseudo_rx_ring_t *
   1028 vnet_alloc_pseudo_rx_ring(vnet_t *vnetp)
   1029 {
   1030 	vnet_pseudo_rx_group_t  *rx_grp;
   1031 	vnet_pseudo_rx_ring_t	*rx_ringp;
   1032 	int			index;
   1033 
   1034 	rx_grp = &vnetp->rx_grp[0];
   1035 	WRITE_ENTER(&rx_grp->lock);
   1036 
   1037 	if (rx_grp->ring_cnt == rx_grp->max_ring_cnt) {
   1038 		/* no rings available */
   1039 		RW_EXIT(&rx_grp->lock);
   1040 		return (NULL);
   1041 	}
   1042 
   1043 	for (index = VNET_NUM_PSEUDO_RXRINGS_DEFAULT;
   1044 	    index < rx_grp->max_ring_cnt; index++) {
   1045 		rx_ringp = &rx_grp->rings[index];
   1046 		if (rx_ringp->state == VNET_RXRING_FREE) {
   1047 			rx_ringp->state |= VNET_RXRING_INUSE;
   1048 			rx_grp->ring_cnt++;
   1049 			break;
   1050 		}
   1051 	}
   1052 
   1053 	RW_EXIT(&rx_grp->lock);
   1054 	return (rx_ringp);
   1055 }
   1056 
   1057 static void
   1058 vnet_free_pseudo_rx_ring(vnet_t *vnetp, vnet_pseudo_rx_ring_t *ringp)
   1059 {
   1060 	vnet_pseudo_rx_group_t  *rx_grp;
   1061 
   1062 	ASSERT(ringp->index >= VNET_NUM_PSEUDO_RXRINGS_DEFAULT);
   1063 	rx_grp = &vnetp->rx_grp[0];
   1064 	WRITE_ENTER(&rx_grp->lock);
   1065 
   1066 	if (ringp->state != VNET_RXRING_FREE) {
   1067 		ringp->state = VNET_RXRING_FREE;
   1068 		ringp->handle = NULL;
   1069 		rx_grp->ring_cnt--;
   1070 	}
   1071 
   1072 	RW_EXIT(&rx_grp->lock);
   1073 }
   1074 
   1075 /* wrapper function for mac_register() */
   1076 static int
   1077 vnet_mac_register(vnet_t *vnetp)
   1078 {
   1079 	mac_register_t	*macp;
   1080 	int		err;
   1081 
   1082 	if ((macp = mac_alloc(MAC_VERSION)) == NULL)
   1083 		return (DDI_FAILURE);
   1084 	macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
   1085 	macp->m_driver = vnetp;
   1086 	macp->m_dip = vnetp->dip;
   1087 	macp->m_src_addr = vnetp->curr_macaddr;
   1088 	macp->m_callbacks = &vnet_m_callbacks;
   1089 	macp->m_min_sdu = 0;
   1090 	macp->m_max_sdu = vnetp->mtu;
   1091 	macp->m_margin = VLAN_TAGSZ;
   1092 
   1093 	/*
   1094 	 * MAC_VIRT_SERIALIZE flag is needed while hybridIO is enabled to
   1095 	 * workaround tx lock contention issues in nxge.
   1096 	 */
   1097 	macp->m_v12n = MAC_VIRT_LEVEL1;
   1098 	if (vnet_mac_tx_serialize == B_TRUE) {
   1099 		macp->m_v12n |= MAC_VIRT_SERIALIZE;
   1100 	}
   1101 
   1102 	/*
   1103 	 * Finally, we're ready to register ourselves with the MAC layer
   1104 	 * interface; if this succeeds, we're all ready to start()
   1105 	 */
   1106 	err = mac_register(macp, &vnetp->mh);
   1107 	mac_free(macp);
   1108 	return (err == 0 ? DDI_SUCCESS : DDI_FAILURE);
   1109 }
   1110 
   1111 /* read the mac address of the device */
   1112 static int
   1113 vnet_read_mac_address(vnet_t *vnetp)
   1114 {
   1115 	uchar_t 	*macaddr;
   1116 	uint32_t 	size;
   1117 	int 		rv;
   1118 
   1119 	rv = ddi_prop_lookup_byte_array(DDI_DEV_T_ANY, vnetp->dip,
   1120 	    DDI_PROP_DONTPASS, macaddr_propname, &macaddr, &size);
   1121 	if ((rv != DDI_PROP_SUCCESS) || (size != ETHERADDRL)) {
   1122 		DWARN(vnetp, "prop_lookup failed(%s) err(%d)\n",
   1123 		    macaddr_propname, rv);
   1124 		return (DDI_FAILURE);
   1125 	}
   1126 	bcopy(macaddr, (caddr_t)vnetp->vendor_addr, ETHERADDRL);
   1127 	bcopy(macaddr, (caddr_t)vnetp->curr_macaddr, ETHERADDRL);
   1128 	ddi_prop_free(macaddr);
   1129 
   1130 	return (DDI_SUCCESS);
   1131 }
   1132 
   1133 static void
   1134 vnet_fdb_create(vnet_t *vnetp)
   1135 {
   1136 	char		hashname[MAXNAMELEN];
   1137 
   1138 	(void) snprintf(hashname, MAXNAMELEN, "vnet%d-fdbhash",
   1139 	    vnetp->instance);
   1140 	vnetp->fdb_nchains = vnet_fdb_nchains;
   1141 	vnetp->fdb_hashp = mod_hash_create_ptrhash(hashname, vnetp->fdb_nchains,
   1142 	    mod_hash_null_valdtor, sizeof (void *));
   1143 }
   1144 
   1145 static void
   1146 vnet_fdb_destroy(vnet_t *vnetp)
   1147 {
   1148 	/* destroy fdb-hash-table */
   1149 	if (vnetp->fdb_hashp != NULL) {
   1150 		mod_hash_destroy_hash(vnetp->fdb_hashp);
   1151 		vnetp->fdb_hashp = NULL;
   1152 		vnetp->fdb_nchains = 0;
   1153 	}
   1154 }
   1155 
   1156 /*
   1157  * Add an entry into the fdb.
   1158  */
   1159 void
   1160 vnet_fdbe_add(vnet_t *vnetp, vnet_res_t *vresp)
   1161 {
   1162 	uint64_t	addr = 0;
   1163 	int		rv;
   1164 
   1165 	KEY_HASH(addr, vresp->rem_macaddr);
   1166 
   1167 	/*
   1168 	 * If the entry being added corresponds to LDC_SERVICE resource,
   1169 	 * that is, vswitch connection, it is added to the hash and also
   1170 	 * the entry is cached, an additional reference count reflects
   1171 	 * this. The HYBRID resource is not added to the hash, but only
   1172 	 * cached, as it is only used for sending out packets for unknown
   1173 	 * unicast destinations.
   1174 	 */
   1175 	(vresp->type == VIO_NET_RES_LDC_SERVICE) ?
   1176 	    (vresp->refcnt = 1) : (vresp->refcnt = 0);
   1177 
   1178 	/*
   1179 	 * Note: duplicate keys will be rejected by mod_hash.
   1180 	 */
   1181 	if (vresp->type != VIO_NET_RES_HYBRID) {
   1182 		rv = mod_hash_insert(vnetp->fdb_hashp, (mod_hash_key_t)addr,
   1183 		    (mod_hash_val_t)vresp);
   1184 		if (rv != 0) {
   1185 			DWARN(vnetp, "Duplicate macaddr key(%lx)\n", addr);
   1186 			return;
   1187 		}
   1188 	}
   1189 
   1190 	if (vresp->type == VIO_NET_RES_LDC_SERVICE) {
   1191 		/* Cache the fdb entry to vsw-port */
   1192 		WRITE_ENTER(&vnetp->vsw_fp_rw);
   1193 		if (vnetp->vsw_fp == NULL)
   1194 			vnetp->vsw_fp = vresp;
   1195 		RW_EXIT(&vnetp->vsw_fp_rw);
   1196 	} else if (vresp->type == VIO_NET_RES_HYBRID) {
   1197 		/* Cache the fdb entry to hybrid resource */
   1198 		WRITE_ENTER(&vnetp->vsw_fp_rw);
   1199 		if (vnetp->hio_fp == NULL)
   1200 			vnetp->hio_fp = vresp;
   1201 		RW_EXIT(&vnetp->vsw_fp_rw);
   1202 	}
   1203 }
   1204 
   1205 /*
   1206  * Remove an entry from fdb.
   1207  */
   1208 static void
   1209 vnet_fdbe_del(vnet_t *vnetp, vnet_res_t *vresp)
   1210 {
   1211 	uint64_t	addr = 0;
   1212 	int		rv;
   1213 	uint32_t	refcnt;
   1214 	vnet_res_t	*tmp;
   1215 
   1216 	KEY_HASH(addr, vresp->rem_macaddr);
   1217 
   1218 	/*
   1219 	 * Remove the entry from fdb hash table.
   1220 	 * This prevents further references to this fdb entry.
   1221 	 */
   1222 	if (vresp->type != VIO_NET_RES_HYBRID) {
   1223 		rv = mod_hash_remove(vnetp->fdb_hashp, (mod_hash_key_t)addr,
   1224 		    (mod_hash_val_t *)&tmp);
   1225 		if (rv != 0) {
   1226 			/*
   1227 			 * As the resources are added to the hash only
   1228 			 * after they are started, this can occur if
   1229 			 * a resource unregisters before it is ever started.
   1230 			 */
   1231 			return;
   1232 		}
   1233 	}
   1234 
   1235 	if (vresp->type == VIO_NET_RES_LDC_SERVICE) {
   1236 		WRITE_ENTER(&vnetp->vsw_fp_rw);
   1237 
   1238 		ASSERT(tmp == vnetp->vsw_fp);
   1239 		vnetp->vsw_fp = NULL;
   1240 
   1241 		RW_EXIT(&vnetp->vsw_fp_rw);
   1242 	} else if (vresp->type == VIO_NET_RES_HYBRID) {
   1243 		WRITE_ENTER(&vnetp->vsw_fp_rw);
   1244 
   1245 		vnetp->hio_fp = NULL;
   1246 
   1247 		RW_EXIT(&vnetp->vsw_fp_rw);
   1248 	}
   1249 
   1250 	/*
   1251 	 * If there are threads already ref holding before the entry was
   1252 	 * removed from hash table, then wait for ref count to drop to zero.
   1253 	 */
   1254 	(vresp->type == VIO_NET_RES_LDC_SERVICE) ?
   1255 	    (refcnt = 1) : (refcnt = 0);
   1256 	while (vresp->refcnt > refcnt) {
   1257 		delay(drv_usectohz(vnet_fdbe_refcnt_delay));
   1258 	}
   1259 }
   1260 
   1261 /*
   1262  * Search fdb for a given mac address. If an entry is found, hold
   1263  * a reference to it and return the entry; else returns NULL.
   1264  */
   1265 static vnet_res_t *
   1266 vnet_fdbe_find(vnet_t *vnetp, struct ether_addr *addrp)
   1267 {
   1268 	uint64_t	key = 0;
   1269 	vnet_res_t	*vresp;
   1270 	int		rv;
   1271 
   1272 	KEY_HASH(key, addrp->ether_addr_octet);
   1273 
   1274 	rv = mod_hash_find_cb(vnetp->fdb_hashp, (mod_hash_key_t)key,
   1275 	    (mod_hash_val_t *)&vresp, vnet_fdbe_find_cb);
   1276 
   1277 	if (rv != 0)
   1278 		return (NULL);
   1279 
   1280 	return (vresp);
   1281 }
   1282 
   1283 /*
   1284  * Callback function provided to mod_hash_find_cb(). After finding the fdb
   1285  * entry corresponding to the key (macaddr), this callback will be invoked by
   1286  * mod_hash_find_cb() to atomically increment the reference count on the fdb
   1287  * entry before returning the found entry.
   1288  */
   1289 static void
   1290 vnet_fdbe_find_cb(mod_hash_key_t key, mod_hash_val_t val)
   1291 {
   1292 	_NOTE(ARGUNUSED(key))
   1293 	VNET_FDBE_REFHOLD((vnet_res_t *)val);
   1294 }
   1295 
   1296 /*
   1297  * Frames received that are tagged with the pvid of the vnet device must be
   1298  * untagged before sending up the stack. This function walks the chain of rx
   1299  * frames, untags any such frames and returns the updated chain.
   1300  *
   1301  * Arguments:
   1302  *    pvid:  pvid of the vnet device for which packets are being received
   1303  *    mp:    head of pkt chain to be validated and untagged
   1304  *
   1305  * Returns:
   1306  *    mp:    head of updated chain of packets
   1307  */
   1308 static void
   1309 vnet_rx_frames_untag(uint16_t pvid, mblk_t **mp)
   1310 {
   1311 	struct ether_vlan_header	*evhp;
   1312 	mblk_t				*bp;
   1313 	mblk_t				*bpt;
   1314 	mblk_t				*bph;
   1315 	mblk_t				*bpn;
   1316 
   1317 	bpn = bph = bpt = NULL;
   1318 
   1319 	for (bp = *mp; bp != NULL; bp = bpn) {
   1320 
   1321 		bpn = bp->b_next;
   1322 		bp->b_next = bp->b_prev = NULL;
   1323 
   1324 		evhp = (struct ether_vlan_header *)bp->b_rptr;
   1325 
   1326 		if (ntohs(evhp->ether_tpid) == ETHERTYPE_VLAN &&
   1327 		    VLAN_ID(ntohs(evhp->ether_tci)) == pvid) {
   1328 
   1329 			bp = vnet_vlan_remove_tag(bp);
   1330 			if (bp == NULL) {
   1331 				continue;
   1332 			}
   1333 
   1334 		}
   1335 
   1336 		/* build a chain of processed packets */
   1337 		if (bph == NULL) {
   1338 			bph = bpt = bp;
   1339 		} else {
   1340 			bpt->b_next = bp;
   1341 			bpt = bp;
   1342 		}
   1343 
   1344 	}
   1345 
   1346 	*mp = bph;
   1347 }
   1348 
   1349 static void
   1350 vnet_rx(vio_net_handle_t vrh, mblk_t *mp)
   1351 {
   1352 	vnet_res_t		*vresp = (vnet_res_t *)vrh;
   1353 	vnet_t			*vnetp = vresp->vnetp;
   1354 	vnet_pseudo_rx_ring_t	*ringp;
   1355 
   1356 	if ((vnetp == NULL) || (vnetp->mh == 0)) {
   1357 		freemsgchain(mp);
   1358 		return;
   1359 	}
   1360 
   1361 	ringp = vresp->rx_ringp;
   1362 	mac_rx_ring(vnetp->mh, ringp->handle, mp, ringp->gen_num);
   1363 }
   1364 
   1365 void
   1366 vnet_tx_update(vio_net_handle_t vrh)
   1367 {
   1368 	vnet_res_t		*vresp = (vnet_res_t *)vrh;
   1369 	vnet_t			*vnetp = vresp->vnetp;
   1370 	vnet_pseudo_tx_ring_t	*tx_ringp;
   1371 	vnet_pseudo_tx_group_t	*tx_grp;
   1372 	int			i;
   1373 
   1374 	if (vnetp == NULL || vnetp->mh == NULL) {
   1375 		return;
   1376 	}
   1377 
   1378 	/*
   1379 	 * Currently, the tx hwring API (used to access rings that belong to
   1380 	 * a Hybrid IO resource) does not provide us a per ring flow ctrl
   1381 	 * update; also the pseudo rings are shared by the ports/ldcs in the
   1382 	 * vgen layer. Thus we can't figure out which pseudo ring is being
   1383 	 * re-enabled for transmits. To work around this, when we get a tx
   1384 	 * restart notification from below, we simply propagate that to all
   1385 	 * the tx pseudo rings registered with the mac layer above.
   1386 	 *
   1387 	 * There are a couple of side effects with this approach, but they are
   1388 	 * not harmful, as outlined below:
   1389 	 *
   1390 	 * A) We might send an invalid ring_update() for a ring that is not
   1391 	 * really flow controlled. This will not have any effect in the mac
   1392 	 * layer and packets will continue to be transmitted on that ring.
   1393 	 *
   1394 	 * B) We might end up clearing the flow control in the mac layer for
   1395 	 * a ring that is still flow controlled in the underlying resource.
   1396 	 * This will result in the mac layer restarting	transmit, only to be
   1397 	 * flow controlled again on that ring.
   1398 	 */
   1399 	tx_grp = &vnetp->tx_grp[0];
   1400 	for (i = 0; i < tx_grp->ring_cnt; i++) {
   1401 		tx_ringp = &tx_grp->rings[i];
   1402 		mac_tx_ring_update(vnetp->mh, tx_ringp->handle);
   1403 	}
   1404 }
   1405 
   1406 /*
   1407  * Update the new mtu of vnet into the mac layer. First check if the device has
   1408  * been plumbed and if so fail the mtu update. Returns 0 on success.
   1409  */
   1410 int
   1411 vnet_mtu_update(vnet_t *vnetp, uint32_t mtu)
   1412 {
   1413 	int	rv;
   1414 
   1415 	if (vnetp == NULL || vnetp->mh == NULL) {
   1416 		return (EINVAL);
   1417 	}
   1418 
   1419 	WRITE_ENTER(&vnetp->vrwlock);
   1420 
   1421 	if (vnetp->flags & VNET_STARTED) {
   1422 		RW_EXIT(&vnetp->vrwlock);
   1423 		cmn_err(CE_NOTE, "!vnet%d: Unable to process mtu "
   1424 		    "update as the device is plumbed\n",
   1425 		    vnetp->instance);
   1426 		return (EBUSY);
   1427 	}
   1428 
   1429 	/* update mtu in the mac layer */
   1430 	rv = mac_maxsdu_update(vnetp->mh, mtu);
   1431 	if (rv != 0) {
   1432 		RW_EXIT(&vnetp->vrwlock);
   1433 		cmn_err(CE_NOTE,
   1434 		    "!vnet%d: Unable to update mtu with mac layer\n",
   1435 		    vnetp->instance);
   1436 		return (EIO);
   1437 	}
   1438 
   1439 	vnetp->mtu = mtu;
   1440 
   1441 	RW_EXIT(&vnetp->vrwlock);
   1442 
   1443 	return (0);
   1444 }
   1445 
   1446 /*
   1447  * Update the link state of vnet to the mac layer.
   1448  */
   1449 void
   1450 vnet_link_update(vnet_t *vnetp, link_state_t link_state)
   1451 {
   1452 	if (vnetp == NULL || vnetp->mh == NULL) {
   1453 		return;
   1454 	}
   1455 
   1456 	WRITE_ENTER(&vnetp->vrwlock);
   1457 	if (vnetp->link_state == link_state) {
   1458 		RW_EXIT(&vnetp->vrwlock);
   1459 		return;
   1460 	}
   1461 	vnetp->link_state = link_state;
   1462 	RW_EXIT(&vnetp->vrwlock);
   1463 
   1464 	mac_link_update(vnetp->mh, link_state);
   1465 }
   1466 
   1467 /*
   1468  * vio_net_resource_reg -- An interface called to register a resource
   1469  *	with vnet.
   1470  *	macp -- a GLDv3 mac_register that has all the details of
   1471  *		a resource and its callbacks etc.
   1472  *	type -- resource type.
   1473  *	local_macaddr -- resource's MAC address. This is used to
   1474  *			 associate a resource with a corresponding vnet.
   1475  *	remote_macaddr -- remote side MAC address. This is ignored for
   1476  *			  the Hybrid resources.
   1477  *	vhp -- A handle returned to the caller.
   1478  *	vcb -- A set of callbacks provided to the callers.
   1479  */
   1480 int vio_net_resource_reg(mac_register_t *macp, vio_net_res_type_t type,
   1481     ether_addr_t local_macaddr, ether_addr_t rem_macaddr, vio_net_handle_t *vhp,
   1482     vio_net_callbacks_t *vcb)
   1483 {
   1484 	vnet_t		*vnetp;
   1485 	vnet_res_t	*vresp;
   1486 
   1487 	vresp = kmem_zalloc(sizeof (vnet_res_t), KM_SLEEP);
   1488 	ether_copy(local_macaddr, vresp->local_macaddr);
   1489 	ether_copy(rem_macaddr, vresp->rem_macaddr);
   1490 	vresp->type = type;
   1491 	bcopy(macp, &vresp->macreg, sizeof (mac_register_t));
   1492 
   1493 	DBG1(NULL, "Resource Registerig type=0%X\n", type);
   1494 
   1495 	READ_ENTER(&vnet_rw);
   1496 	vnetp = vnet_headp;
   1497 	while (vnetp != NULL) {
   1498 		if (VNET_MATCH_RES(vresp, vnetp)) {
   1499 			vresp->vnetp = vnetp;
   1500 
   1501 			/* Setup kstats for hio resource */
   1502 			if (vresp->type == VIO_NET_RES_HYBRID) {
   1503 				vresp->ksp = vnet_hio_setup_kstats(DRV_NAME,
   1504 				    "hio", vresp);
   1505 				if (vresp->ksp == NULL) {
   1506 					cmn_err(CE_NOTE, "!vnet%d: Cannot "
   1507 					    "create kstats for hio resource",
   1508 					    vnetp->instance);
   1509 				}
   1510 			}
   1511 			vnet_add_resource(vnetp, vresp);
   1512 			break;
   1513 		}
   1514 		vnetp = vnetp->nextp;
   1515 	}
   1516 	RW_EXIT(&vnet_rw);
   1517 	if (vresp->vnetp == NULL) {
   1518 		DWARN(NULL, "No vnet instance");
   1519 		kmem_free(vresp, sizeof (vnet_res_t));
   1520 		return (ENXIO);
   1521 	}
   1522 
   1523 	*vhp = vresp;
   1524 	vcb->vio_net_rx_cb = vnet_rx;
   1525 	vcb->vio_net_tx_update = vnet_tx_update;
   1526 	vcb->vio_net_report_err = vnet_handle_res_err;
   1527 
   1528 	/* Bind the resource to pseudo ring(s) */
   1529 	if (vnet_bind_rings(vresp) != 0) {
   1530 		(void) vnet_rem_resource(vnetp, vresp);
   1531 		vnet_hio_destroy_kstats(vresp->ksp);
   1532 		KMEM_FREE(vresp);
   1533 		return (1);
   1534 	}
   1535 
   1536 	/* Dispatch a task to start resources */
   1537 	vnet_dispatch_res_task(vnetp);
   1538 	return (0);
   1539 }
   1540 
   1541 /*
   1542  * vio_net_resource_unreg -- An interface to unregister a resource.
   1543  */
   1544 void
   1545 vio_net_resource_unreg(vio_net_handle_t vhp)
   1546 {
   1547 	vnet_res_t	*vresp = (vnet_res_t *)vhp;
   1548 	vnet_t		*vnetp = vresp->vnetp;
   1549 
   1550 	DBG1(NULL, "Resource Registerig hdl=0x%p", vhp);
   1551 
   1552 	ASSERT(vnetp != NULL);
   1553 	/*
   1554 	 * Remove the resource from fdb; this ensures
   1555 	 * there are no references to the resource.
   1556 	 */
   1557 	vnet_fdbe_del(vnetp, vresp);
   1558 
   1559 	vnet_unbind_rings(vresp);
   1560 
   1561 	/* Now remove the resource from the list */
   1562 	(void) vnet_rem_resource(vnetp, vresp);
   1563 
   1564 	vnet_hio_destroy_kstats(vresp->ksp);
   1565 	KMEM_FREE(vresp);
   1566 }
   1567 
   1568 static void
   1569 vnet_add_resource(vnet_t *vnetp, vnet_res_t *vresp)
   1570 {
   1571 	WRITE_ENTER(&vnetp->vrwlock);
   1572 	vresp->nextp = vnetp->vres_list;
   1573 	vnetp->vres_list = vresp;
   1574 	RW_EXIT(&vnetp->vrwlock);
   1575 }
   1576 
   1577 static vnet_res_t *
   1578 vnet_rem_resource(vnet_t *vnetp, vnet_res_t *vresp)
   1579 {
   1580 	vnet_res_t	*vrp;
   1581 
   1582 	WRITE_ENTER(&vnetp->vrwlock);
   1583 	if (vresp == vnetp->vres_list) {
   1584 		vnetp->vres_list = vresp->nextp;
   1585 	} else {
   1586 		vrp = vnetp->vres_list;
   1587 		while (vrp->nextp != NULL) {
   1588 			if (vrp->nextp == vresp) {
   1589 				vrp->nextp = vresp->nextp;
   1590 				break;
   1591 			}
   1592 			vrp = vrp->nextp;
   1593 		}
   1594 	}
   1595 	vresp->vnetp = NULL;
   1596 	vresp->nextp = NULL;
   1597 
   1598 	RW_EXIT(&vnetp->vrwlock);
   1599 
   1600 	return (vresp);
   1601 }
   1602 
   1603 /*
   1604  * vnet_dds_rx -- an interface called by vgen to DDS messages.
   1605  */
   1606 void
   1607 vnet_dds_rx(void *arg, void *dmsg)
   1608 {
   1609 	vnet_t *vnetp = arg;
   1610 	vdds_process_dds_msg(vnetp, dmsg);
   1611 }
   1612 
   1613 /*
   1614  * vnet_send_dds_msg -- An interface provided to DDS to send
   1615  *	DDS messages. This simply sends meessages via vgen.
   1616  */
   1617 int
   1618 vnet_send_dds_msg(vnet_t *vnetp, void *dmsg)
   1619 {
   1620 	int rv;
   1621 
   1622 	if (vnetp->vgenhdl != NULL) {
   1623 		rv = vgen_dds_tx(vnetp->vgenhdl, dmsg);
   1624 	}
   1625 	return (rv);
   1626 }
   1627 
   1628 /*
   1629  * vnet_cleanup_hio -- an interface called by vgen to cleanup hio resources.
   1630  */
   1631 void
   1632 vnet_dds_cleanup_hio(vnet_t *vnetp)
   1633 {
   1634 	vdds_cleanup_hio(vnetp);
   1635 }
   1636 
   1637 /*
   1638  * vnet_handle_res_err -- A callback function called by a resource
   1639  *	to report an error. For example, vgen can call to report
   1640  *	an LDC down/reset event. This will trigger cleanup of associated
   1641  *	Hybrid resource.
   1642  */
   1643 /* ARGSUSED */
   1644 static void
   1645 vnet_handle_res_err(vio_net_handle_t vrh, vio_net_err_val_t err)
   1646 {
   1647 	vnet_res_t *vresp = (vnet_res_t *)vrh;
   1648 	vnet_t *vnetp = vresp->vnetp;
   1649 
   1650 	if (vnetp == NULL) {
   1651 		return;
   1652 	}
   1653 	if ((vresp->type != VIO_NET_RES_LDC_SERVICE) &&
   1654 	    (vresp->type != VIO_NET_RES_HYBRID)) {
   1655 		return;
   1656 	}
   1657 
   1658 	vdds_cleanup_hio(vnetp);
   1659 }
   1660 
   1661 /*
   1662  * vnet_dispatch_res_task -- A function to dispatch tasks start resources.
   1663  */
   1664 static void
   1665 vnet_dispatch_res_task(vnet_t *vnetp)
   1666 {
   1667 	int rv;
   1668 
   1669 	/*
   1670 	 * Dispatch the task. It could be the case that vnetp->flags does
   1671 	 * not have VNET_STARTED set. This is ok as vnet_rest_start_task()
   1672 	 * can abort the task when the task is started. See related comments
   1673 	 * in vnet_m_stop() and vnet_stop_resources().
   1674 	 */
   1675 	rv = ddi_taskq_dispatch(vnetp->taskqp, vnet_res_start_task,
   1676 	    vnetp, DDI_NOSLEEP);
   1677 	if (rv != DDI_SUCCESS) {
   1678 		cmn_err(CE_WARN,
   1679 		    "vnet%d:Can't dispatch start resource task",
   1680 		    vnetp->instance);
   1681 	}
   1682 }
   1683 
   1684 /*
   1685  * vnet_res_start_task -- A taskq callback function that starts a resource.
   1686  */
   1687 static void
   1688 vnet_res_start_task(void *arg)
   1689 {
   1690 	vnet_t *vnetp = arg;
   1691 
   1692 	WRITE_ENTER(&vnetp->vrwlock);
   1693 	if (vnetp->flags & VNET_STARTED) {
   1694 		vnet_start_resources(vnetp);
   1695 	}
   1696 	RW_EXIT(&vnetp->vrwlock);
   1697 }
   1698 
   1699 /*
   1700  * vnet_start_resources -- starts all resources associated with
   1701  *	a vnet.
   1702  */
   1703 static void
   1704 vnet_start_resources(vnet_t *vnetp)
   1705 {
   1706 	mac_register_t	*macp;
   1707 	mac_callbacks_t	*cbp;
   1708 	vnet_res_t	*vresp;
   1709 	int rv;
   1710 
   1711 	DBG1(vnetp, "enter\n");
   1712 
   1713 	ASSERT(RW_WRITE_HELD(&vnetp->vrwlock));
   1714 
   1715 	for (vresp = vnetp->vres_list; vresp != NULL; vresp = vresp->nextp) {
   1716 		/* skip if it is already started */
   1717 		if (vresp->flags & VNET_STARTED) {
   1718 			continue;
   1719 		}
   1720 		macp = &vresp->macreg;
   1721 		cbp = macp->m_callbacks;
   1722 		rv = cbp->mc_start(macp->m_driver);
   1723 		if (rv == 0) {
   1724 			/*
   1725 			 * Successfully started the resource, so now
   1726 			 * add it to the fdb.
   1727 			 */
   1728 			vresp->flags |= VNET_STARTED;
   1729 			vnet_fdbe_add(vnetp, vresp);
   1730 		}
   1731 	}
   1732 
   1733 	DBG1(vnetp, "exit\n");
   1734 
   1735 }
   1736 
   1737 /*
   1738  * vnet_stop_resources -- stop all resources associated with a vnet.
   1739  */
   1740 static void
   1741 vnet_stop_resources(vnet_t *vnetp)
   1742 {
   1743 	vnet_res_t	*vresp;
   1744 	mac_register_t	*macp;
   1745 	mac_callbacks_t	*cbp;
   1746 
   1747 	DBG1(vnetp, "enter\n");
   1748 
   1749 	ASSERT(RW_WRITE_HELD(&vnetp->vrwlock));
   1750 
   1751 	for (vresp = vnetp->vres_list; vresp != NULL; ) {
   1752 		if (vresp->flags & VNET_STARTED) {
   1753 			/*
   1754 			 * Release the lock while invoking mc_stop() of the
   1755 			 * underlying resource. We hold a reference to this
   1756 			 * resource to prevent being removed from the list in
   1757 			 * vio_net_resource_unreg(). Note that new resources
   1758 			 * can be added to the head of the list while the lock
   1759 			 * is released, but they won't be started, as
   1760 			 * VNET_STARTED flag has been cleared for the vnet
   1761 			 * device in vnet_m_stop(). Also, while the lock is
   1762 			 * released a resource could be removed from the list
   1763 			 * in vio_net_resource_unreg(); but that is ok, as we
   1764 			 * re-acquire the lock and only then access the forward
   1765 			 * link (vresp->nextp) to continue with the next
   1766 			 * resource.
   1767 			 */
   1768 			vresp->flags &= ~VNET_STARTED;
   1769 			vresp->flags |= VNET_STOPPING;
   1770 			macp = &vresp->macreg;
   1771 			cbp = macp->m_callbacks;
   1772 			VNET_FDBE_REFHOLD(vresp);
   1773 			RW_EXIT(&vnetp->vrwlock);
   1774 
   1775 			cbp->mc_stop(macp->m_driver);
   1776 
   1777 			WRITE_ENTER(&vnetp->vrwlock);
   1778 			vresp->flags &= ~VNET_STOPPING;
   1779 			VNET_FDBE_REFRELE(vresp);
   1780 		}
   1781 		vresp = vresp->nextp;
   1782 	}
   1783 	DBG1(vnetp, "exit\n");
   1784 }
   1785 
   1786 /*
   1787  * Setup kstats for the HIO statistics.
   1788  * NOTE: the synchronization for the statistics is the
   1789  * responsibility of the caller.
   1790  */
   1791 kstat_t *
   1792 vnet_hio_setup_kstats(char *ks_mod, char *ks_name, vnet_res_t *vresp)
   1793 {
   1794 	kstat_t *ksp;
   1795 	vnet_t *vnetp = vresp->vnetp;
   1796 	vnet_hio_kstats_t *hiokp;
   1797 	size_t size;
   1798 
   1799 	ASSERT(vnetp != NULL);
   1800 	size = sizeof (vnet_hio_kstats_t) / sizeof (kstat_named_t);
   1801 	ksp = kstat_create(ks_mod, vnetp->instance, ks_name, "net",
   1802 	    KSTAT_TYPE_NAMED, size, 0);
   1803 	if (ksp == NULL) {
   1804 		return (NULL);
   1805 	}
   1806 
   1807 	hiokp = (vnet_hio_kstats_t *)ksp->ks_data;
   1808 	kstat_named_init(&hiokp->ipackets,		"ipackets",
   1809 	    KSTAT_DATA_ULONG);
   1810 	kstat_named_init(&hiokp->ierrors,		"ierrors",
   1811 	    KSTAT_DATA_ULONG);
   1812 	kstat_named_init(&hiokp->opackets,		"opackets",
   1813 	    KSTAT_DATA_ULONG);
   1814 	kstat_named_init(&hiokp->oerrors,		"oerrors",
   1815 	    KSTAT_DATA_ULONG);
   1816 
   1817 
   1818 	/* MIB II kstat variables */
   1819 	kstat_named_init(&hiokp->rbytes,		"rbytes",
   1820 	    KSTAT_DATA_ULONG);
   1821 	kstat_named_init(&hiokp->obytes,		"obytes",
   1822 	    KSTAT_DATA_ULONG);
   1823 	kstat_named_init(&hiokp->multircv,		"multircv",
   1824 	    KSTAT_DATA_ULONG);
   1825 	kstat_named_init(&hiokp->multixmt,		"multixmt",
   1826 	    KSTAT_DATA_ULONG);
   1827 	kstat_named_init(&hiokp->brdcstrcv,		"brdcstrcv",
   1828 	    KSTAT_DATA_ULONG);
   1829 	kstat_named_init(&hiokp->brdcstxmt,		"brdcstxmt",
   1830 	    KSTAT_DATA_ULONG);
   1831 	kstat_named_init(&hiokp->norcvbuf,		"norcvbuf",
   1832 	    KSTAT_DATA_ULONG);
   1833 	kstat_named_init(&hiokp->noxmtbuf,		"noxmtbuf",
   1834 	    KSTAT_DATA_ULONG);
   1835 
   1836 	ksp->ks_update = vnet_hio_update_kstats;
   1837 	ksp->ks_private = (void *)vresp;
   1838 	kstat_install(ksp);
   1839 	return (ksp);
   1840 }
   1841 
   1842 /*
   1843  * Destroy kstats.
   1844  */
   1845 static void
   1846 vnet_hio_destroy_kstats(kstat_t *ksp)
   1847 {
   1848 	if (ksp != NULL)
   1849 		kstat_delete(ksp);
   1850 }
   1851 
   1852 /*
   1853  * Update the kstats.
   1854  */
   1855 static int
   1856 vnet_hio_update_kstats(kstat_t *ksp, int rw)
   1857 {
   1858 	vnet_t *vnetp;
   1859 	vnet_res_t *vresp;
   1860 	vnet_hio_stats_t statsp;
   1861 	vnet_hio_kstats_t *hiokp;
   1862 
   1863 	vresp = (vnet_res_t *)ksp->ks_private;
   1864 	vnetp = vresp->vnetp;
   1865 
   1866 	bzero(&statsp, sizeof (vnet_hio_stats_t));
   1867 
   1868 	READ_ENTER(&vnetp->vsw_fp_rw);
   1869 	if (vnetp->hio_fp == NULL) {
   1870 		/* not using hio resources, just return */
   1871 		RW_EXIT(&vnetp->vsw_fp_rw);
   1872 		return (0);
   1873 	}
   1874 	VNET_FDBE_REFHOLD(vnetp->hio_fp);
   1875 	RW_EXIT(&vnetp->vsw_fp_rw);
   1876 	vnet_hio_get_stats(vnetp->hio_fp, &statsp);
   1877 	VNET_FDBE_REFRELE(vnetp->hio_fp);
   1878 
   1879 	hiokp = (vnet_hio_kstats_t *)ksp->ks_data;
   1880 
   1881 	if (rw == KSTAT_READ) {
   1882 		/* Link Input/Output stats */
   1883 		hiokp->ipackets.value.ul	= (uint32_t)statsp.ipackets;
   1884 		hiokp->ipackets64.value.ull	= statsp.ipackets;
   1885 		hiokp->ierrors.value.ul		= statsp.ierrors;
   1886 		hiokp->opackets.value.ul	= (uint32_t)statsp.opackets;
   1887 		hiokp->opackets64.value.ull	= statsp.opackets;
   1888 		hiokp->oerrors.value.ul		= statsp.oerrors;
   1889 
   1890 		/* MIB II kstat variables */
   1891 		hiokp->rbytes.value.ul		= (uint32_t)statsp.rbytes;
   1892 		hiokp->rbytes64.value.ull	= statsp.rbytes;
   1893 		hiokp->obytes.value.ul		= (uint32_t)statsp.obytes;
   1894 		hiokp->obytes64.value.ull	= statsp.obytes;
   1895 		hiokp->multircv.value.ul	= statsp.multircv;
   1896 		hiokp->multixmt.value.ul	= statsp.multixmt;
   1897 		hiokp->brdcstrcv.value.ul	= statsp.brdcstrcv;
   1898 		hiokp->brdcstxmt.value.ul	= statsp.brdcstxmt;
   1899 		hiokp->norcvbuf.value.ul	= statsp.norcvbuf;
   1900 		hiokp->noxmtbuf.value.ul	= statsp.noxmtbuf;
   1901 	} else {
   1902 		return (EACCES);
   1903 	}
   1904 
   1905 	return (0);
   1906 }
   1907 
   1908 static void
   1909 vnet_hio_get_stats(vnet_res_t *vresp, vnet_hio_stats_t *statsp)
   1910 {
   1911 	mac_register_t		*macp;
   1912 	mac_callbacks_t		*cbp;
   1913 	uint64_t		val;
   1914 	int			stat;
   1915 
   1916 	/*
   1917 	 * get the specified statistics from the underlying nxge.
   1918 	 */
   1919 	macp = &vresp->macreg;
   1920 	cbp = macp->m_callbacks;
   1921 	for (stat = MAC_STAT_MIN; stat < MAC_STAT_OVERFLOWS; stat++) {
   1922 		if (cbp->mc_getstat(macp->m_driver, stat, &val) == 0) {
   1923 			switch (stat) {
   1924 			case MAC_STAT_IPACKETS:
   1925 				statsp->ipackets = val;
   1926 				break;
   1927 
   1928 			case MAC_STAT_IERRORS:
   1929 				statsp->ierrors = val;
   1930 				break;
   1931 
   1932 			case MAC_STAT_OPACKETS:
   1933 				statsp->opackets = val;
   1934 				break;
   1935 
   1936 			case MAC_STAT_OERRORS:
   1937 				statsp->oerrors = val;
   1938 				break;
   1939 
   1940 			case MAC_STAT_RBYTES:
   1941 				statsp->rbytes = val;
   1942 				break;
   1943 
   1944 			case MAC_STAT_OBYTES:
   1945 				statsp->obytes = val;
   1946 				break;
   1947 
   1948 			case MAC_STAT_MULTIRCV:
   1949 				statsp->multircv = val;
   1950 				break;
   1951 
   1952 			case MAC_STAT_MULTIXMT:
   1953 				statsp->multixmt = val;
   1954 				break;
   1955 
   1956 			case MAC_STAT_BRDCSTRCV:
   1957 				statsp->brdcstrcv = val;
   1958 				break;
   1959 
   1960 			case MAC_STAT_BRDCSTXMT:
   1961 				statsp->brdcstxmt = val;
   1962 				break;
   1963 
   1964 			case MAC_STAT_NOXMTBUF:
   1965 				statsp->noxmtbuf = val;
   1966 				break;
   1967 
   1968 			case MAC_STAT_NORCVBUF:
   1969 				statsp->norcvbuf = val;
   1970 				break;
   1971 
   1972 			default:
   1973 				/*
   1974 				 * parameters not interested.
   1975 				 */
   1976 				break;
   1977 			}
   1978 		}
   1979 	}
   1980 }
   1981 
   1982 static boolean_t
   1983 vnet_m_capab(void *arg, mac_capab_t cap, void *cap_data)
   1984 {
   1985 	vnet_t	*vnetp = (vnet_t *)arg;
   1986 
   1987 	if (vnetp == NULL) {
   1988 		return (0);
   1989 	}
   1990 
   1991 	switch (cap) {
   1992 
   1993 	case MAC_CAPAB_RINGS: {
   1994 
   1995 		mac_capab_rings_t *cap_rings = cap_data;
   1996 		/*
   1997 		 * Rings Capability Notes:
   1998 		 * We advertise rings to make use of the rings framework in
   1999 		 * gldv3 mac layer, to improve the performance. This is
   2000 		 * specifically needed when a Hybrid resource (with multiple
   2001 		 * tx/rx hardware rings) is assigned to a vnet device. We also
   2002 		 * leverage this for the normal case when no Hybrid resource is
   2003 		 * assigned.
   2004 		 *
   2005 		 * Ring Allocation:
   2006 		 * - TX path:
   2007 		 * We expose a pseudo ring group with 2 pseudo tx rings (as
   2008 		 * currently HybridIO exports only 2 rings) In the normal case,
   2009 		 * transmit traffic that comes down to the driver through the
   2010 		 * mri_tx (vnet_tx_ring_send()) entry point goes through the
   2011 		 * distributed switching algorithm in vnet and gets transmitted
   2012 		 * over a port/LDC in the vgen layer to either the vswitch or a
   2013 		 * peer vnet. If and when a Hybrid resource is assigned to the
   2014 		 * vnet, we obtain the tx ring information of the Hybrid device
   2015 		 * (nxge) and map the pseudo rings 1:1 to the 2 hw tx rings.
   2016 		 * Traffic being sent over the Hybrid resource by the mac layer
   2017 		 * gets spread across both hw rings, as they are mapped to the
   2018 		 * 2 pseudo tx rings in vnet.
   2019 		 *
   2020 		 * - RX path:
   2021 		 * We expose a pseudo ring group with 3 pseudo rx rings (static
   2022 		 * rings) initially. The first (default) pseudo rx ring is
   2023 		 * reserved for the resource that connects to the vswitch
   2024 		 * service. The next 2 rings are reserved for a Hybrid resource
   2025 		 * that may be assigned to the vnet device. If and when a
   2026 		 * Hybrid resource is assigned to the vnet, we obtain the rx
   2027 		 * ring information of the Hybrid device (nxge) and map these
   2028 		 * pseudo rings 1:1 to the 2 hw rx rings. For each additional
   2029 		 * resource that connects to a peer vnet, we dynamically
   2030 		 * allocate a pseudo rx ring and map it to that resource, when
   2031 		 * the resource gets added; and the pseudo rx ring is
   2032 		 * dynamically registered with the upper mac layer. We do the
   2033 		 * reverse and unregister the ring with the mac layer when
   2034 		 * the resource gets removed.
   2035 		 *
   2036 		 * Synchronization notes:
   2037 		 * We don't need any lock to protect members of ring structure,
   2038 		 * specifically ringp->hw_rh, in either the TX or the RX ring,
   2039 		 * as explained below.
   2040 		 * - TX ring:
   2041 		 * ring->hw_rh is initialized only when a Hybrid resource is
   2042 		 * associated; and gets referenced only in vnet_hio_tx(). The
   2043 		 * Hybrid resource itself is available in fdb only after tx
   2044 		 * hwrings are found and mapped; i.e, in vio_net_resource_reg()
   2045 		 * we call vnet_bind_rings() first and then call
   2046 		 * vnet_start_resources() which adds an entry to fdb. For
   2047 		 * traffic going over LDC resources, we don't reference
   2048 		 * ring->hw_rh at all.
   2049 		 * - RX ring:
   2050 		 * For rings mapped to Hybrid resource ring->hw_rh is
   2051 		 * initialized and only then do we add the rx callback for
   2052 		 * the underlying Hybrid resource; we disable callbacks before
   2053 		 * we unmap ring->hw_rh. For rings mapped to LDC resources, we
   2054 		 * stop the rx callbacks (in vgen) before we remove ring->hw_rh
   2055 		 * (vio_net_resource_unreg()).
   2056 		 */
   2057 
   2058 		if (cap_rings->mr_type == MAC_RING_TYPE_RX) {
   2059 			cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC;
   2060 
   2061 			/*
   2062 			 * The ring_cnt for rx grp is initialized in
   2063 			 * vnet_ring_grp_init(). Later, the ring_cnt gets
   2064 			 * updated dynamically whenever LDC resources are added
   2065 			 * or removed.
   2066 			 */
   2067 			cap_rings->mr_rnum = vnetp->rx_grp[0].ring_cnt;
   2068 			cap_rings->mr_rget = vnet_get_ring;
   2069 
   2070 			cap_rings->mr_gnum = VNET_NUM_PSEUDO_GROUPS;
   2071 			cap_rings->mr_gget = vnet_get_group;
   2072 			cap_rings->mr_gaddring = NULL;
   2073 			cap_rings->mr_gremring = NULL;
   2074 		} else {
   2075 			cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC;
   2076 
   2077 			/*
   2078 			 * The ring_cnt for tx grp is initialized in
   2079 			 * vnet_ring_grp_init() and remains constant, as we
   2080 			 * do not support dymanic tx rings for now.
   2081 			 */
   2082 			cap_rings->mr_rnum = vnetp->tx_grp[0].ring_cnt;
   2083 			cap_rings->mr_rget = vnet_get_ring;
   2084 
   2085 			/*
   2086 			 * Transmit rings are not grouped; i.e, the number of
   2087 			 * transmit ring groups advertised should be set to 0.
   2088 			 */
   2089 			cap_rings->mr_gnum = 0;
   2090 
   2091 			cap_rings->mr_gget = vnet_get_group;
   2092 			cap_rings->mr_gaddring = NULL;
   2093 			cap_rings->mr_gremring = NULL;
   2094 		}
   2095 		return (B_TRUE);
   2096 
   2097 	}
   2098 
   2099 	default:
   2100 		break;
   2101 
   2102 	}
   2103 
   2104 	return (B_FALSE);
   2105 }
   2106 
   2107 /*
   2108  * Callback funtion for MAC layer to get ring information.
   2109  */
   2110 static void
   2111 vnet_get_ring(void *arg, mac_ring_type_t rtype, const int g_index,
   2112     const int r_index, mac_ring_info_t *infop, mac_ring_handle_t r_handle)
   2113 {
   2114 	vnet_t	*vnetp = arg;
   2115 
   2116 	switch (rtype) {
   2117 
   2118 	case MAC_RING_TYPE_RX: {
   2119 
   2120 		vnet_pseudo_rx_group_t	*rx_grp;
   2121 		vnet_pseudo_rx_ring_t	*rx_ringp;
   2122 		mac_intr_t		*mintr;
   2123 
   2124 		/* We advertised only one RX group */
   2125 		ASSERT(g_index == 0);
   2126 		rx_grp = &vnetp->rx_grp[g_index];
   2127 
   2128 		/* Check the current # of rings in the rx group */
   2129 		ASSERT((r_index >= 0) && (r_index < rx_grp->max_ring_cnt));
   2130 
   2131 		/* Get the ring based on the index */
   2132 		rx_ringp = &rx_grp->rings[r_index];
   2133 
   2134 		rx_ringp->handle = r_handle;
   2135 		/*
   2136 		 * Note: we don't need to save the incoming r_index in rx_ring,
   2137 		 * as vnet_ring_grp_init() would have initialized the index for
   2138 		 * each ring in the array.
   2139 		 */
   2140 		rx_ringp->grp = rx_grp;
   2141 		rx_ringp->vnetp = vnetp;
   2142 
   2143 		mintr = &infop->mri_intr;
   2144 		mintr->mi_handle = (mac_intr_handle_t)rx_ringp;
   2145 		mintr->mi_enable = (mac_intr_enable_t)vnet_ring_enable_intr;
   2146 		mintr->mi_disable = (mac_intr_disable_t)vnet_ring_disable_intr;
   2147 
   2148 		infop->mri_driver = (mac_ring_driver_t)rx_ringp;
   2149 		infop->mri_start = vnet_rx_ring_start;
   2150 		infop->mri_stop = vnet_rx_ring_stop;
   2151 
   2152 		/* Set the poll function, as this is an rx ring */
   2153 		infop->mri_poll = vnet_rx_poll;
   2154 
   2155 		break;
   2156 	}
   2157 
   2158 	case MAC_RING_TYPE_TX: {
   2159 		vnet_pseudo_tx_group_t	*tx_grp;
   2160 		vnet_pseudo_tx_ring_t	*tx_ringp;
   2161 
   2162 		/*
   2163 		 * No need to check grp index; mac layer passes -1 for it.
   2164 		 */
   2165 		tx_grp = &vnetp->tx_grp[0];
   2166 
   2167 		/* Check the # of rings in the tx group */
   2168 		ASSERT((r_index >= 0) && (r_index < tx_grp->ring_cnt));
   2169 
   2170 		/* Get the ring based on the index */
   2171 		tx_ringp = &tx_grp->rings[r_index];
   2172 
   2173 		tx_ringp->handle = r_handle;
   2174 		tx_ringp->index = r_index;
   2175 		tx_ringp->grp = tx_grp;
   2176 		tx_ringp->vnetp = vnetp;
   2177 
   2178 		infop->mri_driver = (mac_ring_driver_t)tx_ringp;
   2179 		infop->mri_start = vnet_tx_ring_start;
   2180 		infop->mri_stop = vnet_tx_ring_stop;
   2181 
   2182 		/* Set the transmit function, as this is a tx ring */
   2183 		infop->mri_tx = vnet_tx_ring_send;
   2184 
   2185 		break;
   2186 	}
   2187 
   2188 	default:
   2189 		break;
   2190 	}
   2191 }
   2192 
   2193 /*
   2194  * Callback funtion for MAC layer to get group information.
   2195  */
   2196 static void
   2197 vnet_get_group(void *arg, mac_ring_type_t type, const int index,
   2198 	mac_group_info_t *infop, mac_group_handle_t handle)
   2199 {
   2200 	vnet_t	*vnetp = (vnet_t *)arg;
   2201 
   2202 	switch (type) {
   2203 
   2204 	case MAC_RING_TYPE_RX:
   2205 	{
   2206 		vnet_pseudo_rx_group_t	*rx_grp;
   2207 
   2208 		/* We advertised only one RX group */
   2209 		ASSERT(index == 0);
   2210 
   2211 		rx_grp = &vnetp->rx_grp[index];
   2212 		rx_grp->handle = handle;
   2213 		rx_grp->index = index;
   2214 		rx_grp->vnetp = vnetp;
   2215 
   2216 		infop->mgi_driver = (mac_group_driver_t)rx_grp;
   2217 		infop->mgi_start = NULL;
   2218 		infop->mgi_stop = NULL;
   2219 		infop->mgi_addmac = vnet_addmac;
   2220 		infop->mgi_remmac = vnet_remmac;
   2221 		infop->mgi_count = rx_grp->ring_cnt;
   2222 
   2223 		break;
   2224 	}
   2225 
   2226 	case MAC_RING_TYPE_TX:
   2227 	{
   2228 		vnet_pseudo_tx_group_t	*tx_grp;
   2229 
   2230 		/* We advertised only one TX group */
   2231 		ASSERT(index == 0);
   2232 
   2233 		tx_grp = &vnetp->tx_grp[index];
   2234 		tx_grp->handle = handle;
   2235 		tx_grp->index = index;
   2236 		tx_grp->vnetp = vnetp;
   2237 
   2238 		infop->mgi_driver = (mac_group_driver_t)tx_grp;
   2239 		infop->mgi_start = NULL;
   2240 		infop->mgi_stop = NULL;
   2241 		infop->mgi_addmac = NULL;
   2242 		infop->mgi_remmac = NULL;
   2243 		infop->mgi_count = VNET_NUM_PSEUDO_TXRINGS;
   2244 
   2245 		break;
   2246 	}
   2247 
   2248 	default:
   2249 		break;
   2250 
   2251 	}
   2252 }
   2253 
   2254 static int
   2255 vnet_rx_ring_start(mac_ring_driver_t arg, uint64_t mr_gen_num)
   2256 {
   2257 	vnet_pseudo_rx_ring_t	*rx_ringp = (vnet_pseudo_rx_ring_t *)arg;
   2258 	int			err;
   2259 
   2260 	/*
   2261 	 * If this ring is mapped to a LDC resource, simply mark the state to
   2262 	 * indicate the ring is started and return.
   2263 	 */
   2264 	if ((rx_ringp->state &
   2265 	    (VNET_RXRING_LDC_SERVICE|VNET_RXRING_LDC_GUEST)) != 0) {
   2266 		rx_ringp->gen_num = mr_gen_num;
   2267 		rx_ringp->state |= VNET_RXRING_STARTED;
   2268 		return (0);
   2269 	}
   2270 
   2271 	ASSERT((rx_ringp->state & VNET_RXRING_HYBRID) != 0);
   2272 
   2273 	/*
   2274 	 * This must be a ring reserved for a hwring. If the hwring is not
   2275 	 * bound yet, simply mark the state to indicate the ring is started and
   2276 	 * return. If and when a hybrid resource is activated for this vnet
   2277 	 * device, we will bind the hwring and start it then. If a hwring is
   2278 	 * already bound, start it now.
   2279 	 */
   2280 	if (rx_ringp->hw_rh == NULL) {
   2281 		rx_ringp->gen_num = mr_gen_num;
   2282 		rx_ringp->state |= VNET_RXRING_STARTED;
   2283 		return (0);
   2284 	}
   2285 
   2286 	err = mac_hwring_start(rx_ringp->hw_rh);
   2287 	if (err == 0) {
   2288 		rx_ringp->gen_num = mr_gen_num;
   2289 		rx_ringp->state |= VNET_RXRING_STARTED;
   2290 	} else {
   2291 		err = ENXIO;
   2292 	}
   2293 
   2294 	return (err);
   2295 }
   2296 
   2297 static void
   2298 vnet_rx_ring_stop(mac_ring_driver_t arg)
   2299 {
   2300 	vnet_pseudo_rx_ring_t	*rx_ringp = (vnet_pseudo_rx_ring_t *)arg;
   2301 
   2302 	/*
   2303 	 * If this ring is mapped to a LDC resource, simply mark the state to
   2304 	 * indicate the ring is now stopped and return.
   2305 	 */
   2306 	if ((rx_ringp->state &
   2307 	    (VNET_RXRING_LDC_SERVICE|VNET_RXRING_LDC_GUEST)) != 0) {
   2308 		rx_ringp->state &= ~VNET_RXRING_STARTED;
   2309 		return;
   2310 	}
   2311 
   2312 	ASSERT((rx_ringp->state & VNET_RXRING_HYBRID) != 0);
   2313 
   2314 	/*
   2315 	 * This must be a ring reserved for a hwring. If the hwring is not
   2316 	 * bound yet, simply mark the state to indicate the ring is stopped and
   2317 	 * return. If a hwring is already bound, stop it now.
   2318 	 */
   2319 	if (rx_ringp->hw_rh == NULL) {
   2320 		rx_ringp->state &= ~VNET_RXRING_STARTED;
   2321 		return;
   2322 	}
   2323 
   2324 	mac_hwring_stop(rx_ringp->hw_rh);
   2325 	rx_ringp->state &= ~VNET_RXRING_STARTED;
   2326 }
   2327 
   2328 /* ARGSUSED */
   2329 static int
   2330 vnet_tx_ring_start(mac_ring_driver_t arg, uint64_t mr_gen_num)
   2331 {
   2332 	vnet_pseudo_tx_ring_t	*tx_ringp = (vnet_pseudo_tx_ring_t *)arg;
   2333 
   2334 	tx_ringp->state |= VNET_TXRING_STARTED;
   2335 	return (0);
   2336 }
   2337 
   2338 static void
   2339 vnet_tx_ring_stop(mac_ring_driver_t arg)
   2340 {
   2341 	vnet_pseudo_tx_ring_t	*tx_ringp = (vnet_pseudo_tx_ring_t *)arg;
   2342 
   2343 	tx_ringp->state &= ~VNET_TXRING_STARTED;
   2344 }
   2345 
   2346 /*
   2347  * Disable polling for a ring and enable its interrupt.
   2348  */
   2349 static int
   2350 vnet_ring_enable_intr(void *arg)
   2351 {
   2352 	vnet_pseudo_rx_ring_t	*rx_ringp = (vnet_pseudo_rx_ring_t *)arg;
   2353 	vnet_res_t		*vresp;
   2354 
   2355 	if (rx_ringp->hw_rh == NULL) {
   2356 		/*
   2357 		 * Ring enable intr func is being invoked, but the ring is
   2358 		 * not bound to any underlying resource ? This must be a ring
   2359 		 * reserved for Hybrid resource and no such resource has been
   2360 		 * assigned to this vnet device yet. We simply return success.
   2361 		 */
   2362 		ASSERT((rx_ringp->state & VNET_RXRING_HYBRID) != 0);
   2363 		return (0);
   2364 	}
   2365 
   2366 	/*
   2367 	 * The rx ring has been bound to either a LDC or a Hybrid resource.
   2368 	 * Call the appropriate function to enable interrupts for the ring.
   2369 	 */
   2370 	if (rx_ringp->state & VNET_RXRING_HYBRID) {
   2371 		return (mac_hwring_enable_intr(rx_ringp->hw_rh));
   2372 	} else {
   2373 		vresp = (vnet_res_t *)rx_ringp->hw_rh;
   2374 		return (vgen_enable_intr(vresp->macreg.m_driver));
   2375 	}
   2376 }
   2377 
   2378 /*
   2379  * Enable polling for a ring and disable its interrupt.
   2380  */
   2381 static int
   2382 vnet_ring_disable_intr(void *arg)
   2383 {
   2384 	vnet_pseudo_rx_ring_t	*rx_ringp = (vnet_pseudo_rx_ring_t *)arg;
   2385 	vnet_res_t		*vresp;
   2386 
   2387 	if (rx_ringp->hw_rh == NULL) {
   2388 		/*
   2389 		 * Ring disable intr func is being invoked, but the ring is
   2390 		 * not bound to any underlying resource ? This must be a ring
   2391 		 * reserved for Hybrid resource and no such resource has been
   2392 		 * assigned to this vnet device yet. We simply return success.
   2393 		 */
   2394 		ASSERT((rx_ringp->state & VNET_RXRING_HYBRID) != 0);
   2395 		return (0);
   2396 	}
   2397 
   2398 	/*
   2399 	 * The rx ring has been bound to either a LDC or a Hybrid resource.
   2400 	 * Call the appropriate function to disable interrupts for the ring.
   2401 	 */
   2402 	if (rx_ringp->state & VNET_RXRING_HYBRID) {
   2403 		return (mac_hwring_disable_intr(rx_ringp->hw_rh));
   2404 	} else {
   2405 		vresp = (vnet_res_t *)rx_ringp->hw_rh;
   2406 		return (vgen_disable_intr(vresp->macreg.m_driver));
   2407 	}
   2408 }
   2409 
   2410 /*
   2411  * Poll 'bytes_to_pickup' bytes of message from the rx ring.
   2412  */
   2413 static mblk_t *
   2414 vnet_rx_poll(void *arg, int bytes_to_pickup)
   2415 {
   2416 	vnet_pseudo_rx_ring_t	*rx_ringp = (vnet_pseudo_rx_ring_t *)arg;
   2417 	mblk_t			*mp = NULL;
   2418 	vnet_res_t		*vresp;
   2419 	vnet_t			*vnetp = rx_ringp->vnetp;
   2420 
   2421 	if (rx_ringp->hw_rh == NULL) {
   2422 		return (NULL);
   2423 	}
   2424 
   2425 	if (rx_ringp->state & VNET_RXRING_HYBRID) {
   2426 		mp = mac_hwring_poll(rx_ringp->hw_rh, bytes_to_pickup);
   2427 		/*
   2428 		 * Packets received over a hybrid resource need additional
   2429 		 * processing to remove the tag, for the pvid case. The
   2430 		 * underlying resource is not aware of the vnet's pvid and thus
   2431 		 * packets are received with the vlan tag in the header; unlike
   2432 		 * packets that are received over a ldc channel in which case
   2433 		 * the peer vnet/vsw would have already removed the tag.
   2434 		 */
   2435 		if (vnetp->pvid != vnetp->default_vlan_id) {
   2436 			vnet_rx_frames_untag(vnetp->pvid, &mp);
   2437 		}
   2438 	} else {
   2439 		vresp = (vnet_res_t *)rx_ringp->hw_rh;
   2440 		mp = vgen_poll(vresp->macreg.m_driver, bytes_to_pickup);
   2441 	}
   2442 	return (mp);
   2443 }
   2444 
   2445 /* ARGSUSED */
   2446 void
   2447 vnet_hio_rx_cb(void *arg, mac_resource_handle_t mrh, mblk_t *mp,
   2448 	boolean_t loopback)
   2449 {
   2450 	vnet_t			*vnetp = (vnet_t *)arg;
   2451 	vnet_pseudo_rx_ring_t	*ringp = (vnet_pseudo_rx_ring_t *)mrh;
   2452 
   2453 	/*
   2454 	 * Packets received over a hybrid resource need additional processing
   2455 	 * to remove the tag, for the pvid case. The underlying resource is
   2456 	 * not aware of the vnet's pvid and thus packets are received with the
   2457 	 * vlan tag in the header; unlike packets that are received over a ldc
   2458 	 * channel in which case the peer vnet/vsw would have already removed
   2459 	 * the tag.
   2460 	 */
   2461 	if (vnetp->pvid != vnetp->default_vlan_id) {
   2462 		vnet_rx_frames_untag(vnetp->pvid, &mp);
   2463 		if (mp == NULL) {
   2464 			return;
   2465 		}
   2466 	}
   2467 	mac_rx_ring(vnetp->mh, ringp->handle, mp, ringp->gen_num);
   2468 }
   2469 
   2470 static int
   2471 vnet_addmac(void *arg, const uint8_t *mac_addr)
   2472 {
   2473 	vnet_pseudo_rx_group_t  *rx_grp = (vnet_pseudo_rx_group_t *)arg;
   2474 	vnet_t			*vnetp;
   2475 
   2476 	vnetp = rx_grp->vnetp;
   2477 
   2478 	if (bcmp(mac_addr, vnetp->curr_macaddr, ETHERADDRL) == 0) {
   2479 		return (0);
   2480 	}
   2481 
   2482 	cmn_err(CE_CONT, "!vnet%d: %s: Multiple macaddr unsupported\n",
   2483 	    vnetp->instance, __func__);
   2484 	return (EINVAL);
   2485 }
   2486 
   2487 static int
   2488 vnet_remmac(void *arg, const uint8_t *mac_addr)
   2489 {
   2490 	vnet_pseudo_rx_group_t  *rx_grp = (vnet_pseudo_rx_group_t *)arg;
   2491 	vnet_t			*vnetp;
   2492 
   2493 	vnetp = rx_grp->vnetp;
   2494 
   2495 	if (bcmp(mac_addr, vnetp->curr_macaddr, ETHERADDRL) == 0) {
   2496 		return (0);
   2497 	}
   2498 
   2499 	cmn_err(CE_CONT, "!vnet%d: %s: Invalid macaddr: %s\n",
   2500 	    vnetp->instance, __func__, ether_sprintf((void *)mac_addr));
   2501 	return (EINVAL);
   2502 }
   2503 
   2504 int
   2505 vnet_hio_mac_init(vnet_t *vnetp, char *ifname)
   2506 {
   2507 	mac_handle_t		mh;
   2508 	mac_client_handle_t	mch = NULL;
   2509 	mac_unicast_handle_t	muh = NULL;
   2510 	mac_diag_t		diag;
   2511 	mac_register_t		*macp;
   2512 	char			client_name[MAXNAMELEN];
   2513 	int			rv;
   2514 	uint16_t		mac_flags = MAC_UNICAST_TAG_DISABLE |
   2515 	    MAC_UNICAST_STRIP_DISABLE | MAC_UNICAST_PRIMARY;
   2516 	vio_net_callbacks_t	vcb;
   2517 	ether_addr_t		rem_addr =
   2518 		{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
   2519 	uint32_t		retries = 0;
   2520 
   2521 	if ((macp = mac_alloc(MAC_VERSION)) == NULL) {
   2522 		return (EAGAIN);
   2523 	}
   2524 
   2525 	do {
   2526 		rv = mac_open_by_linkname(ifname, &mh);
   2527 		if (rv == 0) {
   2528 			break;
   2529 		}
   2530 		if (rv != ENOENT || (retries++ >= vnet_mac_open_retries)) {
   2531 			mac_free(macp);
   2532 			return (rv);
   2533 		}
   2534 		drv_usecwait(vnet_mac_open_delay);
   2535 	} while (rv == ENOENT);
   2536 
   2537 	vnetp->hio_mh = mh;
   2538 
   2539 	(void) snprintf(client_name, MAXNAMELEN, "vnet%d-%s", vnetp->instance,
   2540 	    ifname);
   2541 	rv = mac_client_open(mh, &mch, client_name, MAC_OPEN_FLAGS_EXCLUSIVE);
   2542 	if (rv != 0) {
   2543 		goto fail;
   2544 	}
   2545 	vnetp->hio_mch = mch;
   2546 
   2547 	rv = mac_unicast_add(mch, vnetp->curr_macaddr, mac_flags, &muh, 0,
   2548 	    &diag);
   2549 	if (rv != 0) {
   2550 		goto fail;
   2551 	}
   2552 	vnetp->hio_muh = muh;
   2553 
   2554 	macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
   2555 	macp->m_driver = vnetp;
   2556 	macp->m_dip = NULL;
   2557 	macp->m_src_addr = NULL;
   2558 	macp->m_callbacks = &vnet_hio_res_callbacks;
   2559 	macp->m_min_sdu = 0;
   2560 	macp->m_max_sdu = ETHERMTU;
   2561 
   2562 	rv = vio_net_resource_reg(macp, VIO_NET_RES_HYBRID,
   2563 	    vnetp->curr_macaddr, rem_addr, &vnetp->hio_vhp, &vcb);
   2564 	if (rv != 0) {
   2565 		goto fail;
   2566 	}
   2567 	mac_free(macp);
   2568 
   2569 	/* add the recv callback */
   2570 	mac_rx_set(vnetp->hio_mch, vnet_hio_rx_cb, vnetp);
   2571 
   2572 	/* add the notify callback - only tx updates for now */
   2573 	vnetp->hio_mnh = mac_notify_add(vnetp->hio_mh, vnet_hio_notify_cb,
   2574 	    vnetp);
   2575 
   2576 	return (0);
   2577 
   2578 fail:
   2579 	mac_free(macp);
   2580 	vnet_hio_mac_cleanup(vnetp);
   2581 	return (1);
   2582 }
   2583 
   2584 void
   2585 vnet_hio_mac_cleanup(vnet_t *vnetp)
   2586 {
   2587 	if (vnetp->hio_mnh != NULL) {
   2588 		(void) mac_notify_remove(vnetp->hio_mnh, B_TRUE);
   2589 		vnetp->hio_mnh = NULL;
   2590 	}
   2591 
   2592 	if (vnetp->hio_vhp != NULL) {
   2593 		vio_net_resource_unreg(vnetp->hio_vhp);
   2594 		vnetp->hio_vhp = NULL;
   2595 	}
   2596 
   2597 	if (vnetp->hio_muh != NULL) {
   2598 		(void) mac_unicast_remove(vnetp->hio_mch, vnetp->hio_muh);
   2599 		vnetp->hio_muh = NULL;
   2600 	}
   2601 
   2602 	if (vnetp->hio_mch != NULL) {
   2603 		mac_client_close(vnetp->hio_mch, 0);
   2604 		vnetp->hio_mch = NULL;
   2605 	}
   2606 
   2607 	if (vnetp->hio_mh != NULL) {
   2608 		mac_close(vnetp->hio_mh);
   2609 		vnetp->hio_mh = NULL;
   2610 	}
   2611 }
   2612 
   2613 /* Bind pseudo rings to hwrings */
   2614 static int
   2615 vnet_bind_hwrings(vnet_t *vnetp)
   2616 {
   2617 	mac_ring_handle_t	hw_rh[VNET_NUM_HYBRID_RINGS];
   2618 	mac_perim_handle_t	mph1;
   2619 	vnet_pseudo_rx_group_t	*rx_grp;
   2620 	vnet_pseudo_rx_ring_t	*rx_ringp;
   2621 	vnet_pseudo_tx_group_t	*tx_grp;
   2622 	vnet_pseudo_tx_ring_t	*tx_ringp;
   2623 	int			hw_ring_cnt;
   2624 	int			i;
   2625 	int			rv;
   2626 
   2627 	mac_perim_enter_by_mh(vnetp->hio_mh, &mph1);
   2628 
   2629 	/* Get the list of the underlying RX rings. */
   2630 	hw_ring_cnt = mac_hwrings_get(vnetp->hio_mch, &vnetp->rx_hwgh, hw_rh,
   2631 	    MAC_RING_TYPE_RX);
   2632 
   2633 	/* We expect the the # of hw rx rings to match VNET_NUM_HYBRID_RINGS */
   2634 	if (hw_ring_cnt != VNET_NUM_HYBRID_RINGS) {
   2635 		cmn_err(CE_WARN,
   2636 		    "!vnet%d: vnet_bind_hwrings: bad rx hw_ring_cnt(%d)\n",
   2637 		    vnetp->instance, hw_ring_cnt);
   2638 		goto fail;
   2639 	}
   2640 
   2641 	if (vnetp->rx_hwgh != NULL) {
   2642 		/*
   2643 		 * Quiesce the HW ring and the mac srs on the ring. Note
   2644 		 * that the HW ring will be restarted when the pseudo ring
   2645 		 * is started. At that time all the packets will be
   2646 		 * directly passed up to the pseudo RX ring and handled
   2647 		 * by mac srs created over the pseudo RX ring.
   2648 		 */
   2649 		mac_rx_client_quiesce(vnetp->hio_mch);
   2650 		mac_srs_perm_quiesce(vnetp->hio_mch, B_TRUE);
   2651 	}
   2652 
   2653 	/*
   2654 	 * Bind the pseudo rings to the hwrings and start the hwrings.
   2655 	 * Note we don't need to register these with the upper mac, as we have
   2656 	 * statically exported these pseudo rxrings which are reserved for
   2657 	 * rxrings of Hybrid resource.
   2658 	 */
   2659 	rx_grp = &vnetp->rx_grp[0];
   2660 	for (i = 0; i < VNET_NUM_HYBRID_RINGS; i++) {
   2661 		/* Pick the rxrings reserved for Hybrid resource */
   2662 		rx_ringp = &rx_grp->rings[i + VNET_HYBRID_RXRING_INDEX];
   2663 
   2664 		/* Store the hw ring handle */
   2665 		rx_ringp->hw_rh = hw_rh[i];
   2666 
   2667 		/* Bind the pseudo ring to the underlying hwring */
   2668 		mac_hwring_setup(rx_ringp->hw_rh,
   2669 		    (mac_resource_handle_t)rx_ringp);
   2670 
   2671 		/* Start the hwring if needed */
   2672 		if (rx_ringp->state & VNET_RXRING_STARTED) {
   2673 			rv = mac_hwring_start(rx_ringp->hw_rh);
   2674 			if (rv != 0) {
   2675 				mac_hwring_teardown(rx_ringp->hw_rh);
   2676 				rx_ringp->hw_rh = NULL;
   2677 				goto fail;
   2678 			}
   2679 		}
   2680 	}
   2681 
   2682 	/* Get the list of the underlying TX rings. */
   2683 	hw_ring_cnt = mac_hwrings_get(vnetp->hio_mch, &vnetp->tx_hwgh, hw_rh,
   2684 	    MAC_RING_TYPE_TX);
   2685 
   2686 	/* We expect the # of hw tx rings to match VNET_NUM_HYBRID_RINGS */
   2687 	if (hw_ring_cnt != VNET_NUM_HYBRID_RINGS) {
   2688 		cmn_err(CE_WARN,
   2689 		    "!vnet%d: vnet_bind_hwrings: bad tx hw_ring_cnt(%d)\n",
   2690 		    vnetp->instance, hw_ring_cnt);
   2691 		goto fail;
   2692 	}
   2693 
   2694 	/*
   2695 	 * Now map the pseudo txrings to the hw txrings. Note we don't need
   2696 	 * to register these with the upper mac, as we have statically exported
   2697 	 * these rings. Note that these rings will continue to be used for LDC
   2698 	 * resources to peer vnets and vswitch (shared ring).
   2699 	 */
   2700 	tx_grp = &vnetp->tx_grp[0];
   2701 	for (i = 0; i < tx_grp->ring_cnt; i++) {
   2702 		tx_ringp = &tx_grp->rings[i];
   2703 		tx_ringp->hw_rh = hw_rh[i];
   2704 		tx_ringp->state |= VNET_TXRING_HYBRID;
   2705 	}
   2706 
   2707 	mac_perim_exit(mph1);
   2708 	return (0);
   2709 
   2710 fail:
   2711 	mac_perim_exit(mph1);
   2712 	vnet_unbind_hwrings(vnetp);
   2713 	return (1);
   2714 }
   2715 
   2716 /* Unbind pseudo rings from hwrings */
   2717 static void
   2718 vnet_unbind_hwrings(vnet_t *vnetp)
   2719 {
   2720 	mac_perim_handle_t	mph1;
   2721 	vnet_pseudo_rx_ring_t	*rx_ringp;
   2722 	vnet_pseudo_rx_group_t	*rx_grp;
   2723 	vnet_pseudo_tx_group_t	*tx_grp;
   2724 	vnet_pseudo_tx_ring_t	*tx_ringp;
   2725 	int			i;
   2726 
   2727 	mac_perim_enter_by_mh(vnetp->hio_mh, &mph1);
   2728 
   2729 	tx_grp = &vnetp->tx_grp[0];
   2730 	for (i = 0; i < VNET_NUM_HYBRID_RINGS; i++) {
   2731 		tx_ringp = &tx_grp->rings[i];
   2732 		if (tx_ringp->state & VNET_TXRING_HYBRID) {
   2733 			tx_ringp->state &= ~VNET_TXRING_HYBRID;
   2734 			tx_ringp->hw_rh = NULL;
   2735 		}
   2736 	}
   2737 
   2738 	rx_grp = &vnetp->rx_grp[0];
   2739 	for (i = 0; i < VNET_NUM_HYBRID_RINGS; i++) {
   2740 		rx_ringp = &rx_grp->rings[i + VNET_HYBRID_RXRING_INDEX];
   2741 		if (rx_ringp->hw_rh != NULL) {
   2742 			/* Stop the hwring */
   2743 			mac_hwring_stop(rx_ringp->hw_rh);
   2744 
   2745 			/* Teardown the hwring */
   2746 			mac_hwring_teardown(rx_ringp->hw_rh);
   2747 			rx_ringp->hw_rh = NULL;
   2748 		}
   2749 	}
   2750 
   2751 	if (vnetp->rx_hwgh != NULL) {
   2752 		vnetp->rx_hwgh = NULL;
   2753 		/*
   2754 		 * First clear the permanent-quiesced flag of the RX srs then
   2755 		 * restart the HW ring and the mac srs on the ring.
   2756 		 */
   2757 		mac_srs_perm_quiesce(vnetp->hio_mch, B_FALSE);
   2758 		mac_rx_client_restart(vnetp->hio_mch);
   2759 	}
   2760 
   2761 	mac_perim_exit(mph1);
   2762 }
   2763 
   2764 /* Bind pseudo ring to a LDC resource */
   2765 static int
   2766 vnet_bind_vgenring(vnet_res_t *vresp)
   2767 {
   2768 	vnet_t			*vnetp;
   2769 	vnet_pseudo_rx_group_t	*rx_grp;
   2770 	vnet_pseudo_rx_ring_t	*rx_ringp;
   2771 	mac_perim_handle_t	mph1;
   2772 	int			rv;
   2773 	int			type;
   2774 
   2775 	vnetp = vresp->vnetp;
   2776 	type = vresp->type;
   2777 	rx_grp = &vnetp->rx_grp[0];
   2778 
   2779 	if (type == VIO_NET_RES_LDC_SERVICE) {
   2780 		/*
   2781 		 * Ring Index 0 is the default ring in the group and is
   2782 		 * reserved for LDC_SERVICE in vnet_ring_grp_init(). This ring
   2783 		 * is allocated statically and is reported to the mac layer
   2784 		 * in vnet_m_capab(). So, all we need to do here, is save a
   2785 		 * reference to the associated vresp.
   2786 		 */
   2787 		rx_ringp = &rx_grp->rings[0];
   2788 		rx_ringp->hw_rh = (mac_ring_handle_t)vresp;
   2789 		vresp->rx_ringp = (void *)rx_ringp;
   2790 		return (0);
   2791 	}
   2792 	ASSERT(type == VIO_NET_RES_LDC_GUEST);
   2793 
   2794 	mac_perim_enter_by_mh(vnetp->mh, &mph1);
   2795 
   2796 	rx_ringp = vnet_alloc_pseudo_rx_ring(vnetp);
   2797 	if (rx_ringp == NULL) {
   2798 		cmn_err(CE_WARN, "!vnet%d: Failed to allocate pseudo rx ring",
   2799 		    vnetp->instance);
   2800 		goto fail;
   2801 	}
   2802 
   2803 	/* Store the LDC resource itself as the ring handle */
   2804 	rx_ringp->hw_rh = (mac_ring_handle_t)vresp;
   2805 
   2806 	/*
   2807 	 * Save a reference to the ring in the resource for lookup during
   2808 	 * unbind. Note this is only done for LDC resources. We don't need this
   2809 	 * in the case of a Hybrid resource (see vnet_bind_hwrings()), as its
   2810 	 * rx rings are mapped to reserved pseudo rx rings (index 1 and 2).
   2811 	 */
   2812 	vresp->rx_ringp = (void *)rx_ringp;
   2813 	rx_ringp->state |= VNET_RXRING_LDC_GUEST;
   2814 
   2815 	/* Register the pseudo ring with upper-mac */
   2816 	rv = mac_group_add_ring(rx_grp->handle, rx_ringp->index);
   2817 	if (rv != 0) {
   2818 		rx_ringp->state &= ~VNET_RXRING_LDC_GUEST;
   2819 		rx_ringp->hw_rh = NULL;
   2820 		vnet_free_pseudo_rx_ring(vnetp, rx_ringp);
   2821 		goto fail;
   2822 	}
   2823 
   2824 	mac_perim_exit(mph1);
   2825 	return (0);
   2826 fail:
   2827 	mac_perim_exit(mph1);
   2828 	return (1);
   2829 }
   2830 
   2831 /* Unbind pseudo ring from a LDC resource */
   2832 static void
   2833 vnet_unbind_vgenring(vnet_res_t *vresp)
   2834 {
   2835 	vnet_t			*vnetp;
   2836 	vnet_pseudo_rx_group_t	*rx_grp;
   2837 	vnet_pseudo_rx_ring_t	*rx_ringp;
   2838 	mac_perim_handle_t	mph1;
   2839 	int			type;
   2840 
   2841 	vnetp = vresp->vnetp;
   2842 	type = vresp->type;
   2843 	rx_grp = &vnetp->rx_grp[0];
   2844 
   2845 	if (vresp->rx_ringp == NULL) {
   2846 		return;
   2847 	}
   2848 
   2849 	if (type == VIO_NET_RES_LDC_SERVICE) {
   2850 		/*
   2851 		 * Ring Index 0 is the default ring in the group and is
   2852 		 * reserved for LDC_SERVICE in vnet_ring_grp_init(). This ring
   2853 		 * is allocated statically and is reported to the mac layer
   2854 		 * in vnet_m_capab(). So, all we need to do here, is remove its
   2855 		 * reference to the associated vresp.
   2856 		 */
   2857 		rx_ringp = &rx_grp->rings[0];
   2858 		rx_ringp->hw_rh = NULL;
   2859 		vresp->rx_ringp = NULL;
   2860 		return;
   2861 	}
   2862 	ASSERT(type == VIO_NET_RES_LDC_GUEST);
   2863 
   2864 	mac_perim_enter_by_mh(vnetp->mh, &mph1);
   2865 
   2866 	rx_ringp = (vnet_pseudo_rx_ring_t *)vresp->rx_ringp;
   2867 	vresp->rx_ringp = NULL;
   2868 
   2869 	if (rx_ringp != NULL && (rx_ringp->state & VNET_RXRING_LDC_GUEST)) {
   2870 		/* Unregister the pseudo ring with upper-mac */
   2871 		mac_group_rem_ring(rx_grp->handle, rx_ringp->handle);
   2872 
   2873 		rx_ringp->hw_rh = NULL;
   2874 		rx_ringp->state &= ~VNET_RXRING_LDC_GUEST;
   2875 
   2876 		/* Free the pseudo rx ring */
   2877 		vnet_free_pseudo_rx_ring(vnetp, rx_ringp);
   2878 	}
   2879 
   2880 	mac_perim_exit(mph1);
   2881 }
   2882 
   2883 static void
   2884 vnet_unbind_rings(vnet_res_t *vresp)
   2885 {
   2886 	switch (vresp->type) {
   2887 
   2888 	case VIO_NET_RES_LDC_SERVICE:
   2889 	case VIO_NET_RES_LDC_GUEST:
   2890 		vnet_unbind_vgenring(vresp);
   2891 		break;
   2892 
   2893 	case VIO_NET_RES_HYBRID:
   2894 		vnet_unbind_hwrings(vresp->vnetp);
   2895 		break;
   2896 
   2897 	default:
   2898 		break;
   2899 
   2900 	}
   2901 }
   2902 
   2903 static int
   2904 vnet_bind_rings(vnet_res_t *vresp)
   2905 {
   2906 	int	rv;
   2907 
   2908 	switch (vresp->type) {
   2909 
   2910 	case VIO_NET_RES_LDC_SERVICE:
   2911 	case VIO_NET_RES_LDC_GUEST:
   2912 		rv = vnet_bind_vgenring(vresp);
   2913 		break;
   2914 
   2915 	case VIO_NET_RES_HYBRID:
   2916 		rv = vnet_bind_hwrings(vresp->vnetp);
   2917 		break;
   2918 
   2919 	default:
   2920 		rv = 1;
   2921 		break;
   2922 
   2923 	}
   2924 
   2925 	return (rv);
   2926 }
   2927 
   2928 /* ARGSUSED */
   2929 int
   2930 vnet_hio_stat(void *arg, uint_t stat, uint64_t *val)
   2931 {
   2932 	vnet_t	*vnetp = (vnet_t *)arg;
   2933 
   2934 	*val = mac_stat_get(vnetp->hio_mh, stat);
   2935 	return (0);
   2936 }
   2937 
   2938 /*
   2939  * The start() and stop() routines for the Hybrid resource below, are just
   2940  * dummy functions. This is provided to avoid resource type specific code in
   2941  * vnet_start_resources() and vnet_stop_resources(). The starting and stopping
   2942  * of the Hybrid resource happens in the context of the mac_client interfaces
   2943  * that are invoked in vnet_hio_mac_init() and vnet_hio_mac_cleanup().
   2944  */
   2945 /* ARGSUSED */
   2946 static int
   2947 vnet_hio_start(void *arg)
   2948 {
   2949 	return (0);
   2950 }
   2951 
   2952 /* ARGSUSED */
   2953 static void
   2954 vnet_hio_stop(void *arg)
   2955 {
   2956 }
   2957 
   2958 mblk_t *
   2959 vnet_hio_tx(void *arg, mblk_t *mp)
   2960 {
   2961 	vnet_pseudo_tx_ring_t	*tx_ringp;
   2962 	mblk_t			*nextp;
   2963 	mblk_t			*ret_mp;
   2964 
   2965 	tx_ringp = (vnet_pseudo_tx_ring_t *)arg;
   2966 	for (;;) {
   2967 		nextp = mp->b_next;
   2968 		mp->b_next = NULL;
   2969 
   2970 		ret_mp = mac_hwring_tx(tx_ringp->hw_rh, mp);
   2971 		if (ret_mp != NULL) {
   2972 			ret_mp->b_next = nextp;
   2973 			mp = ret_mp;
   2974 			break;
   2975 		}
   2976 
   2977 		if ((mp = nextp) == NULL)
   2978 			break;
   2979 	}
   2980 	return (mp);
   2981 }
   2982 
   2983 static void
   2984 vnet_hio_notify_cb(void *arg, mac_notify_type_t type)
   2985 {
   2986 	vnet_t			*vnetp = (vnet_t *)arg;
   2987 	mac_perim_handle_t	mph;
   2988 
   2989 	mac_perim_enter_by_mh(vnetp->hio_mh, &mph);
   2990 	switch (type) {
   2991 	case MAC_NOTE_TX:
   2992 		vnet_tx_update(vnetp->hio_vhp);
   2993 		break;
   2994 
   2995 	default:
   2996 		break;
   2997 	}
   2998 	mac_perim_exit(mph);
   2999 }
   3000 
   3001 #ifdef	VNET_IOC_DEBUG
   3002 
   3003 /*
   3004  * The ioctl entry point is used only for debugging for now. The ioctl commands
   3005  * can be used to force the link state of the channel connected to vsw.
   3006  */
   3007 static void
   3008 vnet_m_ioctl(void *arg, queue_t *q, mblk_t *mp)
   3009 {
   3010 	struct iocblk	*iocp;
   3011 	vnet_t		*vnetp;
   3012 
   3013 	iocp = (struct iocblk *)(uintptr_t)mp->b_rptr;
   3014 	iocp->ioc_error = 0;
   3015 	vnetp = (vnet_t *)arg;
   3016 
   3017 	if (vnetp == NULL) {
   3018 		miocnak(q, mp, 0, EINVAL);
   3019 		return;
   3020 	}
   3021 
   3022 	switch (iocp->ioc_cmd) {
   3023 
   3024 	case VNET_FORCE_LINK_DOWN:
   3025 	case VNET_FORCE_LINK_UP:
   3026 		vnet_force_link_state(vnetp, q, mp);
   3027 		break;
   3028 
   3029 	default:
   3030 		iocp->ioc_error = EINVAL;
   3031 		miocnak(q, mp, 0, iocp->ioc_error);
   3032 		break;
   3033 
   3034 	}
   3035 }
   3036 
   3037 static void
   3038 vnet_force_link_state(vnet_t *vnetp, queue_t *q, mblk_t *mp)
   3039 {
   3040 	mac_register_t	*macp;
   3041 	mac_callbacks_t	*cbp;
   3042 	vnet_res_t	*vresp;
   3043 
   3044 	READ_ENTER(&vnetp->vsw_fp_rw);
   3045 
   3046 	vresp = vnetp->vsw_fp;
   3047 	if (vresp == NULL) {
   3048 		RW_EXIT(&vnetp->vsw_fp_rw);
   3049 		return;
   3050 	}
   3051 
   3052 	macp = &vresp->macreg;
   3053 	cbp = macp->m_callbacks;
   3054 	cbp->mc_ioctl(macp->m_driver, q, mp);
   3055 
   3056 	RW_EXIT(&vnetp->vsw_fp_rw);
   3057 }
   3058 
   3059 #else
   3060 
   3061 static void
   3062 vnet_m_ioctl(void *arg, queue_t *q, mblk_t *mp)
   3063 {
   3064 	vnet_t		*vnetp;
   3065 
   3066 	vnetp = (vnet_t *)arg;
   3067 
   3068 	if (vnetp == NULL) {
   3069 		miocnak(q, mp, 0, EINVAL);
   3070 		return;
   3071 	}
   3072 
   3073 	/* ioctl support only for debugging */
   3074 	miocnak(q, mp, 0, ENOTSUP);
   3075 }
   3076 
   3077 #endif
   3078