Home | History | Annotate | Download | only in io
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 
     22 /*
     23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     24  * Use is subject to license terms.
     25  */
     26 
     27 #include <sys/types.h>
     28 #include <sys/errno.h>
     29 #include <sys/param.h>
     30 #include <sys/stream.h>
     31 #include <sys/kmem.h>
     32 #include <sys/conf.h>
     33 #include <sys/devops.h>
     34 #include <sys/ksynch.h>
     35 #include <sys/stat.h>
     36 #include <sys/modctl.h>
     37 #include <sys/modhash.h>
     38 #include <sys/debug.h>
     39 #include <sys/ethernet.h>
     40 #include <sys/dlpi.h>
     41 #include <net/if.h>
     42 #include <sys/mac_provider.h>
     43 #include <sys/mac_client.h>
     44 #include <sys/mac_client_priv.h>
     45 #include <sys/mac_ether.h>
     46 #include <sys/ddi.h>
     47 #include <sys/sunddi.h>
     48 #include <sys/strsun.h>
     49 #include <sys/note.h>
     50 #include <sys/atomic.h>
     51 #include <sys/vnet.h>
     52 #include <sys/vlan.h>
     53 #include <sys/vnet_mailbox.h>
     54 #include <sys/vnet_common.h>
     55 #include <sys/dds.h>
     56 #include <sys/strsubr.h>
     57 #include <sys/taskq.h>
     58 
     59 /*
     60  * Function prototypes.
     61  */
     62 
     63 /* DDI entrypoints */
     64 static int vnetdevinfo(dev_info_t *, ddi_info_cmd_t, void *, void **);
     65 static int vnetattach(dev_info_t *, ddi_attach_cmd_t);
     66 static int vnetdetach(dev_info_t *, ddi_detach_cmd_t);
     67 
     68 /* MAC entrypoints  */
     69 static int vnet_m_stat(void *, uint_t, uint64_t *);
     70 static int vnet_m_start(void *);
     71 static void vnet_m_stop(void *);
     72 static int vnet_m_promisc(void *, boolean_t);
     73 static int vnet_m_multicst(void *, boolean_t, const uint8_t *);
     74 static int vnet_m_unicst(void *, const uint8_t *);
     75 mblk_t *vnet_m_tx(void *, mblk_t *);
     76 static void vnet_m_ioctl(void *arg, queue_t *q, mblk_t *mp);
     77 #ifdef	VNET_IOC_DEBUG
     78 static void vnet_force_link_state(vnet_t *vnetp, queue_t *q, mblk_t *mp);
     79 #endif
     80 static boolean_t vnet_m_capab(void *arg, mac_capab_t cap, void *cap_data);
     81 static void vnet_get_ring(void *arg, mac_ring_type_t rtype, const int g_index,
     82 	const int r_index, mac_ring_info_t *infop, mac_ring_handle_t r_handle);
     83 static void vnet_get_group(void *arg, mac_ring_type_t type, const int index,
     84 	mac_group_info_t *infop, mac_group_handle_t handle);
     85 static int vnet_rx_ring_start(mac_ring_driver_t rdriver, uint64_t mr_gen_num);
     86 static void vnet_rx_ring_stop(mac_ring_driver_t rdriver);
     87 static int vnet_tx_ring_start(mac_ring_driver_t rdriver, uint64_t mr_gen_num);
     88 static void vnet_tx_ring_stop(mac_ring_driver_t rdriver);
     89 static int vnet_ring_enable_intr(void *arg);
     90 static int vnet_ring_disable_intr(void *arg);
     91 static mblk_t *vnet_rx_poll(void *arg, int bytes_to_pickup);
     92 static int vnet_addmac(void *arg, const uint8_t *mac_addr);
     93 static int vnet_remmac(void *arg, const uint8_t *mac_addr);
     94 
     95 /* vnet internal functions */
     96 static int vnet_unattach(vnet_t *vnetp);
     97 static void vnet_ring_grp_init(vnet_t *vnetp);
     98 static void vnet_ring_grp_uninit(vnet_t *vnetp);
     99 static int vnet_mac_register(vnet_t *);
    100 static int vnet_read_mac_address(vnet_t *vnetp);
    101 static int vnet_bind_vgenring(vnet_res_t *vresp);
    102 static void vnet_unbind_vgenring(vnet_res_t *vresp);
    103 static int vnet_bind_hwrings(vnet_t *vnetp);
    104 static void vnet_unbind_hwrings(vnet_t *vnetp);
    105 static int vnet_bind_rings(vnet_res_t *vresp);
    106 static void vnet_unbind_rings(vnet_res_t *vresp);
    107 static int vnet_hio_stat(void *, uint_t, uint64_t *);
    108 static int vnet_hio_start(void *);
    109 static void vnet_hio_stop(void *);
    110 static void vnet_hio_notify_cb(void *arg, mac_notify_type_t type);
    111 mblk_t *vnet_hio_tx(void *, mblk_t *);
    112 
    113 /* Forwarding database (FDB) routines */
    114 static void vnet_fdb_create(vnet_t *vnetp);
    115 static void vnet_fdb_destroy(vnet_t *vnetp);
    116 static vnet_res_t *vnet_fdbe_find(vnet_t *vnetp, struct ether_addr *addrp);
    117 static void vnet_fdbe_find_cb(mod_hash_key_t key, mod_hash_val_t val);
    118 void vnet_fdbe_add(vnet_t *vnetp, vnet_res_t *vresp);
    119 static void vnet_fdbe_del(vnet_t *vnetp, vnet_res_t *vresp);
    120 
    121 static void vnet_rx_frames_untag(uint16_t pvid, mblk_t **mp);
    122 static void vnet_rx(vio_net_handle_t vrh, mblk_t *mp);
    123 static void vnet_tx_update(vio_net_handle_t vrh);
    124 static void vnet_res_start_task(void *arg);
    125 static void vnet_start_resources(vnet_t *vnetp);
    126 static void vnet_stop_resources(vnet_t *vnetp);
    127 static void vnet_dispatch_res_task(vnet_t *vnetp);
    128 static void vnet_res_start_task(void *arg);
    129 static void vnet_handle_res_err(vio_net_handle_t vrh, vio_net_err_val_t err);
    130 static void vnet_add_resource(vnet_t *vnetp, vnet_res_t *vresp);
    131 static vnet_res_t *vnet_rem_resource(vnet_t *vnetp, vnet_res_t *vresp);
    132 
    133 /* Exported to vnet_gen */
    134 int vnet_mtu_update(vnet_t *vnetp, uint32_t mtu);
    135 void vnet_link_update(vnet_t *vnetp, link_state_t link_state);
    136 void vnet_dds_cleanup_hio(vnet_t *vnetp);
    137 
    138 static kstat_t *vnet_hio_setup_kstats(char *ks_mod, char *ks_name,
    139     vnet_res_t *vresp);
    140 static int vnet_hio_update_kstats(kstat_t *ksp, int rw);
    141 static void vnet_hio_get_stats(vnet_res_t *vresp, vnet_hio_stats_t *statsp);
    142 static void vnet_hio_destroy_kstats(kstat_t *ksp);
    143 
    144 /* Exported to to vnet_dds */
    145 int vnet_send_dds_msg(vnet_t *vnetp, void *dmsg);
    146 int vnet_hio_mac_init(vnet_t *vnetp, char *ifname);
    147 void vnet_hio_mac_cleanup(vnet_t *vnetp);
    148 
    149 /* Externs that are imported from vnet_gen */
    150 extern int vgen_init(void *vnetp, uint64_t regprop, dev_info_t *vnetdip,
    151     const uint8_t *macaddr, void **vgenhdl);
    152 extern int vgen_init_mdeg(void *arg);
    153 extern void vgen_uninit(void *arg);
    154 extern int vgen_dds_tx(void *arg, void *dmsg);
    155 extern void vgen_mod_init(void);
    156 extern int vgen_mod_cleanup(void);
    157 extern void vgen_mod_fini(void);
    158 extern int vgen_enable_intr(void *arg);
    159 extern int vgen_disable_intr(void *arg);
    160 extern mblk_t *vgen_poll(void *arg, int bytes_to_pickup);
    161 
    162 /* Externs that are imported from vnet_dds */
    163 extern void vdds_mod_init(void);
    164 extern void vdds_mod_fini(void);
    165 extern int vdds_init(vnet_t *vnetp);
    166 extern void vdds_cleanup(vnet_t *vnetp);
    167 extern void vdds_process_dds_msg(vnet_t *vnetp, vio_dds_msg_t *dmsg);
    168 extern void vdds_cleanup_hybrid_res(void *arg);
    169 extern void vdds_cleanup_hio(vnet_t *vnetp);
    170 
    171 /* Externs imported from mac_impl */
    172 extern mblk_t *mac_hwring_tx(mac_ring_handle_t, mblk_t *);
    173 
    174 #define	DRV_NAME	"vnet"
    175 #define	VNET_FDBE_REFHOLD(p)						\
    176 {									\
    177 	atomic_inc_32(&(p)->refcnt);					\
    178 	ASSERT((p)->refcnt != 0);					\
    179 }
    180 
    181 #define	VNET_FDBE_REFRELE(p)						\
    182 {									\
    183 	ASSERT((p)->refcnt != 0);					\
    184 	atomic_dec_32(&(p)->refcnt);					\
    185 }
    186 
    187 #ifdef	VNET_IOC_DEBUG
    188 #define	VNET_M_CALLBACK_FLAGS	(MC_IOCTL | MC_GETCAPAB)
    189 #else
    190 #define	VNET_M_CALLBACK_FLAGS	(MC_GETCAPAB)
    191 #endif
    192 
    193 static mac_callbacks_t vnet_m_callbacks = {
    194 	VNET_M_CALLBACK_FLAGS,
    195 	vnet_m_stat,
    196 	vnet_m_start,
    197 	vnet_m_stop,
    198 	vnet_m_promisc,
    199 	vnet_m_multicst,
    200 	NULL,	/* m_unicst entry must be NULL while rx rings are exposed */
    201 	NULL,	/* m_tx entry must be NULL while tx rings are exposed */
    202 	vnet_m_ioctl,
    203 	vnet_m_capab,
    204 	NULL
    205 };
    206 
    207 static mac_callbacks_t vnet_hio_res_callbacks = {
    208 	0,
    209 	vnet_hio_stat,
    210 	vnet_hio_start,
    211 	vnet_hio_stop,
    212 	NULL,
    213 	NULL,
    214 	NULL,
    215 	vnet_hio_tx,
    216 	NULL,
    217 	NULL,
    218 	NULL
    219 };
    220 
    221 /*
    222  * Linked list of "vnet_t" structures - one per instance.
    223  */
    224 static vnet_t	*vnet_headp = NULL;
    225 static krwlock_t vnet_rw;
    226 
    227 /* Tunables */
    228 uint32_t vnet_ntxds = VNET_NTXDS;	/* power of 2 transmit descriptors */
    229 uint32_t vnet_ldcwd_interval = VNET_LDCWD_INTERVAL; /* watchdog freq in msec */
    230 uint32_t vnet_ldcwd_txtimeout = VNET_LDCWD_TXTIMEOUT;  /* tx timeout in msec */
    231 uint32_t vnet_ldc_mtu = VNET_LDC_MTU;		/* ldc mtu */
    232 
    233 /* Configure tx serialization in mac layer for the vnet device */
    234 boolean_t vnet_mac_tx_serialize = B_TRUE;
    235 
    236 /*
    237  * Set this to non-zero to enable additional internal receive buffer pools
    238  * based on the MTU of the device for better performance at the cost of more
    239  * memory consumption. This is turned off by default, to use allocb(9F) for
    240  * receive buffer allocations of sizes > 2K.
    241  */
    242 boolean_t vnet_jumbo_rxpools = B_FALSE;
    243 
    244 /* # of chains in fdb hash table */
    245 uint32_t	vnet_fdb_nchains = VNET_NFDB_HASH;
    246 
    247 /* Internal tunables */
    248 uint32_t	vnet_ethermtu = 1500;	/* mtu of the device */
    249 
    250 /*
    251  * Default vlan id. This is only used internally when the "default-vlan-id"
    252  * property is not present in the MD device node. Therefore, this should not be
    253  * used as a tunable; if this value is changed, the corresponding variable
    254  * should be updated to the same value in vsw and also other vnets connected to
    255  * the same vsw.
    256  */
    257 uint16_t	vnet_default_vlan_id = 1;
    258 
    259 /* delay in usec to wait for all references on a fdb entry to be dropped */
    260 uint32_t vnet_fdbe_refcnt_delay = 10;
    261 
    262 static struct ether_addr etherbroadcastaddr = {
    263 	0xff, 0xff, 0xff, 0xff, 0xff, 0xff
    264 };
    265 
    266 /* mac_open() retry delay in usec */
    267 uint32_t vnet_mac_open_delay = 100;	/* 0.1 ms */
    268 
    269 /* max # of mac_open() retries */
    270 uint32_t vnet_mac_open_retries = 100;
    271 
    272 /*
    273  * Property names
    274  */
    275 static char macaddr_propname[] = "local-mac-address";
    276 
    277 /*
    278  * This is the string displayed by modinfo(1m).
    279  */
    280 static char vnet_ident[] = "vnet driver";
    281 extern struct mod_ops mod_driverops;
    282 static struct cb_ops cb_vnetops = {
    283 	nulldev,		/* cb_open */
    284 	nulldev,		/* cb_close */
    285 	nodev,			/* cb_strategy */
    286 	nodev,			/* cb_print */
    287 	nodev,			/* cb_dump */
    288 	nodev,			/* cb_read */
    289 	nodev,			/* cb_write */
    290 	nodev,			/* cb_ioctl */
    291 	nodev,			/* cb_devmap */
    292 	nodev,			/* cb_mmap */
    293 	nodev,			/* cb_segmap */
    294 	nochpoll,		/* cb_chpoll */
    295 	ddi_prop_op,		/* cb_prop_op */
    296 	NULL,			/* cb_stream */
    297 	(int)(D_MP)		/* cb_flag */
    298 };
    299 
    300 static struct dev_ops vnetops = {
    301 	DEVO_REV,		/* devo_rev */
    302 	0,			/* devo_refcnt */
    303 	NULL,			/* devo_getinfo */
    304 	nulldev,		/* devo_identify */
    305 	nulldev,		/* devo_probe */
    306 	vnetattach,		/* devo_attach */
    307 	vnetdetach,		/* devo_detach */
    308 	nodev,			/* devo_reset */
    309 	&cb_vnetops,		/* devo_cb_ops */
    310 	(struct bus_ops *)NULL,	/* devo_bus_ops */
    311 	NULL,			/* devo_power */
    312 	ddi_quiesce_not_supported,	/* devo_quiesce */
    313 };
    314 
    315 static struct modldrv modldrv = {
    316 	&mod_driverops,		/* Type of module.  This one is a driver */
    317 	vnet_ident,		/* ID string */
    318 	&vnetops		/* driver specific ops */
    319 };
    320 
    321 static struct modlinkage modlinkage = {
    322 	MODREV_1, (void *)&modldrv, NULL
    323 };
    324 
    325 #ifdef DEBUG
    326 
    327 /*
    328  * Print debug messages - set to 0xf to enable all msgs
    329  */
    330 int vnet_dbglevel = 0x8;
    331 
    332 static void
    333 debug_printf(const char *fname, void *arg, const char *fmt, ...)
    334 {
    335 	char    buf[512];
    336 	va_list ap;
    337 	vnet_t *vnetp = (vnet_t *)arg;
    338 	char    *bufp = buf;
    339 
    340 	if (vnetp == NULL) {
    341 		(void) sprintf(bufp, "%s: ", fname);
    342 		bufp += strlen(bufp);
    343 	} else {
    344 		(void) sprintf(bufp, "vnet%d:%s: ", vnetp->instance, fname);
    345 		bufp += strlen(bufp);
    346 	}
    347 	va_start(ap, fmt);
    348 	(void) vsprintf(bufp, fmt, ap);
    349 	va_end(ap);
    350 	cmn_err(CE_CONT, "%s\n", buf);
    351 }
    352 
    353 #endif
    354 
    355 /* _init(9E): initialize the loadable module */
    356 int
    357 _init(void)
    358 {
    359 	int status;
    360 
    361 	DBG1(NULL, "enter\n");
    362 
    363 	mac_init_ops(&vnetops, "vnet");
    364 	status = mod_install(&modlinkage);
    365 	if (status != 0) {
    366 		mac_fini_ops(&vnetops);
    367 	}
    368 	vdds_mod_init();
    369 	vgen_mod_init();
    370 	DBG1(NULL, "exit(%d)\n", status);
    371 	return (status);
    372 }
    373 
    374 /* _fini(9E): prepare the module for unloading. */
    375 int
    376 _fini(void)
    377 {
    378 	int		status;
    379 
    380 	DBG1(NULL, "enter\n");
    381 
    382 	status = vgen_mod_cleanup();
    383 	if (status != 0)
    384 		return (status);
    385 
    386 	status = mod_remove(&modlinkage);
    387 	if (status != 0)
    388 		return (status);
    389 	mac_fini_ops(&vnetops);
    390 	vgen_mod_fini();
    391 	vdds_mod_fini();
    392 
    393 	DBG1(NULL, "exit(%d)\n", status);
    394 	return (status);
    395 }
    396 
    397 /* _info(9E): return information about the loadable module */
    398 int
    399 _info(struct modinfo *modinfop)
    400 {
    401 	return (mod_info(&modlinkage, modinfop));
    402 }
    403 
    404 /*
    405  * attach(9E): attach a device to the system.
    406  * called once for each instance of the device on the system.
    407  */
    408 static int
    409 vnetattach(dev_info_t *dip, ddi_attach_cmd_t cmd)
    410 {
    411 	vnet_t			*vnetp;
    412 	int			status;
    413 	int			instance;
    414 	uint64_t		reg;
    415 	char			qname[TASKQ_NAMELEN];
    416 	vnet_attach_progress_t	attach_progress;
    417 
    418 	attach_progress = AST_init;
    419 
    420 	switch (cmd) {
    421 	case DDI_ATTACH:
    422 		break;
    423 	case DDI_RESUME:
    424 	case DDI_PM_RESUME:
    425 	default:
    426 		goto vnet_attach_fail;
    427 	}
    428 
    429 	instance = ddi_get_instance(dip);
    430 	DBG1(NULL, "instance(%d) enter\n", instance);
    431 
    432 	/* allocate vnet_t and mac_t structures */
    433 	vnetp = kmem_zalloc(sizeof (vnet_t), KM_SLEEP);
    434 	vnetp->dip = dip;
    435 	vnetp->instance = instance;
    436 	rw_init(&vnetp->vrwlock, NULL, RW_DRIVER, NULL);
    437 	rw_init(&vnetp->vsw_fp_rw, NULL, RW_DRIVER, NULL);
    438 	attach_progress |= AST_vnet_alloc;
    439 
    440 	vnet_ring_grp_init(vnetp);
    441 	attach_progress |= AST_ring_init;
    442 
    443 	status = vdds_init(vnetp);
    444 	if (status != 0) {
    445 		goto vnet_attach_fail;
    446 	}
    447 	attach_progress |= AST_vdds_init;
    448 
    449 	/* setup links to vnet_t from both devinfo and mac_t */
    450 	ddi_set_driver_private(dip, (caddr_t)vnetp);
    451 
    452 	/* read the mac address */
    453 	status = vnet_read_mac_address(vnetp);
    454 	if (status != DDI_SUCCESS) {
    455 		goto vnet_attach_fail;
    456 	}
    457 	attach_progress |= AST_read_macaddr;
    458 
    459 	reg = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
    460 	    DDI_PROP_DONTPASS, "reg", -1);
    461 	if (reg == -1) {
    462 		goto vnet_attach_fail;
    463 	}
    464 	vnetp->reg = reg;
    465 
    466 	vnet_fdb_create(vnetp);
    467 	attach_progress |= AST_fdbh_alloc;
    468 
    469 	(void) snprintf(qname, TASKQ_NAMELEN, "vnet_taskq%d", instance);
    470 	if ((vnetp->taskqp = ddi_taskq_create(dip, qname, 1,
    471 	    TASKQ_DEFAULTPRI, 0)) == NULL) {
    472 		cmn_err(CE_WARN, "!vnet%d: Unable to create task queue",
    473 		    instance);
    474 		goto vnet_attach_fail;
    475 	}
    476 	attach_progress |= AST_taskq_create;
    477 
    478 	/* add to the list of vnet devices */
    479 	WRITE_ENTER(&vnet_rw);
    480 	vnetp->nextp = vnet_headp;
    481 	vnet_headp = vnetp;
    482 	RW_EXIT(&vnet_rw);
    483 
    484 	attach_progress |= AST_vnet_list;
    485 
    486 	/*
    487 	 * Initialize the generic vnet plugin which provides communication via
    488 	 * sun4v LDC (logical domain channel) based resources. This involves 2
    489 	 * steps; first, vgen_init() is invoked to read the various properties
    490 	 * of the vnet device from its MD node (including its mtu which is
    491 	 * needed to mac_register()) and obtain a handle to the vgen layer.
    492 	 * After mac_register() is done and we have a mac handle, we then
    493 	 * invoke vgen_init_mdeg() which registers with the the MD event
    494 	 * generator (mdeg) framework to allow LDC resource notifications.
    495 	 * Note: this sequence also allows us to report the correct default #
    496 	 * of pseudo rings (2TX and 3RX) in vnet_m_capab() which gets invoked
    497 	 * in the context of mac_register(); and avoids conflicting with
    498 	 * dynamic pseudo rx rings which get added/removed as a result of mdeg
    499 	 * events in vgen.
    500 	 */
    501 	status = vgen_init(vnetp, reg, vnetp->dip,
    502 	    (uint8_t *)vnetp->curr_macaddr, &vnetp->vgenhdl);
    503 	if (status != DDI_SUCCESS) {
    504 		DERR(vnetp, "vgen_init() failed\n");
    505 		goto vnet_attach_fail;
    506 	}
    507 	attach_progress |= AST_vgen_init;
    508 
    509 	status = vnet_mac_register(vnetp);
    510 	if (status != DDI_SUCCESS) {
    511 		goto vnet_attach_fail;
    512 	}
    513 	vnetp->link_state = LINK_STATE_UNKNOWN;
    514 	attach_progress |= AST_macreg;
    515 
    516 	status = vgen_init_mdeg(vnetp->vgenhdl);
    517 	if (status != DDI_SUCCESS) {
    518 		goto vnet_attach_fail;
    519 	}
    520 	attach_progress |= AST_init_mdeg;
    521 
    522 	vnetp->attach_progress = attach_progress;
    523 
    524 	DBG1(NULL, "instance(%d) exit\n", instance);
    525 	return (DDI_SUCCESS);
    526 
    527 vnet_attach_fail:
    528 	vnetp->attach_progress = attach_progress;
    529 	status = vnet_unattach(vnetp);
    530 	ASSERT(status == 0);
    531 	return (DDI_FAILURE);
    532 }
    533 
    534 /*
    535  * detach(9E): detach a device from the system.
    536  */
    537 static int
    538 vnetdetach(dev_info_t *dip, ddi_detach_cmd_t cmd)
    539 {
    540 	vnet_t		*vnetp;
    541 	int		instance;
    542 
    543 	instance = ddi_get_instance(dip);
    544 	DBG1(NULL, "instance(%d) enter\n", instance);
    545 
    546 	vnetp = ddi_get_driver_private(dip);
    547 	if (vnetp == NULL) {
    548 		goto vnet_detach_fail;
    549 	}
    550 
    551 	switch (cmd) {
    552 	case DDI_DETACH:
    553 		break;
    554 	case DDI_SUSPEND:
    555 	case DDI_PM_SUSPEND:
    556 	default:
    557 		goto vnet_detach_fail;
    558 	}
    559 
    560 	if (vnet_unattach(vnetp) != 0) {
    561 		goto vnet_detach_fail;
    562 	}
    563 
    564 	return (DDI_SUCCESS);
    565 
    566 vnet_detach_fail:
    567 	return (DDI_FAILURE);
    568 }
    569 
    570 /*
    571  * Common routine to handle vnetattach() failure and vnetdetach(). Note that
    572  * the only reason this function could fail is if mac_unregister() fails.
    573  * Otherwise, this function must ensure that all resources are freed and return
    574  * success.
    575  */
    576 static int
    577 vnet_unattach(vnet_t *vnetp)
    578 {
    579 	vnet_attach_progress_t	attach_progress;
    580 
    581 	attach_progress = vnetp->attach_progress;
    582 
    583 	/*
    584 	 * Disable the mac device in the gldv3 subsystem. This can fail, in
    585 	 * particular if there are still any open references to this mac
    586 	 * device; in which case we just return failure without continuing to
    587 	 * detach further.
    588 	 * If it succeeds, we then invoke vgen_uninit() which should unregister
    589 	 * any pseudo rings registered with the mac layer. Note we keep the
    590 	 * AST_macreg flag on, so we can unregister with the mac layer at
    591 	 * the end of this routine.
    592 	 */
    593 	if (attach_progress & AST_macreg) {
    594 		if (mac_disable(vnetp->mh) != 0) {
    595 			return (1);
    596 		}
    597 	}
    598 
    599 	/*
    600 	 * Now that we have disabled the device, we must finish all other steps
    601 	 * and successfully return from this function; otherwise we will end up
    602 	 * leaving the device in a broken/unusable state.
    603 	 *
    604 	 * First, release any hybrid resources assigned to this vnet device.
    605 	 */
    606 	if (attach_progress & AST_vdds_init) {
    607 		vdds_cleanup(vnetp);
    608 		attach_progress &= ~AST_vdds_init;
    609 	}
    610 
    611 	/*
    612 	 * Uninit vgen. This stops further mdeg callbacks to this vnet
    613 	 * device and/or its ports; and detaches any existing ports.
    614 	 */
    615 	if (attach_progress & (AST_vgen_init|AST_init_mdeg)) {
    616 		vgen_uninit(vnetp->vgenhdl);
    617 		attach_progress &= ~AST_vgen_init;
    618 		attach_progress &= ~AST_init_mdeg;
    619 	}
    620 
    621 	/* Destroy the taskq. */
    622 	if (attach_progress & AST_taskq_create) {
    623 		ddi_taskq_destroy(vnetp->taskqp);
    624 		attach_progress &= ~AST_taskq_create;
    625 	}
    626 
    627 	/* Destroy fdb. */
    628 	if (attach_progress & AST_fdbh_alloc) {
    629 		vnet_fdb_destroy(vnetp);
    630 		attach_progress &= ~AST_fdbh_alloc;
    631 	}
    632 
    633 	/* Remove from the device list */
    634 	if (attach_progress & AST_vnet_list) {
    635 		vnet_t		**vnetpp;
    636 		/* unlink from instance(vnet_t) list */
    637 		WRITE_ENTER(&vnet_rw);
    638 		for (vnetpp = &vnet_headp; *vnetpp;
    639 		    vnetpp = &(*vnetpp)->nextp) {
    640 			if (*vnetpp == vnetp) {
    641 				*vnetpp = vnetp->nextp;
    642 				break;
    643 			}
    644 		}
    645 		RW_EXIT(&vnet_rw);
    646 		attach_progress &= ~AST_vnet_list;
    647 	}
    648 
    649 	if (attach_progress & AST_ring_init) {
    650 		vnet_ring_grp_uninit(vnetp);
    651 		attach_progress &= ~AST_ring_init;
    652 	}
    653 
    654 	if (attach_progress & AST_macreg) {
    655 		VERIFY(mac_unregister(vnetp->mh) == 0);
    656 		vnetp->mh = NULL;
    657 		attach_progress &= ~AST_macreg;
    658 	}
    659 
    660 	if (attach_progress & AST_vnet_alloc) {
    661 		rw_destroy(&vnetp->vrwlock);
    662 		rw_destroy(&vnetp->vsw_fp_rw);
    663 		attach_progress &= ~AST_vnet_list;
    664 		KMEM_FREE(vnetp);
    665 	}
    666 
    667 	return (0);
    668 }
    669 
    670 /* enable the device for transmit/receive */
    671 static int
    672 vnet_m_start(void *arg)
    673 {
    674 	vnet_t		*vnetp = arg;
    675 
    676 	DBG1(vnetp, "enter\n");
    677 
    678 	WRITE_ENTER(&vnetp->vrwlock);
    679 	vnetp->flags |= VNET_STARTED;
    680 	vnet_start_resources(vnetp);
    681 	RW_EXIT(&vnetp->vrwlock);
    682 
    683 	DBG1(vnetp, "exit\n");
    684 	return (VNET_SUCCESS);
    685 
    686 }
    687 
    688 /* stop transmit/receive for the device */
    689 static void
    690 vnet_m_stop(void *arg)
    691 {
    692 	vnet_t		*vnetp = arg;
    693 
    694 	DBG1(vnetp, "enter\n");
    695 
    696 	WRITE_ENTER(&vnetp->vrwlock);
    697 	if (vnetp->flags & VNET_STARTED) {
    698 		/*
    699 		 * Set the flags appropriately; this should prevent starting of
    700 		 * any new resources that are added(see vnet_res_start_task()),
    701 		 * while we release the vrwlock in vnet_stop_resources() before
    702 		 * stopping each resource.
    703 		 */
    704 		vnetp->flags &= ~VNET_STARTED;
    705 		vnetp->flags |= VNET_STOPPING;
    706 		vnet_stop_resources(vnetp);
    707 		vnetp->flags &= ~VNET_STOPPING;
    708 	}
    709 	RW_EXIT(&vnetp->vrwlock);
    710 
    711 	DBG1(vnetp, "exit\n");
    712 }
    713 
    714 /* set the unicast mac address of the device */
    715 static int
    716 vnet_m_unicst(void *arg, const uint8_t *macaddr)
    717 {
    718 	_NOTE(ARGUNUSED(macaddr))
    719 
    720 	vnet_t *vnetp = arg;
    721 
    722 	DBG1(vnetp, "enter\n");
    723 	/*
    724 	 * NOTE: setting mac address dynamically is not supported.
    725 	 */
    726 	DBG1(vnetp, "exit\n");
    727 
    728 	return (VNET_FAILURE);
    729 }
    730 
    731 /* enable/disable a multicast address */
    732 static int
    733 vnet_m_multicst(void *arg, boolean_t add, const uint8_t *mca)
    734 {
    735 	_NOTE(ARGUNUSED(add, mca))
    736 
    737 	vnet_t *vnetp = arg;
    738 	vnet_res_t	*vresp;
    739 	mac_register_t	*macp;
    740 	mac_callbacks_t	*cbp;
    741 	int rv = VNET_SUCCESS;
    742 
    743 	DBG1(vnetp, "enter\n");
    744 
    745 	READ_ENTER(&vnetp->vrwlock);
    746 	for (vresp = vnetp->vres_list; vresp != NULL; vresp = vresp->nextp) {
    747 		if (vresp->type == VIO_NET_RES_LDC_SERVICE) {
    748 			macp = &vresp->macreg;
    749 			cbp = macp->m_callbacks;
    750 			rv = cbp->mc_multicst(macp->m_driver, add, mca);
    751 		}
    752 	}
    753 	RW_EXIT(&vnetp->vrwlock);
    754 
    755 	DBG1(vnetp, "exit(%d)\n", rv);
    756 	return (rv);
    757 }
    758 
    759 /* set or clear promiscuous mode on the device */
    760 static int
    761 vnet_m_promisc(void *arg, boolean_t on)
    762 {
    763 	_NOTE(ARGUNUSED(on))
    764 
    765 	vnet_t *vnetp = arg;
    766 	DBG1(vnetp, "enter\n");
    767 	/*
    768 	 * NOTE: setting promiscuous mode is not supported, just return success.
    769 	 */
    770 	DBG1(vnetp, "exit\n");
    771 	return (VNET_SUCCESS);
    772 }
    773 
    774 /*
    775  * Transmit a chain of packets. This function provides switching functionality
    776  * based on the destination mac address to reach other guests (within ldoms) or
    777  * external hosts.
    778  */
    779 mblk_t *
    780 vnet_tx_ring_send(void *arg, mblk_t *mp)
    781 {
    782 	vnet_pseudo_tx_ring_t	*tx_ringp;
    783 	vnet_t			*vnetp;
    784 	vnet_res_t		*vresp;
    785 	mblk_t			*next;
    786 	mblk_t			*resid_mp;
    787 	mac_register_t		*macp;
    788 	struct ether_header	*ehp;
    789 	boolean_t		is_unicast;
    790 	boolean_t		is_pvid;	/* non-default pvid ? */
    791 	boolean_t		hres;		/* Hybrid resource ? */
    792 	void			*tx_arg;
    793 
    794 	tx_ringp = (vnet_pseudo_tx_ring_t *)arg;
    795 	vnetp = (vnet_t *)tx_ringp->vnetp;
    796 	DBG1(vnetp, "enter\n");
    797 	ASSERT(mp != NULL);
    798 
    799 	is_pvid = (vnetp->pvid != vnetp->default_vlan_id) ? B_TRUE : B_FALSE;
    800 
    801 	while (mp != NULL) {
    802 
    803 		next = mp->b_next;
    804 		mp->b_next = NULL;
    805 
    806 		/*
    807 		 * Find fdb entry for the destination
    808 		 * and hold a reference to it.
    809 		 */
    810 		ehp = (struct ether_header *)mp->b_rptr;
    811 		vresp = vnet_fdbe_find(vnetp, &ehp->ether_dhost);
    812 		if (vresp != NULL) {
    813 
    814 			/*
    815 			 * Destination found in FDB.
    816 			 * The destination is a vnet device within ldoms
    817 			 * and directly reachable, invoke the tx function
    818 			 * in the fdb entry.
    819 			 */
    820 			macp = &vresp->macreg;
    821 			resid_mp = macp->m_callbacks->mc_tx(macp->m_driver, mp);
    822 
    823 			/* tx done; now release ref on fdb entry */
    824 			VNET_FDBE_REFRELE(vresp);
    825 
    826 			if (resid_mp != NULL) {
    827 				/* m_tx failed */
    828 				mp->b_next = next;
    829 				break;
    830 			}
    831 		} else {
    832 			is_unicast = !(IS_BROADCAST(ehp) ||
    833 			    (IS_MULTICAST(ehp)));
    834 			/*
    835 			 * Destination is not in FDB.
    836 			 * If the destination is broadcast or multicast,
    837 			 * then forward the packet to vswitch.
    838 			 * If a Hybrid resource avilable, then send the
    839 			 * unicast packet via hybrid resource, otherwise
    840 			 * forward it to vswitch.
    841 			 */
    842 			READ_ENTER(&vnetp->vsw_fp_rw);
    843 
    844 			if ((is_unicast) && (vnetp->hio_fp != NULL)) {
    845 				vresp = vnetp->hio_fp;
    846 				hres = B_TRUE;
    847 			} else {
    848 				vresp = vnetp->vsw_fp;
    849 				hres = B_FALSE;
    850 			}
    851 			if (vresp == NULL) {
    852 				/*
    853 				 * no fdb entry to vsw? drop the packet.
    854 				 */
    855 				RW_EXIT(&vnetp->vsw_fp_rw);
    856 				freemsg(mp);
    857 				mp = next;
    858 				continue;
    859 			}
    860 
    861 			/* ref hold the fdb entry to vsw */
    862 			VNET_FDBE_REFHOLD(vresp);
    863 
    864 			RW_EXIT(&vnetp->vsw_fp_rw);
    865 
    866 			/*
    867 			 * In the case of a hybrid resource we need to insert
    868 			 * the tag for the pvid case here; unlike packets that
    869 			 * are destined to a vnet/vsw in which case the vgen
    870 			 * layer does the tagging before sending it over ldc.
    871 			 */
    872 			if (hres == B_TRUE) {
    873 				/*
    874 				 * Determine if the frame being transmitted
    875 				 * over the hybrid resource is untagged. If so,
    876 				 * insert the tag before transmitting.
    877 				 */
    878 				if (is_pvid == B_TRUE &&
    879 				    ehp->ether_type != htons(ETHERTYPE_VLAN)) {
    880 
    881 					mp = vnet_vlan_insert_tag(mp,
    882 					    vnetp->pvid);
    883 					if (mp == NULL) {
    884 						VNET_FDBE_REFRELE(vresp);
    885 						mp = next;
    886 						continue;
    887 					}
    888 
    889 				}
    890 
    891 				macp = &vresp->macreg;
    892 				tx_arg = tx_ringp;
    893 			} else {
    894 				macp = &vresp->macreg;
    895 				tx_arg = macp->m_driver;
    896 			}
    897 			resid_mp = macp->m_callbacks->mc_tx(tx_arg, mp);
    898 
    899 			/* tx done; now release ref on fdb entry */
    900 			VNET_FDBE_REFRELE(vresp);
    901 
    902 			if (resid_mp != NULL) {
    903 				/* m_tx failed */
    904 				mp->b_next = next;
    905 				break;
    906 			}
    907 		}
    908 
    909 		mp = next;
    910 	}
    911 
    912 	DBG1(vnetp, "exit\n");
    913 	return (mp);
    914 }
    915 
    916 /* get statistics from the device */
    917 int
    918 vnet_m_stat(void *arg, uint_t stat, uint64_t *val)
    919 {
    920 	vnet_t *vnetp = arg;
    921 	vnet_res_t	*vresp;
    922 	mac_register_t	*macp;
    923 	mac_callbacks_t	*cbp;
    924 	uint64_t val_total = 0;
    925 
    926 	DBG1(vnetp, "enter\n");
    927 
    928 	/*
    929 	 * get the specified statistic from each transport and return the
    930 	 * aggregate val.  This obviously only works for counters.
    931 	 */
    932 	if ((IS_MAC_STAT(stat) && !MAC_STAT_ISACOUNTER(stat)) ||
    933 	    (IS_MACTYPE_STAT(stat) && !ETHER_STAT_ISACOUNTER(stat))) {
    934 		return (ENOTSUP);
    935 	}
    936 
    937 	READ_ENTER(&vnetp->vrwlock);
    938 	for (vresp = vnetp->vres_list; vresp != NULL; vresp = vresp->nextp) {
    939 		macp = &vresp->macreg;
    940 		cbp = macp->m_callbacks;
    941 		if (cbp->mc_getstat(macp->m_driver, stat, val) == 0)
    942 			val_total += *val;
    943 	}
    944 	RW_EXIT(&vnetp->vrwlock);
    945 
    946 	*val = val_total;
    947 
    948 	DBG1(vnetp, "exit\n");
    949 	return (0);
    950 }
    951 
    952 static void
    953 vnet_ring_grp_init(vnet_t *vnetp)
    954 {
    955 	vnet_pseudo_rx_group_t	*rx_grp;
    956 	vnet_pseudo_rx_ring_t	*rx_ringp;
    957 	vnet_pseudo_tx_group_t	*tx_grp;
    958 	vnet_pseudo_tx_ring_t	*tx_ringp;
    959 	int			i;
    960 
    961 	tx_grp = &vnetp->tx_grp[0];
    962 	tx_ringp = kmem_zalloc(sizeof (vnet_pseudo_tx_ring_t) *
    963 	    VNET_NUM_PSEUDO_TXRINGS, KM_SLEEP);
    964 	for (i = 0; i < VNET_NUM_PSEUDO_TXRINGS; i++) {
    965 		tx_ringp[i].state |= VNET_TXRING_SHARED;
    966 	}
    967 	tx_grp->rings = tx_ringp;
    968 	tx_grp->ring_cnt = VNET_NUM_PSEUDO_TXRINGS;
    969 
    970 	rx_grp = &vnetp->rx_grp[0];
    971 	rx_grp->max_ring_cnt = MAX_RINGS_PER_GROUP;
    972 	rw_init(&rx_grp->lock, NULL, RW_DRIVER, NULL);
    973 	rx_ringp = kmem_zalloc(sizeof (vnet_pseudo_rx_ring_t) *
    974 	    rx_grp->max_ring_cnt, KM_SLEEP);
    975 
    976 	/*
    977 	 * Setup the first 3 Pseudo RX Rings that are reserved;
    978 	 * 1 for LDC resource to vswitch + 2 for RX rings of Hybrid resource.
    979 	 */
    980 	rx_ringp[0].state |= VNET_RXRING_INUSE|VNET_RXRING_LDC_SERVICE;
    981 	rx_ringp[0].index = 0;
    982 	rx_ringp[1].state |= VNET_RXRING_INUSE|VNET_RXRING_HYBRID;
    983 	rx_ringp[1].index = 1;
    984 	rx_ringp[2].state |= VNET_RXRING_INUSE|VNET_RXRING_HYBRID;
    985 	rx_ringp[2].index = 2;
    986 
    987 	rx_grp->ring_cnt = VNET_NUM_PSEUDO_RXRINGS_DEFAULT;
    988 	rx_grp->rings = rx_ringp;
    989 
    990 	for (i = VNET_NUM_PSEUDO_RXRINGS_DEFAULT;
    991 	    i < rx_grp->max_ring_cnt; i++) {
    992 		rx_ringp = &rx_grp->rings[i];
    993 		rx_ringp->state = VNET_RXRING_FREE;
    994 		rx_ringp->index = i;
    995 	}
    996 }
    997 
    998 static void
    999 vnet_ring_grp_uninit(vnet_t *vnetp)
   1000 {
   1001 	vnet_pseudo_rx_group_t	*rx_grp;
   1002 	vnet_pseudo_tx_group_t	*tx_grp;
   1003 
   1004 	tx_grp = &vnetp->tx_grp[0];
   1005 	if (tx_grp->rings != NULL) {
   1006 		ASSERT(tx_grp->ring_cnt == VNET_NUM_PSEUDO_TXRINGS);
   1007 		kmem_free(tx_grp->rings, sizeof (vnet_pseudo_tx_ring_t) *
   1008 		    tx_grp->ring_cnt);
   1009 		tx_grp->rings = NULL;
   1010 	}
   1011 
   1012 	rx_grp = &vnetp->rx_grp[0];
   1013 	if (rx_grp->rings != NULL) {
   1014 		ASSERT(rx_grp->max_ring_cnt == MAX_RINGS_PER_GROUP);
   1015 		ASSERT(rx_grp->ring_cnt == VNET_NUM_PSEUDO_RXRINGS_DEFAULT);
   1016 		kmem_free(rx_grp->rings, sizeof (vnet_pseudo_rx_ring_t) *
   1017 		    rx_grp->max_ring_cnt);
   1018 		rx_grp->rings = NULL;
   1019 	}
   1020 }
   1021 
   1022 static vnet_pseudo_rx_ring_t *
   1023 vnet_alloc_pseudo_rx_ring(vnet_t *vnetp)
   1024 {
   1025 	vnet_pseudo_rx_group_t  *rx_grp;
   1026 	vnet_pseudo_rx_ring_t	*rx_ringp;
   1027 	int			index;
   1028 
   1029 	rx_grp = &vnetp->rx_grp[0];
   1030 	WRITE_ENTER(&rx_grp->lock);
   1031 
   1032 	if (rx_grp->ring_cnt == rx_grp->max_ring_cnt) {
   1033 		/* no rings available */
   1034 		RW_EXIT(&rx_grp->lock);
   1035 		return (NULL);
   1036 	}
   1037 
   1038 	for (index = VNET_NUM_PSEUDO_RXRINGS_DEFAULT;
   1039 	    index < rx_grp->max_ring_cnt; index++) {
   1040 		rx_ringp = &rx_grp->rings[index];
   1041 		if (rx_ringp->state == VNET_RXRING_FREE) {
   1042 			rx_ringp->state |= VNET_RXRING_INUSE;
   1043 			rx_grp->ring_cnt++;
   1044 			break;
   1045 		}
   1046 	}
   1047 
   1048 	RW_EXIT(&rx_grp->lock);
   1049 	return (rx_ringp);
   1050 }
   1051 
   1052 static void
   1053 vnet_free_pseudo_rx_ring(vnet_t *vnetp, vnet_pseudo_rx_ring_t *ringp)
   1054 {
   1055 	vnet_pseudo_rx_group_t  *rx_grp;
   1056 
   1057 	ASSERT(ringp->index >= VNET_NUM_PSEUDO_RXRINGS_DEFAULT);
   1058 	rx_grp = &vnetp->rx_grp[0];
   1059 	WRITE_ENTER(&rx_grp->lock);
   1060 
   1061 	if (ringp->state != VNET_RXRING_FREE) {
   1062 		ringp->state = VNET_RXRING_FREE;
   1063 		ringp->handle = NULL;
   1064 		rx_grp->ring_cnt--;
   1065 	}
   1066 
   1067 	RW_EXIT(&rx_grp->lock);
   1068 }
   1069 
   1070 /* wrapper function for mac_register() */
   1071 static int
   1072 vnet_mac_register(vnet_t *vnetp)
   1073 {
   1074 	mac_register_t	*macp;
   1075 	int		err;
   1076 
   1077 	if ((macp = mac_alloc(MAC_VERSION)) == NULL)
   1078 		return (DDI_FAILURE);
   1079 	macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
   1080 	macp->m_driver = vnetp;
   1081 	macp->m_dip = vnetp->dip;
   1082 	macp->m_src_addr = vnetp->curr_macaddr;
   1083 	macp->m_callbacks = &vnet_m_callbacks;
   1084 	macp->m_min_sdu = 0;
   1085 	macp->m_max_sdu = vnetp->mtu;
   1086 	macp->m_margin = VLAN_TAGSZ;
   1087 
   1088 	/*
   1089 	 * MAC_VIRT_SERIALIZE flag is needed while hybridIO is enabled to
   1090 	 * workaround tx lock contention issues in nxge.
   1091 	 */
   1092 	macp->m_v12n = MAC_VIRT_LEVEL1;
   1093 	if (vnet_mac_tx_serialize == B_TRUE) {
   1094 		macp->m_v12n |= MAC_VIRT_SERIALIZE;
   1095 	}
   1096 
   1097 	/*
   1098 	 * Finally, we're ready to register ourselves with the MAC layer
   1099 	 * interface; if this succeeds, we're all ready to start()
   1100 	 */
   1101 	err = mac_register(macp, &vnetp->mh);
   1102 	mac_free(macp);
   1103 	return (err == 0 ? DDI_SUCCESS : DDI_FAILURE);
   1104 }
   1105 
   1106 /* read the mac address of the device */
   1107 static int
   1108 vnet_read_mac_address(vnet_t *vnetp)
   1109 {
   1110 	uchar_t 	*macaddr;
   1111 	uint32_t 	size;
   1112 	int 		rv;
   1113 
   1114 	rv = ddi_prop_lookup_byte_array(DDI_DEV_T_ANY, vnetp->dip,
   1115 	    DDI_PROP_DONTPASS, macaddr_propname, &macaddr, &size);
   1116 	if ((rv != DDI_PROP_SUCCESS) || (size != ETHERADDRL)) {
   1117 		DWARN(vnetp, "prop_lookup failed(%s) err(%d)\n",
   1118 		    macaddr_propname, rv);
   1119 		return (DDI_FAILURE);
   1120 	}
   1121 	bcopy(macaddr, (caddr_t)vnetp->vendor_addr, ETHERADDRL);
   1122 	bcopy(macaddr, (caddr_t)vnetp->curr_macaddr, ETHERADDRL);
   1123 	ddi_prop_free(macaddr);
   1124 
   1125 	return (DDI_SUCCESS);
   1126 }
   1127 
   1128 static void
   1129 vnet_fdb_create(vnet_t *vnetp)
   1130 {
   1131 	char		hashname[MAXNAMELEN];
   1132 
   1133 	(void) snprintf(hashname, MAXNAMELEN, "vnet%d-fdbhash",
   1134 	    vnetp->instance);
   1135 	vnetp->fdb_nchains = vnet_fdb_nchains;
   1136 	vnetp->fdb_hashp = mod_hash_create_ptrhash(hashname, vnetp->fdb_nchains,
   1137 	    mod_hash_null_valdtor, sizeof (void *));
   1138 }
   1139 
   1140 static void
   1141 vnet_fdb_destroy(vnet_t *vnetp)
   1142 {
   1143 	/* destroy fdb-hash-table */
   1144 	if (vnetp->fdb_hashp != NULL) {
   1145 		mod_hash_destroy_hash(vnetp->fdb_hashp);
   1146 		vnetp->fdb_hashp = NULL;
   1147 		vnetp->fdb_nchains = 0;
   1148 	}
   1149 }
   1150 
   1151 /*
   1152  * Add an entry into the fdb.
   1153  */
   1154 void
   1155 vnet_fdbe_add(vnet_t *vnetp, vnet_res_t *vresp)
   1156 {
   1157 	uint64_t	addr = 0;
   1158 	int		rv;
   1159 
   1160 	KEY_HASH(addr, vresp->rem_macaddr);
   1161 
   1162 	/*
   1163 	 * If the entry being added corresponds to LDC_SERVICE resource,
   1164 	 * that is, vswitch connection, it is added to the hash and also
   1165 	 * the entry is cached, an additional reference count reflects
   1166 	 * this. The HYBRID resource is not added to the hash, but only
   1167 	 * cached, as it is only used for sending out packets for unknown
   1168 	 * unicast destinations.
   1169 	 */
   1170 	(vresp->type == VIO_NET_RES_LDC_SERVICE) ?
   1171 	    (vresp->refcnt = 1) : (vresp->refcnt = 0);
   1172 
   1173 	/*
   1174 	 * Note: duplicate keys will be rejected by mod_hash.
   1175 	 */
   1176 	if (vresp->type != VIO_NET_RES_HYBRID) {
   1177 		rv = mod_hash_insert(vnetp->fdb_hashp, (mod_hash_key_t)addr,
   1178 		    (mod_hash_val_t)vresp);
   1179 		if (rv != 0) {
   1180 			DWARN(vnetp, "Duplicate macaddr key(%lx)\n", addr);
   1181 			return;
   1182 		}
   1183 	}
   1184 
   1185 	if (vresp->type == VIO_NET_RES_LDC_SERVICE) {
   1186 		/* Cache the fdb entry to vsw-port */
   1187 		WRITE_ENTER(&vnetp->vsw_fp_rw);
   1188 		if (vnetp->vsw_fp == NULL)
   1189 			vnetp->vsw_fp = vresp;
   1190 		RW_EXIT(&vnetp->vsw_fp_rw);
   1191 	} else if (vresp->type == VIO_NET_RES_HYBRID) {
   1192 		/* Cache the fdb entry to hybrid resource */
   1193 		WRITE_ENTER(&vnetp->vsw_fp_rw);
   1194 		if (vnetp->hio_fp == NULL)
   1195 			vnetp->hio_fp = vresp;
   1196 		RW_EXIT(&vnetp->vsw_fp_rw);
   1197 	}
   1198 }
   1199 
   1200 /*
   1201  * Remove an entry from fdb.
   1202  */
   1203 static void
   1204 vnet_fdbe_del(vnet_t *vnetp, vnet_res_t *vresp)
   1205 {
   1206 	uint64_t	addr = 0;
   1207 	int		rv;
   1208 	uint32_t	refcnt;
   1209 	vnet_res_t	*tmp;
   1210 
   1211 	KEY_HASH(addr, vresp->rem_macaddr);
   1212 
   1213 	/*
   1214 	 * Remove the entry from fdb hash table.
   1215 	 * This prevents further references to this fdb entry.
   1216 	 */
   1217 	if (vresp->type != VIO_NET_RES_HYBRID) {
   1218 		rv = mod_hash_remove(vnetp->fdb_hashp, (mod_hash_key_t)addr,
   1219 		    (mod_hash_val_t *)&tmp);
   1220 		if (rv != 0) {
   1221 			/*
   1222 			 * As the resources are added to the hash only
   1223 			 * after they are started, this can occur if
   1224 			 * a resource unregisters before it is ever started.
   1225 			 */
   1226 			return;
   1227 		}
   1228 	}
   1229 
   1230 	if (vresp->type == VIO_NET_RES_LDC_SERVICE) {
   1231 		WRITE_ENTER(&vnetp->vsw_fp_rw);
   1232 
   1233 		ASSERT(tmp == vnetp->vsw_fp);
   1234 		vnetp->vsw_fp = NULL;
   1235 
   1236 		RW_EXIT(&vnetp->vsw_fp_rw);
   1237 	} else if (vresp->type == VIO_NET_RES_HYBRID) {
   1238 		WRITE_ENTER(&vnetp->vsw_fp_rw);
   1239 
   1240 		vnetp->hio_fp = NULL;
   1241 
   1242 		RW_EXIT(&vnetp->vsw_fp_rw);
   1243 	}
   1244 
   1245 	/*
   1246 	 * If there are threads already ref holding before the entry was
   1247 	 * removed from hash table, then wait for ref count to drop to zero.
   1248 	 */
   1249 	(vresp->type == VIO_NET_RES_LDC_SERVICE) ?
   1250 	    (refcnt = 1) : (refcnt = 0);
   1251 	while (vresp->refcnt > refcnt) {
   1252 		delay(drv_usectohz(vnet_fdbe_refcnt_delay));
   1253 	}
   1254 }
   1255 
   1256 /*
   1257  * Search fdb for a given mac address. If an entry is found, hold
   1258  * a reference to it and return the entry; else returns NULL.
   1259  */
   1260 static vnet_res_t *
   1261 vnet_fdbe_find(vnet_t *vnetp, struct ether_addr *addrp)
   1262 {
   1263 	uint64_t	key = 0;
   1264 	vnet_res_t	*vresp;
   1265 	int		rv;
   1266 
   1267 	KEY_HASH(key, addrp->ether_addr_octet);
   1268 
   1269 	rv = mod_hash_find_cb(vnetp->fdb_hashp, (mod_hash_key_t)key,
   1270 	    (mod_hash_val_t *)&vresp, vnet_fdbe_find_cb);
   1271 
   1272 	if (rv != 0)
   1273 		return (NULL);
   1274 
   1275 	return (vresp);
   1276 }
   1277 
   1278 /*
   1279  * Callback function provided to mod_hash_find_cb(). After finding the fdb
   1280  * entry corresponding to the key (macaddr), this callback will be invoked by
   1281  * mod_hash_find_cb() to atomically increment the reference count on the fdb
   1282  * entry before returning the found entry.
   1283  */
   1284 static void
   1285 vnet_fdbe_find_cb(mod_hash_key_t key, mod_hash_val_t val)
   1286 {
   1287 	_NOTE(ARGUNUSED(key))
   1288 	VNET_FDBE_REFHOLD((vnet_res_t *)val);
   1289 }
   1290 
   1291 /*
   1292  * Frames received that are tagged with the pvid of the vnet device must be
   1293  * untagged before sending up the stack. This function walks the chain of rx
   1294  * frames, untags any such frames and returns the updated chain.
   1295  *
   1296  * Arguments:
   1297  *    pvid:  pvid of the vnet device for which packets are being received
   1298  *    mp:    head of pkt chain to be validated and untagged
   1299  *
   1300  * Returns:
   1301  *    mp:    head of updated chain of packets
   1302  */
   1303 static void
   1304 vnet_rx_frames_untag(uint16_t pvid, mblk_t **mp)
   1305 {
   1306 	struct ether_vlan_header	*evhp;
   1307 	mblk_t				*bp;
   1308 	mblk_t				*bpt;
   1309 	mblk_t				*bph;
   1310 	mblk_t				*bpn;
   1311 
   1312 	bpn = bph = bpt = NULL;
   1313 
   1314 	for (bp = *mp; bp != NULL; bp = bpn) {
   1315 
   1316 		bpn = bp->b_next;
   1317 		bp->b_next = bp->b_prev = NULL;
   1318 
   1319 		evhp = (struct ether_vlan_header *)bp->b_rptr;
   1320 
   1321 		if (ntohs(evhp->ether_tpid) == ETHERTYPE_VLAN &&
   1322 		    VLAN_ID(ntohs(evhp->ether_tci)) == pvid) {
   1323 
   1324 			bp = vnet_vlan_remove_tag(bp);
   1325 			if (bp == NULL) {
   1326 				continue;
   1327 			}
   1328 
   1329 		}
   1330 
   1331 		/* build a chain of processed packets */
   1332 		if (bph == NULL) {
   1333 			bph = bpt = bp;
   1334 		} else {
   1335 			bpt->b_next = bp;
   1336 			bpt = bp;
   1337 		}
   1338 
   1339 	}
   1340 
   1341 	*mp = bph;
   1342 }
   1343 
   1344 static void
   1345 vnet_rx(vio_net_handle_t vrh, mblk_t *mp)
   1346 {
   1347 	vnet_res_t		*vresp = (vnet_res_t *)vrh;
   1348 	vnet_t			*vnetp = vresp->vnetp;
   1349 	vnet_pseudo_rx_ring_t	*ringp;
   1350 
   1351 	if ((vnetp == NULL) || (vnetp->mh == 0)) {
   1352 		freemsgchain(mp);
   1353 		return;
   1354 	}
   1355 
   1356 	ringp = vresp->rx_ringp;
   1357 	mac_rx_ring(vnetp->mh, ringp->handle, mp, ringp->gen_num);
   1358 }
   1359 
   1360 void
   1361 vnet_tx_update(vio_net_handle_t vrh)
   1362 {
   1363 	vnet_res_t		*vresp = (vnet_res_t *)vrh;
   1364 	vnet_t			*vnetp = vresp->vnetp;
   1365 	vnet_pseudo_tx_ring_t	*tx_ringp;
   1366 	vnet_pseudo_tx_group_t	*tx_grp;
   1367 	int			i;
   1368 
   1369 	if (vnetp == NULL || vnetp->mh == NULL) {
   1370 		return;
   1371 	}
   1372 
   1373 	/*
   1374 	 * Currently, the tx hwring API (used to access rings that belong to
   1375 	 * a Hybrid IO resource) does not provide us a per ring flow ctrl
   1376 	 * update; also the pseudo rings are shared by the ports/ldcs in the
   1377 	 * vgen layer. Thus we can't figure out which pseudo ring is being
   1378 	 * re-enabled for transmits. To work around this, when we get a tx
   1379 	 * restart notification from below, we simply propagate that to all
   1380 	 * the tx pseudo rings registered with the mac layer above.
   1381 	 *
   1382 	 * There are a couple of side effects with this approach, but they are
   1383 	 * not harmful, as outlined below:
   1384 	 *
   1385 	 * A) We might send an invalid ring_update() for a ring that is not
   1386 	 * really flow controlled. This will not have any effect in the mac
   1387 	 * layer and packets will continue to be transmitted on that ring.
   1388 	 *
   1389 	 * B) We might end up clearing the flow control in the mac layer for
   1390 	 * a ring that is still flow controlled in the underlying resource.
   1391 	 * This will result in the mac layer restarting	transmit, only to be
   1392 	 * flow controlled again on that ring.
   1393 	 */
   1394 	tx_grp = &vnetp->tx_grp[0];
   1395 	for (i = 0; i < tx_grp->ring_cnt; i++) {
   1396 		tx_ringp = &tx_grp->rings[i];
   1397 		mac_tx_ring_update(vnetp->mh, tx_ringp->handle);
   1398 	}
   1399 }
   1400 
   1401 /*
   1402  * Update the new mtu of vnet into the mac layer. First check if the device has
   1403  * been plumbed and if so fail the mtu update. Returns 0 on success.
   1404  */
   1405 int
   1406 vnet_mtu_update(vnet_t *vnetp, uint32_t mtu)
   1407 {
   1408 	int	rv;
   1409 
   1410 	if (vnetp == NULL || vnetp->mh == NULL) {
   1411 		return (EINVAL);
   1412 	}
   1413 
   1414 	WRITE_ENTER(&vnetp->vrwlock);
   1415 
   1416 	if (vnetp->flags & VNET_STARTED) {
   1417 		RW_EXIT(&vnetp->vrwlock);
   1418 		cmn_err(CE_NOTE, "!vnet%d: Unable to process mtu "
   1419 		    "update as the device is plumbed\n",
   1420 		    vnetp->instance);
   1421 		return (EBUSY);
   1422 	}
   1423 
   1424 	/* update mtu in the mac layer */
   1425 	rv = mac_maxsdu_update(vnetp->mh, mtu);
   1426 	if (rv != 0) {
   1427 		RW_EXIT(&vnetp->vrwlock);
   1428 		cmn_err(CE_NOTE,
   1429 		    "!vnet%d: Unable to update mtu with mac layer\n",
   1430 		    vnetp->instance);
   1431 		return (EIO);
   1432 	}
   1433 
   1434 	vnetp->mtu = mtu;
   1435 
   1436 	RW_EXIT(&vnetp->vrwlock);
   1437 
   1438 	return (0);
   1439 }
   1440 
   1441 /*
   1442  * Update the link state of vnet to the mac layer.
   1443  */
   1444 void
   1445 vnet_link_update(vnet_t *vnetp, link_state_t link_state)
   1446 {
   1447 	if (vnetp == NULL || vnetp->mh == NULL) {
   1448 		return;
   1449 	}
   1450 
   1451 	WRITE_ENTER(&vnetp->vrwlock);
   1452 	if (vnetp->link_state == link_state) {
   1453 		RW_EXIT(&vnetp->vrwlock);
   1454 		return;
   1455 	}
   1456 	vnetp->link_state = link_state;
   1457 	RW_EXIT(&vnetp->vrwlock);
   1458 
   1459 	mac_link_update(vnetp->mh, link_state);
   1460 }
   1461 
   1462 /*
   1463  * vio_net_resource_reg -- An interface called to register a resource
   1464  *	with vnet.
   1465  *	macp -- a GLDv3 mac_register that has all the details of
   1466  *		a resource and its callbacks etc.
   1467  *	type -- resource type.
   1468  *	local_macaddr -- resource's MAC address. This is used to
   1469  *			 associate a resource with a corresponding vnet.
   1470  *	remote_macaddr -- remote side MAC address. This is ignored for
   1471  *			  the Hybrid resources.
   1472  *	vhp -- A handle returned to the caller.
   1473  *	vcb -- A set of callbacks provided to the callers.
   1474  */
   1475 int vio_net_resource_reg(mac_register_t *macp, vio_net_res_type_t type,
   1476     ether_addr_t local_macaddr, ether_addr_t rem_macaddr, vio_net_handle_t *vhp,
   1477     vio_net_callbacks_t *vcb)
   1478 {
   1479 	vnet_t		*vnetp;
   1480 	vnet_res_t	*vresp;
   1481 
   1482 	vresp = kmem_zalloc(sizeof (vnet_res_t), KM_SLEEP);
   1483 	ether_copy(local_macaddr, vresp->local_macaddr);
   1484 	ether_copy(rem_macaddr, vresp->rem_macaddr);
   1485 	vresp->type = type;
   1486 	bcopy(macp, &vresp->macreg, sizeof (mac_register_t));
   1487 
   1488 	DBG1(NULL, "Resource Registerig type=0%X\n", type);
   1489 
   1490 	READ_ENTER(&vnet_rw);
   1491 	vnetp = vnet_headp;
   1492 	while (vnetp != NULL) {
   1493 		if (VNET_MATCH_RES(vresp, vnetp)) {
   1494 			vresp->vnetp = vnetp;
   1495 
   1496 			/* Setup kstats for hio resource */
   1497 			if (vresp->type == VIO_NET_RES_HYBRID) {
   1498 				vresp->ksp = vnet_hio_setup_kstats(DRV_NAME,
   1499 				    "hio", vresp);
   1500 				if (vresp->ksp == NULL) {
   1501 					cmn_err(CE_NOTE, "!vnet%d: Cannot "
   1502 					    "create kstats for hio resource",
   1503 					    vnetp->instance);
   1504 				}
   1505 			}
   1506 			vnet_add_resource(vnetp, vresp);
   1507 			break;
   1508 		}
   1509 		vnetp = vnetp->nextp;
   1510 	}
   1511 	RW_EXIT(&vnet_rw);
   1512 	if (vresp->vnetp == NULL) {
   1513 		DWARN(NULL, "No vnet instance");
   1514 		kmem_free(vresp, sizeof (vnet_res_t));
   1515 		return (ENXIO);
   1516 	}
   1517 
   1518 	*vhp = vresp;
   1519 	vcb->vio_net_rx_cb = vnet_rx;
   1520 	vcb->vio_net_tx_update = vnet_tx_update;
   1521 	vcb->vio_net_report_err = vnet_handle_res_err;
   1522 
   1523 	/* Bind the resource to pseudo ring(s) */
   1524 	if (vnet_bind_rings(vresp) != 0) {
   1525 		(void) vnet_rem_resource(vnetp, vresp);
   1526 		vnet_hio_destroy_kstats(vresp->ksp);
   1527 		KMEM_FREE(vresp);
   1528 		return (1);
   1529 	}
   1530 
   1531 	/* Dispatch a task to start resources */
   1532 	vnet_dispatch_res_task(vnetp);
   1533 	return (0);
   1534 }
   1535 
   1536 /*
   1537  * vio_net_resource_unreg -- An interface to unregister a resource.
   1538  */
   1539 void
   1540 vio_net_resource_unreg(vio_net_handle_t vhp)
   1541 {
   1542 	vnet_res_t	*vresp = (vnet_res_t *)vhp;
   1543 	vnet_t		*vnetp = vresp->vnetp;
   1544 
   1545 	DBG1(NULL, "Resource Registerig hdl=0x%p", vhp);
   1546 
   1547 	ASSERT(vnetp != NULL);
   1548 	/*
   1549 	 * Remove the resource from fdb; this ensures
   1550 	 * there are no references to the resource.
   1551 	 */
   1552 	vnet_fdbe_del(vnetp, vresp);
   1553 
   1554 	vnet_unbind_rings(vresp);
   1555 
   1556 	/* Now remove the resource from the list */
   1557 	(void) vnet_rem_resource(vnetp, vresp);
   1558 
   1559 	vnet_hio_destroy_kstats(vresp->ksp);
   1560 	KMEM_FREE(vresp);
   1561 }
   1562 
   1563 static void
   1564 vnet_add_resource(vnet_t *vnetp, vnet_res_t *vresp)
   1565 {
   1566 	WRITE_ENTER(&vnetp->vrwlock);
   1567 	vresp->nextp = vnetp->vres_list;
   1568 	vnetp->vres_list = vresp;
   1569 	RW_EXIT(&vnetp->vrwlock);
   1570 }
   1571 
   1572 static vnet_res_t *
   1573 vnet_rem_resource(vnet_t *vnetp, vnet_res_t *vresp)
   1574 {
   1575 	vnet_res_t	*vrp;
   1576 
   1577 	WRITE_ENTER(&vnetp->vrwlock);
   1578 	if (vresp == vnetp->vres_list) {
   1579 		vnetp->vres_list = vresp->nextp;
   1580 	} else {
   1581 		vrp = vnetp->vres_list;
   1582 		while (vrp->nextp != NULL) {
   1583 			if (vrp->nextp == vresp) {
   1584 				vrp->nextp = vresp->nextp;
   1585 				break;
   1586 			}
   1587 			vrp = vrp->nextp;
   1588 		}
   1589 	}
   1590 	vresp->vnetp = NULL;
   1591 	vresp->nextp = NULL;
   1592 
   1593 	RW_EXIT(&vnetp->vrwlock);
   1594 
   1595 	return (vresp);
   1596 }
   1597 
   1598 /*
   1599  * vnet_dds_rx -- an interface called by vgen to DDS messages.
   1600  */
   1601 void
   1602 vnet_dds_rx(void *arg, void *dmsg)
   1603 {
   1604 	vnet_t *vnetp = arg;
   1605 	vdds_process_dds_msg(vnetp, dmsg);
   1606 }
   1607 
   1608 /*
   1609  * vnet_send_dds_msg -- An interface provided to DDS to send
   1610  *	DDS messages. This simply sends meessages via vgen.
   1611  */
   1612 int
   1613 vnet_send_dds_msg(vnet_t *vnetp, void *dmsg)
   1614 {
   1615 	int rv;
   1616 
   1617 	if (vnetp->vgenhdl != NULL) {
   1618 		rv = vgen_dds_tx(vnetp->vgenhdl, dmsg);
   1619 	}
   1620 	return (rv);
   1621 }
   1622 
   1623 /*
   1624  * vnet_cleanup_hio -- an interface called by vgen to cleanup hio resources.
   1625  */
   1626 void
   1627 vnet_dds_cleanup_hio(vnet_t *vnetp)
   1628 {
   1629 	vdds_cleanup_hio(vnetp);
   1630 }
   1631 
   1632 /*
   1633  * vnet_handle_res_err -- A callback function called by a resource
   1634  *	to report an error. For example, vgen can call to report
   1635  *	an LDC down/reset event. This will trigger cleanup of associated
   1636  *	Hybrid resource.
   1637  */
   1638 /* ARGSUSED */
   1639 static void
   1640 vnet_handle_res_err(vio_net_handle_t vrh, vio_net_err_val_t err)
   1641 {
   1642 	vnet_res_t *vresp = (vnet_res_t *)vrh;
   1643 	vnet_t *vnetp = vresp->vnetp;
   1644 
   1645 	if (vnetp == NULL) {
   1646 		return;
   1647 	}
   1648 	if ((vresp->type != VIO_NET_RES_LDC_SERVICE) &&
   1649 	    (vresp->type != VIO_NET_RES_HYBRID)) {
   1650 		return;
   1651 	}
   1652 
   1653 	vdds_cleanup_hio(vnetp);
   1654 }
   1655 
   1656 /*
   1657  * vnet_dispatch_res_task -- A function to dispatch tasks start resources.
   1658  */
   1659 static void
   1660 vnet_dispatch_res_task(vnet_t *vnetp)
   1661 {
   1662 	int rv;
   1663 
   1664 	/*
   1665 	 * Dispatch the task. It could be the case that vnetp->flags does
   1666 	 * not have VNET_STARTED set. This is ok as vnet_rest_start_task()
   1667 	 * can abort the task when the task is started. See related comments
   1668 	 * in vnet_m_stop() and vnet_stop_resources().
   1669 	 */
   1670 	rv = ddi_taskq_dispatch(vnetp->taskqp, vnet_res_start_task,
   1671 	    vnetp, DDI_NOSLEEP);
   1672 	if (rv != DDI_SUCCESS) {
   1673 		cmn_err(CE_WARN,
   1674 		    "vnet%d:Can't dispatch start resource task",
   1675 		    vnetp->instance);
   1676 	}
   1677 }
   1678 
   1679 /*
   1680  * vnet_res_start_task -- A taskq callback function that starts a resource.
   1681  */
   1682 static void
   1683 vnet_res_start_task(void *arg)
   1684 {
   1685 	vnet_t *vnetp = arg;
   1686 
   1687 	WRITE_ENTER(&vnetp->vrwlock);
   1688 	if (vnetp->flags & VNET_STARTED) {
   1689 		vnet_start_resources(vnetp);
   1690 	}
   1691 	RW_EXIT(&vnetp->vrwlock);
   1692 }
   1693 
   1694 /*
   1695  * vnet_start_resources -- starts all resources associated with
   1696  *	a vnet.
   1697  */
   1698 static void
   1699 vnet_start_resources(vnet_t *vnetp)
   1700 {
   1701 	mac_register_t	*macp;
   1702 	mac_callbacks_t	*cbp;
   1703 	vnet_res_t	*vresp;
   1704 	int rv;
   1705 
   1706 	DBG1(vnetp, "enter\n");
   1707 
   1708 	ASSERT(RW_WRITE_HELD(&vnetp->vrwlock));
   1709 
   1710 	for (vresp = vnetp->vres_list; vresp != NULL; vresp = vresp->nextp) {
   1711 		/* skip if it is already started */
   1712 		if (vresp->flags & VNET_STARTED) {
   1713 			continue;
   1714 		}
   1715 		macp = &vresp->macreg;
   1716 		cbp = macp->m_callbacks;
   1717 		rv = cbp->mc_start(macp->m_driver);
   1718 		if (rv == 0) {
   1719 			/*
   1720 			 * Successfully started the resource, so now
   1721 			 * add it to the fdb.
   1722 			 */
   1723 			vresp->flags |= VNET_STARTED;
   1724 			vnet_fdbe_add(vnetp, vresp);
   1725 		}
   1726 	}
   1727 
   1728 	DBG1(vnetp, "exit\n");
   1729 
   1730 }
   1731 
   1732 /*
   1733  * vnet_stop_resources -- stop all resources associated with a vnet.
   1734  */
   1735 static void
   1736 vnet_stop_resources(vnet_t *vnetp)
   1737 {
   1738 	vnet_res_t	*vresp;
   1739 	mac_register_t	*macp;
   1740 	mac_callbacks_t	*cbp;
   1741 
   1742 	DBG1(vnetp, "enter\n");
   1743 
   1744 	ASSERT(RW_WRITE_HELD(&vnetp->vrwlock));
   1745 
   1746 	for (vresp = vnetp->vres_list; vresp != NULL; ) {
   1747 		if (vresp->flags & VNET_STARTED) {
   1748 			/*
   1749 			 * Release the lock while invoking mc_stop() of the
   1750 			 * underlying resource. We hold a reference to this
   1751 			 * resource to prevent being removed from the list in
   1752 			 * vio_net_resource_unreg(). Note that new resources
   1753 			 * can be added to the head of the list while the lock
   1754 			 * is released, but they won't be started, as
   1755 			 * VNET_STARTED flag has been cleared for the vnet
   1756 			 * device in vnet_m_stop(). Also, while the lock is
   1757 			 * released a resource could be removed from the list
   1758 			 * in vio_net_resource_unreg(); but that is ok, as we
   1759 			 * re-acquire the lock and only then access the forward
   1760 			 * link (vresp->nextp) to continue with the next
   1761 			 * resource.
   1762 			 */
   1763 			vresp->flags &= ~VNET_STARTED;
   1764 			vresp->flags |= VNET_STOPPING;
   1765 			macp = &vresp->macreg;
   1766 			cbp = macp->m_callbacks;
   1767 			VNET_FDBE_REFHOLD(vresp);
   1768 			RW_EXIT(&vnetp->vrwlock);
   1769 
   1770 			cbp->mc_stop(macp->m_driver);
   1771 
   1772 			WRITE_ENTER(&vnetp->vrwlock);
   1773 			vresp->flags &= ~VNET_STOPPING;
   1774 			VNET_FDBE_REFRELE(vresp);
   1775 		}
   1776 		vresp = vresp->nextp;
   1777 	}
   1778 	DBG1(vnetp, "exit\n");
   1779 }
   1780 
   1781 /*
   1782  * Setup kstats for the HIO statistics.
   1783  * NOTE: the synchronization for the statistics is the
   1784  * responsibility of the caller.
   1785  */
   1786 kstat_t *
   1787 vnet_hio_setup_kstats(char *ks_mod, char *ks_name, vnet_res_t *vresp)
   1788 {
   1789 	kstat_t *ksp;
   1790 	vnet_t *vnetp = vresp->vnetp;
   1791 	vnet_hio_kstats_t *hiokp;
   1792 	size_t size;
   1793 
   1794 	ASSERT(vnetp != NULL);
   1795 	size = sizeof (vnet_hio_kstats_t) / sizeof (kstat_named_t);
   1796 	ksp = kstat_create(ks_mod, vnetp->instance, ks_name, "net",
   1797 	    KSTAT_TYPE_NAMED, size, 0);
   1798 	if (ksp == NULL) {
   1799 		return (NULL);
   1800 	}
   1801 
   1802 	hiokp = (vnet_hio_kstats_t *)ksp->ks_data;
   1803 	kstat_named_init(&hiokp->ipackets,		"ipackets",
   1804 	    KSTAT_DATA_ULONG);
   1805 	kstat_named_init(&hiokp->ierrors,		"ierrors",
   1806 	    KSTAT_DATA_ULONG);
   1807 	kstat_named_init(&hiokp->opackets,		"opackets",
   1808 	    KSTAT_DATA_ULONG);
   1809 	kstat_named_init(&hiokp->oerrors,		"oerrors",
   1810 	    KSTAT_DATA_ULONG);
   1811 
   1812 
   1813 	/* MIB II kstat variables */
   1814 	kstat_named_init(&hiokp->rbytes,		"rbytes",
   1815 	    KSTAT_DATA_ULONG);
   1816 	kstat_named_init(&hiokp->obytes,		"obytes",
   1817 	    KSTAT_DATA_ULONG);
   1818 	kstat_named_init(&hiokp->multircv,		"multircv",
   1819 	    KSTAT_DATA_ULONG);
   1820 	kstat_named_init(&hiokp->multixmt,		"multixmt",
   1821 	    KSTAT_DATA_ULONG);
   1822 	kstat_named_init(&hiokp->brdcstrcv,		"brdcstrcv",
   1823 	    KSTAT_DATA_ULONG);
   1824 	kstat_named_init(&hiokp->brdcstxmt,		"brdcstxmt",
   1825 	    KSTAT_DATA_ULONG);
   1826 	kstat_named_init(&hiokp->norcvbuf,		"norcvbuf",
   1827 	    KSTAT_DATA_ULONG);
   1828 	kstat_named_init(&hiokp->noxmtbuf,		"noxmtbuf",
   1829 	    KSTAT_DATA_ULONG);
   1830 
   1831 	ksp->ks_update = vnet_hio_update_kstats;
   1832 	ksp->ks_private = (void *)vresp;
   1833 	kstat_install(ksp);
   1834 	return (ksp);
   1835 }
   1836 
   1837 /*
   1838  * Destroy kstats.
   1839  */
   1840 static void
   1841 vnet_hio_destroy_kstats(kstat_t *ksp)
   1842 {
   1843 	if (ksp != NULL)
   1844 		kstat_delete(ksp);
   1845 }
   1846 
   1847 /*
   1848  * Update the kstats.
   1849  */
   1850 static int
   1851 vnet_hio_update_kstats(kstat_t *ksp, int rw)
   1852 {
   1853 	vnet_t *vnetp;
   1854 	vnet_res_t *vresp;
   1855 	vnet_hio_stats_t statsp;
   1856 	vnet_hio_kstats_t *hiokp;
   1857 
   1858 	vresp = (vnet_res_t *)ksp->ks_private;
   1859 	vnetp = vresp->vnetp;
   1860 
   1861 	bzero(&statsp, sizeof (vnet_hio_stats_t));
   1862 
   1863 	READ_ENTER(&vnetp->vsw_fp_rw);
   1864 	if (vnetp->hio_fp == NULL) {
   1865 		/* not using hio resources, just return */
   1866 		RW_EXIT(&vnetp->vsw_fp_rw);
   1867 		return (0);
   1868 	}
   1869 	VNET_FDBE_REFHOLD(vnetp->hio_fp);
   1870 	RW_EXIT(&vnetp->vsw_fp_rw);
   1871 	vnet_hio_get_stats(vnetp->hio_fp, &statsp);
   1872 	VNET_FDBE_REFRELE(vnetp->hio_fp);
   1873 
   1874 	hiokp = (vnet_hio_kstats_t *)ksp->ks_data;
   1875 
   1876 	if (rw == KSTAT_READ) {
   1877 		/* Link Input/Output stats */
   1878 		hiokp->ipackets.value.ul	= (uint32_t)statsp.ipackets;
   1879 		hiokp->ipackets64.value.ull	= statsp.ipackets;
   1880 		hiokp->ierrors.value.ul		= statsp.ierrors;
   1881 		hiokp->opackets.value.ul	= (uint32_t)statsp.opackets;
   1882 		hiokp->opackets64.value.ull	= statsp.opackets;
   1883 		hiokp->oerrors.value.ul		= statsp.oerrors;
   1884 
   1885 		/* MIB II kstat variables */
   1886 		hiokp->rbytes.value.ul		= (uint32_t)statsp.rbytes;
   1887 		hiokp->rbytes64.value.ull	= statsp.rbytes;
   1888 		hiokp->obytes.value.ul		= (uint32_t)statsp.obytes;
   1889 		hiokp->obytes64.value.ull	= statsp.obytes;
   1890 		hiokp->multircv.value.ul	= statsp.multircv;
   1891 		hiokp->multixmt.value.ul	= statsp.multixmt;
   1892 		hiokp->brdcstrcv.value.ul	= statsp.brdcstrcv;
   1893 		hiokp->brdcstxmt.value.ul	= statsp.brdcstxmt;
   1894 		hiokp->norcvbuf.value.ul	= statsp.norcvbuf;
   1895 		hiokp->noxmtbuf.value.ul	= statsp.noxmtbuf;
   1896 	} else {
   1897 		return (EACCES);
   1898 	}
   1899 
   1900 	return (0);
   1901 }
   1902 
   1903 static void
   1904 vnet_hio_get_stats(vnet_res_t *vresp, vnet_hio_stats_t *statsp)
   1905 {
   1906 	mac_register_t		*macp;
   1907 	mac_callbacks_t		*cbp;
   1908 	uint64_t		val;
   1909 	int			stat;
   1910 
   1911 	/*
   1912 	 * get the specified statistics from the underlying nxge.
   1913 	 */
   1914 	macp = &vresp->macreg;
   1915 	cbp = macp->m_callbacks;
   1916 	for (stat = MAC_STAT_MIN; stat < MAC_STAT_OVERFLOWS; stat++) {
   1917 		if (cbp->mc_getstat(macp->m_driver, stat, &val) == 0) {
   1918 			switch (stat) {
   1919 			case MAC_STAT_IPACKETS:
   1920 				statsp->ipackets = val;
   1921 				break;
   1922 
   1923 			case MAC_STAT_IERRORS:
   1924 				statsp->ierrors = val;
   1925 				break;
   1926 
   1927 			case MAC_STAT_OPACKETS:
   1928 				statsp->opackets = val;
   1929 				break;
   1930 
   1931 			case MAC_STAT_OERRORS:
   1932 				statsp->oerrors = val;
   1933 				break;
   1934 
   1935 			case MAC_STAT_RBYTES:
   1936 				statsp->rbytes = val;
   1937 				break;
   1938 
   1939 			case MAC_STAT_OBYTES:
   1940 				statsp->obytes = val;
   1941 				break;
   1942 
   1943 			case MAC_STAT_MULTIRCV:
   1944 				statsp->multircv = val;
   1945 				break;
   1946 
   1947 			case MAC_STAT_MULTIXMT:
   1948 				statsp->multixmt = val;
   1949 				break;
   1950 
   1951 			case MAC_STAT_BRDCSTRCV:
   1952 				statsp->brdcstrcv = val;
   1953 				break;
   1954 
   1955 			case MAC_STAT_BRDCSTXMT:
   1956 				statsp->brdcstxmt = val;
   1957 				break;
   1958 
   1959 			case MAC_STAT_NOXMTBUF:
   1960 				statsp->noxmtbuf = val;
   1961 				break;
   1962 
   1963 			case MAC_STAT_NORCVBUF:
   1964 				statsp->norcvbuf = val;
   1965 				break;
   1966 
   1967 			default:
   1968 				/*
   1969 				 * parameters not interested.
   1970 				 */
   1971 				break;
   1972 			}
   1973 		}
   1974 	}
   1975 }
   1976 
   1977 static boolean_t
   1978 vnet_m_capab(void *arg, mac_capab_t cap, void *cap_data)
   1979 {
   1980 	vnet_t	*vnetp = (vnet_t *)arg;
   1981 
   1982 	if (vnetp == NULL) {
   1983 		return (0);
   1984 	}
   1985 
   1986 	switch (cap) {
   1987 
   1988 	case MAC_CAPAB_RINGS: {
   1989 
   1990 		mac_capab_rings_t *cap_rings = cap_data;
   1991 		/*
   1992 		 * Rings Capability Notes:
   1993 		 * We advertise rings to make use of the rings framework in
   1994 		 * gldv3 mac layer, to improve the performance. This is
   1995 		 * specifically needed when a Hybrid resource (with multiple
   1996 		 * tx/rx hardware rings) is assigned to a vnet device. We also
   1997 		 * leverage this for the normal case when no Hybrid resource is
   1998 		 * assigned.
   1999 		 *
   2000 		 * Ring Allocation:
   2001 		 * - TX path:
   2002 		 * We expose a pseudo ring group with 2 pseudo tx rings (as
   2003 		 * currently HybridIO exports only 2 rings) In the normal case,
   2004 		 * transmit traffic that comes down to the driver through the
   2005 		 * mri_tx (vnet_tx_ring_send()) entry point goes through the
   2006 		 * distributed switching algorithm in vnet and gets transmitted
   2007 		 * over a port/LDC in the vgen layer to either the vswitch or a
   2008 		 * peer vnet. If and when a Hybrid resource is assigned to the
   2009 		 * vnet, we obtain the tx ring information of the Hybrid device
   2010 		 * (nxge) and map the pseudo rings 1:1 to the 2 hw tx rings.
   2011 		 * Traffic being sent over the Hybrid resource by the mac layer
   2012 		 * gets spread across both hw rings, as they are mapped to the
   2013 		 * 2 pseudo tx rings in vnet.
   2014 		 *
   2015 		 * - RX path:
   2016 		 * We expose a pseudo ring group with 3 pseudo rx rings (static
   2017 		 * rings) initially. The first (default) pseudo rx ring is
   2018 		 * reserved for the resource that connects to the vswitch
   2019 		 * service. The next 2 rings are reserved for a Hybrid resource
   2020 		 * that may be assigned to the vnet device. If and when a
   2021 		 * Hybrid resource is assigned to the vnet, we obtain the rx
   2022 		 * ring information of the Hybrid device (nxge) and map these
   2023 		 * pseudo rings 1:1 to the 2 hw rx rings. For each additional
   2024 		 * resource that connects to a peer vnet, we dynamically
   2025 		 * allocate a pseudo rx ring and map it to that resource, when
   2026 		 * the resource gets added; and the pseudo rx ring is
   2027 		 * dynamically registered with the upper mac layer. We do the
   2028 		 * reverse and unregister the ring with the mac layer when
   2029 		 * the resource gets removed.
   2030 		 *
   2031 		 * Synchronization notes:
   2032 		 * We don't need any lock to protect members of ring structure,
   2033 		 * specifically ringp->hw_rh, in either the TX or the RX ring,
   2034 		 * as explained below.
   2035 		 * - TX ring:
   2036 		 * ring->hw_rh is initialized only when a Hybrid resource is
   2037 		 * associated; and gets referenced only in vnet_hio_tx(). The
   2038 		 * Hybrid resource itself is available in fdb only after tx
   2039 		 * hwrings are found and mapped; i.e, in vio_net_resource_reg()
   2040 		 * we call vnet_bind_rings() first and then call
   2041 		 * vnet_start_resources() which adds an entry to fdb. For
   2042 		 * traffic going over LDC resources, we don't reference
   2043 		 * ring->hw_rh at all.
   2044 		 * - RX ring:
   2045 		 * For rings mapped to Hybrid resource ring->hw_rh is
   2046 		 * initialized and only then do we add the rx callback for
   2047 		 * the underlying Hybrid resource; we disable callbacks before
   2048 		 * we unmap ring->hw_rh. For rings mapped to LDC resources, we
   2049 		 * stop the rx callbacks (in vgen) before we remove ring->hw_rh
   2050 		 * (vio_net_resource_unreg()).
   2051 		 */
   2052 
   2053 		if (cap_rings->mr_type == MAC_RING_TYPE_RX) {
   2054 			cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC;
   2055 
   2056 			/*
   2057 			 * The ring_cnt for rx grp is initialized in
   2058 			 * vnet_ring_grp_init(). Later, the ring_cnt gets
   2059 			 * updated dynamically whenever LDC resources are added
   2060 			 * or removed.
   2061 			 */
   2062 			cap_rings->mr_rnum = vnetp->rx_grp[0].ring_cnt;
   2063 			cap_rings->mr_rget = vnet_get_ring;
   2064 
   2065 			cap_rings->mr_gnum = VNET_NUM_PSEUDO_GROUPS;
   2066 			cap_rings->mr_gget = vnet_get_group;
   2067 			cap_rings->mr_gaddring = NULL;
   2068 			cap_rings->mr_gremring = NULL;
   2069 		} else {
   2070 			cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC;
   2071 
   2072 			/*
   2073 			 * The ring_cnt for tx grp is initialized in
   2074 			 * vnet_ring_grp_init() and remains constant, as we
   2075 			 * do not support dymanic tx rings for now.
   2076 			 */
   2077 			cap_rings->mr_rnum = vnetp->tx_grp[0].ring_cnt;
   2078 			cap_rings->mr_rget = vnet_get_ring;
   2079 
   2080 			/*
   2081 			 * Transmit rings are not grouped; i.e, the number of
   2082 			 * transmit ring groups advertised should be set to 0.
   2083 			 */
   2084 			cap_rings->mr_gnum = 0;
   2085 
   2086 			cap_rings->mr_gget = vnet_get_group;
   2087 			cap_rings->mr_gaddring = NULL;
   2088 			cap_rings->mr_gremring = NULL;
   2089 		}
   2090 		return (B_TRUE);
   2091 
   2092 	}
   2093 
   2094 	default:
   2095 		break;
   2096 
   2097 	}
   2098 
   2099 	return (B_FALSE);
   2100 }
   2101 
   2102 /*
   2103  * Callback funtion for MAC layer to get ring information.
   2104  */
   2105 static void
   2106 vnet_get_ring(void *arg, mac_ring_type_t rtype, const int g_index,
   2107     const int r_index, mac_ring_info_t *infop, mac_ring_handle_t r_handle)
   2108 {
   2109 	vnet_t	*vnetp = arg;
   2110 
   2111 	switch (rtype) {
   2112 
   2113 	case MAC_RING_TYPE_RX: {
   2114 
   2115 		vnet_pseudo_rx_group_t	*rx_grp;
   2116 		vnet_pseudo_rx_ring_t	*rx_ringp;
   2117 		mac_intr_t		*mintr;
   2118 
   2119 		/* We advertised only one RX group */
   2120 		ASSERT(g_index == 0);
   2121 		rx_grp = &vnetp->rx_grp[g_index];
   2122 
   2123 		/* Check the current # of rings in the rx group */
   2124 		ASSERT((r_index >= 0) && (r_index < rx_grp->max_ring_cnt));
   2125 
   2126 		/* Get the ring based on the index */
   2127 		rx_ringp = &rx_grp->rings[r_index];
   2128 
   2129 		rx_ringp->handle = r_handle;
   2130 		/*
   2131 		 * Note: we don't need to save the incoming r_index in rx_ring,
   2132 		 * as vnet_ring_grp_init() would have initialized the index for
   2133 		 * each ring in the array.
   2134 		 */
   2135 		rx_ringp->grp = rx_grp;
   2136 		rx_ringp->vnetp = vnetp;
   2137 
   2138 		mintr = &infop->mri_intr;
   2139 		mintr->mi_handle = (mac_intr_handle_t)rx_ringp;
   2140 		mintr->mi_enable = (mac_intr_enable_t)vnet_ring_enable_intr;
   2141 		mintr->mi_disable = (mac_intr_disable_t)vnet_ring_disable_intr;
   2142 
   2143 		infop->mri_driver = (mac_ring_driver_t)rx_ringp;
   2144 		infop->mri_start = vnet_rx_ring_start;
   2145 		infop->mri_stop = vnet_rx_ring_stop;
   2146 
   2147 		/* Set the poll function, as this is an rx ring */
   2148 		infop->mri_poll = vnet_rx_poll;
   2149 
   2150 		break;
   2151 	}
   2152 
   2153 	case MAC_RING_TYPE_TX: {
   2154 		vnet_pseudo_tx_group_t	*tx_grp;
   2155 		vnet_pseudo_tx_ring_t	*tx_ringp;
   2156 
   2157 		/*
   2158 		 * No need to check grp index; mac layer passes -1 for it.
   2159 		 */
   2160 		tx_grp = &vnetp->tx_grp[0];
   2161 
   2162 		/* Check the # of rings in the tx group */
   2163 		ASSERT((r_index >= 0) && (r_index < tx_grp->ring_cnt));
   2164 
   2165 		/* Get the ring based on the index */
   2166 		tx_ringp = &tx_grp->rings[r_index];
   2167 
   2168 		tx_ringp->handle = r_handle;
   2169 		tx_ringp->index = r_index;
   2170 		tx_ringp->grp = tx_grp;
   2171 		tx_ringp->vnetp = vnetp;
   2172 
   2173 		infop->mri_driver = (mac_ring_driver_t)tx_ringp;
   2174 		infop->mri_start = vnet_tx_ring_start;
   2175 		infop->mri_stop = vnet_tx_ring_stop;
   2176 
   2177 		/* Set the transmit function, as this is a tx ring */
   2178 		infop->mri_tx = vnet_tx_ring_send;
   2179 
   2180 		break;
   2181 	}
   2182 
   2183 	default:
   2184 		break;
   2185 	}
   2186 }
   2187 
   2188 /*
   2189  * Callback funtion for MAC layer to get group information.
   2190  */
   2191 static void
   2192 vnet_get_group(void *arg, mac_ring_type_t type, const int index,
   2193 	mac_group_info_t *infop, mac_group_handle_t handle)
   2194 {
   2195 	vnet_t	*vnetp = (vnet_t *)arg;
   2196 
   2197 	switch (type) {
   2198 
   2199 	case MAC_RING_TYPE_RX:
   2200 	{
   2201 		vnet_pseudo_rx_group_t	*rx_grp;
   2202 
   2203 		/* We advertised only one RX group */
   2204 		ASSERT(index == 0);
   2205 
   2206 		rx_grp = &vnetp->rx_grp[index];
   2207 		rx_grp->handle = handle;
   2208 		rx_grp->index = index;
   2209 		rx_grp->vnetp = vnetp;
   2210 
   2211 		infop->mgi_driver = (mac_group_driver_t)rx_grp;
   2212 		infop->mgi_start = NULL;
   2213 		infop->mgi_stop = NULL;
   2214 		infop->mgi_addmac = vnet_addmac;
   2215 		infop->mgi_remmac = vnet_remmac;
   2216 		infop->mgi_count = rx_grp->ring_cnt;
   2217 
   2218 		break;
   2219 	}
   2220 
   2221 	case MAC_RING_TYPE_TX:
   2222 	{
   2223 		vnet_pseudo_tx_group_t	*tx_grp;
   2224 
   2225 		/* We advertised only one TX group */
   2226 		ASSERT(index == 0);
   2227 
   2228 		tx_grp = &vnetp->tx_grp[index];
   2229 		tx_grp->handle = handle;
   2230 		tx_grp->index = index;
   2231 		tx_grp->vnetp = vnetp;
   2232 
   2233 		infop->mgi_driver = (mac_group_driver_t)tx_grp;
   2234 		infop->mgi_start = NULL;
   2235 		infop->mgi_stop = NULL;
   2236 		infop->mgi_addmac = NULL;
   2237 		infop->mgi_remmac = NULL;
   2238 		infop->mgi_count = VNET_NUM_PSEUDO_TXRINGS;
   2239 
   2240 		break;
   2241 	}
   2242 
   2243 	default:
   2244 		break;
   2245 
   2246 	}
   2247 }
   2248 
   2249 static int
   2250 vnet_rx_ring_start(mac_ring_driver_t arg, uint64_t mr_gen_num)
   2251 {
   2252 	vnet_pseudo_rx_ring_t	*rx_ringp = (vnet_pseudo_rx_ring_t *)arg;
   2253 	int			err;
   2254 
   2255 	/*
   2256 	 * If this ring is mapped to a LDC resource, simply mark the state to
   2257 	 * indicate the ring is started and return.
   2258 	 */
   2259 	if ((rx_ringp->state &
   2260 	    (VNET_RXRING_LDC_SERVICE|VNET_RXRING_LDC_GUEST)) != 0) {
   2261 		rx_ringp->gen_num = mr_gen_num;
   2262 		rx_ringp->state |= VNET_RXRING_STARTED;
   2263 		return (0);
   2264 	}
   2265 
   2266 	ASSERT((rx_ringp->state & VNET_RXRING_HYBRID) != 0);
   2267 
   2268 	/*
   2269 	 * This must be a ring reserved for a hwring. If the hwring is not
   2270 	 * bound yet, simply mark the state to indicate the ring is started and
   2271 	 * return. If and when a hybrid resource is activated for this vnet
   2272 	 * device, we will bind the hwring and start it then. If a hwring is
   2273 	 * already bound, start it now.
   2274 	 */
   2275 	if (rx_ringp->hw_rh == NULL) {
   2276 		rx_ringp->gen_num = mr_gen_num;
   2277 		rx_ringp->state |= VNET_RXRING_STARTED;
   2278 		return (0);
   2279 	}
   2280 
   2281 	err = mac_hwring_start(rx_ringp->hw_rh);
   2282 	if (err == 0) {
   2283 		rx_ringp->gen_num = mr_gen_num;
   2284 		rx_ringp->state |= VNET_RXRING_STARTED;
   2285 	} else {
   2286 		err = ENXIO;
   2287 	}
   2288 
   2289 	return (err);
   2290 }
   2291 
   2292 static void
   2293 vnet_rx_ring_stop(mac_ring_driver_t arg)
   2294 {
   2295 	vnet_pseudo_rx_ring_t	*rx_ringp = (vnet_pseudo_rx_ring_t *)arg;
   2296 
   2297 	/*
   2298 	 * If this ring is mapped to a LDC resource, simply mark the state to
   2299 	 * indicate the ring is now stopped and return.
   2300 	 */
   2301 	if ((rx_ringp->state &
   2302 	    (VNET_RXRING_LDC_SERVICE|VNET_RXRING_LDC_GUEST)) != 0) {
   2303 		rx_ringp->state &= ~VNET_RXRING_STARTED;
   2304 		return;
   2305 	}
   2306 
   2307 	ASSERT((rx_ringp->state & VNET_RXRING_HYBRID) != 0);
   2308 
   2309 	/*
   2310 	 * This must be a ring reserved for a hwring. If the hwring is not
   2311 	 * bound yet, simply mark the state to indicate the ring is stopped and
   2312 	 * return. If a hwring is already bound, stop it now.
   2313 	 */
   2314 	if (rx_ringp->hw_rh == NULL) {
   2315 		rx_ringp->state &= ~VNET_RXRING_STARTED;
   2316 		return;
   2317 	}
   2318 
   2319 	mac_hwring_stop(rx_ringp->hw_rh);
   2320 	rx_ringp->state &= ~VNET_RXRING_STARTED;
   2321 }
   2322 
   2323 /* ARGSUSED */
   2324 static int
   2325 vnet_tx_ring_start(mac_ring_driver_t arg, uint64_t mr_gen_num)
   2326 {
   2327 	vnet_pseudo_tx_ring_t	*tx_ringp = (vnet_pseudo_tx_ring_t *)arg;
   2328 
   2329 	tx_ringp->state |= VNET_TXRING_STARTED;
   2330 	return (0);
   2331 }
   2332 
   2333 static void
   2334 vnet_tx_ring_stop(mac_ring_driver_t arg)
   2335 {
   2336 	vnet_pseudo_tx_ring_t	*tx_ringp = (vnet_pseudo_tx_ring_t *)arg;
   2337 
   2338 	tx_ringp->state &= ~VNET_TXRING_STARTED;
   2339 }
   2340 
   2341 /*
   2342  * Disable polling for a ring and enable its interrupt.
   2343  */
   2344 static int
   2345 vnet_ring_enable_intr(void *arg)
   2346 {
   2347 	vnet_pseudo_rx_ring_t	*rx_ringp = (vnet_pseudo_rx_ring_t *)arg;
   2348 	vnet_res_t		*vresp;
   2349 
   2350 	if (rx_ringp->hw_rh == NULL) {
   2351 		/*
   2352 		 * Ring enable intr func is being invoked, but the ring is
   2353 		 * not bound to any underlying resource ? This must be a ring
   2354 		 * reserved for Hybrid resource and no such resource has been
   2355 		 * assigned to this vnet device yet. We simply return success.
   2356 		 */
   2357 		ASSERT((rx_ringp->state & VNET_RXRING_HYBRID) != 0);
   2358 		return (0);
   2359 	}
   2360 
   2361 	/*
   2362 	 * The rx ring has been bound to either a LDC or a Hybrid resource.
   2363 	 * Call the appropriate function to enable interrupts for the ring.
   2364 	 */
   2365 	if (rx_ringp->state & VNET_RXRING_HYBRID) {
   2366 		return (mac_hwring_enable_intr(rx_ringp->hw_rh));
   2367 	} else {
   2368 		vresp = (vnet_res_t *)rx_ringp->hw_rh;
   2369 		return (vgen_enable_intr(vresp->macreg.m_driver));
   2370 	}
   2371 }
   2372 
   2373 /*
   2374  * Enable polling for a ring and disable its interrupt.
   2375  */
   2376 static int
   2377 vnet_ring_disable_intr(void *arg)
   2378 {
   2379 	vnet_pseudo_rx_ring_t	*rx_ringp = (vnet_pseudo_rx_ring_t *)arg;
   2380 	vnet_res_t		*vresp;
   2381 
   2382 	if (rx_ringp->hw_rh == NULL) {
   2383 		/*
   2384 		 * Ring disable intr func is being invoked, but the ring is
   2385 		 * not bound to any underlying resource ? This must be a ring
   2386 		 * reserved for Hybrid resource and no such resource has been
   2387 		 * assigned to this vnet device yet. We simply return success.
   2388 		 */
   2389 		ASSERT((rx_ringp->state & VNET_RXRING_HYBRID) != 0);
   2390 		return (0);
   2391 	}
   2392 
   2393 	/*
   2394 	 * The rx ring has been bound to either a LDC or a Hybrid resource.
   2395 	 * Call the appropriate function to disable interrupts for the ring.
   2396 	 */
   2397 	if (rx_ringp->state & VNET_RXRING_HYBRID) {
   2398 		return (mac_hwring_disable_intr(rx_ringp->hw_rh));
   2399 	} else {
   2400 		vresp = (vnet_res_t *)rx_ringp->hw_rh;
   2401 		return (vgen_disable_intr(vresp->macreg.m_driver));
   2402 	}
   2403 }
   2404 
   2405 /*
   2406  * Poll 'bytes_to_pickup' bytes of message from the rx ring.
   2407  */
   2408 static mblk_t *
   2409 vnet_rx_poll(void *arg, int bytes_to_pickup)
   2410 {
   2411 	vnet_pseudo_rx_ring_t	*rx_ringp = (vnet_pseudo_rx_ring_t *)arg;
   2412 	mblk_t			*mp = NULL;
   2413 	vnet_res_t		*vresp;
   2414 	vnet_t			*vnetp = rx_ringp->vnetp;
   2415 
   2416 	if (rx_ringp->hw_rh == NULL) {
   2417 		return (NULL);
   2418 	}
   2419 
   2420 	if (rx_ringp->state & VNET_RXRING_HYBRID) {
   2421 		mp = mac_hwring_poll(rx_ringp->hw_rh, bytes_to_pickup);
   2422 		/*
   2423 		 * Packets received over a hybrid resource need additional
   2424 		 * processing to remove the tag, for the pvid case. The
   2425 		 * underlying resource is not aware of the vnet's pvid and thus
   2426 		 * packets are received with the vlan tag in the header; unlike
   2427 		 * packets that are received over a ldc channel in which case
   2428 		 * the peer vnet/vsw would have already removed the tag.
   2429 		 */
   2430 		if (vnetp->pvid != vnetp->default_vlan_id) {
   2431 			vnet_rx_frames_untag(vnetp->pvid, &mp);
   2432 		}
   2433 	} else {
   2434 		vresp = (vnet_res_t *)rx_ringp->hw_rh;
   2435 		mp = vgen_poll(vresp->macreg.m_driver, bytes_to_pickup);
   2436 	}
   2437 	return (mp);
   2438 }
   2439 
   2440 /* ARGSUSED */
   2441 void
   2442 vnet_hio_rx_cb(void *arg, mac_resource_handle_t mrh, mblk_t *mp,
   2443 	boolean_t loopback)
   2444 {
   2445 	vnet_t			*vnetp = (vnet_t *)arg;
   2446 	vnet_pseudo_rx_ring_t	*ringp = (vnet_pseudo_rx_ring_t *)mrh;
   2447 
   2448 	/*
   2449 	 * Packets received over a hybrid resource need additional processing
   2450 	 * to remove the tag, for the pvid case. The underlying resource is
   2451 	 * not aware of the vnet's pvid and thus packets are received with the
   2452 	 * vlan tag in the header; unlike packets that are received over a ldc
   2453 	 * channel in which case the peer vnet/vsw would have already removed
   2454 	 * the tag.
   2455 	 */
   2456 	if (vnetp->pvid != vnetp->default_vlan_id) {
   2457 		vnet_rx_frames_untag(vnetp->pvid, &mp);
   2458 		if (mp == NULL) {
   2459 			return;
   2460 		}
   2461 	}
   2462 	mac_rx_ring(vnetp->mh, ringp->handle, mp, ringp->gen_num);
   2463 }
   2464 
   2465 static int
   2466 vnet_addmac(void *arg, const uint8_t *mac_addr)
   2467 {
   2468 	vnet_pseudo_rx_group_t  *rx_grp = (vnet_pseudo_rx_group_t *)arg;
   2469 	vnet_t			*vnetp;
   2470 
   2471 	vnetp = rx_grp->vnetp;
   2472 
   2473 	if (bcmp(mac_addr, vnetp->curr_macaddr, ETHERADDRL) == 0) {
   2474 		return (0);
   2475 	}
   2476 
   2477 	cmn_err(CE_CONT, "!vnet%d: %s: Multiple macaddr unsupported\n",
   2478 	    vnetp->instance, __func__);
   2479 	return (EINVAL);
   2480 }
   2481 
   2482 static int
   2483 vnet_remmac(void *arg, const uint8_t *mac_addr)
   2484 {
   2485 	vnet_pseudo_rx_group_t  *rx_grp = (vnet_pseudo_rx_group_t *)arg;
   2486 	vnet_t			*vnetp;
   2487 
   2488 	vnetp = rx_grp->vnetp;
   2489 
   2490 	if (bcmp(mac_addr, vnetp->curr_macaddr, ETHERADDRL) == 0) {
   2491 		return (0);
   2492 	}
   2493 
   2494 	cmn_err(CE_CONT, "!vnet%d: %s: Invalid macaddr: %s\n",
   2495 	    vnetp->instance, __func__, ether_sprintf((void *)mac_addr));
   2496 	return (EINVAL);
   2497 }
   2498 
   2499 int
   2500 vnet_hio_mac_init(vnet_t *vnetp, char *ifname)
   2501 {
   2502 	mac_handle_t		mh;
   2503 	mac_client_handle_t	mch = NULL;
   2504 	mac_unicast_handle_t	muh = NULL;
   2505 	mac_diag_t		diag;
   2506 	mac_register_t		*macp;
   2507 	char			client_name[MAXNAMELEN];
   2508 	int			rv;
   2509 	uint16_t		mac_flags = MAC_UNICAST_TAG_DISABLE |
   2510 	    MAC_UNICAST_STRIP_DISABLE | MAC_UNICAST_PRIMARY;
   2511 	vio_net_callbacks_t	vcb;
   2512 	ether_addr_t		rem_addr =
   2513 		{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
   2514 	uint32_t		retries = 0;
   2515 
   2516 	if ((macp = mac_alloc(MAC_VERSION)) == NULL) {
   2517 		return (EAGAIN);
   2518 	}
   2519 
   2520 	do {
   2521 		rv = mac_open_by_linkname(ifname, &mh);
   2522 		if (rv == 0) {
   2523 			break;
   2524 		}
   2525 		if (rv != ENOENT || (retries++ >= vnet_mac_open_retries)) {
   2526 			mac_free(macp);
   2527 			return (rv);
   2528 		}
   2529 		drv_usecwait(vnet_mac_open_delay);
   2530 	} while (rv == ENOENT);
   2531 
   2532 	vnetp->hio_mh = mh;
   2533 
   2534 	(void) snprintf(client_name, MAXNAMELEN, "vnet%d-%s", vnetp->instance,
   2535 	    ifname);
   2536 	rv = mac_client_open(mh, &mch, client_name, MAC_OPEN_FLAGS_EXCLUSIVE);
   2537 	if (rv != 0) {
   2538 		goto fail;
   2539 	}
   2540 	vnetp->hio_mch = mch;
   2541 
   2542 	rv = mac_unicast_add(mch, vnetp->curr_macaddr, mac_flags, &muh, 0,
   2543 	    &diag);
   2544 	if (rv != 0) {
   2545 		goto fail;
   2546 	}
   2547 	vnetp->hio_muh = muh;
   2548 
   2549 	macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
   2550 	macp->m_driver = vnetp;
   2551 	macp->m_dip = NULL;
   2552 	macp->m_src_addr = NULL;
   2553 	macp->m_callbacks = &vnet_hio_res_callbacks;
   2554 	macp->m_min_sdu = 0;
   2555 	macp->m_max_sdu = ETHERMTU;
   2556 
   2557 	rv = vio_net_resource_reg(macp, VIO_NET_RES_HYBRID,
   2558 	    vnetp->curr_macaddr, rem_addr, &vnetp->hio_vhp, &vcb);
   2559 	if (rv != 0) {
   2560 		goto fail;
   2561 	}
   2562 	mac_free(macp);
   2563 
   2564 	/* add the recv callback */
   2565 	mac_rx_set(vnetp->hio_mch, vnet_hio_rx_cb, vnetp);
   2566 
   2567 	/* add the notify callback - only tx updates for now */
   2568 	vnetp->hio_mnh = mac_notify_add(vnetp->hio_mh, vnet_hio_notify_cb,
   2569 	    vnetp);
   2570 
   2571 	return (0);
   2572 
   2573 fail:
   2574 	mac_free(macp);
   2575 	vnet_hio_mac_cleanup(vnetp);
   2576 	return (1);
   2577 }
   2578 
   2579 void
   2580 vnet_hio_mac_cleanup(vnet_t *vnetp)
   2581 {
   2582 	if (vnetp->hio_mnh != NULL) {
   2583 		(void) mac_notify_remove(vnetp->hio_mnh, B_TRUE);
   2584 		vnetp->hio_mnh = NULL;
   2585 	}
   2586 
   2587 	if (vnetp->hio_vhp != NULL) {
   2588 		vio_net_resource_unreg(vnetp->hio_vhp);
   2589 		vnetp->hio_vhp = NULL;
   2590 	}
   2591 
   2592 	if (vnetp->hio_muh != NULL) {
   2593 		mac_unicast_remove(vnetp->hio_mch, vnetp->hio_muh);
   2594 		vnetp->hio_muh = NULL;
   2595 	}
   2596 
   2597 	if (vnetp->hio_mch != NULL) {
   2598 		mac_client_close(vnetp->hio_mch, 0);
   2599 		vnetp->hio_mch = NULL;
   2600 	}
   2601 
   2602 	if (vnetp->hio_mh != NULL) {
   2603 		mac_close(vnetp->hio_mh);
   2604 		vnetp->hio_mh = NULL;
   2605 	}
   2606 }
   2607 
   2608 /* Bind pseudo rings to hwrings */
   2609 static int
   2610 vnet_bind_hwrings(vnet_t *vnetp)
   2611 {
   2612 	mac_ring_handle_t	hw_rh[VNET_NUM_HYBRID_RINGS];
   2613 	mac_perim_handle_t	mph1;
   2614 	vnet_pseudo_rx_group_t	*rx_grp;
   2615 	vnet_pseudo_rx_ring_t	*rx_ringp;
   2616 	vnet_pseudo_tx_group_t	*tx_grp;
   2617 	vnet_pseudo_tx_ring_t	*tx_ringp;
   2618 	int			hw_ring_cnt;
   2619 	int			i;
   2620 	int			rv;
   2621 
   2622 	mac_perim_enter_by_mh(vnetp->hio_mh, &mph1);
   2623 
   2624 	/* Get the list of the underlying RX rings. */
   2625 	hw_ring_cnt = mac_hwrings_get(vnetp->hio_mch, &vnetp->rx_hwgh, hw_rh,
   2626 	    MAC_RING_TYPE_RX);
   2627 
   2628 	/* We expect the the # of hw rx rings to match VNET_NUM_HYBRID_RINGS */
   2629 	if (hw_ring_cnt != VNET_NUM_HYBRID_RINGS) {
   2630 		cmn_err(CE_WARN,
   2631 		    "!vnet%d: vnet_bind_hwrings: bad rx hw_ring_cnt(%d)\n",
   2632 		    vnetp->instance, hw_ring_cnt);
   2633 		goto fail;
   2634 	}
   2635 
   2636 	if (vnetp->rx_hwgh != NULL) {
   2637 		/*
   2638 		 * Quiesce the HW ring and the mac srs on the ring. Note
   2639 		 * that the HW ring will be restarted when the pseudo ring
   2640 		 * is started. At that time all the packets will be
   2641 		 * directly passed up to the pseudo RX ring and handled
   2642 		 * by mac srs created over the pseudo RX ring.
   2643 		 */
   2644 		mac_rx_client_quiesce(vnetp->hio_mch);
   2645 		mac_srs_perm_quiesce(vnetp->hio_mch, B_TRUE);
   2646 	}
   2647 
   2648 	/*
   2649 	 * Bind the pseudo rings to the hwrings and start the hwrings.
   2650 	 * Note we don't need to register these with the upper mac, as we have
   2651 	 * statically exported these pseudo rxrings which are reserved for
   2652 	 * rxrings of Hybrid resource.
   2653 	 */
   2654 	rx_grp = &vnetp->rx_grp[0];
   2655 	for (i = 0; i < VNET_NUM_HYBRID_RINGS; i++) {
   2656 		/* Pick the rxrings reserved for Hybrid resource */
   2657 		rx_ringp = &rx_grp->rings[i + VNET_HYBRID_RXRING_INDEX];
   2658 
   2659 		/* Store the hw ring handle */
   2660 		rx_ringp->hw_rh = hw_rh[i];
   2661 
   2662 		/* Bind the pseudo ring to the underlying hwring */
   2663 		mac_hwring_setup(rx_ringp->hw_rh,
   2664 		    (mac_resource_handle_t)rx_ringp);
   2665 
   2666 		/* Start the hwring if needed */
   2667 		if (rx_ringp->state & VNET_RXRING_STARTED) {
   2668 			rv = mac_hwring_start(rx_ringp->hw_rh);
   2669 			if (rv != 0) {
   2670 				mac_hwring_teardown(rx_ringp->hw_rh);
   2671 				rx_ringp->hw_rh = NULL;
   2672 				goto fail;
   2673 			}
   2674 		}
   2675 	}
   2676 
   2677 	/* Get the list of the underlying TX rings. */
   2678 	hw_ring_cnt = mac_hwrings_get(vnetp->hio_mch, &vnetp->tx_hwgh, hw_rh,
   2679 	    MAC_RING_TYPE_TX);
   2680 
   2681 	/* We expect the # of hw tx rings to match VNET_NUM_HYBRID_RINGS */
   2682 	if (hw_ring_cnt != VNET_NUM_HYBRID_RINGS) {
   2683 		cmn_err(CE_WARN,
   2684 		    "!vnet%d: vnet_bind_hwrings: bad tx hw_ring_cnt(%d)\n",
   2685 		    vnetp->instance, hw_ring_cnt);
   2686 		goto fail;
   2687 	}
   2688 
   2689 	/*
   2690 	 * Now map the pseudo txrings to the hw txrings. Note we don't need
   2691 	 * to register these with the upper mac, as we have statically exported
   2692 	 * these rings. Note that these rings will continue to be used for LDC
   2693 	 * resources to peer vnets and vswitch (shared ring).
   2694 	 */
   2695 	tx_grp = &vnetp->tx_grp[0];
   2696 	for (i = 0; i < tx_grp->ring_cnt; i++) {
   2697 		tx_ringp = &tx_grp->rings[i];
   2698 		tx_ringp->hw_rh = hw_rh[i];
   2699 		tx_ringp->state |= VNET_TXRING_HYBRID;
   2700 	}
   2701 
   2702 	mac_perim_exit(mph1);
   2703 	return (0);
   2704 
   2705 fail:
   2706 	mac_perim_exit(mph1);
   2707 	vnet_unbind_hwrings(vnetp);
   2708 	return (1);
   2709 }
   2710 
   2711 /* Unbind pseudo rings from hwrings */
   2712 static void
   2713 vnet_unbind_hwrings(vnet_t *vnetp)
   2714 {
   2715 	mac_perim_handle_t	mph1;
   2716 	vnet_pseudo_rx_ring_t	*rx_ringp;
   2717 	vnet_pseudo_rx_group_t	*rx_grp;
   2718 	vnet_pseudo_tx_group_t	*tx_grp;
   2719 	vnet_pseudo_tx_ring_t	*tx_ringp;
   2720 	int			i;
   2721 
   2722 	mac_perim_enter_by_mh(vnetp->hio_mh, &mph1);
   2723 
   2724 	tx_grp = &vnetp->tx_grp[0];
   2725 	for (i = 0; i < VNET_NUM_HYBRID_RINGS; i++) {
   2726 		tx_ringp = &tx_grp->rings[i];
   2727 		if (tx_ringp->state & VNET_TXRING_HYBRID) {
   2728 			tx_ringp->state &= ~VNET_TXRING_HYBRID;
   2729 			tx_ringp->hw_rh = NULL;
   2730 		}
   2731 	}
   2732 
   2733 	rx_grp = &vnetp->rx_grp[0];
   2734 	for (i = 0; i < VNET_NUM_HYBRID_RINGS; i++) {
   2735 		rx_ringp = &rx_grp->rings[i + VNET_HYBRID_RXRING_INDEX];
   2736 		if (rx_ringp->hw_rh != NULL) {
   2737 			/* Stop the hwring */
   2738 			mac_hwring_stop(rx_ringp->hw_rh);
   2739 
   2740 			/* Teardown the hwring */
   2741 			mac_hwring_teardown(rx_ringp->hw_rh);
   2742 			rx_ringp->hw_rh = NULL;
   2743 		}
   2744 	}
   2745 
   2746 	if (vnetp->rx_hwgh != NULL) {
   2747 		vnetp->rx_hwgh = NULL;
   2748 		/*
   2749 		 * First clear the permanent-quiesced flag of the RX srs then
   2750 		 * restart the HW ring and the mac srs on the ring.
   2751 		 */
   2752 		mac_srs_perm_quiesce(vnetp->hio_mch, B_FALSE);
   2753 		mac_rx_client_restart(vnetp->hio_mch);
   2754 	}
   2755 
   2756 	mac_perim_exit(mph1);
   2757 }
   2758 
   2759 /* Bind pseudo ring to a LDC resource */
   2760 static int
   2761 vnet_bind_vgenring(vnet_res_t *vresp)
   2762 {
   2763 	vnet_t			*vnetp;
   2764 	vnet_pseudo_rx_group_t	*rx_grp;
   2765 	vnet_pseudo_rx_ring_t	*rx_ringp;
   2766 	mac_perim_handle_t	mph1;
   2767 	int			rv;
   2768 	int			type;
   2769 
   2770 	vnetp = vresp->vnetp;
   2771 	type = vresp->type;
   2772 	rx_grp = &vnetp->rx_grp[0];
   2773 
   2774 	if (type == VIO_NET_RES_LDC_SERVICE) {
   2775 		/*
   2776 		 * Ring Index 0 is the default ring in the group and is
   2777 		 * reserved for LDC_SERVICE in vnet_ring_grp_init(). This ring
   2778 		 * is allocated statically and is reported to the mac layer
   2779 		 * in vnet_m_capab(). So, all we need to do here, is save a
   2780 		 * reference to the associated vresp.
   2781 		 */
   2782 		rx_ringp = &rx_grp->rings[0];
   2783 		rx_ringp->hw_rh = (mac_ring_handle_t)vresp;
   2784 		vresp->rx_ringp = (void *)rx_ringp;
   2785 		return (0);
   2786 	}
   2787 	ASSERT(type == VIO_NET_RES_LDC_GUEST);
   2788 
   2789 	mac_perim_enter_by_mh(vnetp->mh, &mph1);
   2790 
   2791 	rx_ringp = vnet_alloc_pseudo_rx_ring(vnetp);
   2792 	if (rx_ringp == NULL) {
   2793 		cmn_err(CE_WARN, "!vnet%d: Failed to allocate pseudo rx ring",
   2794 		    vnetp->instance);
   2795 		goto fail;
   2796 	}
   2797 
   2798 	/* Store the LDC resource itself as the ring handle */
   2799 	rx_ringp->hw_rh = (mac_ring_handle_t)vresp;
   2800 
   2801 	/*
   2802 	 * Save a reference to the ring in the resource for lookup during
   2803 	 * unbind. Note this is only done for LDC resources. We don't need this
   2804 	 * in the case of a Hybrid resource (see vnet_bind_hwrings()), as its
   2805 	 * rx rings are mapped to reserved pseudo rx rings (index 1 and 2).
   2806 	 */
   2807 	vresp->rx_ringp = (void *)rx_ringp;
   2808 	rx_ringp->state |= VNET_RXRING_LDC_GUEST;
   2809 
   2810 	/* Register the pseudo ring with upper-mac */
   2811 	rv = mac_group_add_ring(rx_grp->handle, rx_ringp->index);
   2812 	if (rv != 0) {
   2813 		rx_ringp->state &= ~VNET_RXRING_LDC_GUEST;
   2814 		rx_ringp->hw_rh = NULL;
   2815 		vnet_free_pseudo_rx_ring(vnetp, rx_ringp);
   2816 		goto fail;
   2817 	}
   2818 
   2819 	mac_perim_exit(mph1);
   2820 	return (0);
   2821 fail:
   2822 	mac_perim_exit(mph1);
   2823 	return (1);
   2824 }
   2825 
   2826 /* Unbind pseudo ring from a LDC resource */
   2827 static void
   2828 vnet_unbind_vgenring(vnet_res_t *vresp)
   2829 {
   2830 	vnet_t			*vnetp;
   2831 	vnet_pseudo_rx_group_t	*rx_grp;
   2832 	vnet_pseudo_rx_ring_t	*rx_ringp;
   2833 	mac_perim_handle_t	mph1;
   2834 	int			type;
   2835 
   2836 	vnetp = vresp->vnetp;
   2837 	type = vresp->type;
   2838 	rx_grp = &vnetp->rx_grp[0];
   2839 
   2840 	if (vresp->rx_ringp == NULL) {
   2841 		return;
   2842 	}
   2843 
   2844 	if (type == VIO_NET_RES_LDC_SERVICE) {
   2845 		/*
   2846 		 * Ring Index 0 is the default ring in the group and is
   2847 		 * reserved for LDC_SERVICE in vnet_ring_grp_init(). This ring
   2848 		 * is allocated statically and is reported to the mac layer
   2849 		 * in vnet_m_capab(). So, all we need to do here, is remove its
   2850 		 * reference to the associated vresp.
   2851 		 */
   2852 		rx_ringp = &rx_grp->rings[0];
   2853 		rx_ringp->hw_rh = NULL;
   2854 		vresp->rx_ringp = NULL;
   2855 		return;
   2856 	}
   2857 	ASSERT(type == VIO_NET_RES_LDC_GUEST);
   2858 
   2859 	mac_perim_enter_by_mh(vnetp->mh, &mph1);
   2860 
   2861 	rx_ringp = (vnet_pseudo_rx_ring_t *)vresp->rx_ringp;
   2862 	vresp->rx_ringp = NULL;
   2863 
   2864 	if (rx_ringp != NULL && (rx_ringp->state & VNET_RXRING_LDC_GUEST)) {
   2865 		/* Unregister the pseudo ring with upper-mac */
   2866 		mac_group_rem_ring(rx_grp->handle, rx_ringp->handle);
   2867 
   2868 		rx_ringp->hw_rh = NULL;
   2869 		rx_ringp->state &= ~VNET_RXRING_LDC_GUEST;
   2870 
   2871 		/* Free the pseudo rx ring */
   2872 		vnet_free_pseudo_rx_ring(vnetp, rx_ringp);
   2873 	}
   2874 
   2875 	mac_perim_exit(mph1);
   2876 }
   2877 
   2878 static void
   2879 vnet_unbind_rings(vnet_res_t *vresp)
   2880 {
   2881 	switch (vresp->type) {
   2882 
   2883 	case VIO_NET_RES_LDC_SERVICE:
   2884 	case VIO_NET_RES_LDC_GUEST:
   2885 		vnet_unbind_vgenring(vresp);
   2886 		break;
   2887 
   2888 	case VIO_NET_RES_HYBRID:
   2889 		vnet_unbind_hwrings(vresp->vnetp);
   2890 		break;
   2891 
   2892 	default:
   2893 		break;
   2894 
   2895 	}
   2896 }
   2897 
   2898 static int
   2899 vnet_bind_rings(vnet_res_t *vresp)
   2900 {
   2901 	int	rv;
   2902 
   2903 	switch (vresp->type) {
   2904 
   2905 	case VIO_NET_RES_LDC_SERVICE:
   2906 	case VIO_NET_RES_LDC_GUEST:
   2907 		rv = vnet_bind_vgenring(vresp);
   2908 		break;
   2909 
   2910 	case VIO_NET_RES_HYBRID:
   2911 		rv = vnet_bind_hwrings(vresp->vnetp);
   2912 		break;
   2913 
   2914 	default:
   2915 		rv = 1;
   2916 		break;
   2917 
   2918 	}
   2919 
   2920 	return (rv);
   2921 }
   2922 
   2923 /* ARGSUSED */
   2924 int
   2925 vnet_hio_stat(void *arg, uint_t stat, uint64_t *val)
   2926 {
   2927 	vnet_t	*vnetp = (vnet_t *)arg;
   2928 
   2929 	*val = mac_stat_get(vnetp->hio_mh, stat);
   2930 	return (0);
   2931 }
   2932 
   2933 /*
   2934  * The start() and stop() routines for the Hybrid resource below, are just
   2935  * dummy functions. This is provided to avoid resource type specific code in
   2936  * vnet_start_resources() and vnet_stop_resources(). The starting and stopping
   2937  * of the Hybrid resource happens in the context of the mac_client interfaces
   2938  * that are invoked in vnet_hio_mac_init() and vnet_hio_mac_cleanup().
   2939  */
   2940 /* ARGSUSED */
   2941 static int
   2942 vnet_hio_start(void *arg)
   2943 {
   2944 	return (0);
   2945 }
   2946 
   2947 /* ARGSUSED */
   2948 static void
   2949 vnet_hio_stop(void *arg)
   2950 {
   2951 }
   2952 
   2953 mblk_t *
   2954 vnet_hio_tx(void *arg, mblk_t *mp)
   2955 {
   2956 	vnet_pseudo_tx_ring_t	*tx_ringp;
   2957 	mblk_t			*nextp;
   2958 	mblk_t			*ret_mp;
   2959 
   2960 	tx_ringp = (vnet_pseudo_tx_ring_t *)arg;
   2961 	for (;;) {
   2962 		nextp = mp->b_next;
   2963 		mp->b_next = NULL;
   2964 
   2965 		ret_mp = mac_hwring_tx(tx_ringp->hw_rh, mp);
   2966 		if (ret_mp != NULL) {
   2967 			ret_mp->b_next = nextp;
   2968 			mp = ret_mp;
   2969 			break;
   2970 		}
   2971 
   2972 		if ((mp = nextp) == NULL)
   2973 			break;
   2974 	}
   2975 	return (mp);
   2976 }
   2977 
   2978 static void
   2979 vnet_hio_notify_cb(void *arg, mac_notify_type_t type)
   2980 {
   2981 	vnet_t			*vnetp = (vnet_t *)arg;
   2982 	mac_perim_handle_t	mph;
   2983 
   2984 	mac_perim_enter_by_mh(vnetp->hio_mh, &mph);
   2985 	switch (type) {
   2986 	case MAC_NOTE_TX:
   2987 		vnet_tx_update(vnetp->hio_vhp);
   2988 		break;
   2989 
   2990 	default:
   2991 		break;
   2992 	}
   2993 	mac_perim_exit(mph);
   2994 }
   2995 
   2996 #ifdef	VNET_IOC_DEBUG
   2997 
   2998 /*
   2999  * The ioctl entry point is used only for debugging for now. The ioctl commands
   3000  * can be used to force the link state of the channel connected to vsw.
   3001  */
   3002 static void
   3003 vnet_m_ioctl(void *arg, queue_t *q, mblk_t *mp)
   3004 {
   3005 	struct iocblk	*iocp;
   3006 	vnet_t		*vnetp;
   3007 
   3008 	iocp = (struct iocblk *)(uintptr_t)mp->b_rptr;
   3009 	iocp->ioc_error = 0;
   3010 	vnetp = (vnet_t *)arg;
   3011 
   3012 	if (vnetp == NULL) {
   3013 		miocnak(q, mp, 0, EINVAL);
   3014 		return;
   3015 	}
   3016 
   3017 	switch (iocp->ioc_cmd) {
   3018 
   3019 	case VNET_FORCE_LINK_DOWN:
   3020 	case VNET_FORCE_LINK_UP:
   3021 		vnet_force_link_state(vnetp, q, mp);
   3022 		break;
   3023 
   3024 	default:
   3025 		iocp->ioc_error = EINVAL;
   3026 		miocnak(q, mp, 0, iocp->ioc_error);
   3027 		break;
   3028 
   3029 	}
   3030 }
   3031 
   3032 static void
   3033 vnet_force_link_state(vnet_t *vnetp, queue_t *q, mblk_t *mp)
   3034 {
   3035 	mac_register_t	*macp;
   3036 	mac_callbacks_t	*cbp;
   3037 	vnet_res_t	*vresp;
   3038 
   3039 	READ_ENTER(&vnetp->vsw_fp_rw);
   3040 
   3041 	vresp = vnetp->vsw_fp;
   3042 	if (vresp == NULL) {
   3043 		RW_EXIT(&vnetp->vsw_fp_rw);
   3044 		return;
   3045 	}
   3046 
   3047 	macp = &vresp->macreg;
   3048 	cbp = macp->m_callbacks;
   3049 	cbp->mc_ioctl(macp->m_driver, q, mp);
   3050 
   3051 	RW_EXIT(&vnetp->vsw_fp_rw);
   3052 }
   3053 
   3054 #else
   3055 
   3056 static void
   3057 vnet_m_ioctl(void *arg, queue_t *q, mblk_t *mp)
   3058 {
   3059 	vnet_t		*vnetp;
   3060 
   3061 	vnetp = (vnet_t *)arg;
   3062 
   3063 	if (vnetp == NULL) {
   3064 		miocnak(q, mp, 0, EINVAL);
   3065 		return;
   3066 	}
   3067 
   3068 	/* ioctl support only for debugging */
   3069 	miocnak(q, mp, 0, ENOTSUP);
   3070 }
   3071 
   3072 #endif
   3073