Home | History | Annotate | Download | only in io
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 
     22 /*
     23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     24  * Use is subject to license terms.
     25  */
     26 
     27 #include <sys/types.h>
     28 #include <sys/errno.h>
     29 #include <sys/debug.h>
     30 #include <sys/time.h>
     31 #include <sys/sysmacros.h>
     32 #include <sys/systm.h>
     33 #include <sys/user.h>
     34 #include <sys/stropts.h>
     35 #include <sys/stream.h>
     36 #include <sys/strlog.h>
     37 #include <sys/strsubr.h>
     38 #include <sys/cmn_err.h>
     39 #include <sys/cpu.h>
     40 #include <sys/kmem.h>
     41 #include <sys/conf.h>
     42 #include <sys/ddi.h>
     43 #include <sys/sunddi.h>
     44 #include <sys/ksynch.h>
     45 #include <sys/stat.h>
     46 #include <sys/kstat.h>
     47 #include <sys/vtrace.h>
     48 #include <sys/strsun.h>
     49 #include <sys/dlpi.h>
     50 #include <sys/ethernet.h>
     51 #include <net/if.h>
     52 #include <sys/varargs.h>
     53 #include <sys/machsystm.h>
     54 #include <sys/modctl.h>
     55 #include <sys/modhash.h>
     56 #include <sys/mac_provider.h>
     57 #include <sys/mac_ether.h>
     58 #include <sys/taskq.h>
     59 #include <sys/note.h>
     60 #include <sys/mach_descrip.h>
     61 #include <sys/mac_provider.h>
     62 #include <sys/mdeg.h>
     63 #include <sys/ldc.h>
     64 #include <sys/vsw_fdb.h>
     65 #include <sys/vsw.h>
     66 #include <sys/vio_mailbox.h>
     67 #include <sys/vnet_mailbox.h>
     68 #include <sys/vnet_common.h>
     69 #include <sys/vio_util.h>
     70 #include <sys/sdt.h>
     71 #include <sys/atomic.h>
     72 #include <sys/callb.h>
     73 #include <sys/vlan.h>
     74 
     75 /*
     76  * Function prototypes.
     77  */
     78 static	int vsw_attach(dev_info_t *, ddi_attach_cmd_t);
     79 static	int vsw_detach(dev_info_t *, ddi_detach_cmd_t);
     80 static	int vsw_unattach(vsw_t *vswp);
     81 static	int vsw_get_md_physname(vsw_t *, md_t *, mde_cookie_t, char *);
     82 static	int vsw_get_md_smodes(vsw_t *, md_t *, mde_cookie_t, uint8_t *);
     83 void vsw_destroy_rxpools(void *);
     84 
     85 /* MDEG routines */
     86 static	int vsw_mdeg_register(vsw_t *vswp);
     87 static	void vsw_mdeg_unregister(vsw_t *vswp);
     88 static	int vsw_mdeg_cb(void *cb_argp, mdeg_result_t *);
     89 static	int vsw_port_mdeg_cb(void *cb_argp, mdeg_result_t *);
     90 static	int vsw_get_initial_md_properties(vsw_t *vswp, md_t *, mde_cookie_t);
     91 static	int vsw_read_mdprops(vsw_t *vswp);
     92 static	void vsw_vlan_read_ids(void *arg, int type, md_t *mdp,
     93 	mde_cookie_t node, uint16_t *pvidp, vsw_vlanid_t **vidspp,
     94 	uint16_t *nvidsp, uint16_t *default_idp);
     95 static	void vsw_port_read_bandwidth(vsw_port_t *portp, md_t *mdp,
     96 	mde_cookie_t node, uint64_t *bw);
     97 static	int vsw_port_read_props(vsw_port_t *portp, vsw_t *vswp,
     98 	md_t *mdp, mde_cookie_t *node);
     99 static	void vsw_read_pri_eth_types(vsw_t *vswp, md_t *mdp,
    100 	mde_cookie_t node);
    101 static	void vsw_mtu_read(vsw_t *vswp, md_t *mdp, mde_cookie_t node,
    102 	uint32_t *mtu);
    103 static	int vsw_mtu_update(vsw_t *vswp, uint32_t mtu);
    104 static	void vsw_linkprop_read(vsw_t *vswp, md_t *mdp, mde_cookie_t node,
    105 	boolean_t *pls);
    106 static	void vsw_bandwidth_read(vsw_t *vswp, md_t *mdp, mde_cookie_t node,
    107 	uint64_t *bw);
    108 static	void vsw_update_md_prop(vsw_t *, md_t *, mde_cookie_t);
    109 static void vsw_save_lmacaddr(vsw_t *vswp, uint64_t macaddr);
    110 static boolean_t vsw_cmp_vids(vsw_vlanid_t *vids1,
    111 	vsw_vlanid_t *vids2, int nvids);
    112 
    113 /* Mac driver related routines */
    114 static int vsw_mac_register(vsw_t *);
    115 static int vsw_mac_unregister(vsw_t *);
    116 static int vsw_m_stat(void *, uint_t, uint64_t *);
    117 static void vsw_m_stop(void *arg);
    118 static int vsw_m_start(void *arg);
    119 static int vsw_m_unicst(void *arg, const uint8_t *);
    120 static int vsw_m_multicst(void *arg, boolean_t, const uint8_t *);
    121 static int vsw_m_promisc(void *arg, boolean_t);
    122 static mblk_t *vsw_m_tx(void *arg, mblk_t *);
    123 void vsw_mac_link_update(vsw_t *vswp, link_state_t link_state);
    124 void vsw_mac_rx(vsw_t *vswp, mac_resource_handle_t mrh,
    125     mblk_t *mp, vsw_macrx_flags_t flags);
    126 void vsw_physlink_state_update(vsw_t *vswp);
    127 
    128 /*
    129  * Functions imported from other files.
    130  */
    131 extern void vsw_setup_switching_thread(void *arg);
    132 extern int vsw_setup_switching_start(vsw_t *vswp);
    133 extern void vsw_setup_switching_stop(vsw_t *vswp);
    134 extern int vsw_setup_switching(vsw_t *);
    135 extern void vsw_switch_frame_nop(vsw_t *vswp, mblk_t *mp, int caller,
    136     vsw_port_t *port, mac_resource_handle_t mrh);
    137 extern int vsw_add_mcst(vsw_t *, uint8_t, uint64_t, void *);
    138 extern int vsw_del_mcst(vsw_t *, uint8_t, uint64_t, void *);
    139 extern void vsw_del_mcst_vsw(vsw_t *);
    140 extern mcst_addr_t *vsw_del_addr(uint8_t devtype, void *arg, uint64_t addr);
    141 extern void vsw_detach_ports(vsw_t *vswp);
    142 extern int vsw_port_add(vsw_t *vswp, md_t *mdp, mde_cookie_t *node);
    143 extern int vsw_port_detach(vsw_t *vswp, int p_instance);
    144 static int vsw_port_update(vsw_t *vswp, md_t *curr_mdp, mde_cookie_t curr_mdex,
    145 	md_t *prev_mdp, mde_cookie_t prev_mdex);
    146 extern	int vsw_port_attach(vsw_port_t *port);
    147 extern vsw_port_t *vsw_lookup_port(vsw_t *vswp, int p_instance);
    148 extern int vsw_mac_open(vsw_t *vswp);
    149 extern void vsw_mac_close(vsw_t *vswp);
    150 extern void vsw_mac_cleanup_ports(vsw_t *vswp);
    151 extern void vsw_unset_addrs(vsw_t *vswp);
    152 extern void vsw_setup_switching_post_process(vsw_t *vswp);
    153 extern void vsw_create_vlans(void *arg, int type);
    154 extern void vsw_destroy_vlans(void *arg, int type);
    155 extern void vsw_vlan_add_ids(void *arg, int type);
    156 extern void vsw_vlan_remove_ids(void *arg, int type);
    157 extern void vsw_vlan_unaware_port_reset(vsw_port_t *portp);
    158 extern uint32_t vsw_vlan_frame_untag(void *arg, int type, mblk_t **np,
    159 	mblk_t **npt);
    160 extern mblk_t *vsw_vlan_frame_pretag(void *arg, int type, mblk_t *mp);
    161 extern void vsw_hio_cleanup(vsw_t *vswp);
    162 extern void vsw_hio_start_ports(vsw_t *vswp);
    163 extern void vsw_hio_port_update(vsw_port_t *portp, boolean_t hio_enabled);
    164 extern int vsw_mac_multicast_add(vsw_t *, vsw_port_t *, mcst_addr_t *, int);
    165 extern void vsw_mac_multicast_remove(vsw_t *, vsw_port_t *, mcst_addr_t *, int);
    166 extern void vsw_mac_port_reconfig_vlans(vsw_port_t *portp, uint16_t new_pvid,
    167     vsw_vlanid_t *new_vids, int new_nvids);
    168 extern int vsw_mac_client_init(vsw_t *vswp, vsw_port_t *port, int type);
    169 extern void vsw_mac_client_cleanup(vsw_t *vswp, vsw_port_t *port, int type);
    170 extern void vsw_if_mac_reconfig(vsw_t *vswp, boolean_t update_vlans,
    171     uint16_t new_pvid, vsw_vlanid_t *new_vids, int new_nvids);
    172 extern void vsw_reset_ports(vsw_t *vswp);
    173 extern void vsw_port_reset(vsw_port_t *portp);
    174 extern void vsw_physlink_update_ports(vsw_t *vswp);
    175 extern void vsw_update_bandwidth(vsw_t *vswp, vsw_port_t *port, int type,
    176     uint64_t maxbw);
    177 
    178 /*
    179  * Internal tunables.
    180  */
    181 int	vsw_num_handshakes = VNET_NUM_HANDSHAKES; /* # of handshake attempts */
    182 int	vsw_wretries = 100;		/* # of write attempts */
    183 int	vsw_desc_delay = 0;		/* delay in us */
    184 int	vsw_read_attempts = 5;		/* # of reads of descriptor */
    185 int	vsw_setup_switching_delay = 3;	/* setup sw timeout interval in sec */
    186 int	vsw_mac_open_retries = 300;	/* max # of mac_open() retries */
    187 					/* 300*3 = 900sec(15min) of max tmout */
    188 int	vsw_ldc_tx_delay = 5;		/* delay(ticks) for tx retries */
    189 int	vsw_ldc_tx_retries = 10;	/* # of ldc tx retries */
    190 int	vsw_ldc_retries = 5;		/* # of ldc_close() retries */
    191 int	vsw_ldc_delay = 1000;		/* 1 ms delay for ldc_close() */
    192 boolean_t vsw_ldc_rxthr_enabled = B_TRUE;	/* LDC Rx thread enabled */
    193 boolean_t vsw_ldc_txthr_enabled = B_TRUE;	/* LDC Tx thread enabled */
    194 int	vsw_rxpool_cleanup_delay = 100000;	/* 100ms */
    195 
    196 
    197 uint32_t	vsw_fdb_nchains = 8;	/* # of chains in fdb hash table */
    198 uint32_t	vsw_vlan_nchains = 4;	/* # of chains in vlan id hash table */
    199 uint32_t	vsw_ethermtu = 1500;	/* mtu of the device */
    200 
    201 /* delay in usec to wait for all references on a fdb entry to be dropped */
    202 uint32_t vsw_fdbe_refcnt_delay = 10;
    203 
    204 /*
    205  * Default vlan id. This is only used internally when the "default-vlan-id"
    206  * property is not present in the MD device node. Therefore, this should not be
    207  * used as a tunable; if this value is changed, the corresponding variable
    208  * should be updated to the same value in all vnets connected to this vsw.
    209  */
    210 uint16_t	vsw_default_vlan_id = 1;
    211 
    212 /*
    213  * Workaround for a version handshake bug in obp's vnet.
    214  * If vsw initiates version negotiation starting from the highest version,
    215  * obp sends a nack and terminates version handshake. To workaround
    216  * this, we do not initiate version handshake when the channel comes up.
    217  * Instead, we wait for the peer to send its version info msg and go through
    218  * the version protocol exchange. If we successfully negotiate a version,
    219  * before sending the ack, we send our version info msg to the peer
    220  * using the <major,minor> version that we are about to ack.
    221  */
    222 boolean_t vsw_obp_ver_proto_workaround = B_TRUE;
    223 
    224 /*
    225  * In the absence of "priority-ether-types" property in MD, the following
    226  * internal tunable can be set to specify a single priority ethertype.
    227  */
    228 uint64_t vsw_pri_eth_type = 0;
    229 
    230 /*
    231  * Number of transmit priority buffers that are preallocated per device.
    232  * This number is chosen to be a small value to throttle transmission
    233  * of priority packets. Note: Must be a power of 2 for vio_create_mblks().
    234  */
    235 uint32_t vsw_pri_tx_nmblks = 64;
    236 
    237 /*
    238  * Number of RARP packets sent to announce macaddr to the physical switch,
    239  * after vsw's physical device is changed dynamically or after a guest (client
    240  * vnet) is live migrated in.
    241  */
    242 uint32_t vsw_publish_macaddr_count = 3;
    243 
    244 boolean_t vsw_hio_enabled = B_TRUE;	/* Enable/disable HybridIO */
    245 int vsw_hio_max_cleanup_retries = 10;	/* Max retries for HybridIO cleanp */
    246 int vsw_hio_cleanup_delay = 10000;	/* 10ms */
    247 
    248 /* Number of transmit descriptors -  must be power of 2 */
    249 uint32_t vsw_ntxds = VSW_RING_NUM_EL;
    250 
    251 /*
    252  * Max number of mblks received in one receive operation.
    253  */
    254 uint32_t vsw_chain_len = (VSW_NUM_MBLKS * 0.6);
    255 
    256 /*
    257  * Internal tunables for receive buffer pools, that is,  the size and number of
    258  * mblks for each pool. At least 3 sizes must be specified if these are used.
    259  * The sizes must be specified in increasing order. Non-zero value of the first
    260  * size will be used as a hint to use these values instead of the algorithm
    261  * that determines the sizes based on MTU.
    262  */
    263 uint32_t vsw_mblk_size1 = 0;
    264 uint32_t vsw_mblk_size2 = 0;
    265 uint32_t vsw_mblk_size3 = 0;
    266 uint32_t vsw_mblk_size4 = 0;
    267 uint32_t vsw_num_mblks1 = VSW_NUM_MBLKS;	/* number of mblks for pool1 */
    268 uint32_t vsw_num_mblks2 = VSW_NUM_MBLKS;	/* number of mblks for pool2 */
    269 uint32_t vsw_num_mblks3 = VSW_NUM_MBLKS;	/* number of mblks for pool3 */
    270 uint32_t vsw_num_mblks4 = VSW_NUM_MBLKS;	/* number of mblks for pool4 */
    271 
    272 /*
    273  * Set this to non-zero to enable additional internal receive buffer pools
    274  * based on the MTU of the device for better performance at the cost of more
    275  * memory consumption. This is turned off by default, to use allocb(9F) for
    276  * receive buffer allocations of sizes > 2K.
    277  */
    278 boolean_t vsw_jumbo_rxpools = B_FALSE;
    279 
    280 /*
    281  * vsw_max_tx_qcount is the maximum # of packets that can be queued
    282  * before the tx worker thread begins processing the queue. Its value
    283  * is chosen to be 4x the default length of tx descriptor ring.
    284  */
    285 uint32_t vsw_max_tx_qcount = 4 * VSW_RING_NUM_EL;
    286 
    287 /*
    288  * MAC callbacks
    289  */
    290 static	mac_callbacks_t	vsw_m_callbacks = {
    291 	0,
    292 	vsw_m_stat,
    293 	vsw_m_start,
    294 	vsw_m_stop,
    295 	vsw_m_promisc,
    296 	vsw_m_multicst,
    297 	vsw_m_unicst,
    298 	vsw_m_tx,
    299 	NULL,
    300 	NULL,
    301 	NULL
    302 };
    303 
    304 static	struct	cb_ops	vsw_cb_ops = {
    305 	nulldev,			/* cb_open */
    306 	nulldev,			/* cb_close */
    307 	nodev,				/* cb_strategy */
    308 	nodev,				/* cb_print */
    309 	nodev,				/* cb_dump */
    310 	nodev,				/* cb_read */
    311 	nodev,				/* cb_write */
    312 	nodev,				/* cb_ioctl */
    313 	nodev,				/* cb_devmap */
    314 	nodev,				/* cb_mmap */
    315 	nodev,				/* cb_segmap */
    316 	nochpoll,			/* cb_chpoll */
    317 	ddi_prop_op,			/* cb_prop_op */
    318 	NULL,				/* cb_stream */
    319 	D_MP,				/* cb_flag */
    320 	CB_REV,				/* rev */
    321 	nodev,				/* int (*cb_aread)() */
    322 	nodev				/* int (*cb_awrite)() */
    323 };
    324 
    325 static	struct	dev_ops	vsw_ops = {
    326 	DEVO_REV,		/* devo_rev */
    327 	0,			/* devo_refcnt */
    328 	NULL,			/* devo_getinfo */
    329 	nulldev,		/* devo_identify */
    330 	nulldev,		/* devo_probe */
    331 	vsw_attach,		/* devo_attach */
    332 	vsw_detach,		/* devo_detach */
    333 	nodev,			/* devo_reset */
    334 	&vsw_cb_ops,		/* devo_cb_ops */
    335 	(struct bus_ops *)NULL,	/* devo_bus_ops */
    336 	ddi_power		/* devo_power */
    337 };
    338 
    339 extern	struct	mod_ops	mod_driverops;
    340 static struct modldrv vswmodldrv = {
    341 	&mod_driverops,
    342 	"sun4v Virtual Switch",
    343 	&vsw_ops,
    344 };
    345 
    346 #define	LDC_ENTER_LOCK(ldcp)	\
    347 				mutex_enter(&((ldcp)->ldc_cblock));\
    348 				mutex_enter(&((ldcp)->ldc_rxlock));\
    349 				mutex_enter(&((ldcp)->ldc_txlock));
    350 #define	LDC_EXIT_LOCK(ldcp)	\
    351 				mutex_exit(&((ldcp)->ldc_txlock));\
    352 				mutex_exit(&((ldcp)->ldc_rxlock));\
    353 				mutex_exit(&((ldcp)->ldc_cblock));
    354 
    355 /* Driver soft state ptr  */
    356 static void	*vsw_state;
    357 
    358 /*
    359  * Linked list of "vsw_t" structures - one per instance.
    360  */
    361 vsw_t		*vsw_head = NULL;
    362 krwlock_t	vsw_rw;
    363 
    364 /*
    365  * Property names
    366  */
    367 static char vdev_propname[] = "virtual-device";
    368 static char vsw_propname[] = "virtual-network-switch";
    369 static char physdev_propname[] = "vsw-phys-dev";
    370 static char smode_propname[] = "vsw-switch-mode";
    371 static char macaddr_propname[] = "local-mac-address";
    372 static char remaddr_propname[] = "remote-mac-address";
    373 static char ldcids_propname[] = "ldc-ids";
    374 static char chan_propname[] = "channel-endpoint";
    375 static char id_propname[] = "id";
    376 static char reg_propname[] = "reg";
    377 static char pri_types_propname[] = "priority-ether-types";
    378 static char vsw_pvid_propname[] = "port-vlan-id";
    379 static char vsw_vid_propname[] = "vlan-id";
    380 static char vsw_dvid_propname[] = "default-vlan-id";
    381 static char port_pvid_propname[] = "remote-port-vlan-id";
    382 static char port_vid_propname[] = "remote-vlan-id";
    383 static char hybrid_propname[] = "hybrid";
    384 static char vsw_mtu_propname[] = "mtu";
    385 static char vsw_linkprop_propname[] = "linkprop";
    386 static char vsw_maxbw_propname[] = "maxbw";
    387 static char port_maxbw_propname[] = "maxbw";
    388 
    389 /*
    390  * Matching criteria passed to the MDEG to register interest
    391  * in changes to 'virtual-device-port' nodes identified by their
    392  * 'id' property.
    393  */
    394 static md_prop_match_t vport_prop_match[] = {
    395 	{ MDET_PROP_VAL,    "id"   },
    396 	{ MDET_LIST_END,    NULL    }
    397 };
    398 
    399 static mdeg_node_match_t vport_match = { "virtual-device-port",
    400 						vport_prop_match };
    401 
    402 /*
    403  * Matching criteria passed to the MDEG to register interest
    404  * in changes to 'virtual-device' nodes (i.e. vsw nodes) identified
    405  * by their 'name' and 'cfg-handle' properties.
    406  */
    407 static md_prop_match_t vdev_prop_match[] = {
    408 	{ MDET_PROP_STR,    "name"   },
    409 	{ MDET_PROP_VAL,    "cfg-handle" },
    410 	{ MDET_LIST_END,    NULL    }
    411 };
    412 
    413 static mdeg_node_match_t vdev_match = { "virtual-device",
    414 						vdev_prop_match };
    415 
    416 
    417 /*
    418  * Specification of an MD node passed to the MDEG to filter any
    419  * 'vport' nodes that do not belong to the specified node. This
    420  * template is copied for each vsw instance and filled in with
    421  * the appropriate 'cfg-handle' value before being passed to the MDEG.
    422  */
    423 static mdeg_prop_spec_t vsw_prop_template[] = {
    424 	{ MDET_PROP_STR,    "name",		vsw_propname },
    425 	{ MDET_PROP_VAL,    "cfg-handle",	NULL	},
    426 	{ MDET_LIST_END,    NULL,		NULL	}
    427 };
    428 
    429 #define	VSW_SET_MDEG_PROP_INST(specp, val)	(specp)[1].ps_val = (val);
    430 
    431 #ifdef	DEBUG
    432 /*
    433  * Print debug messages - set to 0x1f to enable all msgs
    434  * or 0x0 to turn all off.
    435  */
    436 int vswdbg = 0x0;
    437 
    438 /*
    439  * debug levels:
    440  * 0x01:	Function entry/exit tracing
    441  * 0x02:	Internal function messages
    442  * 0x04:	Verbose internal messages
    443  * 0x08:	Warning messages
    444  * 0x10:	Error messages
    445  */
    446 
    447 void
    448 vswdebug(vsw_t *vswp, const char *fmt, ...)
    449 {
    450 	char buf[512];
    451 	va_list ap;
    452 
    453 	va_start(ap, fmt);
    454 	(void) vsprintf(buf, fmt, ap);
    455 	va_end(ap);
    456 
    457 	if (vswp == NULL)
    458 		cmn_err(CE_CONT, "%s\n", buf);
    459 	else
    460 		cmn_err(CE_CONT, "vsw%d: %s\n", vswp->instance, buf);
    461 }
    462 
    463 #endif	/* DEBUG */
    464 
    465 static struct modlinkage modlinkage = {
    466 	MODREV_1,
    467 	&vswmodldrv,
    468 	NULL
    469 };
    470 
    471 int
    472 _init(void)
    473 {
    474 	int status;
    475 
    476 	rw_init(&vsw_rw, NULL, RW_DRIVER, NULL);
    477 
    478 	status = ddi_soft_state_init(&vsw_state, sizeof (vsw_t), 1);
    479 	if (status != 0) {
    480 		return (status);
    481 	}
    482 
    483 	mac_init_ops(&vsw_ops, DRV_NAME);
    484 	status = mod_install(&modlinkage);
    485 	if (status != 0) {
    486 		ddi_soft_state_fini(&vsw_state);
    487 	}
    488 	return (status);
    489 }
    490 
    491 int
    492 _fini(void)
    493 {
    494 	int status;
    495 
    496 	status = mod_remove(&modlinkage);
    497 	if (status != 0)
    498 		return (status);
    499 	mac_fini_ops(&vsw_ops);
    500 	ddi_soft_state_fini(&vsw_state);
    501 
    502 	rw_destroy(&vsw_rw);
    503 
    504 	return (status);
    505 }
    506 
    507 int
    508 _info(struct modinfo *modinfop)
    509 {
    510 	return (mod_info(&modlinkage, modinfop));
    511 }
    512 
    513 static int
    514 vsw_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
    515 {
    516 	vsw_t			*vswp;
    517 	int			instance;
    518 	char			hashname[MAXNAMELEN];
    519 	char			qname[TASKQ_NAMELEN];
    520 	vsw_attach_progress_t	progress = PROG_init;
    521 	int			rv;
    522 
    523 	switch (cmd) {
    524 	case DDI_ATTACH:
    525 		break;
    526 	case DDI_RESUME:
    527 		/* nothing to do for this non-device */
    528 		return (DDI_SUCCESS);
    529 	case DDI_PM_RESUME:
    530 	default:
    531 		return (DDI_FAILURE);
    532 	}
    533 
    534 	instance = ddi_get_instance(dip);
    535 	if (ddi_soft_state_zalloc(vsw_state, instance) != DDI_SUCCESS) {
    536 		DERR(NULL, "vsw%d: ddi_soft_state_zalloc failed", instance);
    537 		return (DDI_FAILURE);
    538 	}
    539 	vswp = ddi_get_soft_state(vsw_state, instance);
    540 
    541 	if (vswp == NULL) {
    542 		DERR(NULL, "vsw%d: ddi_get_soft_state failed", instance);
    543 		goto vsw_attach_fail;
    544 	}
    545 
    546 	vswp->dip = dip;
    547 	vswp->instance = instance;
    548 	vswp->phys_link_state = LINK_STATE_UNKNOWN;
    549 	ddi_set_driver_private(dip, (caddr_t)vswp);
    550 
    551 	mutex_init(&vswp->mac_lock, NULL, MUTEX_DRIVER, NULL);
    552 	mutex_init(&vswp->mca_lock, NULL, MUTEX_DRIVER, NULL);
    553 	mutex_init(&vswp->sw_thr_lock, NULL, MUTEX_DRIVER, NULL);
    554 	cv_init(&vswp->sw_thr_cv, NULL, CV_DRIVER, NULL);
    555 	rw_init(&vswp->maccl_rwlock, NULL, RW_DRIVER, NULL);
    556 	rw_init(&vswp->if_lockrw, NULL, RW_DRIVER, NULL);
    557 	rw_init(&vswp->mfdbrw, NULL, RW_DRIVER, NULL);
    558 	rw_init(&vswp->plist.lockrw, NULL, RW_DRIVER, NULL);
    559 
    560 	progress |= PROG_locks;
    561 
    562 	rv = vsw_read_mdprops(vswp);
    563 	if (rv != 0)
    564 		goto vsw_attach_fail;
    565 
    566 	progress |= PROG_readmd;
    567 
    568 	/* setup the unicast forwarding database  */
    569 	(void) snprintf(hashname, MAXNAMELEN, "vsw_unicst_table-%d",
    570 	    vswp->instance);
    571 	D2(vswp, "creating unicast hash table (%s)...", hashname);
    572 	vswp->fdb_nchains = vsw_fdb_nchains;
    573 	vswp->fdb_hashp = mod_hash_create_ptrhash(hashname, vswp->fdb_nchains,
    574 	    mod_hash_null_valdtor, sizeof (void *));
    575 	vsw_create_vlans((void *)vswp, VSW_LOCALDEV);
    576 	progress |= PROG_fdb;
    577 
    578 	/* setup the multicast fowarding database */
    579 	(void) snprintf(hashname, MAXNAMELEN, "vsw_mcst_table-%d",
    580 	    vswp->instance);
    581 	D2(vswp, "creating multicast hash table %s)...", hashname);
    582 	vswp->mfdb = mod_hash_create_ptrhash(hashname, vsw_fdb_nchains,
    583 	    mod_hash_null_valdtor, sizeof (void *));
    584 
    585 	progress |= PROG_mfdb;
    586 
    587 	/*
    588 	 * Create the taskq which will process all the VIO
    589 	 * control messages.
    590 	 */
    591 	(void) snprintf(qname, TASKQ_NAMELEN, "vsw_taskq%d", vswp->instance);
    592 	if ((vswp->taskq_p = ddi_taskq_create(vswp->dip, qname, 1,
    593 	    TASKQ_DEFAULTPRI, 0)) == NULL) {
    594 		cmn_err(CE_WARN, "!vsw%d: Unable to create task queue",
    595 		    vswp->instance);
    596 		goto vsw_attach_fail;
    597 	}
    598 
    599 	progress |= PROG_taskq;
    600 
    601 	(void) snprintf(qname, TASKQ_NAMELEN, "vsw_rxp_taskq%d",
    602 	    vswp->instance);
    603 	if ((vswp->rxp_taskq = ddi_taskq_create(vswp->dip, qname, 1,
    604 	    TASKQ_DEFAULTPRI, 0)) == NULL) {
    605 		cmn_err(CE_WARN, "!vsw%d: Unable to create rxp task queue",
    606 		    vswp->instance);
    607 		goto vsw_attach_fail;
    608 	}
    609 
    610 	progress |= PROG_rxp_taskq;
    611 
    612 	/* prevent auto-detaching */
    613 	if (ddi_prop_update_int(DDI_DEV_T_NONE, vswp->dip,
    614 	    DDI_NO_AUTODETACH, 1) != DDI_SUCCESS) {
    615 		cmn_err(CE_NOTE, "!Unable to set \"%s\" property for "
    616 		    "instance %u", DDI_NO_AUTODETACH, instance);
    617 	}
    618 
    619 	/*
    620 	 * The null switching function is set to avoid panic until
    621 	 * switch mode is setup.
    622 	 */
    623 	vswp->vsw_switch_frame = vsw_switch_frame_nop;
    624 
    625 	/*
    626 	 * Setup the required switching mode, based on the mdprops that we read
    627 	 * earlier. We start a thread to do this, to avoid calling mac_open()
    628 	 * directly from attach().
    629 	 */
    630 	rv = vsw_setup_switching_start(vswp);
    631 	if (rv != 0) {
    632 		goto vsw_attach_fail;
    633 	}
    634 
    635 	progress |= PROG_swmode;
    636 
    637 	/* Register with mac layer as a provider */
    638 	rv = vsw_mac_register(vswp);
    639 	if (rv != 0)
    640 		goto vsw_attach_fail;
    641 
    642 	progress |= PROG_macreg;
    643 
    644 	/*
    645 	 * Now we have everything setup, register an interest in
    646 	 * specific MD nodes.
    647 	 *
    648 	 * The callback is invoked in 2 cases, firstly if upon mdeg
    649 	 * registration there are existing nodes which match our specified
    650 	 * criteria, and secondly if the MD is changed (and again, there
    651 	 * are nodes which we are interested in present within it. Note
    652 	 * that our callback will be invoked even if our specified nodes
    653 	 * have not actually changed).
    654 	 *
    655 	 */
    656 	rv = vsw_mdeg_register(vswp);
    657 	if (rv != 0)
    658 		goto vsw_attach_fail;
    659 
    660 	progress |= PROG_mdreg;
    661 
    662 	vswp->attach_progress = progress;
    663 
    664 	WRITE_ENTER(&vsw_rw);
    665 	vswp->next = vsw_head;
    666 	vsw_head = vswp;
    667 	RW_EXIT(&vsw_rw);
    668 
    669 	ddi_report_dev(vswp->dip);
    670 	return (DDI_SUCCESS);
    671 
    672 vsw_attach_fail:
    673 	DERR(NULL, "vsw_attach: failed");
    674 
    675 	vswp->attach_progress = progress;
    676 	(void) vsw_unattach(vswp);
    677 	ddi_soft_state_free(vsw_state, instance);
    678 	return (DDI_FAILURE);
    679 }
    680 
    681 static int
    682 vsw_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
    683 {
    684 	vsw_t			**vswpp, *vswp;
    685 	int 			instance;
    686 
    687 	instance = ddi_get_instance(dip);
    688 	vswp = ddi_get_soft_state(vsw_state, instance);
    689 
    690 	if (vswp == NULL) {
    691 		return (DDI_FAILURE);
    692 	}
    693 
    694 	switch (cmd) {
    695 	case DDI_DETACH:
    696 		break;
    697 	case DDI_SUSPEND:
    698 	case DDI_PM_SUSPEND:
    699 	default:
    700 		return (DDI_FAILURE);
    701 	}
    702 
    703 	D2(vswp, "detaching instance %d", instance);
    704 
    705 	if (vsw_unattach(vswp) != 0) {
    706 		return (DDI_FAILURE);
    707 	}
    708 
    709 	ddi_remove_minor_node(dip, NULL);
    710 
    711 	WRITE_ENTER(&vsw_rw);
    712 	for (vswpp = &vsw_head; *vswpp; vswpp = &(*vswpp)->next) {
    713 		if (*vswpp == vswp) {
    714 			*vswpp = vswp->next;
    715 			break;
    716 		}
    717 	}
    718 	RW_EXIT(&vsw_rw);
    719 
    720 	ddi_soft_state_free(vsw_state, instance);
    721 
    722 	return (DDI_SUCCESS);
    723 }
    724 
    725 /*
    726  * Common routine to handle vsw_attach() failure and vsw_detach(). Note that
    727  * the only reason this function could fail is if mac_unregister() fails.
    728  * Otherwise, this function must ensure that all resources are freed and return
    729  * success.
    730  */
    731 static int
    732 vsw_unattach(vsw_t *vswp)
    733 {
    734 	vsw_attach_progress_t	progress;
    735 
    736 	progress = vswp->attach_progress;
    737 
    738 	/*
    739 	 * Unregister from the gldv3 subsystem. This can fail, in particular
    740 	 * if there are still any open references to this mac device; in which
    741 	 * case we just return failure without continuing to detach further.
    742 	 */
    743 	if (progress & PROG_macreg) {
    744 		if (vsw_mac_unregister(vswp) != 0) {
    745 			cmn_err(CE_WARN, "!vsw%d: Unable to detach from "
    746 			    "MAC layer", vswp->instance);
    747 			return (1);
    748 		}
    749 		progress &= ~PROG_macreg;
    750 	}
    751 
    752 	/*
    753 	 * Now that we have unregistered from gldv3, we must finish all other
    754 	 * steps and successfully return from this function; otherwise we will
    755 	 * end up leaving the device in a broken/unusable state.
    756 	 *
    757 	 * If we have registered with mdeg, unregister now to stop further
    758 	 * callbacks to this vsw device and/or its ports. Then, detach any
    759 	 * existing ports.
    760 	 */
    761 	if (progress & PROG_mdreg) {
    762 		vsw_mdeg_unregister(vswp);
    763 		vsw_detach_ports(vswp);
    764 		progress &= ~PROG_mdreg;
    765 	}
    766 
    767 	/*
    768 	 * If we have started a thread to setup the switching mode, stop it, if
    769 	 * it is still running. If it has finished setting up the switching
    770 	 * mode, then we need to clean up some additional things if we are
    771 	 * running in L2 mode: first free up any hybrid resources; then stop
    772 	 * and close the underlying physical device. Note that we would have
    773 	 * already released all per mac_client resources (ucast, mcast addrs,
    774 	 * hio-shares etc) as all the ports are detached and if the vsw device
    775 	 * itself was in use as an interface, it has been unplumbed (otherwise
    776 	 * mac_unregister() above would fail).
    777 	 */
    778 	if (progress & PROG_swmode) {
    779 
    780 		vsw_setup_switching_stop(vswp);
    781 
    782 		if (vswp->hio_capable == B_TRUE) {
    783 			vsw_hio_cleanup(vswp);
    784 			vswp->hio_capable = B_FALSE;
    785 		}
    786 
    787 		mutex_enter(&vswp->mac_lock);
    788 		vsw_mac_close(vswp);
    789 		mutex_exit(&vswp->mac_lock);
    790 
    791 		progress &= ~PROG_swmode;
    792 	}
    793 
    794 	/*
    795 	 * We now destroy the taskq used to clean up rx mblk pools that
    796 	 * couldn't be destroyed when the ports/channels were detached.
    797 	 * We implicitly wait for those tasks to complete in
    798 	 * ddi_taskq_destroy().
    799 	 */
    800 	if (progress & PROG_rxp_taskq) {
    801 		ddi_taskq_destroy(vswp->rxp_taskq);
    802 		progress &= ~PROG_rxp_taskq;
    803 	}
    804 
    805 	/*
    806 	 * By now any pending tasks have finished and the underlying
    807 	 * ldc's have been destroyed, so its safe to delete the control
    808 	 * message taskq.
    809 	 */
    810 	if (progress & PROG_taskq) {
    811 		ddi_taskq_destroy(vswp->taskq_p);
    812 		progress &= ~PROG_taskq;
    813 	}
    814 
    815 	/* Destroy the multicast hash table */
    816 	if (progress & PROG_mfdb) {
    817 		mod_hash_destroy_hash(vswp->mfdb);
    818 		progress &= ~PROG_mfdb;
    819 	}
    820 
    821 	/* Destroy the vlan hash table and fdb */
    822 	if (progress & PROG_fdb) {
    823 		vsw_destroy_vlans(vswp, VSW_LOCALDEV);
    824 		mod_hash_destroy_hash(vswp->fdb_hashp);
    825 		progress &= ~PROG_fdb;
    826 	}
    827 
    828 	if (progress & PROG_readmd) {
    829 		if (VSW_PRI_ETH_DEFINED(vswp)) {
    830 			kmem_free(vswp->pri_types,
    831 			    sizeof (uint16_t) * vswp->pri_num_types);
    832 			(void) vio_destroy_mblks(vswp->pri_tx_vmp);
    833 		}
    834 		progress &= ~PROG_readmd;
    835 	}
    836 
    837 	if (progress & PROG_locks) {
    838 		rw_destroy(&vswp->plist.lockrw);
    839 		rw_destroy(&vswp->mfdbrw);
    840 		rw_destroy(&vswp->if_lockrw);
    841 		rw_destroy(&vswp->maccl_rwlock);
    842 		cv_destroy(&vswp->sw_thr_cv);
    843 		mutex_destroy(&vswp->sw_thr_lock);
    844 		mutex_destroy(&vswp->mca_lock);
    845 		mutex_destroy(&vswp->mac_lock);
    846 		progress &= ~PROG_locks;
    847 	}
    848 
    849 	vswp->attach_progress = progress;
    850 
    851 	return (0);
    852 }
    853 
    854 void
    855 vsw_destroy_rxpools(void *arg)
    856 {
    857 	vio_mblk_pool_t	*poolp = (vio_mblk_pool_t *)arg;
    858 	vio_mblk_pool_t	*npoolp;
    859 
    860 	while (poolp != NULL) {
    861 		npoolp =  poolp->nextp;
    862 		while (vio_destroy_mblks(poolp) != 0) {
    863 			drv_usecwait(vsw_rxpool_cleanup_delay);
    864 		}
    865 		poolp = npoolp;
    866 	}
    867 }
    868 
    869 /*
    870  * Get the value of the "vsw-phys-dev" property in the specified
    871  * node. This property is the name of the physical device that
    872  * the virtual switch will use to talk to the outside world.
    873  *
    874  * Note it is valid for this property to be NULL (but the property
    875  * itself must exist). Callers of this routine should verify that
    876  * the value returned is what they expected (i.e. either NULL or non NULL).
    877  *
    878  * On success returns value of the property in region pointed to by
    879  * the 'name' argument, and with return value of 0. Otherwise returns 1.
    880  */
    881 static int
    882 vsw_get_md_physname(vsw_t *vswp, md_t *mdp, mde_cookie_t node, char *name)
    883 {
    884 	int		len = 0;
    885 	int		instance;
    886 	char		*physname = NULL;
    887 	char		*dev;
    888 	const char	*dev_name;
    889 	char		myname[MAXNAMELEN];
    890 
    891 	dev_name = ddi_driver_name(vswp->dip);
    892 	instance = ddi_get_instance(vswp->dip);
    893 	(void) snprintf(myname, MAXNAMELEN, "%s%d", dev_name, instance);
    894 
    895 	if (md_get_prop_data(mdp, node, physdev_propname,
    896 	    (uint8_t **)(&physname), &len) != 0) {
    897 		cmn_err(CE_WARN, "!vsw%d: Unable to get name(s) of physical "
    898 		    "device(s) from MD", vswp->instance);
    899 		return (1);
    900 	} else if ((strlen(physname) + 1) > LIFNAMSIZ) {
    901 		cmn_err(CE_WARN, "!vsw%d: %s is too long a device name",
    902 		    vswp->instance, physname);
    903 		return (1);
    904 	} else if (strcmp(myname, physname) == 0) {
    905 		/*
    906 		 * Prevent the vswitch from opening itself as the
    907 		 * network device.
    908 		 */
    909 		cmn_err(CE_WARN, "!vsw%d: %s is an invalid device name",
    910 		    vswp->instance, physname);
    911 		return (1);
    912 	} else {
    913 		(void) strncpy(name, physname, strlen(physname) + 1);
    914 		D2(vswp, "%s: using first device specified (%s)",
    915 		    __func__, physname);
    916 	}
    917 
    918 #ifdef DEBUG
    919 	/*
    920 	 * As a temporary measure to aid testing we check to see if there
    921 	 * is a vsw.conf file present. If there is we use the value of the
    922 	 * vsw_physname property in the file as the name of the physical
    923 	 * device, overriding the value from the MD.
    924 	 *
    925 	 * There may be multiple devices listed, but for the moment
    926 	 * we just use the first one.
    927 	 */
    928 	if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vswp->dip, 0,
    929 	    "vsw_physname", &dev) == DDI_PROP_SUCCESS) {
    930 		if ((strlen(dev) + 1) > LIFNAMSIZ) {
    931 			cmn_err(CE_WARN, "vsw%d: %s is too long a device name",
    932 			    vswp->instance, dev);
    933 			ddi_prop_free(dev);
    934 			return (1);
    935 		} else {
    936 			cmn_err(CE_NOTE, "vsw%d: Using device name (%s) from "
    937 			    "config file", vswp->instance, dev);
    938 
    939 			(void) strncpy(name, dev, strlen(dev) + 1);
    940 		}
    941 
    942 		ddi_prop_free(dev);
    943 	}
    944 #endif
    945 
    946 	return (0);
    947 }
    948 
    949 /*
    950  * Read the 'vsw-switch-mode' property from the specified MD node.
    951  *
    952  * Returns 0 on success, otherwise returns 1.
    953  */
    954 static int
    955 vsw_get_md_smodes(vsw_t *vswp, md_t *mdp, mde_cookie_t node, uint8_t *mode)
    956 {
    957 	int		len = 0;
    958 	char		*smode = NULL;
    959 	char		*curr_mode = NULL;
    960 
    961 	D1(vswp, "%s: enter", __func__);
    962 
    963 	/*
    964 	 * Get the switch-mode property. The modes are listed in
    965 	 * decreasing order of preference, i.e. prefered mode is
    966 	 * first item in list.
    967 	 */
    968 	len = 0;
    969 	if (md_get_prop_data(mdp, node, smode_propname,
    970 	    (uint8_t **)(&smode), &len) != 0) {
    971 		/*
    972 		 * Unable to get switch-mode property from MD, nothing
    973 		 * more we can do.
    974 		 */
    975 		cmn_err(CE_WARN, "!vsw%d: Unable to get switch mode property"
    976 		    " from the MD", vswp->instance);
    977 		return (1);
    978 	}
    979 
    980 	curr_mode = smode;
    981 	/*
    982 	 * Modes of operation:
    983 	 * 'switched'	 - layer 2 switching, underlying HW in
    984 	 *			programmed mode.
    985 	 * 'promiscuous' - layer 2 switching, underlying HW in
    986 	 *			promiscuous mode.
    987 	 * 'routed'	 - layer 3 (i.e. IP) routing, underlying HW
    988 	 *			in non-promiscuous mode.
    989 	 */
    990 	while (curr_mode < (smode + len)) {
    991 		D2(vswp, "%s: curr_mode = [%s]", __func__, curr_mode);
    992 		if (strcmp(curr_mode, "switched") == 0) {
    993 			*mode = VSW_LAYER2;
    994 		} else if (strcmp(curr_mode, "promiscuous") == 0) {
    995 			*mode = VSW_LAYER2 | VSW_LAYER2_PROMISC;
    996 		} else if (strcmp(curr_mode, "routed") == 0) {
    997 			*mode = VSW_LAYER3;
    998 		} else {
    999 			cmn_err(CE_WARN, "!vsw%d: Unknown switch mode %s, "
   1000 			    "setting to default switched mode",
   1001 			    vswp->instance, curr_mode);
   1002 			*mode = VSW_LAYER2;
   1003 		}
   1004 		curr_mode += strlen(curr_mode) + 1;
   1005 	}
   1006 
   1007 	D2(vswp, "%s: %d mode", __func__, *mode);
   1008 
   1009 	D1(vswp, "%s: exit", __func__);
   1010 
   1011 	return (0);
   1012 }
   1013 
   1014 /*
   1015  * Register with the MAC layer as a network device, so we
   1016  * can be plumbed if necessary.
   1017  */
   1018 static int
   1019 vsw_mac_register(vsw_t *vswp)
   1020 {
   1021 	mac_register_t	*macp;
   1022 	int		rv;
   1023 
   1024 	D1(vswp, "%s: enter", __func__);
   1025 
   1026 	if ((macp = mac_alloc(MAC_VERSION)) == NULL)
   1027 		return (EINVAL);
   1028 	macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
   1029 	macp->m_driver = vswp;
   1030 	macp->m_dip = vswp->dip;
   1031 	macp->m_src_addr = (uint8_t *)&vswp->if_addr;
   1032 	macp->m_callbacks = &vsw_m_callbacks;
   1033 	macp->m_min_sdu = 0;
   1034 	macp->m_max_sdu = vswp->mtu;
   1035 	macp->m_margin = VLAN_TAGSZ;
   1036 	rv = mac_register(macp, &vswp->if_mh);
   1037 	mac_free(macp);
   1038 	if (rv != 0) {
   1039 		/*
   1040 		 * Treat this as a non-fatal error as we may be
   1041 		 * able to operate in some other mode.
   1042 		 */
   1043 		cmn_err(CE_NOTE, "!vsw%d: Unable to register as "
   1044 		    "a provider with MAC layer", vswp->instance);
   1045 		return (rv);
   1046 	}
   1047 
   1048 	vswp->if_state |= VSW_IF_REG;
   1049 
   1050 	D1(vswp, "%s: exit", __func__);
   1051 
   1052 	return (rv);
   1053 }
   1054 
   1055 static int
   1056 vsw_mac_unregister(vsw_t *vswp)
   1057 {
   1058 	int		rv = 0;
   1059 
   1060 	D1(vswp, "%s: enter", __func__);
   1061 
   1062 	WRITE_ENTER(&vswp->if_lockrw);
   1063 
   1064 	if (vswp->if_state & VSW_IF_REG) {
   1065 		rv = mac_unregister(vswp->if_mh);
   1066 		if (rv != 0) {
   1067 			DWARN(vswp, "%s: unable to unregister from MAC "
   1068 			    "framework", __func__);
   1069 
   1070 			RW_EXIT(&vswp->if_lockrw);
   1071 			D1(vswp, "%s: fail exit", __func__);
   1072 			return (rv);
   1073 		}
   1074 
   1075 		/* mark i/f as down and unregistered */
   1076 		vswp->if_state &= ~(VSW_IF_UP | VSW_IF_REG);
   1077 	}
   1078 	RW_EXIT(&vswp->if_lockrw);
   1079 
   1080 	D1(vswp, "%s: exit", __func__);
   1081 
   1082 	return (rv);
   1083 }
   1084 
   1085 static int
   1086 vsw_m_stat(void *arg, uint_t stat, uint64_t *val)
   1087 {
   1088 	vsw_t			*vswp = (vsw_t *)arg;
   1089 
   1090 	D1(vswp, "%s: enter", __func__);
   1091 
   1092 	mutex_enter(&vswp->mac_lock);
   1093 	if (vswp->mh == NULL) {
   1094 		mutex_exit(&vswp->mac_lock);
   1095 		return (EINVAL);
   1096 	}
   1097 
   1098 	/* return stats from underlying device */
   1099 	*val = mac_stat_get(vswp->mh, stat);
   1100 
   1101 	mutex_exit(&vswp->mac_lock);
   1102 
   1103 	return (0);
   1104 }
   1105 
   1106 static void
   1107 vsw_m_stop(void *arg)
   1108 {
   1109 	vsw_t	*vswp = (vsw_t *)arg;
   1110 
   1111 	D1(vswp, "%s: enter", __func__);
   1112 
   1113 	WRITE_ENTER(&vswp->if_lockrw);
   1114 	vswp->if_state &= ~VSW_IF_UP;
   1115 	RW_EXIT(&vswp->if_lockrw);
   1116 
   1117 	/* Cleanup and close the mac client */
   1118 	vsw_mac_client_cleanup(vswp, NULL, VSW_LOCALDEV);
   1119 
   1120 	D1(vswp, "%s: exit (state = %d)", __func__, vswp->if_state);
   1121 }
   1122 
   1123 static int
   1124 vsw_m_start(void *arg)
   1125 {
   1126 	int		rv;
   1127 	vsw_t		*vswp = (vsw_t *)arg;
   1128 
   1129 	D1(vswp, "%s: enter", __func__);
   1130 
   1131 	WRITE_ENTER(&vswp->if_lockrw);
   1132 
   1133 	vswp->if_state |= VSW_IF_UP;
   1134 
   1135 	if (vswp->switching_setup_done == B_FALSE) {
   1136 		/*
   1137 		 * If the switching mode has not been setup yet, just
   1138 		 * return. The unicast address will be programmed
   1139 		 * after the physical device is successfully setup by the
   1140 		 * timeout handler.
   1141 		 */
   1142 		RW_EXIT(&vswp->if_lockrw);
   1143 		return (0);
   1144 	}
   1145 
   1146 	/* if in layer2 mode, program unicast address. */
   1147 	if (vswp->mh != NULL) {
   1148 		/* Init a mac client and program addresses */
   1149 		rv = vsw_mac_client_init(vswp, NULL, VSW_LOCALDEV);
   1150 		if (rv != 0) {
   1151 			cmn_err(CE_NOTE,
   1152 			    "!vsw%d: failed to program interface "
   1153 			    "unicast address\n", vswp->instance);
   1154 		}
   1155 	}
   1156 
   1157 	RW_EXIT(&vswp->if_lockrw);
   1158 
   1159 	D1(vswp, "%s: exit (state = %d)", __func__, vswp->if_state);
   1160 	return (0);
   1161 }
   1162 
   1163 /*
   1164  * Change the local interface address.
   1165  *
   1166  * Note: we don't support this entry point. The local
   1167  * mac address of the switch can only be changed via its
   1168  * MD node properties.
   1169  */
   1170 static int
   1171 vsw_m_unicst(void *arg, const uint8_t *macaddr)
   1172 {
   1173 	_NOTE(ARGUNUSED(arg, macaddr))
   1174 
   1175 	return (DDI_FAILURE);
   1176 }
   1177 
   1178 static int
   1179 vsw_m_multicst(void *arg, boolean_t add, const uint8_t *mca)
   1180 {
   1181 	vsw_t		*vswp = (vsw_t *)arg;
   1182 	mcst_addr_t	*mcst_p = NULL;
   1183 	uint64_t	addr = 0x0;
   1184 	int		i, ret = 0;
   1185 
   1186 	D1(vswp, "%s: enter", __func__);
   1187 
   1188 	/*
   1189 	 * Convert address into form that can be used
   1190 	 * as hash table key.
   1191 	 */
   1192 	for (i = 0; i < ETHERADDRL; i++) {
   1193 		addr = (addr << 8) | mca[i];
   1194 	}
   1195 
   1196 	D2(vswp, "%s: addr = 0x%llx", __func__, addr);
   1197 
   1198 	if (add) {
   1199 		D2(vswp, "%s: adding multicast", __func__);
   1200 		if (vsw_add_mcst(vswp, VSW_LOCALDEV, addr, NULL) == 0) {
   1201 			/*
   1202 			 * Update the list of multicast addresses
   1203 			 * contained within the vsw_t structure to
   1204 			 * include this new one.
   1205 			 */
   1206 			mcst_p = kmem_zalloc(sizeof (mcst_addr_t), KM_NOSLEEP);
   1207 			if (mcst_p == NULL) {
   1208 				DERR(vswp, "%s unable to alloc mem", __func__);
   1209 				(void) vsw_del_mcst(vswp,
   1210 				    VSW_LOCALDEV, addr, NULL);
   1211 				return (1);
   1212 			}
   1213 			mcst_p->addr = addr;
   1214 			ether_copy(mca, &mcst_p->mca);
   1215 
   1216 			/*
   1217 			 * Call into the underlying driver to program the
   1218 			 * address into HW.
   1219 			 */
   1220 			ret = vsw_mac_multicast_add(vswp, NULL, mcst_p,
   1221 			    VSW_LOCALDEV);
   1222 			if (ret != 0) {
   1223 				(void) vsw_del_mcst(vswp,
   1224 				    VSW_LOCALDEV, addr, NULL);
   1225 				kmem_free(mcst_p, sizeof (*mcst_p));
   1226 				return (ret);
   1227 			}
   1228 
   1229 			mutex_enter(&vswp->mca_lock);
   1230 			mcst_p->nextp = vswp->mcap;
   1231 			vswp->mcap = mcst_p;
   1232 			mutex_exit(&vswp->mca_lock);
   1233 		} else {
   1234 			cmn_err(CE_WARN, "!vsw%d: unable to add multicast "
   1235 			    "address", vswp->instance);
   1236 		}
   1237 		return (ret);
   1238 	}
   1239 
   1240 	D2(vswp, "%s: removing multicast", __func__);
   1241 	/*
   1242 	 * Remove the address from the hash table..
   1243 	 */
   1244 	if (vsw_del_mcst(vswp, VSW_LOCALDEV, addr, NULL) == 0) {
   1245 
   1246 		/*
   1247 		 * ..and then from the list maintained in the
   1248 		 * vsw_t structure.
   1249 		 */
   1250 		mcst_p = vsw_del_addr(VSW_LOCALDEV, vswp, addr);
   1251 		ASSERT(mcst_p != NULL);
   1252 
   1253 		vsw_mac_multicast_remove(vswp, NULL, mcst_p, VSW_LOCALDEV);
   1254 		kmem_free(mcst_p, sizeof (*mcst_p));
   1255 	}
   1256 
   1257 	D1(vswp, "%s: exit", __func__);
   1258 
   1259 	return (0);
   1260 }
   1261 
   1262 static int
   1263 vsw_m_promisc(void *arg, boolean_t on)
   1264 {
   1265 	vsw_t		*vswp = (vsw_t *)arg;
   1266 
   1267 	D1(vswp, "%s: enter", __func__);
   1268 
   1269 	WRITE_ENTER(&vswp->if_lockrw);
   1270 	if (on)
   1271 		vswp->if_state |= VSW_IF_PROMISC;
   1272 	else
   1273 		vswp->if_state &= ~VSW_IF_PROMISC;
   1274 	RW_EXIT(&vswp->if_lockrw);
   1275 
   1276 	D1(vswp, "%s: exit", __func__);
   1277 
   1278 	return (0);
   1279 }
   1280 
   1281 static mblk_t *
   1282 vsw_m_tx(void *arg, mblk_t *mp)
   1283 {
   1284 	vsw_t		*vswp = (vsw_t *)arg;
   1285 
   1286 	D1(vswp, "%s: enter", __func__);
   1287 
   1288 	mp = vsw_vlan_frame_pretag(vswp, VSW_LOCALDEV, mp);
   1289 
   1290 	if (mp == NULL) {
   1291 		return (NULL);
   1292 	}
   1293 
   1294 	vswp->vsw_switch_frame(vswp, mp, VSW_LOCALDEV, NULL, NULL);
   1295 
   1296 	D1(vswp, "%s: exit", __func__);
   1297 
   1298 	return (NULL);
   1299 }
   1300 
   1301 /*
   1302  * Register for machine description (MD) updates.
   1303  *
   1304  * Returns 0 on success, 1 on failure.
   1305  */
   1306 static int
   1307 vsw_mdeg_register(vsw_t *vswp)
   1308 {
   1309 	mdeg_prop_spec_t	*pspecp;
   1310 	mdeg_node_spec_t	*inst_specp;
   1311 	mdeg_handle_t		mdeg_hdl, mdeg_port_hdl;
   1312 	size_t			templatesz;
   1313 	int			rv;
   1314 
   1315 	D1(vswp, "%s: enter", __func__);
   1316 
   1317 	/*
   1318 	 * Allocate and initialize a per-instance copy
   1319 	 * of the global property spec array that will
   1320 	 * uniquely identify this vsw instance.
   1321 	 */
   1322 	templatesz = sizeof (vsw_prop_template);
   1323 	pspecp = kmem_zalloc(templatesz, KM_SLEEP);
   1324 
   1325 	bcopy(vsw_prop_template, pspecp, templatesz);
   1326 
   1327 	VSW_SET_MDEG_PROP_INST(pspecp, vswp->regprop);
   1328 
   1329 	/* initialize the complete prop spec structure */
   1330 	inst_specp = kmem_zalloc(sizeof (mdeg_node_spec_t), KM_SLEEP);
   1331 	inst_specp->namep = "virtual-device";
   1332 	inst_specp->specp = pspecp;
   1333 
   1334 	D2(vswp, "%s: instance %d registering with mdeg", __func__,
   1335 	    vswp->regprop);
   1336 	/*
   1337 	 * Register an interest in 'virtual-device' nodes with a
   1338 	 * 'name' property of 'virtual-network-switch'
   1339 	 */
   1340 	rv = mdeg_register(inst_specp, &vdev_match, vsw_mdeg_cb,
   1341 	    (void *)vswp, &mdeg_hdl);
   1342 	if (rv != MDEG_SUCCESS) {
   1343 		DERR(vswp, "%s: mdeg_register failed (%d) for vsw node",
   1344 		    __func__, rv);
   1345 		goto mdeg_reg_fail;
   1346 	}
   1347 
   1348 	/*
   1349 	 * Register an interest in 'vsw-port' nodes.
   1350 	 */
   1351 	rv = mdeg_register(inst_specp, &vport_match, vsw_port_mdeg_cb,
   1352 	    (void *)vswp, &mdeg_port_hdl);
   1353 	if (rv != MDEG_SUCCESS) {
   1354 		DERR(vswp, "%s: mdeg_register failed (%d)\n", __func__, rv);
   1355 		(void) mdeg_unregister(mdeg_hdl);
   1356 		goto mdeg_reg_fail;
   1357 	}
   1358 
   1359 	/* save off data that will be needed later */
   1360 	vswp->inst_spec = inst_specp;
   1361 	vswp->mdeg_hdl = mdeg_hdl;
   1362 	vswp->mdeg_port_hdl = mdeg_port_hdl;
   1363 
   1364 	D1(vswp, "%s: exit", __func__);
   1365 	return (0);
   1366 
   1367 mdeg_reg_fail:
   1368 	cmn_err(CE_WARN, "!vsw%d: Unable to register MDEG callbacks",
   1369 	    vswp->instance);
   1370 	kmem_free(pspecp, templatesz);
   1371 	kmem_free(inst_specp, sizeof (mdeg_node_spec_t));
   1372 
   1373 	vswp->mdeg_hdl = NULL;
   1374 	vswp->mdeg_port_hdl = NULL;
   1375 
   1376 	return (1);
   1377 }
   1378 
   1379 static void
   1380 vsw_mdeg_unregister(vsw_t *vswp)
   1381 {
   1382 	D1(vswp, "vsw_mdeg_unregister: enter");
   1383 
   1384 	if (vswp->mdeg_hdl != NULL)
   1385 		(void) mdeg_unregister(vswp->mdeg_hdl);
   1386 
   1387 	if (vswp->mdeg_port_hdl != NULL)
   1388 		(void) mdeg_unregister(vswp->mdeg_port_hdl);
   1389 
   1390 	if (vswp->inst_spec != NULL) {
   1391 		if (vswp->inst_spec->specp != NULL) {
   1392 			(void) kmem_free(vswp->inst_spec->specp,
   1393 			    sizeof (vsw_prop_template));
   1394 			vswp->inst_spec->specp = NULL;
   1395 		}
   1396 
   1397 		(void) kmem_free(vswp->inst_spec, sizeof (mdeg_node_spec_t));
   1398 		vswp->inst_spec = NULL;
   1399 	}
   1400 
   1401 	D1(vswp, "vsw_mdeg_unregister: exit");
   1402 }
   1403 
   1404 /*
   1405  * Mdeg callback invoked for the vsw node itself.
   1406  */
   1407 static int
   1408 vsw_mdeg_cb(void *cb_argp, mdeg_result_t *resp)
   1409 {
   1410 	vsw_t		*vswp;
   1411 	md_t		*mdp;
   1412 	mde_cookie_t	node;
   1413 	uint64_t	inst;
   1414 	char		*node_name = NULL;
   1415 
   1416 	if (resp == NULL)
   1417 		return (MDEG_FAILURE);
   1418 
   1419 	vswp = (vsw_t *)cb_argp;
   1420 
   1421 	D1(vswp, "%s: added %d : removed %d : curr matched %d"
   1422 	    " : prev matched %d", __func__, resp->added.nelem,
   1423 	    resp->removed.nelem, resp->match_curr.nelem,
   1424 	    resp->match_prev.nelem);
   1425 
   1426 	/*
   1427 	 * We get an initial callback for this node as 'added'
   1428 	 * after registering with mdeg. Note that we would have
   1429 	 * already gathered information about this vsw node by
   1430 	 * walking MD earlier during attach (in vsw_read_mdprops()).
   1431 	 * So, there is a window where the properties of this
   1432 	 * node might have changed when we get this initial 'added'
   1433 	 * callback. We handle this as if an update occured
   1434 	 * and invoke the same function which handles updates to
   1435 	 * the properties of this vsw-node if any.
   1436 	 *
   1437 	 * A non-zero 'match' value indicates that the MD has been
   1438 	 * updated and that a virtual-network-switch node is
   1439 	 * present which may or may not have been updated. It is
   1440 	 * up to the clients to examine their own nodes and
   1441 	 * determine if they have changed.
   1442 	 */
   1443 	if (resp->added.nelem != 0) {
   1444 
   1445 		if (resp->added.nelem != 1) {
   1446 			cmn_err(CE_NOTE, "!vsw%d: number of nodes added "
   1447 			    "invalid: %d\n", vswp->instance, resp->added.nelem);
   1448 			return (MDEG_FAILURE);
   1449 		}
   1450 
   1451 		mdp = resp->added.mdp;
   1452 		node = resp->added.mdep[0];
   1453 
   1454 	} else if (resp->match_curr.nelem != 0) {
   1455 
   1456 		if (resp->match_curr.nelem != 1) {
   1457 			cmn_err(CE_NOTE, "!vsw%d: number of nodes updated "
   1458 			    "invalid: %d\n", vswp->instance,
   1459 			    resp->match_curr.nelem);
   1460 			return (MDEG_FAILURE);
   1461 		}
   1462 
   1463 		mdp = resp->match_curr.mdp;
   1464 		node = resp->match_curr.mdep[0];
   1465 
   1466 	} else {
   1467 		return (MDEG_FAILURE);
   1468 	}
   1469 
   1470 	/* Validate name and instance */
   1471 	if (md_get_prop_str(mdp, node, "name", &node_name) != 0) {
   1472 		DERR(vswp, "%s: unable to get node name\n",  __func__);
   1473 		return (MDEG_FAILURE);
   1474 	}
   1475 
   1476 	/* is this a virtual-network-switch? */
   1477 	if (strcmp(node_name, vsw_propname) != 0) {
   1478 		DERR(vswp, "%s: Invalid node name: %s\n",
   1479 		    __func__, node_name);
   1480 		return (MDEG_FAILURE);
   1481 	}
   1482 
   1483 	if (md_get_prop_val(mdp, node, "cfg-handle", &inst)) {
   1484 		DERR(vswp, "%s: prop(cfg-handle) not found\n",
   1485 		    __func__);
   1486 		return (MDEG_FAILURE);
   1487 	}
   1488 
   1489 	/* is this the right instance of vsw? */
   1490 	if (inst != vswp->regprop) {
   1491 		DERR(vswp, "%s: Invalid cfg-handle: %lx\n",
   1492 		    __func__, inst);
   1493 		return (MDEG_FAILURE);
   1494 	}
   1495 
   1496 	vsw_update_md_prop(vswp, mdp, node);
   1497 
   1498 	return (MDEG_SUCCESS);
   1499 }
   1500 
   1501 /*
   1502  * Mdeg callback invoked for changes to the vsw-port nodes
   1503  * under the vsw node.
   1504  */
   1505 static int
   1506 vsw_port_mdeg_cb(void *cb_argp, mdeg_result_t *resp)
   1507 {
   1508 	vsw_t		*vswp;
   1509 	int		idx;
   1510 	md_t		*mdp;
   1511 	mde_cookie_t	node;
   1512 	uint64_t	inst;
   1513 	int		rv;
   1514 
   1515 	if ((resp == NULL) || (cb_argp == NULL))
   1516 		return (MDEG_FAILURE);
   1517 
   1518 	vswp = (vsw_t *)cb_argp;
   1519 
   1520 	D2(vswp, "%s: added %d : removed %d : curr matched %d"
   1521 	    " : prev matched %d", __func__, resp->added.nelem,
   1522 	    resp->removed.nelem, resp->match_curr.nelem,
   1523 	    resp->match_prev.nelem);
   1524 
   1525 	/* process added ports */
   1526 	for (idx = 0; idx < resp->added.nelem; idx++) {
   1527 		mdp = resp->added.mdp;
   1528 		node = resp->added.mdep[idx];
   1529 
   1530 		D2(vswp, "%s: adding node(%d) 0x%lx", __func__, idx, node);
   1531 
   1532 		if ((rv = vsw_port_add(vswp, mdp, &node)) != 0) {
   1533 			cmn_err(CE_WARN, "!vsw%d: Unable to add new port "
   1534 			    "(0x%lx), err=%d", vswp->instance, node, rv);
   1535 		}
   1536 	}
   1537 
   1538 	/* process removed ports */
   1539 	for (idx = 0; idx < resp->removed.nelem; idx++) {
   1540 		mdp = resp->removed.mdp;
   1541 		node = resp->removed.mdep[idx];
   1542 
   1543 		if (md_get_prop_val(mdp, node, id_propname, &inst)) {
   1544 			DERR(vswp, "%s: prop(%s) not found in port(%d)",
   1545 			    __func__, id_propname, idx);
   1546 			continue;
   1547 		}
   1548 
   1549 		D2(vswp, "%s: removing node(%d) 0x%lx", __func__, idx, node);
   1550 
   1551 		if (vsw_port_detach(vswp, inst) != 0) {
   1552 			cmn_err(CE_WARN, "!vsw%d: Unable to remove port %ld",
   1553 			    vswp->instance, inst);
   1554 		}
   1555 	}
   1556 
   1557 	for (idx = 0; idx < resp->match_curr.nelem; idx++) {
   1558 		(void) vsw_port_update(vswp, resp->match_curr.mdp,
   1559 		    resp->match_curr.mdep[idx],
   1560 		    resp->match_prev.mdp,
   1561 		    resp->match_prev.mdep[idx]);
   1562 	}
   1563 
   1564 	D1(vswp, "%s: exit", __func__);
   1565 
   1566 	return (MDEG_SUCCESS);
   1567 }
   1568 
   1569 /*
   1570  * Scan the machine description for this instance of vsw
   1571  * and read its properties. Called only from vsw_attach().
   1572  * Returns: 0 on success, 1 on failure.
   1573  */
   1574 static int
   1575 vsw_read_mdprops(vsw_t *vswp)
   1576 {
   1577 	md_t		*mdp = NULL;
   1578 	mde_cookie_t	rootnode;
   1579 	mde_cookie_t	*listp = NULL;
   1580 	uint64_t	inst;
   1581 	uint64_t	cfgh;
   1582 	char		*name;
   1583 	int		rv = 1;
   1584 	int		num_nodes = 0;
   1585 	int		num_devs = 0;
   1586 	int		listsz = 0;
   1587 	int		i;
   1588 
   1589 	/*
   1590 	 * In each 'virtual-device' node in the MD there is a
   1591 	 * 'cfg-handle' property which is the MD's concept of
   1592 	 * an instance number (this may be completely different from
   1593 	 * the device drivers instance #). OBP reads that value and
   1594 	 * stores it in the 'reg' property of the appropriate node in
   1595 	 * the device tree. We first read this reg property and use this
   1596 	 * to compare against the 'cfg-handle' property of vsw nodes
   1597 	 * in MD to get to this specific vsw instance and then read
   1598 	 * other properties that we are interested in.
   1599 	 * We also cache the value of 'reg' property and use it later
   1600 	 * to register callbacks with mdeg (see vsw_mdeg_register())
   1601 	 */
   1602 	inst = ddi_prop_get_int(DDI_DEV_T_ANY, vswp->dip,
   1603 	    DDI_PROP_DONTPASS, reg_propname, -1);
   1604 	if (inst == -1) {
   1605 		cmn_err(CE_NOTE, "!vsw%d: Unable to read %s property from "
   1606 		    "OBP device tree", vswp->instance, reg_propname);
   1607 		return (rv);
   1608 	}
   1609 
   1610 	vswp->regprop = inst;
   1611 
   1612 	if ((mdp = md_get_handle()) == NULL) {
   1613 		DWARN(vswp, "%s: cannot init MD\n", __func__);
   1614 		return (rv);
   1615 	}
   1616 
   1617 	num_nodes = md_node_count(mdp);
   1618 	ASSERT(num_nodes > 0);
   1619 
   1620 	listsz = num_nodes * sizeof (mde_cookie_t);
   1621 	listp = (mde_cookie_t *)kmem_zalloc(listsz, KM_SLEEP);
   1622 
   1623 	rootnode = md_root_node(mdp);
   1624 
   1625 	/* search for all "virtual_device" nodes */
   1626 	num_devs = md_scan_dag(mdp, rootnode,
   1627 	    md_find_name(mdp, vdev_propname),
   1628 	    md_find_name(mdp, "fwd"), listp);
   1629 	if (num_devs <= 0) {
   1630 		DWARN(vswp, "%s: invalid num_devs:%d\n", __func__, num_devs);
   1631 		goto vsw_readmd_exit;
   1632 	}
   1633 
   1634 	/*
   1635 	 * Now loop through the list of virtual-devices looking for
   1636 	 * devices with name "virtual-network-switch" and for each
   1637 	 * such device compare its instance with what we have from
   1638 	 * the 'reg' property to find the right node in MD and then
   1639 	 * read all its properties.
   1640 	 */
   1641 	for (i = 0; i < num_devs; i++) {
   1642 
   1643 		if (md_get_prop_str(mdp, listp[i], "name", &name) != 0) {
   1644 			DWARN(vswp, "%s: name property not found\n",
   1645 			    __func__);
   1646 			goto vsw_readmd_exit;
   1647 		}
   1648 
   1649 		/* is this a virtual-network-switch? */
   1650 		if (strcmp(name, vsw_propname) != 0)
   1651 			continue;
   1652 
   1653 		if (md_get_prop_val(mdp, listp[i], "cfg-handle", &cfgh) != 0) {
   1654 			DWARN(vswp, "%s: cfg-handle property not found\n",
   1655 			    __func__);
   1656 			goto vsw_readmd_exit;
   1657 		}
   1658 
   1659 		/* is this the required instance of vsw? */
   1660 		if (inst != cfgh)
   1661 			continue;
   1662 
   1663 		/* now read all properties of this vsw instance */
   1664 		rv = vsw_get_initial_md_properties(vswp, mdp, listp[i]);
   1665 		break;
   1666 	}
   1667 
   1668 vsw_readmd_exit:
   1669 
   1670 	kmem_free(listp, listsz);
   1671 	(void) md_fini_handle(mdp);
   1672 	return (rv);
   1673 }
   1674 
   1675 /*
   1676  * Read the initial start-of-day values from the specified MD node.
   1677  */
   1678 static int
   1679 vsw_get_initial_md_properties(vsw_t *vswp, md_t *mdp, mde_cookie_t node)
   1680 {
   1681 	uint64_t	macaddr = 0;
   1682 
   1683 	D1(vswp, "%s: enter", __func__);
   1684 
   1685 	if (vsw_get_md_physname(vswp, mdp, node, vswp->physname) != 0) {
   1686 		return (1);
   1687 	}
   1688 
   1689 	/* mac address for vswitch device itself */
   1690 	if (md_get_prop_val(mdp, node, macaddr_propname, &macaddr) != 0) {
   1691 		cmn_err(CE_WARN, "!vsw%d: Unable to get MAC address from MD",
   1692 		    vswp->instance);
   1693 		return (1);
   1694 	}
   1695 
   1696 	vsw_save_lmacaddr(vswp, macaddr);
   1697 
   1698 	if (vsw_get_md_smodes(vswp, mdp, node, &vswp->smode)) {
   1699 		DWARN(vswp, "%s: Unable to read %s property from MD, "
   1700 		    "defaulting to 'switched' mode",
   1701 		    __func__, smode_propname);
   1702 
   1703 		vswp->smode = VSW_LAYER2;
   1704 	}
   1705 
   1706 	/*
   1707 	 * Read the 'linkprop' property to know if this
   1708 	 * vsw device wants to get physical link updates.
   1709 	 */
   1710 	vsw_linkprop_read(vswp, mdp, node, &vswp->pls_update);
   1711 
   1712 	/* read mtu */
   1713 	vsw_mtu_read(vswp, mdp, node, &vswp->mtu);
   1714 	if (vswp->mtu < ETHERMTU || vswp->mtu > VNET_MAX_MTU) {
   1715 		vswp->mtu = ETHERMTU;
   1716 	}
   1717 	vswp->max_frame_size = vswp->mtu + sizeof (struct ether_header) +
   1718 	    VLAN_TAGSZ;
   1719 
   1720 	/* read vlan id properties of this vsw instance */
   1721 	vsw_vlan_read_ids(vswp, VSW_LOCALDEV, mdp, node, &vswp->pvid,
   1722 	    &vswp->vids, &vswp->nvids, &vswp->default_vlan_id);
   1723 
   1724 	/* read priority-ether-types */
   1725 	vsw_read_pri_eth_types(vswp, mdp, node);
   1726 
   1727 	/* read bandwidth property of this vsw instance */
   1728 	vsw_bandwidth_read(vswp, mdp, node, &vswp->bandwidth);
   1729 
   1730 	D1(vswp, "%s: exit", __func__);
   1731 	return (0);
   1732 }
   1733 
   1734 /*
   1735  * Read vlan id properties of the given MD node.
   1736  * Arguments:
   1737  *   arg:          device argument(vsw device or a port)
   1738  *   type:         type of arg; VSW_LOCALDEV(vsw device) or VSW_VNETPORT(port)
   1739  *   mdp:          machine description
   1740  *   node:         md node cookie
   1741  *
   1742  * Returns:
   1743  *   pvidp:        port-vlan-id of the node
   1744  *   vidspp:       list of vlan-ids of the node
   1745  *   nvidsp:       # of vlan-ids in the list
   1746  *   default_idp:  default-vlan-id of the node(if node is vsw device)
   1747  */
   1748 static void
   1749 vsw_vlan_read_ids(void *arg, int type, md_t *mdp, mde_cookie_t node,
   1750 	uint16_t *pvidp, vsw_vlanid_t **vidspp, uint16_t *nvidsp,
   1751 	uint16_t *default_idp)
   1752 {
   1753 	vsw_t		*vswp;
   1754 	vsw_port_t	*portp;
   1755 	char		*pvid_propname;
   1756 	char		*vid_propname;
   1757 	uint_t		nvids = 0;
   1758 	uint32_t	vids_size;
   1759 	int		rv;
   1760 	int		i;
   1761 	uint64_t	*data;
   1762 	uint64_t	val;
   1763 	int		size;
   1764 	int		inst;
   1765 
   1766 	if (type == VSW_LOCALDEV) {
   1767 
   1768 		vswp = (vsw_t *)arg;
   1769 		pvid_propname = vsw_pvid_propname;
   1770 		vid_propname = vsw_vid_propname;
   1771 		inst = vswp->instance;
   1772 
   1773 	} else if (type == VSW_VNETPORT) {
   1774 
   1775 		portp = (vsw_port_t *)arg;
   1776 		vswp = portp->p_vswp;
   1777 		pvid_propname = port_pvid_propname;
   1778 		vid_propname = port_vid_propname;
   1779 		inst = portp->p_instance;
   1780 
   1781 	} else {
   1782 		return;
   1783 	}
   1784 
   1785 	if (type == VSW_LOCALDEV && default_idp != NULL) {
   1786 		rv = md_get_prop_val(mdp, node, vsw_dvid_propname, &val);
   1787 		if (rv != 0) {
   1788 			DWARN(vswp, "%s: prop(%s) not found", __func__,
   1789 			    vsw_dvid_propname);
   1790 
   1791 			*default_idp = vsw_default_vlan_id;
   1792 		} else {
   1793 			*default_idp = val & 0xFFF;
   1794 			D2(vswp, "%s: %s(%d): (%d)\n", __func__,
   1795 			    vsw_dvid_propname, inst, *default_idp);
   1796 		}
   1797 	}
   1798 
   1799 	rv = md_get_prop_val(mdp, node, pvid_propname, &val);
   1800 	if (rv != 0) {
   1801 		DWARN(vswp, "%s: prop(%s) not found", __func__, pvid_propname);
   1802 		*pvidp = vsw_default_vlan_id;
   1803 	} else {
   1804 
   1805 		*pvidp = val & 0xFFF;
   1806 		D2(vswp, "%s: %s(%d): (%d)\n", __func__,
   1807 		    pvid_propname, inst, *pvidp);
   1808 	}
   1809 
   1810 	rv = md_get_prop_data(mdp, node, vid_propname, (uint8_t **)&data,
   1811 	    &size);
   1812 	if (rv != 0) {
   1813 		D2(vswp, "%s: prop(%s) not found", __func__, vid_propname);
   1814 		size = 0;
   1815 	} else {
   1816 		size /= sizeof (uint64_t);
   1817 	}
   1818 	nvids = size;
   1819 
   1820 	if (nvids != 0) {
   1821 		D2(vswp, "%s: %s(%d): ", __func__, vid_propname, inst);
   1822 		vids_size = sizeof (vsw_vlanid_t) * nvids;
   1823 		*vidspp = kmem_zalloc(vids_size, KM_SLEEP);
   1824 		for (i = 0; i < nvids; i++) {
   1825 			(*vidspp)[i].vl_vid = data[i] & 0xFFFF;
   1826 			(*vidspp)[i].vl_set = B_FALSE;
   1827 			D2(vswp, " %d ", (*vidspp)[i].vl_vid);
   1828 		}
   1829 		D2(vswp, "\n");
   1830 	}
   1831 
   1832 	*nvidsp = nvids;
   1833 }
   1834 
   1835 static void
   1836 vsw_port_read_bandwidth(vsw_port_t *portp, md_t *mdp, mde_cookie_t node,
   1837     uint64_t *bw)
   1838 {
   1839 	int		rv;
   1840 	uint64_t	val;
   1841 	vsw_t		*vswp;
   1842 
   1843 	vswp = portp->p_vswp;
   1844 
   1845 	rv = md_get_prop_val(mdp, node, port_maxbw_propname, &val);
   1846 
   1847 	if (rv != 0) {
   1848 		*bw = 0;
   1849 		D3(vswp, "%s: prop(%s) not found\n", __func__,
   1850 		    port_maxbw_propname);
   1851 	} else {
   1852 		*bw = val;
   1853 		D3(vswp, "%s: %s nodes found", __func__, port_maxbw_propname);
   1854 	}
   1855 }
   1856 
   1857 /*
   1858  * This function reads "priority-ether-types" property from md. This property
   1859  * is used to enable support for priority frames. Applications which need
   1860  * guaranteed and timely delivery of certain high priority frames to/from
   1861  * a vnet or vsw within ldoms, should configure this property by providing
   1862  * the ether type(s) for which the priority facility is needed.
   1863  * Normal data frames are delivered over a ldc channel using the descriptor
   1864  * ring mechanism which is constrained by factors such as descriptor ring size,
   1865  * the rate at which the ring is processed at the peer ldc end point, etc.
   1866  * The priority mechanism provides an Out-Of-Band path to send/receive frames
   1867  * as raw pkt data (VIO_PKT_DATA) messages over the channel, avoiding the
   1868  * descriptor ring path and enables a more reliable and timely delivery of
   1869  * frames to the peer.
   1870  */
   1871 static void
   1872 vsw_read_pri_eth_types(vsw_t *vswp, md_t *mdp, mde_cookie_t node)
   1873 {
   1874 	int		rv;
   1875 	uint16_t	*types;
   1876 	uint64_t	*data;
   1877 	int		size;
   1878 	int		i;
   1879 	size_t		mblk_sz;
   1880 
   1881 	rv = md_get_prop_data(mdp, node, pri_types_propname,
   1882 	    (uint8_t **)&data, &size);
   1883 	if (rv != 0) {
   1884 		/*
   1885 		 * Property may not exist if we are running pre-ldoms1.1 f/w.
   1886 		 * Check if 'vsw_pri_eth_type' has been set in that case.
   1887 		 */
   1888 		if (vsw_pri_eth_type != 0) {
   1889 			size = sizeof (vsw_pri_eth_type);
   1890 			data = &vsw_pri_eth_type;
   1891 		} else {
   1892 			D3(vswp, "%s: prop(%s) not found", __func__,
   1893 			    pri_types_propname);
   1894 			size = 0;
   1895 		}
   1896 	}
   1897 
   1898 	if (size == 0) {
   1899 		vswp->pri_num_types = 0;
   1900 		return;
   1901 	}
   1902 
   1903 	/*
   1904 	 * we have some priority-ether-types defined;
   1905 	 * allocate a table of these types and also
   1906 	 * allocate a pool of mblks to transmit these
   1907 	 * priority packets.
   1908 	 */
   1909 	size /= sizeof (uint64_t);
   1910 	vswp->pri_num_types = size;
   1911 	vswp->pri_types = kmem_zalloc(size * sizeof (uint16_t), KM_SLEEP);
   1912 	for (i = 0, types = vswp->pri_types; i < size; i++) {
   1913 		types[i] = data[i] & 0xFFFF;
   1914 	}
   1915 	mblk_sz = (VIO_PKT_DATA_HDRSIZE + ETHERMAX + 7) & ~7;
   1916 	(void) vio_create_mblks(vsw_pri_tx_nmblks, mblk_sz, &vswp->pri_tx_vmp);
   1917 }
   1918 
   1919 static void
   1920 vsw_mtu_read(vsw_t *vswp, md_t *mdp, mde_cookie_t node, uint32_t *mtu)
   1921 {
   1922 	int		rv;
   1923 	int		inst;
   1924 	uint64_t	val;
   1925 	char		*mtu_propname;
   1926 
   1927 	mtu_propname = vsw_mtu_propname;
   1928 	inst = vswp->instance;
   1929 
   1930 	rv = md_get_prop_val(mdp, node, mtu_propname, &val);
   1931 	if (rv != 0) {
   1932 		D3(vswp, "%s: prop(%s) not found", __func__, mtu_propname);
   1933 		*mtu = vsw_ethermtu;
   1934 	} else {
   1935 
   1936 		*mtu = val & 0xFFFF;
   1937 		D2(vswp, "%s: %s(%d): (%d)\n", __func__,
   1938 		    mtu_propname, inst, *mtu);
   1939 	}
   1940 }
   1941 
   1942 /*
   1943  * Update the mtu of the vsw device. We first check if the device has been
   1944  * plumbed and if so fail the mtu update. Otherwise, we continue to update the
   1945  * new mtu and reset all ports to initiate handshake re-negotiation with peers
   1946  * using the new mtu.
   1947  */
   1948 static int
   1949 vsw_mtu_update(vsw_t *vswp, uint32_t mtu)
   1950 {
   1951 	int	rv;
   1952 
   1953 	WRITE_ENTER(&vswp->if_lockrw);
   1954 
   1955 	if (vswp->if_state & VSW_IF_UP) {
   1956 
   1957 		RW_EXIT(&vswp->if_lockrw);
   1958 
   1959 		cmn_err(CE_NOTE, "!vsw%d: Unable to process mtu update"
   1960 		    " as the device is plumbed\n", vswp->instance);
   1961 		return (EBUSY);
   1962 
   1963 	} else {
   1964 
   1965 		D2(vswp, "%s: curr_mtu(%d) new_mtu(%d)\n",
   1966 		    __func__, vswp->mtu, mtu);
   1967 
   1968 		vswp->mtu = mtu;
   1969 		vswp->max_frame_size = vswp->mtu +
   1970 		    sizeof (struct ether_header) + VLAN_TAGSZ;
   1971 
   1972 		rv = mac_maxsdu_update(vswp->if_mh, mtu);
   1973 		if (rv != 0) {
   1974 			cmn_err(CE_NOTE,
   1975 			    "!vsw%d: Unable to update mtu with mac"
   1976 			    " layer\n", vswp->instance);
   1977 		}
   1978 
   1979 		RW_EXIT(&vswp->if_lockrw);
   1980 
   1981 		/* Reset ports to renegotiate with the new mtu */
   1982 		vsw_reset_ports(vswp);
   1983 
   1984 	}
   1985 
   1986 	return (0);
   1987 }
   1988 
   1989 static void
   1990 vsw_linkprop_read(vsw_t *vswp, md_t *mdp, mde_cookie_t node,
   1991 	boolean_t *pls)
   1992 {
   1993 	int		rv;
   1994 	uint64_t	val;
   1995 	char		*linkpropname;
   1996 
   1997 	linkpropname = vsw_linkprop_propname;
   1998 
   1999 	rv = md_get_prop_val(mdp, node, linkpropname, &val);
   2000 	if (rv != 0) {
   2001 		D3(vswp, "%s: prop(%s) not found", __func__, linkpropname);
   2002 		*pls = B_FALSE;
   2003 	} else {
   2004 
   2005 		*pls = (val & 0x1) ? B_TRUE : B_FALSE;
   2006 		D2(vswp, "%s: %s(%d): (%d)\n", __func__, linkpropname,
   2007 		    vswp->instance, *pls);
   2008 	}
   2009 }
   2010 
   2011 void
   2012 vsw_mac_link_update(vsw_t *vswp, link_state_t link_state)
   2013 {
   2014 	READ_ENTER(&vswp->if_lockrw);
   2015 
   2016 	if (vswp->if_state & VSW_IF_REG) {
   2017 		mac_link_update(vswp->if_mh, link_state);
   2018 	}
   2019 
   2020 	RW_EXIT(&vswp->if_lockrw);
   2021 }
   2022 
   2023 void
   2024 vsw_physlink_state_update(vsw_t *vswp)
   2025 {
   2026 	if (vswp->pls_update == B_TRUE) {
   2027 		vsw_mac_link_update(vswp, vswp->phys_link_state);
   2028 	}
   2029 	vsw_physlink_update_ports(vswp);
   2030 }
   2031 
   2032 static void
   2033 vsw_bandwidth_read(vsw_t *vswp, md_t *mdp, mde_cookie_t node, uint64_t *bw)
   2034 {
   2035 	/* read the vsw bandwidth from md */
   2036 	int		rv;
   2037 	uint64_t	val;
   2038 
   2039 	rv = md_get_prop_val(mdp, node, vsw_maxbw_propname, &val);
   2040 	if (rv != 0) {
   2041 		*bw = 0;
   2042 		D3(vswp, "%s: prop(%s) not found", __func__,
   2043 		    vsw_maxbw_propname);
   2044 	} else {
   2045 		*bw = val;
   2046 		D3(vswp, "%s: %s(%d): (%ld)\n", __func__,
   2047 		    vsw_maxbw_propname, vswp->instance, *bw);
   2048 	}
   2049 }
   2050 
   2051 /*
   2052  * Check to see if the relevant properties in the specified node have
   2053  * changed, and if so take the appropriate action.
   2054  *
   2055  * If any of the properties are missing or invalid we don't take
   2056  * any action, as this function should only be invoked when modifications
   2057  * have been made to what we assume is a working configuration, which
   2058  * we leave active.
   2059  *
   2060  * Note it is legal for this routine to be invoked even if none of the
   2061  * properties in the port node within the MD have actually changed.
   2062  */
   2063 static void
   2064 vsw_update_md_prop(vsw_t *vswp, md_t *mdp, mde_cookie_t node)
   2065 {
   2066 	char		physname[LIFNAMSIZ];
   2067 	char		drv[LIFNAMSIZ];
   2068 	uint_t		ddi_instance;
   2069 	uint8_t		new_smode;
   2070 	int		i;
   2071 	uint64_t 	macaddr = 0;
   2072 	enum		{MD_init = 0x1,
   2073 				MD_physname = 0x2,
   2074 				MD_macaddr = 0x4,
   2075 				MD_smode = 0x8,
   2076 				MD_vlans = 0x10,
   2077 				MD_mtu = 0x20,
   2078 				MD_pls = 0x40,
   2079 				MD_bw = 0x80} updated;
   2080 	int		rv;
   2081 	uint16_t	pvid;
   2082 	vsw_vlanid_t	*vids;
   2083 	uint16_t	nvids;
   2084 	uint32_t	mtu;
   2085 	boolean_t	pls_update;
   2086 	uint64_t	maxbw;
   2087 
   2088 	updated = MD_init;
   2089 
   2090 	D1(vswp, "%s: enter", __func__);
   2091 
   2092 	/*
   2093 	 * Check if name of physical device in MD has changed.
   2094 	 */
   2095 	if (vsw_get_md_physname(vswp, mdp, node, (char *)&physname) == 0) {
   2096 		/*
   2097 		 * Do basic sanity check on new device name/instance,
   2098 		 * if its non NULL. It is valid for the device name to
   2099 		 * have changed from a non NULL to a NULL value, i.e.
   2100 		 * the vsw is being changed to 'routed' mode.
   2101 		 */
   2102 		if ((strlen(physname) != 0) &&
   2103 		    (ddi_parse(physname, drv,
   2104 		    &ddi_instance) != DDI_SUCCESS)) {
   2105 			cmn_err(CE_WARN, "!vsw%d: physical device %s is not"
   2106 			    " a valid device name/instance",
   2107 			    vswp->instance, physname);
   2108 			goto fail_reconf;
   2109 		}
   2110 
   2111 		if (strcmp(physname, vswp->physname)) {
   2112 			D2(vswp, "%s: device name changed from %s to %s",
   2113 			    __func__, vswp->physname, physname);
   2114 
   2115 			updated |= MD_physname;
   2116 		} else {
   2117 			D2(vswp, "%s: device name unchanged at %s",
   2118 			    __func__, vswp->physname);
   2119 		}
   2120 	} else {
   2121 		cmn_err(CE_WARN, "!vsw%d: Unable to read name of physical "
   2122 		    "device from updated MD.", vswp->instance);
   2123 		goto fail_reconf;
   2124 	}
   2125 
   2126 	/*
   2127 	 * Check if MAC address has changed.
   2128 	 */
   2129 	if (md_get_prop_val(mdp, node, macaddr_propname, &macaddr) != 0) {
   2130 		cmn_err(CE_WARN, "!vsw%d: Unable to get MAC address from MD",
   2131 		    vswp->instance);
   2132 		goto fail_reconf;
   2133 	} else {
   2134 		uint64_t maddr = macaddr;
   2135 		READ_ENTER(&vswp->if_lockrw);
   2136 		for (i = ETHERADDRL - 1; i >= 0; i--) {
   2137 			if (vswp->if_addr.ether_addr_octet[i]
   2138 			    != (macaddr & 0xFF)) {
   2139 				D2(vswp, "%s: octet[%d] 0x%x != 0x%x",
   2140 				    __func__, i,
   2141 				    vswp->if_addr.ether_addr_octet[i],
   2142 				    (macaddr & 0xFF));
   2143 				updated |= MD_macaddr;
   2144 				macaddr = maddr;
   2145 				break;
   2146 			}
   2147 			macaddr >>= 8;
   2148 		}
   2149 		RW_EXIT(&vswp->if_lockrw);
   2150 		if (updated & MD_macaddr) {
   2151 			vsw_save_lmacaddr(vswp, macaddr);
   2152 		}
   2153 	}
   2154 
   2155 	/*
   2156 	 * Check if switching modes have changed.
   2157 	 */
   2158 	if (vsw_get_md_smodes(vswp, mdp, node, &new_smode)) {
   2159 		cmn_err(CE_WARN, "!vsw%d: Unable to read %s property from MD",
   2160 		    vswp->instance, smode_propname);
   2161 		goto fail_reconf;
   2162 	} else {
   2163 		if (new_smode != vswp->smode) {
   2164 			D2(vswp, "%s: switching mode changed from %d to %d",
   2165 			    __func__, vswp->smode, new_smode);
   2166 
   2167 			updated |= MD_smode;
   2168 		}
   2169 	}
   2170 
   2171 	/* Read the vlan ids */
   2172 	vsw_vlan_read_ids(vswp, VSW_LOCALDEV, mdp, node, &pvid, &vids,
   2173 	    &nvids, NULL);
   2174 
   2175 	/* Determine if there are any vlan id updates */
   2176 	if ((pvid != vswp->pvid) ||		/* pvid changed? */
   2177 	    (nvids != vswp->nvids) ||		/* # of vids changed? */
   2178 	    ((nvids != 0) && (vswp->nvids != 0) &&	/* vids changed? */
   2179 	    !vsw_cmp_vids(vids, vswp->vids, nvids))) {
   2180 		updated |= MD_vlans;
   2181 	}
   2182 
   2183 	/* Read mtu */
   2184 	vsw_mtu_read(vswp, mdp, node, &mtu);
   2185 	if (mtu != vswp->mtu) {
   2186 		if (mtu >= ETHERMTU && mtu <= VNET_MAX_MTU) {
   2187 			updated |= MD_mtu;
   2188 		} else {
   2189 			cmn_err(CE_NOTE, "!vsw%d: Unable to process mtu update"
   2190 			    " as the specified value:%d is invalid\n",
   2191 			    vswp->instance, mtu);
   2192 		}
   2193 	}
   2194 
   2195 	/*
   2196 	 * Read the 'linkprop' property.
   2197 	 */
   2198 	vsw_linkprop_read(vswp, mdp, node, &pls_update);
   2199 	if (pls_update != vswp->pls_update) {
   2200 		updated |= MD_pls;
   2201 	}
   2202 
   2203 	/* Read bandwidth */
   2204 	vsw_bandwidth_read(vswp, mdp, node, &maxbw);
   2205 	if (maxbw != vswp->bandwidth) {
   2206 		if (maxbw >= MRP_MAXBW_MINVAL || maxbw == 0) {
   2207 			updated |= MD_bw;
   2208 		} else {
   2209 			cmn_err(CE_NOTE, "!vsw%d: Unable to process bandwidth"
   2210 			    " update as the specified value:%ld is invalid\n",
   2211 			    vswp->instance, maxbw);
   2212 		}
   2213 	}
   2214 
   2215 	/*
   2216 	 * Now make any changes which are needed...
   2217 	 */
   2218 	if (updated & MD_pls) {
   2219 
   2220 		/* save the updated property. */
   2221 		vswp->pls_update = pls_update;
   2222 
   2223 		if (pls_update == B_FALSE) {
   2224 			/*
   2225 			 * Phys link state update is now disabled for this vsw
   2226 			 * interface. If we had previously reported a link-down
   2227 			 * to the stack, undo that by sending a link-up.
   2228 			 */
   2229 			if (vswp->phys_link_state == LINK_STATE_DOWN) {
   2230 				vsw_mac_link_update(vswp, LINK_STATE_UP);
   2231 			}
   2232 		} else {
   2233 			/*
   2234 			 * Phys link state update is now enabled. Send up an
   2235 			 * update based on the current phys link state.
   2236 			 */
   2237 			if (vswp->smode & VSW_LAYER2) {
   2238 				vsw_mac_link_update(vswp,
   2239 				    vswp->phys_link_state);
   2240 			}
   2241 		}
   2242 
   2243 	}
   2244 
   2245 	if (updated & (MD_physname | MD_smode | MD_mtu)) {
   2246 
   2247 		/*
   2248 		 * Stop any pending thread to setup switching mode.
   2249 		 */
   2250 		vsw_setup_switching_stop(vswp);
   2251 
   2252 		/* Cleanup HybridIO */
   2253 		vsw_hio_cleanup(vswp);
   2254 
   2255 		/*
   2256 		 * Remove unicst, mcst addrs of vsw interface
   2257 		 * and ports from the physdev. This also closes
   2258 		 * the corresponding mac clients.
   2259 		 */
   2260 		vsw_unset_addrs(vswp);
   2261 
   2262 		/*
   2263 		 * Stop, detach and close the old device..
   2264 		 */
   2265 		mutex_enter(&vswp->mac_lock);
   2266 		vsw_mac_close(vswp);
   2267 		mutex_exit(&vswp->mac_lock);
   2268 
   2269 		/*
   2270 		 * Update phys name.
   2271 		 */
   2272 		if (updated & MD_physname) {
   2273 			cmn_err(CE_NOTE, "!vsw%d: changing from %s to %s",
   2274 			    vswp->instance, vswp->physname, physname);
   2275 			(void) strncpy(vswp->physname,
   2276 			    physname, strlen(physname) + 1);
   2277 		}
   2278 
   2279 		/*
   2280 		 * Update array with the new switch mode values.
   2281 		 */
   2282 		if (updated & MD_smode) {
   2283 			vswp->smode = new_smode;
   2284 		}
   2285 
   2286 		/* Update mtu */
   2287 		if (updated & MD_mtu) {
   2288 			rv = vsw_mtu_update(vswp, mtu);
   2289 			if (rv != 0) {
   2290 				goto fail_update;
   2291 			}
   2292 		}
   2293 
   2294 		/*
   2295 		 * ..and attach, start the new device.
   2296 		 */
   2297 		rv = vsw_setup_switching(vswp);
   2298 		if (rv == EAGAIN) {
   2299 			/*
   2300 			 * Unable to setup switching mode.
   2301 			 * As the error is EAGAIN, schedule a thread to retry
   2302 			 * and return. Programming addresses of ports and
   2303 			 * vsw interface will be done by the thread when the
   2304 			 * switching setup completes successfully.
   2305 			 */
   2306 			if (vsw_setup_switching_start(vswp) != 0) {
   2307 				goto fail_update;
   2308 			}
   2309 			return;
   2310 
   2311 		} else if (rv) {
   2312 			goto fail_update;
   2313 		}
   2314 
   2315 		vsw_setup_switching_post_process(vswp);
   2316 	} else if (updated & MD_macaddr) {
   2317 		/*
   2318 		 * We enter here if only MD_macaddr is exclusively updated.
   2319 		 * If MD_physname and/or MD_smode are also updated, then
   2320 		 * as part of that, we would have implicitly processed
   2321 		 * MD_macaddr update (above).
   2322 		 */
   2323 		cmn_err(CE_NOTE, "!vsw%d: changing mac address to 0x%lx",
   2324 		    vswp->instance, macaddr);
   2325 
   2326 		READ_ENTER(&vswp->if_lockrw);
   2327 		if (vswp->if_state & VSW_IF_UP) {
   2328 			/* reconfigure with new address */
   2329 			vsw_if_mac_reconfig(vswp, B_FALSE, 0, NULL, 0);
   2330 
   2331 			/*
   2332 			 * Notify the MAC layer of the changed address.
   2333 			 */
   2334 			mac_unicst_update(vswp->if_mh,
   2335 			    (uint8_t *)&vswp->if_addr);
   2336 
   2337 		}
   2338 		RW_EXIT(&vswp->if_lockrw);
   2339 
   2340 	}
   2341 
   2342 	if (updated & MD_vlans) {
   2343 		/* Remove existing vlan ids from the hash table. */
   2344 		vsw_vlan_remove_ids(vswp, VSW_LOCALDEV);
   2345 
   2346 		if (vswp->if_state & VSW_IF_UP) {
   2347 			vsw_if_mac_reconfig(vswp, B_TRUE, pvid, vids, nvids);
   2348 		} else {
   2349 			if (vswp->nvids != 0) {
   2350 				kmem_free(vswp->vids,
   2351 				    sizeof (vsw_vlanid_t) * vswp->nvids);
   2352 			}
   2353 			vswp->vids = vids;
   2354 			vswp->nvids = nvids;
   2355 			vswp->pvid = pvid;
   2356 		}
   2357 
   2358 		/* add these new vlan ids into hash table */
   2359 		vsw_vlan_add_ids(vswp, VSW_LOCALDEV);
   2360 	} else {
   2361 		if (nvids != 0) {
   2362 			kmem_free(vids, sizeof (vsw_vlanid_t) * nvids);
   2363 		}
   2364 	}
   2365 
   2366 	if (updated & MD_bw) {
   2367 		vsw_update_bandwidth(vswp, NULL, VSW_LOCALDEV, maxbw);
   2368 	}
   2369 
   2370 	return;
   2371 
   2372 fail_reconf:
   2373 	cmn_err(CE_WARN, "!vsw%d: configuration unchanged", vswp->instance);
   2374 	return;
   2375 
   2376 fail_update:
   2377 	cmn_err(CE_WARN, "!vsw%d: re-configuration failed",
   2378 	    vswp->instance);
   2379 }
   2380 
   2381 /*
   2382  * Read the port's md properties.
   2383  */
   2384 static int
   2385 vsw_port_read_props(vsw_port_t *portp, vsw_t *vswp,
   2386 	md_t *mdp, mde_cookie_t *node)
   2387 {
   2388 	uint64_t		ldc_id;
   2389 	uint8_t			*addrp;
   2390 	int			i, addrsz;
   2391 	int			num_nodes = 0, nchan = 0;
   2392 	int			listsz = 0;
   2393 	mde_cookie_t		*listp = NULL;
   2394 	struct ether_addr	ea;
   2395 	uint64_t		macaddr;
   2396 	uint64_t		inst = 0;
   2397 	uint64_t		val;
   2398 
   2399 	if (md_get_prop_val(mdp, *node, id_propname, &inst)) {
   2400 		DWARN(vswp, "%s: prop(%s) not found", __func__,
   2401 		    id_propname);
   2402 		return (1);
   2403 	}
   2404 
   2405 	/*
   2406 	 * Find the channel endpoint node(s) (which should be under this
   2407 	 * port node) which contain the channel id(s).
   2408 	 */
   2409 	if ((num_nodes = md_node_count(mdp)) <= 0) {
   2410 		DERR(vswp, "%s: invalid number of nodes found (%d)",
   2411 		    __func__, num_nodes);
   2412 		return (1);
   2413 	}
   2414 
   2415 	D2(vswp, "%s: %d nodes found", __func__, num_nodes);
   2416 
   2417 	/* allocate enough space for node list */
   2418 	listsz = num_nodes * sizeof (mde_cookie_t);
   2419 	listp = kmem_zalloc(listsz, KM_SLEEP);
   2420 
   2421 	nchan = md_scan_dag(mdp, *node, md_find_name(mdp, chan_propname),
   2422 	    md_find_name(mdp, "fwd"), listp);
   2423 
   2424 	if (nchan <= 0) {
   2425 		DWARN(vswp, "%s: no %s nodes found", __func__, chan_propname);
   2426 		kmem_free(listp, listsz);
   2427 		return (1);
   2428 	}
   2429 
   2430 	D2(vswp, "%s: %d %s nodes found", __func__, nchan, chan_propname);
   2431 
   2432 	/* use property from first node found */
   2433 	if (md_get_prop_val(mdp, listp[0], id_propname, &ldc_id)) {
   2434 		DWARN(vswp, "%s: prop(%s) not found\n", __func__,
   2435 		    id_propname);
   2436 		kmem_free(listp, listsz);
   2437 		return (1);
   2438 	}
   2439 
   2440 	/* don't need list any more */
   2441 	kmem_free(listp, listsz);
   2442 
   2443 	D2(vswp, "%s: ldc_id 0x%llx", __func__, ldc_id);
   2444 
   2445 	/* read mac-address property */
   2446 	if (md_get_prop_data(mdp, *node, remaddr_propname,
   2447 	    &addrp, &addrsz)) {
   2448 		DWARN(vswp, "%s: prop(%s) not found",
   2449 		    __func__, remaddr_propname);
   2450 		return (1);
   2451 	}
   2452 
   2453 	if (addrsz < ETHERADDRL) {
   2454 		DWARN(vswp, "%s: invalid address size", __func__);
   2455 		return (1);
   2456 	}
   2457 
   2458 	macaddr = *((uint64_t *)addrp);
   2459 	D2(vswp, "%s: remote mac address 0x%llx", __func__, macaddr);
   2460 
   2461 	for (i = ETHERADDRL - 1; i >= 0; i--) {
   2462 		ea.ether_addr_octet[i] = macaddr & 0xFF;
   2463 		macaddr >>= 8;
   2464 	}
   2465 
   2466 	/* now update all properties into the port */
   2467 	portp->p_vswp = vswp;
   2468 	portp->p_instance = inst;
   2469 	portp->addr_set = B_FALSE;
   2470 	ether_copy(&ea, &portp->p_macaddr);
   2471 	if (nchan > VSW_PORT_MAX_LDCS) {
   2472 		D2(vswp, "%s: using first of %d ldc ids",
   2473 		    __func__, nchan);
   2474 		nchan = VSW_PORT_MAX_LDCS;
   2475 	}
   2476 	portp->num_ldcs = nchan;
   2477 	portp->ldc_ids =
   2478 	    kmem_zalloc(sizeof (uint64_t) * nchan, KM_SLEEP);
   2479 	bcopy(&ldc_id, (portp->ldc_ids), sizeof (uint64_t) * nchan);
   2480 
   2481 	/* read vlan id properties of this port node */
   2482 	vsw_vlan_read_ids(portp, VSW_VNETPORT, mdp, *node, &portp->pvid,
   2483 	    &portp->vids, &portp->nvids, NULL);
   2484 
   2485 	/* Check if hybrid property is present */
   2486 	if (md_get_prop_val(mdp, *node, hybrid_propname, &val) == 0) {
   2487 		D1(vswp, "%s: prop(%s) found\n", __func__, hybrid_propname);
   2488 		portp->p_hio_enabled = B_TRUE;
   2489 	} else {
   2490 		portp->p_hio_enabled = B_FALSE;
   2491 	}
   2492 	/*
   2493 	 * Port hio capability determined after version
   2494 	 * negotiation, i.e., when we know the peer is HybridIO capable.
   2495 	 */
   2496 	portp->p_hio_capable = B_FALSE;
   2497 
   2498 	/* Read bandwidth of this port */
   2499 	vsw_port_read_bandwidth(portp, mdp, *node, &portp->p_bandwidth);
   2500 
   2501 	return (0);
   2502 }
   2503 
   2504 /*
   2505  * Add a new port to the system.
   2506  *
   2507  * Returns 0 on success, 1 on failure.
   2508  */
   2509 int
   2510 vsw_port_add(vsw_t *vswp, md_t *mdp, mde_cookie_t *node)
   2511 {
   2512 	vsw_port_t	*portp;
   2513 	int		rv;
   2514 
   2515 	portp = kmem_zalloc(sizeof (vsw_port_t), KM_SLEEP);
   2516 
   2517 	rv = vsw_port_read_props(portp, vswp, mdp, node);
   2518 	if (rv != 0) {
   2519 		kmem_free(portp, sizeof (*portp));
   2520 		return (1);
   2521 	}
   2522 
   2523 	rv = vsw_port_attach(portp);
   2524 	if (rv != 0) {
   2525 		DERR(vswp, "%s: failed to attach port", __func__);
   2526 		return (1);
   2527 	}
   2528 
   2529 	return (0);
   2530 }
   2531 
   2532 static int
   2533 vsw_port_update(vsw_t *vswp, md_t *curr_mdp, mde_cookie_t curr_mdex,
   2534 	md_t *prev_mdp, mde_cookie_t prev_mdex)
   2535 {
   2536 	uint64_t	cport_num;
   2537 	uint64_t	pport_num;
   2538 	vsw_port_list_t	*plistp;
   2539 	vsw_port_t	*portp;
   2540 	uint16_t	pvid;
   2541 	vsw_vlanid_t	*vids;
   2542 	uint16_t	nvids;
   2543 	uint64_t	val;
   2544 	boolean_t	hio_enabled = B_FALSE;
   2545 	uint64_t	maxbw;
   2546 	enum		{P_MD_init = 0x1,
   2547 				P_MD_vlans = 0x2,
   2548 				P_MD_hio = 0x4,
   2549 				P_MD_maxbw = 0x8} updated;
   2550 
   2551 	updated = P_MD_init;
   2552 
   2553 	/*
   2554 	 * For now, we get port updates only if vlan ids changed.
   2555 	 * We read the port num and do some sanity check.
   2556 	 */
   2557 	if (md_get_prop_val(curr_mdp, curr_mdex, id_propname, &cport_num)) {
   2558 		return (1);
   2559 	}
   2560 
   2561 	if (md_get_prop_val(prev_mdp, prev_mdex, id_propname, &pport_num)) {
   2562 		return (1);
   2563 	}
   2564 	if (cport_num != pport_num)
   2565 		return (1);
   2566 
   2567 	plistp = &(vswp->plist);
   2568 
   2569 	READ_ENTER(&plistp->lockrw);
   2570 
   2571 	portp = vsw_lookup_port(vswp, cport_num);
   2572 	if (portp == NULL) {
   2573 		RW_EXIT(&plistp->lockrw);
   2574 		return (1);
   2575 	}
   2576 
   2577 	/* Read the vlan ids */
   2578 	vsw_vlan_read_ids(portp, VSW_VNETPORT, curr_mdp, curr_mdex, &pvid,
   2579 	    &vids, &nvids, NULL);
   2580 
   2581 	/* Determine if there are any vlan id updates */
   2582 	if ((pvid != portp->pvid) ||		/* pvid changed? */
   2583 	    (nvids != portp->nvids) ||		/* # of vids changed? */
   2584 	    ((nvids != 0) && (portp->nvids != 0) &&	/* vids changed? */
   2585 	    !vsw_cmp_vids(vids, portp->vids, nvids))) {
   2586 		updated |= P_MD_vlans;
   2587 	}
   2588 
   2589 	/* Check if hybrid property is present */
   2590 	if (md_get_prop_val(curr_mdp, curr_mdex, hybrid_propname, &val) == 0) {
   2591 		D1(vswp, "%s: prop(%s) found\n", __func__, hybrid_propname);
   2592 		hio_enabled = B_TRUE;
   2593 	}
   2594 
   2595 	if (portp->p_hio_enabled != hio_enabled) {
   2596 		updated |= P_MD_hio;
   2597 	}
   2598 
   2599 	/* Check if maxbw property is present */
   2600 	vsw_port_read_bandwidth(portp, curr_mdp, curr_mdex, &maxbw);
   2601 	if (maxbw != portp->p_bandwidth) {
   2602 		if (maxbw >= MRP_MAXBW_MINVAL || maxbw == 0) {
   2603 			updated |= P_MD_maxbw;
   2604 		} else {
   2605 			cmn_err(CE_NOTE, "!vsw%d: Unable to process bandwidth"
   2606 			    " update for port %d as the specified value:%ld"
   2607 			    " is invalid\n",
   2608 			    vswp->instance, portp->p_instance, maxbw);
   2609 		}
   2610 	}
   2611 
   2612 	if (updated & P_MD_vlans) {
   2613 		/* Remove existing vlan ids from the hash table. */
   2614 		vsw_vlan_remove_ids(portp, VSW_VNETPORT);
   2615 
   2616 		/* Reconfigure vlans with network device */
   2617 		vsw_mac_port_reconfig_vlans(portp, pvid, vids, nvids);
   2618 
   2619 		/* add these new vlan ids into hash table */
   2620 		vsw_vlan_add_ids(portp, VSW_VNETPORT);
   2621 
   2622 		/* reset the port if it is vlan unaware (ver < 1.3) */
   2623 		vsw_vlan_unaware_port_reset(portp);
   2624 	}
   2625 
   2626 	if (updated & P_MD_hio) {
   2627 		vsw_hio_port_update(portp, hio_enabled);
   2628 	}
   2629 
   2630 	if (updated & P_MD_maxbw) {
   2631 		vsw_update_bandwidth(NULL, portp, VSW_VNETPORT, maxbw);
   2632 	}
   2633 
   2634 	RW_EXIT(&plistp->lockrw);
   2635 
   2636 	return (0);
   2637 }
   2638 
   2639 /*
   2640  * vsw_mac_rx -- A common function to send packets to the interface.
   2641  * By default this function check if the interface is UP or not, the
   2642  * rest of the behaviour depends on the flags as below:
   2643  *
   2644  *	VSW_MACRX_PROMISC -- Check if the promisc mode set or not.
   2645  *	VSW_MACRX_COPYMSG -- Make a copy of the message(s).
   2646  *	VSW_MACRX_FREEMSG -- Free if the messages cannot be sent up the stack.
   2647  */
   2648 void
   2649 vsw_mac_rx(vsw_t *vswp, mac_resource_handle_t mrh,
   2650     mblk_t *mp, vsw_macrx_flags_t flags)
   2651 {
   2652 	mblk_t		*mpt;
   2653 
   2654 	D1(vswp, "%s:enter\n", __func__);
   2655 	READ_ENTER(&vswp->if_lockrw);
   2656 	/* Check if the interface is up */
   2657 	if (!(vswp->if_state & VSW_IF_UP)) {
   2658 		RW_EXIT(&vswp->if_lockrw);
   2659 		/* Free messages only if FREEMSG flag specified */
   2660 		if (flags & VSW_MACRX_FREEMSG) {
   2661 			freemsgchain(mp);
   2662 		}
   2663 		D1(vswp, "%s:exit\n", __func__);
   2664 		return;
   2665 	}
   2666 	/*
   2667 	 * If PROMISC flag is passed, then check if
   2668 	 * the interface is in the PROMISC mode.
   2669 	 * If not, drop the messages.
   2670 	 */
   2671 	if (flags & VSW_MACRX_PROMISC) {
   2672 		if (!(vswp->if_state & VSW_IF_PROMISC)) {
   2673 			RW_EXIT(&vswp->if_lockrw);
   2674 			/* Free messages only if FREEMSG flag specified */
   2675 			if (flags & VSW_MACRX_FREEMSG) {
   2676 				freemsgchain(mp);
   2677 			}
   2678 			D1(vswp, "%s:exit\n", __func__);
   2679 			return;
   2680 		}
   2681 	}
   2682 	RW_EXIT(&vswp->if_lockrw);
   2683 	/*
   2684 	 * If COPYMSG flag is passed, then make a copy
   2685 	 * of the message chain and send up the copy.
   2686 	 */
   2687 	if (flags & VSW_MACRX_COPYMSG) {
   2688 		mp = copymsgchain(mp);
   2689 		if (mp == NULL) {
   2690 			D1(vswp, "%s:exit\n", __func__);
   2691 			return;
   2692 		}
   2693 	}
   2694 
   2695 	D2(vswp, "%s: sending up stack", __func__);
   2696 
   2697 	mpt = NULL;
   2698 	(void) vsw_vlan_frame_untag(vswp, VSW_LOCALDEV, &mp, &mpt);
   2699 	if (mp != NULL) {
   2700 		mac_rx(vswp->if_mh, mrh, mp);
   2701 	}
   2702 	D1(vswp, "%s:exit\n", __func__);
   2703 }
   2704 
   2705 /* copy mac address of vsw into soft state structure */
   2706 static void
   2707 vsw_save_lmacaddr(vsw_t *vswp, uint64_t macaddr)
   2708 {
   2709 	int	i;
   2710 
   2711 	WRITE_ENTER(&vswp->if_lockrw);
   2712 	for (i = ETHERADDRL - 1; i >= 0; i--) {
   2713 		vswp->if_addr.ether_addr_octet[i] = macaddr & 0xFF;
   2714 		macaddr >>= 8;
   2715 	}
   2716 	RW_EXIT(&vswp->if_lockrw);
   2717 }
   2718 
   2719 /* Compare VLAN ids, array size expected to be same. */
   2720 static boolean_t
   2721 vsw_cmp_vids(vsw_vlanid_t *vids1, vsw_vlanid_t *vids2, int nvids)
   2722 {
   2723 	int i, j;
   2724 	uint16_t vid;
   2725 
   2726 	for (i = 0; i < nvids; i++) {
   2727 		vid = vids1[i].vl_vid;
   2728 		for (j = 0; j < nvids; j++) {
   2729 			if (vid == vids2[i].vl_vid)
   2730 				break;
   2731 		}
   2732 		if (j == nvids) {
   2733 			return (B_FALSE);
   2734 		}
   2735 	}
   2736 	return (B_TRUE);
   2737 }
   2738