Home | History | Annotate | Download | only in io
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 
     22 /*
     23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     24  * Use is subject to license terms.
     25  */
     26 
     27 #include <sys/types.h>
     28 #include <sys/errno.h>
     29 #include <sys/sysmacros.h>
     30 #include <sys/param.h>
     31 #include <sys/stream.h>
     32 #include <sys/strsubr.h>
     33 #include <sys/kmem.h>
     34 #include <sys/conf.h>
     35 #include <sys/devops.h>
     36 #include <sys/ksynch.h>
     37 #include <sys/stat.h>
     38 #include <sys/modctl.h>
     39 #include <sys/debug.h>
     40 #include <sys/ethernet.h>
     41 #include <sys/ddi.h>
     42 #include <sys/sunddi.h>
     43 #include <sys/strsun.h>
     44 #include <sys/note.h>
     45 #include <sys/mac_provider.h>
     46 #include <sys/mac_ether.h>
     47 #include <sys/ldc.h>
     48 #include <sys/mach_descrip.h>
     49 #include <sys/mdeg.h>
     50 #include <net/if.h>
     51 #include <sys/vnet.h>
     52 #include <sys/vio_mailbox.h>
     53 #include <sys/vio_common.h>
     54 #include <sys/vnet_common.h>
     55 #include <sys/vnet_mailbox.h>
     56 #include <sys/vio_util.h>
     57 #include <sys/vnet_gen.h>
     58 #include <sys/atomic.h>
     59 #include <sys/callb.h>
     60 #include <sys/sdt.h>
     61 #include <sys/intr.h>
     62 #include <sys/pattr.h>
     63 #include <sys/vlan.h>
     64 
     65 /*
     66  * Implementation of the mac functionality for vnet using the
     67  * generic(default) transport layer of sun4v Logical Domain Channels(LDC).
     68  */
     69 
     70 /*
     71  * Function prototypes.
     72  */
     73 /* vgen proxy entry points */
     74 int vgen_init(void *vnetp, uint64_t regprop, dev_info_t *vnetdip,
     75     const uint8_t *macaddr, void **vgenhdl);
     76 int vgen_init_mdeg(void *arg);
     77 void vgen_uninit(void *arg);
     78 int vgen_dds_tx(void *arg, void *dmsg);
     79 void vgen_mod_init(void);
     80 int vgen_mod_cleanup(void);
     81 void vgen_mod_fini(void);
     82 int vgen_enable_intr(void *arg);
     83 int vgen_disable_intr(void *arg);
     84 mblk_t *vgen_poll(void *arg, int bytes_to_pickup);
     85 static int vgen_start(void *arg);
     86 static void vgen_stop(void *arg);
     87 static mblk_t *vgen_tx(void *arg, mblk_t *mp);
     88 static int vgen_multicst(void *arg, boolean_t add,
     89 	const uint8_t *mca);
     90 static int vgen_promisc(void *arg, boolean_t on);
     91 static int vgen_unicst(void *arg, const uint8_t *mca);
     92 static int vgen_stat(void *arg, uint_t stat, uint64_t *val);
     93 static void vgen_ioctl(void *arg, queue_t *q, mblk_t *mp);
     94 #ifdef	VNET_IOC_DEBUG
     95 static int vgen_force_link_state(vgen_port_t *portp, int link_state);
     96 #endif
     97 
     98 /* vgen internal functions */
     99 static int vgen_read_mdprops(vgen_t *vgenp);
    100 static void vgen_update_md_prop(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex);
    101 static void vgen_read_pri_eth_types(vgen_t *vgenp, md_t *mdp,
    102 	mde_cookie_t node);
    103 static void vgen_mtu_read(vgen_t *vgenp, md_t *mdp, mde_cookie_t node,
    104 	uint32_t *mtu);
    105 static void vgen_linkprop_read(vgen_t *vgenp, md_t *mdp, mde_cookie_t node,
    106 	boolean_t *pls);
    107 static void vgen_detach_ports(vgen_t *vgenp);
    108 static void vgen_port_detach(vgen_port_t *portp);
    109 static void vgen_port_list_insert(vgen_port_t *portp);
    110 static void vgen_port_list_remove(vgen_port_t *portp);
    111 static vgen_port_t *vgen_port_lookup(vgen_portlist_t *plistp,
    112 	int port_num);
    113 static int vgen_mdeg_reg(vgen_t *vgenp);
    114 static void vgen_mdeg_unreg(vgen_t *vgenp);
    115 static int vgen_mdeg_cb(void *cb_argp, mdeg_result_t *resp);
    116 static int vgen_mdeg_port_cb(void *cb_argp, mdeg_result_t *resp);
    117 static int vgen_add_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex);
    118 static int vgen_port_read_props(vgen_port_t *portp, vgen_t *vgenp, md_t *mdp,
    119 	mde_cookie_t mdex);
    120 static int vgen_remove_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex);
    121 static int vgen_port_attach(vgen_port_t *portp);
    122 static void vgen_port_detach_mdeg(vgen_port_t *portp);
    123 static void vgen_port_detach_mdeg(vgen_port_t *portp);
    124 static int vgen_update_port(vgen_t *vgenp, md_t *curr_mdp,
    125 	mde_cookie_t curr_mdex, md_t *prev_mdp, mde_cookie_t prev_mdex);
    126 static uint64_t	vgen_port_stat(vgen_port_t *portp, uint_t stat);
    127 static void vgen_port_reset(vgen_port_t *portp);
    128 static void vgen_reset_vsw_port(vgen_t *vgenp);
    129 static void vgen_ldc_reset(vgen_ldc_t *ldcp);
    130 static int vgen_ldc_attach(vgen_port_t *portp, uint64_t ldc_id);
    131 static void vgen_ldc_detach(vgen_ldc_t *ldcp);
    132 static int vgen_alloc_tx_ring(vgen_ldc_t *ldcp);
    133 static void vgen_free_tx_ring(vgen_ldc_t *ldcp);
    134 static void vgen_init_ports(vgen_t *vgenp);
    135 static void vgen_port_init(vgen_port_t *portp);
    136 static void vgen_uninit_ports(vgen_t *vgenp);
    137 static void vgen_port_uninit(vgen_port_t *portp);
    138 static void vgen_init_ldcs(vgen_port_t *portp);
    139 static void vgen_uninit_ldcs(vgen_port_t *portp);
    140 static int vgen_ldc_init(vgen_ldc_t *ldcp);
    141 static void vgen_ldc_uninit(vgen_ldc_t *ldcp);
    142 static int vgen_init_tbufs(vgen_ldc_t *ldcp);
    143 static void vgen_uninit_tbufs(vgen_ldc_t *ldcp);
    144 static void vgen_clobber_tbufs(vgen_ldc_t *ldcp);
    145 static void vgen_clobber_rxds(vgen_ldc_t *ldcp);
    146 static uint64_t	vgen_ldc_stat(vgen_ldc_t *ldcp, uint_t stat);
    147 static uint_t vgen_ldc_cb(uint64_t event, caddr_t arg);
    148 static int vgen_portsend(vgen_port_t *portp, mblk_t *mp);
    149 static int vgen_ldcsend(void *arg, mblk_t *mp);
    150 static void vgen_ldcsend_pkt(void *arg, mblk_t *mp);
    151 static int vgen_ldcsend_dring(void *arg, mblk_t *mp);
    152 static void vgen_reclaim(vgen_ldc_t *ldcp);
    153 static void vgen_reclaim_dring(vgen_ldc_t *ldcp);
    154 static int vgen_num_txpending(vgen_ldc_t *ldcp);
    155 static int vgen_tx_dring_full(vgen_ldc_t *ldcp);
    156 static int vgen_ldc_txtimeout(vgen_ldc_t *ldcp);
    157 static void vgen_ldc_watchdog(void *arg);
    158 static mblk_t *vgen_ldc_poll(vgen_ldc_t *ldcp, int bytes_to_pickup);
    159 
    160 /* vgen handshake functions */
    161 static vgen_ldc_t *vh_nextphase(vgen_ldc_t *ldcp);
    162 static int vgen_sendmsg(vgen_ldc_t *ldcp, caddr_t msg,  size_t msglen,
    163 	boolean_t caller_holds_lock);
    164 static int vgen_send_version_negotiate(vgen_ldc_t *ldcp);
    165 static int vgen_send_attr_info(vgen_ldc_t *ldcp);
    166 static int vgen_send_dring_reg(vgen_ldc_t *ldcp);
    167 static int vgen_send_rdx_info(vgen_ldc_t *ldcp);
    168 static int vgen_send_dring_data(vgen_ldc_t *ldcp, uint32_t start, int32_t end);
    169 static int vgen_send_mcast_info(vgen_ldc_t *ldcp);
    170 static int vgen_handshake_phase2(vgen_ldc_t *ldcp);
    171 static void vgen_handshake_reset(vgen_ldc_t *ldcp);
    172 static void vgen_reset_hphase(vgen_ldc_t *ldcp);
    173 static void vgen_handshake(vgen_ldc_t *ldcp);
    174 static int vgen_handshake_done(vgen_ldc_t *ldcp);
    175 static void vgen_handshake_retry(vgen_ldc_t *ldcp);
    176 static int vgen_handle_version_negotiate(vgen_ldc_t *ldcp,
    177 	vio_msg_tag_t *tagp);
    178 static int vgen_handle_attr_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
    179 static int vgen_handle_dring_reg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
    180 static int vgen_handle_rdx_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
    181 static int vgen_handle_mcast_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
    182 static int vgen_handle_ctrlmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
    183 static void vgen_handle_pkt_data_nop(void *arg1, void *arg2, uint32_t msglen);
    184 static void vgen_handle_pkt_data(void *arg1, void *arg2, uint32_t msglen);
    185 static int vgen_handle_dring_data(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
    186 static int vgen_handle_dring_data_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
    187 static int vgen_process_dring_data(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
    188 static int vgen_handle_dring_data_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
    189 static int vgen_handle_dring_data_nack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
    190 static int vgen_send_dring_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp,
    191 	uint32_t start, int32_t end, uint8_t pstate);
    192 static int vgen_handle_datamsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp,
    193 	uint32_t msglen);
    194 static void vgen_handle_errmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
    195 static void vgen_handle_evt_up(vgen_ldc_t *ldcp);
    196 static void vgen_handle_evt_reset(vgen_ldc_t *ldcp);
    197 static int vgen_check_sid(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
    198 static int vgen_check_datamsg_seq(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
    199 static caddr_t vgen_print_ethaddr(uint8_t *a, char *ebuf);
    200 static void vgen_hwatchdog(void *arg);
    201 static void vgen_print_attr_info(vgen_ldc_t *ldcp, int endpoint);
    202 static void vgen_print_hparams(vgen_hparams_t *hp);
    203 static void vgen_print_ldcinfo(vgen_ldc_t *ldcp);
    204 static void vgen_stop_rcv_thread(vgen_ldc_t *ldcp);
    205 static void vgen_drain_rcv_thread(vgen_ldc_t *ldcp);
    206 static void vgen_ldc_rcv_worker(void *arg);
    207 static void vgen_handle_evt_read(vgen_ldc_t *ldcp);
    208 static void vgen_rx(vgen_ldc_t *ldcp, mblk_t *bp, mblk_t *bpt);
    209 static void vgen_set_vnet_proto_ops(vgen_ldc_t *ldcp);
    210 static void vgen_reset_vnet_proto_ops(vgen_ldc_t *ldcp);
    211 static void vgen_link_update(vgen_t *vgenp, link_state_t link_state);
    212 
    213 /* VLAN routines */
    214 static void vgen_vlan_read_ids(void *arg, int type, md_t *mdp,
    215 	mde_cookie_t node, uint16_t *pvidp, uint16_t **vidspp,
    216 	uint16_t *nvidsp, uint16_t *default_idp);
    217 static void vgen_vlan_create_hash(vgen_port_t *portp);
    218 static void vgen_vlan_destroy_hash(vgen_port_t *portp);
    219 static void vgen_vlan_add_ids(vgen_port_t *portp);
    220 static void vgen_vlan_remove_ids(vgen_port_t *portp);
    221 static boolean_t vgen_vlan_lookup(mod_hash_t *vlan_hashp, uint16_t vid);
    222 static boolean_t vgen_frame_lookup_vid(vnet_t *vnetp, struct ether_header *ehp,
    223 	uint16_t *vidp);
    224 static mblk_t *vgen_vlan_frame_fixtag(vgen_port_t *portp, mblk_t *mp,
    225 	boolean_t is_tagged, uint16_t vid);
    226 static void vgen_vlan_unaware_port_reset(vgen_port_t *portp);
    227 static void vgen_reset_vlan_unaware_ports(vgen_t *vgenp);
    228 static int vgen_dds_rx(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
    229 
    230 /* externs */
    231 extern void vnet_dds_rx(void *arg, void *dmsg);
    232 extern void vnet_dds_cleanup_hio(vnet_t *vnetp);
    233 extern int vnet_mtu_update(vnet_t *vnetp, uint32_t mtu);
    234 extern void vnet_link_update(vnet_t *vnetp, link_state_t link_state);
    235 
    236 /*
    237  * The handshake process consists of 5 phases defined below, with VH_PHASE0
    238  * being the pre-handshake phase and VH_DONE is the phase to indicate
    239  * successful completion of all phases.
    240  * Each phase may have one to several handshake states which are required
    241  * to complete successfully to move to the next phase.
    242  * Refer to the functions vgen_handshake() and vgen_handshake_done() for
    243  * more details.
    244  */
    245 /* handshake phases */
    246 enum {	VH_PHASE0, VH_PHASE1, VH_PHASE2, VH_PHASE3, VH_DONE = 0x80 };
    247 
    248 /* handshake states */
    249 enum {
    250 
    251 	VER_INFO_SENT	=	0x1,
    252 	VER_ACK_RCVD	=	0x2,
    253 	VER_INFO_RCVD	=	0x4,
    254 	VER_ACK_SENT	=	0x8,
    255 	VER_NEGOTIATED	=	(VER_ACK_RCVD | VER_ACK_SENT),
    256 
    257 	ATTR_INFO_SENT	=	0x10,
    258 	ATTR_ACK_RCVD	=	0x20,
    259 	ATTR_INFO_RCVD	=	0x40,
    260 	ATTR_ACK_SENT	=	0x80,
    261 	ATTR_INFO_EXCHANGED	=	(ATTR_ACK_RCVD | ATTR_ACK_SENT),
    262 
    263 	DRING_INFO_SENT	=	0x100,
    264 	DRING_ACK_RCVD	=	0x200,
    265 	DRING_INFO_RCVD	=	0x400,
    266 	DRING_ACK_SENT	=	0x800,
    267 	DRING_INFO_EXCHANGED	=	(DRING_ACK_RCVD | DRING_ACK_SENT),
    268 
    269 	RDX_INFO_SENT	=	0x1000,
    270 	RDX_ACK_RCVD	=	0x2000,
    271 	RDX_INFO_RCVD	=	0x4000,
    272 	RDX_ACK_SENT	=	0x8000,
    273 	RDX_EXCHANGED	=	(RDX_ACK_RCVD | RDX_ACK_SENT)
    274 
    275 };
    276 
    277 #define	VGEN_PRI_ETH_DEFINED(vgenp)	((vgenp)->pri_num_types != 0)
    278 
    279 #define	LDC_LOCK(ldcp)	\
    280 				mutex_enter(&((ldcp)->cblock));\
    281 				mutex_enter(&((ldcp)->rxlock));\
    282 				mutex_enter(&((ldcp)->wrlock));\
    283 				mutex_enter(&((ldcp)->txlock));\
    284 				mutex_enter(&((ldcp)->tclock));
    285 #define	LDC_UNLOCK(ldcp)	\
    286 				mutex_exit(&((ldcp)->tclock));\
    287 				mutex_exit(&((ldcp)->txlock));\
    288 				mutex_exit(&((ldcp)->wrlock));\
    289 				mutex_exit(&((ldcp)->rxlock));\
    290 				mutex_exit(&((ldcp)->cblock));
    291 
    292 #define	VGEN_VER_EQ(ldcp, major, minor)	\
    293 	((ldcp)->local_hparams.ver_major == (major) &&	\
    294 	    (ldcp)->local_hparams.ver_minor == (minor))
    295 
    296 #define	VGEN_VER_LT(ldcp, major, minor)	\
    297 	(((ldcp)->local_hparams.ver_major < (major)) ||	\
    298 	    ((ldcp)->local_hparams.ver_major == (major) &&	\
    299 	    (ldcp)->local_hparams.ver_minor < (minor)))
    300 
    301 #define	VGEN_VER_GTEQ(ldcp, major, minor)	\
    302 	(((ldcp)->local_hparams.ver_major > (major)) ||	\
    303 	    ((ldcp)->local_hparams.ver_major == (major) &&	\
    304 	    (ldcp)->local_hparams.ver_minor >= (minor)))
    305 
    306 static struct ether_addr etherbroadcastaddr = {
    307 	0xff, 0xff, 0xff, 0xff, 0xff, 0xff
    308 };
    309 /*
    310  * MIB II broadcast/multicast packets
    311  */
    312 #define	IS_BROADCAST(ehp) \
    313 		(ether_cmp(&ehp->ether_dhost, &etherbroadcastaddr) == 0)
    314 #define	IS_MULTICAST(ehp) \
    315 		((ehp->ether_dhost.ether_addr_octet[0] & 01) == 1)
    316 
    317 /*
    318  * Property names
    319  */
    320 static char macaddr_propname[] = "mac-address";
    321 static char rmacaddr_propname[] = "remote-mac-address";
    322 static char channel_propname[] = "channel-endpoint";
    323 static char reg_propname[] = "reg";
    324 static char port_propname[] = "port";
    325 static char swport_propname[] = "switch-port";
    326 static char id_propname[] = "id";
    327 static char vdev_propname[] = "virtual-device";
    328 static char vnet_propname[] = "network";
    329 static char pri_types_propname[] = "priority-ether-types";
    330 static char vgen_pvid_propname[] = "port-vlan-id";
    331 static char vgen_vid_propname[] = "vlan-id";
    332 static char vgen_dvid_propname[] = "default-vlan-id";
    333 static char port_pvid_propname[] = "remote-port-vlan-id";
    334 static char port_vid_propname[] = "remote-vlan-id";
    335 static char vgen_mtu_propname[] = "mtu";
    336 static char vgen_linkprop_propname[] = "linkprop";
    337 
    338 /*
    339  * VIO Protocol Version Info:
    340  *
    341  * The version specified below represents the version of protocol currently
    342  * supported in the driver. It means the driver can negotiate with peers with
    343  * versions <= this version. Here is a summary of the feature(s) that are
    344  * supported at each version of the protocol:
    345  *
    346  * 1.0			Basic VIO protocol.
    347  * 1.1			vDisk protocol update (no virtual network update).
    348  * 1.2			Support for priority frames (priority-ether-types).
    349  * 1.3			VLAN and HybridIO support.
    350  * 1.4			Jumbo Frame support.
    351  * 1.5			Link State Notification support with optional support
    352  * 			for Physical Link information.
    353  */
    354 static vgen_ver_t vgen_versions[VGEN_NUM_VER] =  { {1, 5} };
    355 
    356 /* Tunables */
    357 uint32_t vgen_hwd_interval = 5;		/* handshake watchdog freq in sec */
    358 uint32_t vgen_max_hretries = VNET_NUM_HANDSHAKES; /* # of handshake retries */
    359 uint32_t vgen_ldcwr_retries = 10;	/* max # of ldc_write() retries */
    360 uint32_t vgen_ldcup_retries = 5;	/* max # of ldc_up() retries */
    361 uint32_t vgen_ldccl_retries = 5;	/* max # of ldc_close() retries */
    362 uint32_t vgen_recv_delay = 1;		/* delay when rx descr not ready */
    363 uint32_t vgen_recv_retries = 10;	/* retry when rx descr not ready */
    364 uint32_t vgen_tx_retries = 0x4;		/* retry when tx descr not available */
    365 uint32_t vgen_tx_delay = 0x30;		/* delay when tx descr not available */
    366 
    367 int vgen_rcv_thread_enabled = 1;	/* Enable Recieve thread */
    368 
    369 static vio_mblk_pool_t	*vgen_rx_poolp = NULL;
    370 static krwlock_t	vgen_rw;
    371 
    372 /*
    373  * max # of packets accumulated prior to sending them up. It is best
    374  * to keep this at 60% of the number of recieve buffers.
    375  */
    376 uint32_t vgen_chain_len = (VGEN_NRBUFS * 0.6);
    377 
    378 /*
    379  * Internal tunables for receive buffer pools, that is,  the size and number of
    380  * mblks for each pool. At least 3 sizes must be specified if these are used.
    381  * The sizes must be specified in increasing order. Non-zero value of the first
    382  * size will be used as a hint to use these values instead of the algorithm
    383  * that determines the sizes based on MTU.
    384  */
    385 uint32_t vgen_rbufsz1 = 0;
    386 uint32_t vgen_rbufsz2 = 0;
    387 uint32_t vgen_rbufsz3 = 0;
    388 uint32_t vgen_rbufsz4 = 0;
    389 
    390 uint32_t vgen_nrbufs1 = VGEN_NRBUFS;
    391 uint32_t vgen_nrbufs2 = VGEN_NRBUFS;
    392 uint32_t vgen_nrbufs3 = VGEN_NRBUFS;
    393 uint32_t vgen_nrbufs4 = VGEN_NRBUFS;
    394 
    395 /*
    396  * In the absence of "priority-ether-types" property in MD, the following
    397  * internal tunable can be set to specify a single priority ethertype.
    398  */
    399 uint64_t vgen_pri_eth_type = 0;
    400 
    401 /*
    402  * Number of transmit priority buffers that are preallocated per device.
    403  * This number is chosen to be a small value to throttle transmission
    404  * of priority packets. Note: Must be a power of 2 for vio_create_mblks().
    405  */
    406 uint32_t vgen_pri_tx_nmblks = 64;
    407 
    408 uint32_t	vgen_vlan_nchains = 4;	/* # of chains in vlan id hash table */
    409 
    410 #ifdef DEBUG
    411 /* flags to simulate error conditions for debugging */
    412 int vgen_trigger_txtimeout = 0;
    413 int vgen_trigger_rxlost = 0;
    414 #endif
    415 
    416 /*
    417  * Matching criteria passed to the MDEG to register interest
    418  * in changes to 'virtual-device' nodes (i.e. vnet nodes) identified
    419  * by their 'name' and 'cfg-handle' properties.
    420  */
    421 static md_prop_match_t vdev_prop_match[] = {
    422 	{ MDET_PROP_STR,    "name"   },
    423 	{ MDET_PROP_VAL,    "cfg-handle" },
    424 	{ MDET_LIST_END,    NULL    }
    425 };
    426 
    427 static mdeg_node_match_t vdev_match = { "virtual-device",
    428 						vdev_prop_match };
    429 
    430 /* MD update matching structure */
    431 static md_prop_match_t	vport_prop_match[] = {
    432 	{ MDET_PROP_VAL,	"id" },
    433 	{ MDET_LIST_END,	NULL }
    434 };
    435 
    436 static mdeg_node_match_t vport_match = { "virtual-device-port",
    437 					vport_prop_match };
    438 
    439 /* template for matching a particular vnet instance */
    440 static mdeg_prop_spec_t vgen_prop_template[] = {
    441 	{ MDET_PROP_STR,	"name",		"network" },
    442 	{ MDET_PROP_VAL,	"cfg-handle",	NULL },
    443 	{ MDET_LIST_END,	NULL,		NULL }
    444 };
    445 
    446 #define	VGEN_SET_MDEG_PROP_INST(specp, val)	(specp)[1].ps_val = (val)
    447 
    448 static int vgen_mdeg_port_cb(void *cb_argp, mdeg_result_t *resp);
    449 
    450 #ifdef	VNET_IOC_DEBUG
    451 #define	VGEN_M_CALLBACK_FLAGS	(MC_IOCTL)
    452 #else
    453 #define	VGEN_M_CALLBACK_FLAGS	(0)
    454 #endif
    455 
    456 static mac_callbacks_t vgen_m_callbacks = {
    457 	VGEN_M_CALLBACK_FLAGS,
    458 	vgen_stat,
    459 	vgen_start,
    460 	vgen_stop,
    461 	vgen_promisc,
    462 	vgen_multicst,
    463 	vgen_unicst,
    464 	vgen_tx,
    465 	vgen_ioctl,
    466 	NULL,
    467 	NULL
    468 };
    469 
    470 /* externs */
    471 extern pri_t	maxclsyspri;
    472 extern proc_t	p0;
    473 extern uint32_t vnet_ntxds;
    474 extern uint32_t vnet_ldcwd_interval;
    475 extern uint32_t vnet_ldcwd_txtimeout;
    476 extern uint32_t vnet_ldc_mtu;
    477 extern uint32_t vnet_nrbufs;
    478 extern uint32_t	vnet_ethermtu;
    479 extern uint16_t	vnet_default_vlan_id;
    480 extern boolean_t vnet_jumbo_rxpools;
    481 
    482 #ifdef DEBUG
    483 
    484 extern int vnet_dbglevel;
    485 static void debug_printf(const char *fname, vgen_t *vgenp,
    486 	vgen_ldc_t *ldcp, const char *fmt, ...);
    487 
    488 /* -1 for all LDCs info, or ldc_id for a specific LDC info */
    489 int vgendbg_ldcid = -1;
    490 
    491 /* simulate handshake error conditions for debug */
    492 uint32_t vgen_hdbg;
    493 #define	HDBG_VERSION	0x1
    494 #define	HDBG_TIMEOUT	0x2
    495 #define	HDBG_BAD_SID	0x4
    496 #define	HDBG_OUT_STATE	0x8
    497 
    498 #endif
    499 
    500 /*
    501  * vgen_init() is called by an instance of vnet driver to initialize the
    502  * corresponding generic proxy transport layer. The arguments passed by vnet
    503  * are - an opaque pointer to the vnet instance, pointers to dev_info_t and
    504  * the mac address of the vnet device, and a pointer to vgen_t is passed
    505  * back as a handle to vnet.
    506  */
    507 int
    508 vgen_init(void *vnetp, uint64_t regprop, dev_info_t *vnetdip,
    509     const uint8_t *macaddr, void **vgenhdl)
    510 {
    511 	vgen_t *vgenp;
    512 	int instance;
    513 	int rv;
    514 
    515 	if ((vnetp == NULL) || (vnetdip == NULL))
    516 		return (DDI_FAILURE);
    517 
    518 	instance = ddi_get_instance(vnetdip);
    519 
    520 	DBG1(NULL, NULL, "vnet(%d): enter\n", instance);
    521 
    522 	vgenp = kmem_zalloc(sizeof (vgen_t), KM_SLEEP);
    523 
    524 	vgenp->vnetp = vnetp;
    525 	vgenp->instance = instance;
    526 	vgenp->regprop = regprop;
    527 	vgenp->vnetdip = vnetdip;
    528 	bcopy(macaddr, &(vgenp->macaddr), ETHERADDRL);
    529 	vgenp->phys_link_state = LINK_STATE_UNKNOWN;
    530 
    531 	/* allocate multicast table */
    532 	vgenp->mctab = kmem_zalloc(VGEN_INIT_MCTAB_SIZE *
    533 	    sizeof (struct ether_addr), KM_SLEEP);
    534 	vgenp->mccount = 0;
    535 	vgenp->mcsize = VGEN_INIT_MCTAB_SIZE;
    536 
    537 	mutex_init(&vgenp->lock, NULL, MUTEX_DRIVER, NULL);
    538 	rw_init(&vgenp->vgenports.rwlock, NULL, RW_DRIVER, NULL);
    539 
    540 	rv = vgen_read_mdprops(vgenp);
    541 	if (rv != 0) {
    542 		goto vgen_init_fail;
    543 	}
    544 	*vgenhdl = (void *)vgenp;
    545 
    546 	DBG1(NULL, NULL, "vnet(%d): exit\n", instance);
    547 	return (DDI_SUCCESS);
    548 
    549 vgen_init_fail:
    550 	rw_destroy(&vgenp->vgenports.rwlock);
    551 	mutex_destroy(&vgenp->lock);
    552 	kmem_free(vgenp->mctab, VGEN_INIT_MCTAB_SIZE *
    553 	    sizeof (struct ether_addr));
    554 	if (VGEN_PRI_ETH_DEFINED(vgenp)) {
    555 		kmem_free(vgenp->pri_types,
    556 		    sizeof (uint16_t) * vgenp->pri_num_types);
    557 		(void) vio_destroy_mblks(vgenp->pri_tx_vmp);
    558 	}
    559 	KMEM_FREE(vgenp);
    560 	return (DDI_FAILURE);
    561 }
    562 
    563 int
    564 vgen_init_mdeg(void *arg)
    565 {
    566 	vgen_t	*vgenp = (vgen_t *)arg;
    567 
    568 	/* register with MD event generator */
    569 	return (vgen_mdeg_reg(vgenp));
    570 }
    571 
    572 /*
    573  * Called by vnet to undo the initializations done by vgen_init().
    574  * The handle provided by generic transport during vgen_init() is the argument.
    575  */
    576 void
    577 vgen_uninit(void *arg)
    578 {
    579 	vgen_t		*vgenp = (vgen_t *)arg;
    580 	vio_mblk_pool_t	*rp;
    581 	vio_mblk_pool_t	*nrp;
    582 
    583 	if (vgenp == NULL) {
    584 		return;
    585 	}
    586 
    587 	DBG1(vgenp, NULL, "enter\n");
    588 
    589 	/* unregister with MD event generator */
    590 	vgen_mdeg_unreg(vgenp);
    591 
    592 	mutex_enter(&vgenp->lock);
    593 
    594 	/* detach all ports from the device */
    595 	vgen_detach_ports(vgenp);
    596 
    597 	/*
    598 	 * free any pending rx mblk pools,
    599 	 * that couldn't be freed previously during channel detach.
    600 	 */
    601 	rp = vgenp->rmp;
    602 	while (rp != NULL) {
    603 		nrp = vgenp->rmp = rp->nextp;
    604 		if (vio_destroy_mblks(rp)) {
    605 			WRITE_ENTER(&vgen_rw);
    606 			rp->nextp = vgen_rx_poolp;
    607 			vgen_rx_poolp = rp;
    608 			RW_EXIT(&vgen_rw);
    609 		}
    610 		rp = nrp;
    611 	}
    612 
    613 	/* free multicast table */
    614 	kmem_free(vgenp->mctab, vgenp->mcsize * sizeof (struct ether_addr));
    615 
    616 	/* free pri_types table */
    617 	if (VGEN_PRI_ETH_DEFINED(vgenp)) {
    618 		kmem_free(vgenp->pri_types,
    619 		    sizeof (uint16_t) * vgenp->pri_num_types);
    620 		(void) vio_destroy_mblks(vgenp->pri_tx_vmp);
    621 	}
    622 
    623 	mutex_exit(&vgenp->lock);
    624 
    625 	rw_destroy(&vgenp->vgenports.rwlock);
    626 	mutex_destroy(&vgenp->lock);
    627 
    628 	DBG1(vgenp, NULL, "exit\n");
    629 	KMEM_FREE(vgenp);
    630 }
    631 
    632 /*
    633  * module specific initialization common to all instances of vnet/vgen.
    634  */
    635 void
    636 vgen_mod_init(void)
    637 {
    638 	rw_init(&vgen_rw, NULL, RW_DRIVER, NULL);
    639 }
    640 
    641 /*
    642  * module specific cleanup common to all instances of vnet/vgen.
    643  */
    644 int
    645 vgen_mod_cleanup(void)
    646 {
    647 	vio_mblk_pool_t	*poolp, *npoolp;
    648 
    649 	/*
    650 	 * If any rx mblk pools are still in use, return
    651 	 * error and stop the module from unloading.
    652 	 */
    653 	WRITE_ENTER(&vgen_rw);
    654 	poolp = vgen_rx_poolp;
    655 	while (poolp != NULL) {
    656 		npoolp = vgen_rx_poolp = poolp->nextp;
    657 		if (vio_destroy_mblks(poolp) != 0) {
    658 			vgen_rx_poolp = poolp;
    659 			RW_EXIT(&vgen_rw);
    660 			return (EBUSY);
    661 		}
    662 		poolp = npoolp;
    663 	}
    664 	RW_EXIT(&vgen_rw);
    665 
    666 	return (0);
    667 }
    668 
    669 /*
    670  * module specific uninitialization common to all instances of vnet/vgen.
    671  */
    672 void
    673 vgen_mod_fini(void)
    674 {
    675 	rw_destroy(&vgen_rw);
    676 }
    677 
    678 /* enable transmit/receive for the device */
    679 int
    680 vgen_start(void *arg)
    681 {
    682 	vgen_port_t	*portp = (vgen_port_t *)arg;
    683 	vgen_t		*vgenp = portp->vgenp;
    684 
    685 	DBG1(vgenp, NULL, "enter\n");
    686 	mutex_enter(&portp->lock);
    687 	vgen_port_init(portp);
    688 	portp->flags |= VGEN_STARTED;
    689 	mutex_exit(&portp->lock);
    690 	DBG1(vgenp, NULL, "exit\n");
    691 
    692 	return (DDI_SUCCESS);
    693 }
    694 
    695 /* stop transmit/receive */
    696 void
    697 vgen_stop(void *arg)
    698 {
    699 	vgen_port_t	*portp = (vgen_port_t *)arg;
    700 	vgen_t		*vgenp = portp->vgenp;
    701 
    702 	DBG1(vgenp, NULL, "enter\n");
    703 
    704 	mutex_enter(&portp->lock);
    705 	vgen_port_uninit(portp);
    706 	portp->flags &= ~(VGEN_STARTED);
    707 	mutex_exit(&portp->lock);
    708 	DBG1(vgenp, NULL, "exit\n");
    709 
    710 }
    711 
    712 /* vgen transmit function */
    713 static mblk_t *
    714 vgen_tx(void *arg, mblk_t *mp)
    715 {
    716 	int i;
    717 	vgen_port_t *portp;
    718 	int status = VGEN_FAILURE;
    719 
    720 	portp = (vgen_port_t *)arg;
    721 	/*
    722 	 * Retry so that we avoid reporting a failure
    723 	 * to the upper layer. Returning a failure may cause the
    724 	 * upper layer to go into single threaded mode there by
    725 	 * causing performance degradation, especially for a large
    726 	 * number of connections.
    727 	 */
    728 	for (i = 0; i < vgen_tx_retries; ) {
    729 		status = vgen_portsend(portp, mp);
    730 		if (status == VGEN_SUCCESS) {
    731 			break;
    732 		}
    733 		if (++i < vgen_tx_retries)
    734 			delay(drv_usectohz(vgen_tx_delay));
    735 	}
    736 	if (status != VGEN_SUCCESS) {
    737 		/* failure */
    738 		return (mp);
    739 	}
    740 	/* success */
    741 	return (NULL);
    742 }
    743 
    744 /*
    745  * This function provides any necessary tagging/untagging of the frames
    746  * that are being transmitted over the port. It first verifies the vlan
    747  * membership of the destination(port) and drops the packet if the
    748  * destination doesn't belong to the given vlan.
    749  *
    750  * Arguments:
    751  *   portp:     port over which the frames should be transmitted
    752  *   mp:        frame to be transmitted
    753  *   is_tagged:
    754  *              B_TRUE: indicates frame header contains the vlan tag already.
    755  *              B_FALSE: indicates frame is untagged.
    756  *   vid:       vlan in which the frame should be transmitted.
    757  *
    758  * Returns:
    759  *              Sucess: frame(mblk_t *) after doing the necessary tag/untag.
    760  *              Failure: NULL
    761  */
    762 static mblk_t *
    763 vgen_vlan_frame_fixtag(vgen_port_t *portp, mblk_t *mp, boolean_t is_tagged,
    764 	uint16_t vid)
    765 {
    766 	vgen_t				*vgenp;
    767 	boolean_t			dst_tagged;
    768 	int				rv;
    769 
    770 	vgenp = portp->vgenp;
    771 
    772 	/*
    773 	 * If the packet is going to a vnet:
    774 	 *   Check if the destination vnet is in the same vlan.
    775 	 *   Check the frame header if tag or untag is needed.
    776 	 *
    777 	 * We do not check the above conditions if the packet is going to vsw:
    778 	 *   vsw must be present implicitly in all the vlans that a vnet device
    779 	 *   is configured into; even if vsw itself is not assigned to those
    780 	 *   vlans as an interface. For instance, the packet might be destined
    781 	 *   to another vnet(indirectly through vsw) or to an external host
    782 	 *   which is in the same vlan as this vnet and vsw itself may not be
    783 	 *   present in that vlan. Similarly packets going to vsw must be
    784 	 *   always tagged(unless in the default-vlan) if not already tagged,
    785 	 *   as we do not know the final destination. This is needed because
    786 	 *   vsw must always invoke its switching function only after tagging
    787 	 *   the packet; otherwise after switching function determines the
    788 	 *   destination we cannot figure out if the destination belongs to the
    789 	 *   the same vlan that the frame originated from and if it needs tag/
    790 	 *   untag. Note that vsw will tag the packet itself when it receives
    791 	 *   it over the channel from a client if needed. However, that is
    792 	 *   needed only in the case of vlan unaware clients such as obp or
    793 	 *   earlier versions of vnet.
    794 	 *
    795 	 */
    796 	if (portp != vgenp->vsw_portp) {
    797 		/*
    798 		 * Packet going to a vnet. Check if the destination vnet is in
    799 		 * the same vlan. Then check the frame header if tag/untag is
    800 		 * needed.
    801 		 */
    802 		rv = vgen_vlan_lookup(portp->vlan_hashp, vid);
    803 		if (rv == B_FALSE) {
    804 			/* drop the packet */
    805 			freemsg(mp);
    806 			return (NULL);
    807 		}
    808 
    809 		/* is the destination tagged or untagged in this vlan? */
    810 		(vid == portp->pvid) ? (dst_tagged = B_FALSE) :
    811 		    (dst_tagged = B_TRUE);
    812 
    813 		if (is_tagged == dst_tagged) {
    814 			/* no tagging/untagging needed */
    815 			return (mp);
    816 		}
    817 
    818 		if (is_tagged == B_TRUE) {
    819 			/* frame is tagged; destination needs untagged */
    820 			mp = vnet_vlan_remove_tag(mp);
    821 			return (mp);
    822 		}
    823 
    824 		/* (is_tagged == B_FALSE): fallthru to tag tx packet: */
    825 	}
    826 
    827 	/*
    828 	 * Packet going to a vnet needs tagging.
    829 	 * OR
    830 	 * If the packet is going to vsw, then it must be tagged in all cases:
    831 	 * unknown unicast, broadcast/multicast or to vsw interface.
    832 	 */
    833 
    834 	if (is_tagged == B_FALSE) {
    835 		mp = vnet_vlan_insert_tag(mp, vid);
    836 	}
    837 
    838 	return (mp);
    839 }
    840 
    841 /* transmit packets over the given port */
    842 static int
    843 vgen_portsend(vgen_port_t *portp, mblk_t *mp)
    844 {
    845 	vgen_ldclist_t		*ldclp;
    846 	vgen_ldc_t		*ldcp;
    847 	int			status;
    848 	int			rv = VGEN_SUCCESS;
    849 	vgen_t			*vgenp = portp->vgenp;
    850 	vnet_t			*vnetp = vgenp->vnetp;
    851 	boolean_t		is_tagged;
    852 	boolean_t		dec_refcnt = B_FALSE;
    853 	uint16_t		vlan_id;
    854 	struct ether_header	*ehp;
    855 
    856 	if (portp->use_vsw_port) {
    857 		(void) atomic_inc_32(&vgenp->vsw_port_refcnt);
    858 		portp = portp->vgenp->vsw_portp;
    859 		dec_refcnt = B_TRUE;
    860 	}
    861 	if (portp == NULL) {
    862 		return (VGEN_FAILURE);
    863 	}
    864 
    865 	/*
    866 	 * Determine the vlan id that the frame belongs to.
    867 	 */
    868 	ehp = (struct ether_header *)mp->b_rptr;
    869 	is_tagged = vgen_frame_lookup_vid(vnetp, ehp, &vlan_id);
    870 
    871 	if (vlan_id == vnetp->default_vlan_id) {
    872 
    873 		/* Frames in default vlan must be untagged */
    874 		ASSERT(is_tagged == B_FALSE);
    875 
    876 		/*
    877 		 * If the destination is a vnet-port verify it belongs to the
    878 		 * default vlan; otherwise drop the packet. We do not need
    879 		 * this check for vsw-port, as it should implicitly belong to
    880 		 * this vlan; see comments in vgen_vlan_frame_fixtag().
    881 		 */
    882 		if (portp != vgenp->vsw_portp &&
    883 		    portp->pvid != vnetp->default_vlan_id) {
    884 			freemsg(mp);
    885 			goto portsend_ret;
    886 		}
    887 
    888 	} else {	/* frame not in default-vlan */
    889 
    890 		mp = vgen_vlan_frame_fixtag(portp, mp, is_tagged, vlan_id);
    891 		if (mp == NULL) {
    892 			goto portsend_ret;
    893 		}
    894 
    895 	}
    896 
    897 	ldclp = &portp->ldclist;
    898 	READ_ENTER(&ldclp->rwlock);
    899 	/*
    900 	 * NOTE: for now, we will assume we have a single channel.
    901 	 */
    902 	if (ldclp->headp == NULL) {
    903 		RW_EXIT(&ldclp->rwlock);
    904 		rv = VGEN_FAILURE;
    905 		goto portsend_ret;
    906 	}
    907 	ldcp = ldclp->headp;
    908 
    909 	status = ldcp->tx(ldcp, mp);
    910 
    911 	RW_EXIT(&ldclp->rwlock);
    912 
    913 	if (status != VGEN_TX_SUCCESS) {
    914 		rv = VGEN_FAILURE;
    915 	}
    916 
    917 portsend_ret:
    918 	if (dec_refcnt == B_TRUE) {
    919 		(void) atomic_dec_32(&vgenp->vsw_port_refcnt);
    920 	}
    921 	return (rv);
    922 }
    923 
    924 /*
    925  * Wrapper function to transmit normal and/or priority frames over the channel.
    926  */
    927 static int
    928 vgen_ldcsend(void *arg, mblk_t *mp)
    929 {
    930 	vgen_ldc_t		*ldcp = (vgen_ldc_t *)arg;
    931 	int			status;
    932 	struct ether_header	*ehp;
    933 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
    934 	uint32_t		num_types;
    935 	uint16_t		*types;
    936 	int			i;
    937 
    938 	ASSERT(VGEN_PRI_ETH_DEFINED(vgenp));
    939 
    940 	num_types = vgenp->pri_num_types;
    941 	types = vgenp->pri_types;
    942 	ehp = (struct ether_header *)mp->b_rptr;
    943 
    944 	for (i = 0; i < num_types; i++) {
    945 
    946 		if (ehp->ether_type == types[i]) {
    947 			/* priority frame, use pri tx function */
    948 			vgen_ldcsend_pkt(ldcp, mp);
    949 			return (VGEN_SUCCESS);
    950 		}
    951 
    952 	}
    953 
    954 	status  = vgen_ldcsend_dring(ldcp, mp);
    955 
    956 	return (status);
    957 }
    958 
    959 /*
    960  * This functions handles ldc channel reset while in the context
    961  * of transmit routines: vgen_ldcsend_pkt() or vgen_ldcsend_dring().
    962  */
    963 static void
    964 vgen_ldcsend_process_reset(vgen_ldc_t *ldcp)
    965 {
    966 	ldc_status_t	istatus;
    967 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
    968 
    969 	if (mutex_tryenter(&ldcp->cblock)) {
    970 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
    971 			DWARN(vgenp, ldcp, "ldc_status() error\n");
    972 		} else {
    973 			ldcp->ldc_status = istatus;
    974 		}
    975 		if (ldcp->ldc_status != LDC_UP) {
    976 			vgen_handle_evt_reset(ldcp);
    977 		}
    978 		mutex_exit(&ldcp->cblock);
    979 	}
    980 }
    981 
    982 /*
    983  * This function transmits the frame in the payload of a raw data
    984  * (VIO_PKT_DATA) message. Thus, it provides an Out-Of-Band path to
    985  * send special frames with high priorities, without going through
    986  * the normal data path which uses descriptor ring mechanism.
    987  */
    988 static void
    989 vgen_ldcsend_pkt(void *arg, mblk_t *mp)
    990 {
    991 	vgen_ldc_t		*ldcp = (vgen_ldc_t *)arg;
    992 	vio_raw_data_msg_t	*pkt;
    993 	mblk_t			*bp;
    994 	mblk_t			*nmp = NULL;
    995 	caddr_t			dst;
    996 	uint32_t		mblksz;
    997 	uint32_t		size;
    998 	uint32_t		nbytes;
    999 	int			rv;
   1000 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
   1001 	vgen_stats_t		*statsp = &ldcp->stats;
   1002 
   1003 	/* drop the packet if ldc is not up or handshake is not done */
   1004 	if (ldcp->ldc_status != LDC_UP) {
   1005 		(void) atomic_inc_32(&statsp->tx_pri_fail);
   1006 		DWARN(vgenp, ldcp, "status(%d), dropping packet\n",
   1007 		    ldcp->ldc_status);
   1008 		goto send_pkt_exit;
   1009 	}
   1010 
   1011 	if (ldcp->hphase != VH_DONE) {
   1012 		(void) atomic_inc_32(&statsp->tx_pri_fail);
   1013 		DWARN(vgenp, ldcp, "hphase(%x), dropping packet\n",
   1014 		    ldcp->hphase);
   1015 		goto send_pkt_exit;
   1016 	}
   1017 
   1018 	size = msgsize(mp);
   1019 
   1020 	/* frame size bigger than available payload len of raw data msg ? */
   1021 	if (size > (size_t)(ldcp->msglen - VIO_PKT_DATA_HDRSIZE)) {
   1022 		(void) atomic_inc_32(&statsp->tx_pri_fail);
   1023 		DWARN(vgenp, ldcp, "invalid size(%d)\n", size);
   1024 		goto send_pkt_exit;
   1025 	}
   1026 
   1027 	if (size < ETHERMIN)
   1028 		size = ETHERMIN;
   1029 
   1030 	/* alloc space for a raw data message */
   1031 	nmp = vio_allocb(vgenp->pri_tx_vmp);
   1032 	if (nmp == NULL) {
   1033 		(void) atomic_inc_32(&statsp->tx_pri_fail);
   1034 		DWARN(vgenp, ldcp, "vio_allocb failed\n");
   1035 		goto send_pkt_exit;
   1036 	}
   1037 	pkt = (vio_raw_data_msg_t *)nmp->b_rptr;
   1038 
   1039 	/* copy frame into the payload of raw data message */
   1040 	dst = (caddr_t)pkt->data;
   1041 	for (bp = mp; bp != NULL; bp = bp->b_cont) {
   1042 		mblksz = MBLKL(bp);
   1043 		bcopy(bp->b_rptr, dst, mblksz);
   1044 		dst += mblksz;
   1045 	}
   1046 
   1047 	/* setup the raw data msg */
   1048 	pkt->tag.vio_msgtype = VIO_TYPE_DATA;
   1049 	pkt->tag.vio_subtype = VIO_SUBTYPE_INFO;
   1050 	pkt->tag.vio_subtype_env = VIO_PKT_DATA;
   1051 	pkt->tag.vio_sid = ldcp->local_sid;
   1052 	nbytes = VIO_PKT_DATA_HDRSIZE + size;
   1053 
   1054 	/* send the msg over ldc */
   1055 	rv = vgen_sendmsg(ldcp, (caddr_t)pkt, nbytes, B_FALSE);
   1056 	if (rv != VGEN_SUCCESS) {
   1057 		(void) atomic_inc_32(&statsp->tx_pri_fail);
   1058 		DWARN(vgenp, ldcp, "Error sending priority frame\n");
   1059 		if (rv == ECONNRESET) {
   1060 			vgen_ldcsend_process_reset(ldcp);
   1061 		}
   1062 		goto send_pkt_exit;
   1063 	}
   1064 
   1065 	/* update stats */
   1066 	(void) atomic_inc_64(&statsp->tx_pri_packets);
   1067 	(void) atomic_add_64(&statsp->tx_pri_bytes, size);
   1068 
   1069 send_pkt_exit:
   1070 	if (nmp != NULL)
   1071 		freemsg(nmp);
   1072 	freemsg(mp);
   1073 }
   1074 
   1075 /*
   1076  * This function transmits normal (non-priority) data frames over
   1077  * the channel. It queues the frame into the transmit descriptor ring
   1078  * and sends a VIO_DRING_DATA message if needed, to wake up the
   1079  * peer to (re)start processing.
   1080  */
   1081 static int
   1082 vgen_ldcsend_dring(void *arg, mblk_t *mp)
   1083 {
   1084 	vgen_ldc_t		*ldcp = (vgen_ldc_t *)arg;
   1085 	vgen_private_desc_t	*tbufp;
   1086 	vgen_private_desc_t	*rtbufp;
   1087 	vnet_public_desc_t	*rtxdp;
   1088 	vgen_private_desc_t	*ntbufp;
   1089 	vnet_public_desc_t	*txdp;
   1090 	vio_dring_entry_hdr_t	*hdrp;
   1091 	vgen_stats_t		*statsp;
   1092 	struct ether_header	*ehp;
   1093 	boolean_t		is_bcast = B_FALSE;
   1094 	boolean_t		is_mcast = B_FALSE;
   1095 	size_t			mblksz;
   1096 	caddr_t			dst;
   1097 	mblk_t			*bp;
   1098 	size_t			size;
   1099 	int			rv = 0;
   1100 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
   1101 	vgen_hparams_t		*lp = &ldcp->local_hparams;
   1102 
   1103 	statsp = &ldcp->stats;
   1104 	size = msgsize(mp);
   1105 
   1106 	DBG1(vgenp, ldcp, "enter\n");
   1107 
   1108 	if (ldcp->ldc_status != LDC_UP) {
   1109 		DWARN(vgenp, ldcp, "status(%d), dropping packet\n",
   1110 		    ldcp->ldc_status);
   1111 		/* retry ldc_up() if needed */
   1112 #ifdef	VNET_IOC_DEBUG
   1113 		if (ldcp->flags & CHANNEL_STARTED && !ldcp->link_down_forced) {
   1114 #else
   1115 		if (ldcp->flags & CHANNEL_STARTED) {
   1116 #endif
   1117 			(void) ldc_up(ldcp->ldc_handle);
   1118 		}
   1119 		goto send_dring_exit;
   1120 	}
   1121 
   1122 	/* drop the packet if ldc is not up or handshake is not done */
   1123 	if (ldcp->hphase != VH_DONE) {
   1124 		DWARN(vgenp, ldcp, "hphase(%x), dropping packet\n",
   1125 		    ldcp->hphase);
   1126 		goto send_dring_exit;
   1127 	}
   1128 
   1129 	if (size > (size_t)lp->mtu) {
   1130 		DWARN(vgenp, ldcp, "invalid size(%d)\n", size);
   1131 		goto send_dring_exit;
   1132 	}
   1133 	if (size < ETHERMIN)
   1134 		size = ETHERMIN;
   1135 
   1136 	ehp = (struct ether_header *)mp->b_rptr;
   1137 	is_bcast = IS_BROADCAST(ehp);
   1138 	is_mcast = IS_MULTICAST(ehp);
   1139 
   1140 	mutex_enter(&ldcp->txlock);
   1141 	/*
   1142 	 * allocate a descriptor
   1143 	 */
   1144 	tbufp = ldcp->next_tbufp;
   1145 	ntbufp = NEXTTBUF(ldcp, tbufp);
   1146 	if (ntbufp == ldcp->cur_tbufp) { /* out of tbufs/txds */
   1147 
   1148 		mutex_enter(&ldcp->tclock);
   1149 		/* Try reclaiming now */
   1150 		vgen_reclaim_dring(ldcp);
   1151 		ldcp->reclaim_lbolt = ddi_get_lbolt();
   1152 
   1153 		if (ntbufp == ldcp->cur_tbufp) {
   1154 			/* Now we are really out of tbuf/txds */
   1155 			ldcp->need_resched = B_TRUE;
   1156 			mutex_exit(&ldcp->tclock);
   1157 
   1158 			statsp->tx_no_desc++;
   1159 			mutex_exit(&ldcp->txlock);
   1160 
   1161 			return (VGEN_TX_NORESOURCES);
   1162 		}
   1163 		mutex_exit(&ldcp->tclock);
   1164 	}
   1165 	/* update next available tbuf in the ring and update tx index */
   1166 	ldcp->next_tbufp = ntbufp;
   1167 	INCR_TXI(ldcp->next_txi, ldcp);
   1168 
   1169 	/* Mark the buffer busy before releasing the lock */
   1170 	tbufp->flags = VGEN_PRIV_DESC_BUSY;
   1171 	mutex_exit(&ldcp->txlock);
   1172 
   1173 	/* copy data into pre-allocated transmit buffer */
   1174 	dst = tbufp->datap + VNET_IPALIGN;
   1175 	for (bp = mp; bp != NULL; bp = bp->b_cont) {
   1176 		mblksz = MBLKL(bp);
   1177 		bcopy(bp->b_rptr, dst, mblksz);
   1178 		dst += mblksz;
   1179 	}
   1180 
   1181 	tbufp->datalen = size;
   1182 
   1183 	/* initialize the corresponding public descriptor (txd) */
   1184 	txdp = tbufp->descp;
   1185 	hdrp = &txdp->hdr;
   1186 	txdp->nbytes = size;
   1187 	txdp->ncookies = tbufp->ncookies;
   1188 	bcopy((tbufp->memcookie), (txdp->memcookie),
   1189 	    tbufp->ncookies * sizeof (ldc_mem_cookie_t));
   1190 
   1191 	mutex_enter(&ldcp->wrlock);
   1192 	/*
   1193 	 * If the flags not set to BUSY, it implies that the clobber
   1194 	 * was done while we were copying the data. In such case,
   1195 	 * discard the packet and return.
   1196 	 */
   1197 	if (tbufp->flags != VGEN_PRIV_DESC_BUSY) {
   1198 		statsp->oerrors++;
   1199 		mutex_exit(&ldcp->wrlock);
   1200 		goto send_dring_exit;
   1201 	}
   1202 	hdrp->dstate = VIO_DESC_READY;
   1203 
   1204 	/* update stats */
   1205 	statsp->opackets++;
   1206 	statsp->obytes += size;
   1207 	if (is_bcast)
   1208 		statsp->brdcstxmt++;
   1209 	else if (is_mcast)
   1210 		statsp->multixmt++;
   1211 
   1212 	/* send dring datamsg to the peer */
   1213 	if (ldcp->resched_peer) {
   1214 
   1215 		rtbufp = &ldcp->tbufp[ldcp->resched_peer_txi];
   1216 		rtxdp = rtbufp->descp;
   1217 
   1218 		if (rtxdp->hdr.dstate == VIO_DESC_READY) {
   1219 
   1220 			rv = vgen_send_dring_data(ldcp,
   1221 			    (uint32_t)ldcp->resched_peer_txi, -1);
   1222 			if (rv != 0) {
   1223 				/* error: drop the packet */
   1224 				DWARN(vgenp, ldcp, "vgen_send_dring_data "
   1225 				    "failed: rv(%d) len(%d)\n",
   1226 				    ldcp->ldc_id, rv, size);
   1227 				statsp->oerrors++;
   1228 			} else {
   1229 				ldcp->resched_peer = B_FALSE;
   1230 			}
   1231 
   1232 		}
   1233 
   1234 	}
   1235 
   1236 	mutex_exit(&ldcp->wrlock);
   1237 
   1238 send_dring_exit:
   1239 	if (rv == ECONNRESET) {
   1240 		vgen_ldcsend_process_reset(ldcp);
   1241 	}
   1242 	freemsg(mp);
   1243 	DBG1(vgenp, ldcp, "exit\n");
   1244 	return (VGEN_TX_SUCCESS);
   1245 }
   1246 
   1247 /* enable/disable a multicast address */
   1248 int
   1249 vgen_multicst(void *arg, boolean_t add, const uint8_t *mca)
   1250 {
   1251 	vgen_t			*vgenp;
   1252 	vnet_mcast_msg_t	mcastmsg;
   1253 	vio_msg_tag_t		*tagp;
   1254 	vgen_port_t		*portp;
   1255 	vgen_portlist_t		*plistp;
   1256 	vgen_ldc_t		*ldcp;
   1257 	vgen_ldclist_t		*ldclp;
   1258 	struct ether_addr	*addrp;
   1259 	int			rv = DDI_FAILURE;
   1260 	uint32_t		i;
   1261 
   1262 	portp = (vgen_port_t *)arg;
   1263 	vgenp = portp->vgenp;
   1264 
   1265 	if (portp != vgenp->vsw_portp) {
   1266 		return (DDI_SUCCESS);
   1267 	}
   1268 
   1269 	addrp = (struct ether_addr *)mca;
   1270 	tagp = &mcastmsg.tag;
   1271 	bzero(&mcastmsg, sizeof (mcastmsg));
   1272 
   1273 	mutex_enter(&vgenp->lock);
   1274 
   1275 	plistp = &(vgenp->vgenports);
   1276 
   1277 	READ_ENTER(&plistp->rwlock);
   1278 
   1279 	portp = vgenp->vsw_portp;
   1280 	if (portp == NULL) {
   1281 		RW_EXIT(&plistp->rwlock);
   1282 		mutex_exit(&vgenp->lock);
   1283 		return (rv);
   1284 	}
   1285 	ldclp = &portp->ldclist;
   1286 
   1287 	READ_ENTER(&ldclp->rwlock);
   1288 
   1289 	ldcp = ldclp->headp;
   1290 	if (ldcp == NULL)
   1291 		goto vgen_mcast_exit;
   1292 
   1293 	mutex_enter(&ldcp->cblock);
   1294 
   1295 	if (ldcp->hphase == VH_DONE) {
   1296 		/*
   1297 		 * If handshake is done, send a msg to vsw to add/remove
   1298 		 * the multicast address. Otherwise, we just update this
   1299 		 * mcast address in our table and the table will be sync'd
   1300 		 * with vsw when handshake completes.
   1301 		 */
   1302 		tagp->vio_msgtype = VIO_TYPE_CTRL;
   1303 		tagp->vio_subtype = VIO_SUBTYPE_INFO;
   1304 		tagp->vio_subtype_env = VNET_MCAST_INFO;
   1305 		tagp->vio_sid = ldcp->local_sid;
   1306 		bcopy(mca, &(mcastmsg.mca), ETHERADDRL);
   1307 		mcastmsg.set = add;
   1308 		mcastmsg.count = 1;
   1309 		if (vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (mcastmsg),
   1310 		    B_FALSE) != VGEN_SUCCESS) {
   1311 			DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
   1312 			mutex_exit(&ldcp->cblock);
   1313 			goto vgen_mcast_exit;
   1314 		}
   1315 	}
   1316 
   1317 	mutex_exit(&ldcp->cblock);
   1318 
   1319 	if (add) {
   1320 
   1321 		/* expand multicast table if necessary */
   1322 		if (vgenp->mccount >= vgenp->mcsize) {
   1323 			struct ether_addr	*newtab;
   1324 			uint32_t		newsize;
   1325 
   1326 
   1327 			newsize = vgenp->mcsize * 2;
   1328 
   1329 			newtab = kmem_zalloc(newsize *
   1330 			    sizeof (struct ether_addr), KM_NOSLEEP);
   1331 			if (newtab == NULL)
   1332 				goto vgen_mcast_exit;
   1333 			bcopy(vgenp->mctab, newtab, vgenp->mcsize *
   1334 			    sizeof (struct ether_addr));
   1335 			kmem_free(vgenp->mctab,
   1336 			    vgenp->mcsize * sizeof (struct ether_addr));
   1337 
   1338 			vgenp->mctab = newtab;
   1339 			vgenp->mcsize = newsize;
   1340 		}
   1341 
   1342 		/* add address to the table */
   1343 		vgenp->mctab[vgenp->mccount++] = *addrp;
   1344 
   1345 	} else {
   1346 
   1347 		/* delete address from the table */
   1348 		for (i = 0; i < vgenp->mccount; i++) {
   1349 			if (ether_cmp(addrp, &(vgenp->mctab[i])) == 0) {
   1350 
   1351 				/*
   1352 				 * If there's more than one address in this
   1353 				 * table, delete the unwanted one by moving
   1354 				 * the last one in the list over top of it;
   1355 				 * otherwise, just remove it.
   1356 				 */
   1357 				if (vgenp->mccount > 1) {
   1358 					vgenp->mctab[i] =
   1359 					    vgenp->mctab[vgenp->mccount-1];
   1360 				}
   1361 				vgenp->mccount--;
   1362 				break;
   1363 			}
   1364 		}
   1365 	}
   1366 
   1367 	rv = DDI_SUCCESS;
   1368 
   1369 vgen_mcast_exit:
   1370 	RW_EXIT(&ldclp->rwlock);
   1371 	RW_EXIT(&plistp->rwlock);
   1372 
   1373 	mutex_exit(&vgenp->lock);
   1374 	return (rv);
   1375 }
   1376 
   1377 /* set or clear promiscuous mode on the device */
   1378 static int
   1379 vgen_promisc(void *arg, boolean_t on)
   1380 {
   1381 	_NOTE(ARGUNUSED(arg, on))
   1382 	return (DDI_SUCCESS);
   1383 }
   1384 
   1385 /* set the unicast mac address of the device */
   1386 static int
   1387 vgen_unicst(void *arg, const uint8_t *mca)
   1388 {
   1389 	_NOTE(ARGUNUSED(arg, mca))
   1390 	return (DDI_SUCCESS);
   1391 }
   1392 
   1393 /* get device statistics */
   1394 int
   1395 vgen_stat(void *arg, uint_t stat, uint64_t *val)
   1396 {
   1397 	vgen_port_t	*portp = (vgen_port_t *)arg;
   1398 
   1399 	*val = vgen_port_stat(portp, stat);
   1400 
   1401 	return (0);
   1402 }
   1403 
   1404 /* vgen internal functions */
   1405 /* detach all ports from the device */
   1406 static void
   1407 vgen_detach_ports(vgen_t *vgenp)
   1408 {
   1409 	vgen_port_t	*portp;
   1410 	vgen_portlist_t	*plistp;
   1411 
   1412 	plistp = &(vgenp->vgenports);
   1413 	WRITE_ENTER(&plistp->rwlock);
   1414 	while ((portp = plistp->headp) != NULL) {
   1415 		vgen_port_detach(portp);
   1416 	}
   1417 	RW_EXIT(&plistp->rwlock);
   1418 }
   1419 
   1420 /*
   1421  * detach the given port.
   1422  */
   1423 static void
   1424 vgen_port_detach(vgen_port_t *portp)
   1425 {
   1426 	vgen_t		*vgenp;
   1427 	vgen_ldclist_t	*ldclp;
   1428 	int		port_num;
   1429 
   1430 	vgenp = portp->vgenp;
   1431 	port_num = portp->port_num;
   1432 
   1433 	DBG1(vgenp, NULL, "port(%d):enter\n", port_num);
   1434 
   1435 	/*
   1436 	 * If this port is connected to the vswitch, then
   1437 	 * potentially there could be ports that may be using
   1438 	 * this port to transmit packets. To address this do
   1439 	 * the following:
   1440 	 *	- First set vgenp->vsw_portp to NULL, so that
   1441 	 *	  its not used after that.
   1442 	 *	- Then wait for the refcnt to go down to 0.
   1443 	 *	- Now we can safely detach this port.
   1444 	 */
   1445 	if (vgenp->vsw_portp == portp) {
   1446 		vgenp->vsw_portp = NULL;
   1447 		while (vgenp->vsw_port_refcnt > 0) {
   1448 			delay(drv_usectohz(vgen_tx_delay));
   1449 		}
   1450 		(void) atomic_swap_32(&vgenp->vsw_port_refcnt, 0);
   1451 	}
   1452 
   1453 	if (portp->vhp != NULL) {
   1454 		vio_net_resource_unreg(portp->vhp);
   1455 		portp->vhp = NULL;
   1456 	}
   1457 
   1458 	vgen_vlan_destroy_hash(portp);
   1459 
   1460 	/* remove it from port list */
   1461 	vgen_port_list_remove(portp);
   1462 
   1463 	/* detach channels from this port */
   1464 	ldclp = &portp->ldclist;
   1465 	WRITE_ENTER(&ldclp->rwlock);
   1466 	while (ldclp->headp) {
   1467 		vgen_ldc_detach(ldclp->headp);
   1468 	}
   1469 	RW_EXIT(&ldclp->rwlock);
   1470 	rw_destroy(&ldclp->rwlock);
   1471 
   1472 	if (portp->num_ldcs != 0) {
   1473 		kmem_free(portp->ldc_ids, portp->num_ldcs * sizeof (uint64_t));
   1474 		portp->num_ldcs = 0;
   1475 	}
   1476 
   1477 	mutex_destroy(&portp->lock);
   1478 	KMEM_FREE(portp);
   1479 
   1480 	DBG1(vgenp, NULL, "port(%d):exit\n", port_num);
   1481 }
   1482 
   1483 /* add a port to port list */
   1484 static void
   1485 vgen_port_list_insert(vgen_port_t *portp)
   1486 {
   1487 	vgen_portlist_t *plistp;
   1488 	vgen_t *vgenp;
   1489 
   1490 	vgenp = portp->vgenp;
   1491 	plistp = &(vgenp->vgenports);
   1492 
   1493 	if (plistp->headp == NULL) {
   1494 		plistp->headp = portp;
   1495 	} else {
   1496 		plistp->tailp->nextp = portp;
   1497 	}
   1498 	plistp->tailp = portp;
   1499 	portp->nextp = NULL;
   1500 }
   1501 
   1502 /* remove a port from port list */
   1503 static void
   1504 vgen_port_list_remove(vgen_port_t *portp)
   1505 {
   1506 	vgen_port_t *prevp;
   1507 	vgen_port_t *nextp;
   1508 	vgen_portlist_t *plistp;
   1509 	vgen_t *vgenp;
   1510 
   1511 	vgenp = portp->vgenp;
   1512 
   1513 	plistp = &(vgenp->vgenports);
   1514 
   1515 	if (plistp->headp == NULL)
   1516 		return;
   1517 
   1518 	if (portp == plistp->headp) {
   1519 		plistp->headp = portp->nextp;
   1520 		if (portp == plistp->tailp)
   1521 			plistp->tailp = plistp->headp;
   1522 	} else {
   1523 		for (prevp = plistp->headp;
   1524 		    ((nextp = prevp->nextp) != NULL) && (nextp != portp);
   1525 		    prevp = nextp)
   1526 			;
   1527 		if (nextp == portp) {
   1528 			prevp->nextp = portp->nextp;
   1529 		}
   1530 		if (portp == plistp->tailp)
   1531 			plistp->tailp = prevp;
   1532 	}
   1533 }
   1534 
   1535 /* lookup a port in the list based on port_num */
   1536 static vgen_port_t *
   1537 vgen_port_lookup(vgen_portlist_t *plistp, int port_num)
   1538 {
   1539 	vgen_port_t *portp = NULL;
   1540 
   1541 	for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
   1542 		if (portp->port_num == port_num) {
   1543 			break;
   1544 		}
   1545 	}
   1546 
   1547 	return (portp);
   1548 }
   1549 
   1550 /* enable ports for transmit/receive */
   1551 static void
   1552 vgen_init_ports(vgen_t *vgenp)
   1553 {
   1554 	vgen_port_t	*portp;
   1555 	vgen_portlist_t	*plistp;
   1556 
   1557 	plistp = &(vgenp->vgenports);
   1558 	READ_ENTER(&plistp->rwlock);
   1559 
   1560 	for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
   1561 		vgen_port_init(portp);
   1562 	}
   1563 
   1564 	RW_EXIT(&plistp->rwlock);
   1565 }
   1566 
   1567 static void
   1568 vgen_port_init(vgen_port_t *portp)
   1569 {
   1570 	/* Add the port to the specified vlans */
   1571 	vgen_vlan_add_ids(portp);
   1572 
   1573 	/* Bring up the channels of this port */
   1574 	vgen_init_ldcs(portp);
   1575 }
   1576 
   1577 /* disable transmit/receive on ports */
   1578 static void
   1579 vgen_uninit_ports(vgen_t *vgenp)
   1580 {
   1581 	vgen_port_t	*portp;
   1582 	vgen_portlist_t	*plistp;
   1583 
   1584 	plistp = &(vgenp->vgenports);
   1585 	READ_ENTER(&plistp->rwlock);
   1586 
   1587 	for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
   1588 		vgen_port_uninit(portp);
   1589 	}
   1590 
   1591 	RW_EXIT(&plistp->rwlock);
   1592 }
   1593 
   1594 static void
   1595 vgen_port_uninit(vgen_port_t *portp)
   1596 {
   1597 	vgen_uninit_ldcs(portp);
   1598 
   1599 	/* remove the port from vlans it has been assigned to */
   1600 	vgen_vlan_remove_ids(portp);
   1601 }
   1602 
   1603 /*
   1604  * Scan the machine description for this instance of vnet
   1605  * and read its properties. Called only from vgen_init().
   1606  * Returns: 0 on success, 1 on failure.
   1607  */
   1608 static int
   1609 vgen_read_mdprops(vgen_t *vgenp)
   1610 {
   1611 	vnet_t		*vnetp = vgenp->vnetp;
   1612 	md_t		*mdp = NULL;
   1613 	mde_cookie_t	rootnode;
   1614 	mde_cookie_t	*listp = NULL;
   1615 	uint64_t	cfgh;
   1616 	char		*name;
   1617 	int		rv = 1;
   1618 	int		num_nodes = 0;
   1619 	int		num_devs = 0;
   1620 	int		listsz = 0;
   1621 	int		i;
   1622 
   1623 	if ((mdp = md_get_handle()) == NULL) {
   1624 		return (rv);
   1625 	}
   1626 
   1627 	num_nodes = md_node_count(mdp);
   1628 	ASSERT(num_nodes > 0);
   1629 
   1630 	listsz = num_nodes * sizeof (mde_cookie_t);
   1631 	listp = (mde_cookie_t *)kmem_zalloc(listsz, KM_SLEEP);
   1632 
   1633 	rootnode = md_root_node(mdp);
   1634 
   1635 	/* search for all "virtual_device" nodes */
   1636 	num_devs = md_scan_dag(mdp, rootnode,
   1637 	    md_find_name(mdp, vdev_propname),
   1638 	    md_find_name(mdp, "fwd"), listp);
   1639 	if (num_devs <= 0) {
   1640 		goto vgen_readmd_exit;
   1641 	}
   1642 
   1643 	/*
   1644 	 * Now loop through the list of virtual-devices looking for
   1645 	 * devices with name "network" and for each such device compare
   1646 	 * its instance with what we have from the 'reg' property to
   1647 	 * find the right node in MD and then read all its properties.
   1648 	 */
   1649 	for (i = 0; i < num_devs; i++) {
   1650 
   1651 		if (md_get_prop_str(mdp, listp[i], "name", &name) != 0) {
   1652 			goto vgen_readmd_exit;
   1653 		}
   1654 
   1655 		/* is this a "network" device? */
   1656 		if (strcmp(name, vnet_propname) != 0)
   1657 			continue;
   1658 
   1659 		if (md_get_prop_val(mdp, listp[i], "cfg-handle", &cfgh) != 0) {
   1660 			goto vgen_readmd_exit;
   1661 		}
   1662 
   1663 		/* is this the required instance of vnet? */
   1664 		if (vgenp->regprop != cfgh)
   1665 			continue;
   1666 
   1667 		/*
   1668 		 * Read the 'linkprop' property to know if this vnet
   1669 		 * device should get physical link updates from vswitch.
   1670 		 */
   1671 		vgen_linkprop_read(vgenp, mdp, listp[i],
   1672 		    &vnetp->pls_update);
   1673 
   1674 		/*
   1675 		 * Read the mtu. Note that we set the mtu of vnet device within
   1676 		 * this routine itself, after validating the range.
   1677 		 */
   1678 		vgen_mtu_read(vgenp, mdp, listp[i], &vnetp->mtu);
   1679 		if (vnetp->mtu < ETHERMTU || vnetp->mtu > VNET_MAX_MTU) {
   1680 			vnetp->mtu = ETHERMTU;
   1681 		}
   1682 		vgenp->max_frame_size = vnetp->mtu +
   1683 		    sizeof (struct ether_header) + VLAN_TAGSZ;
   1684 
   1685 		/* read priority ether types */
   1686 		vgen_read_pri_eth_types(vgenp, mdp, listp[i]);
   1687 
   1688 		/* read vlan id properties of this vnet instance */
   1689 		vgen_vlan_read_ids(vgenp, VGEN_LOCAL, mdp, listp[i],
   1690 		    &vnetp->pvid, &vnetp->vids, &vnetp->nvids,
   1691 		    &vnetp->default_vlan_id);
   1692 
   1693 		rv = 0;
   1694 		break;
   1695 	}
   1696 
   1697 vgen_readmd_exit:
   1698 
   1699 	kmem_free(listp, listsz);
   1700 	(void) md_fini_handle(mdp);
   1701 	return (rv);
   1702 }
   1703 
   1704 /*
   1705  * Read vlan id properties of the given MD node.
   1706  * Arguments:
   1707  *   arg:          device argument(vnet device or a port)
   1708  *   type:         type of arg; VGEN_LOCAL(vnet device) or VGEN_PEER(port)
   1709  *   mdp:          machine description
   1710  *   node:         md node cookie
   1711  *
   1712  * Returns:
   1713  *   pvidp:        port-vlan-id of the node
   1714  *   vidspp:       list of vlan-ids of the node
   1715  *   nvidsp:       # of vlan-ids in the list
   1716  *   default_idp:  default-vlan-id of the node(if node is vnet device)
   1717  */
   1718 static void
   1719 vgen_vlan_read_ids(void *arg, int type, md_t *mdp, mde_cookie_t node,
   1720 	uint16_t *pvidp, uint16_t **vidspp, uint16_t *nvidsp,
   1721 	uint16_t *default_idp)
   1722 {
   1723 	vgen_t		*vgenp;
   1724 	vnet_t		*vnetp;
   1725 	vgen_port_t	*portp;
   1726 	char		*pvid_propname;
   1727 	char		*vid_propname;
   1728 	uint_t		nvids;
   1729 	uint32_t	vids_size;
   1730 	int		rv;
   1731 	int		i;
   1732 	uint64_t	*data;
   1733 	uint64_t	val;
   1734 	int		size;
   1735 	int		inst;
   1736 
   1737 	if (type == VGEN_LOCAL) {
   1738 
   1739 		vgenp = (vgen_t *)arg;
   1740 		vnetp = vgenp->vnetp;
   1741 		pvid_propname = vgen_pvid_propname;
   1742 		vid_propname = vgen_vid_propname;
   1743 		inst = vnetp->instance;
   1744 
   1745 	} else if (type == VGEN_PEER) {
   1746 
   1747 		portp = (vgen_port_t *)arg;
   1748 		vgenp = portp->vgenp;
   1749 		vnetp = vgenp->vnetp;
   1750 		pvid_propname = port_pvid_propname;
   1751 		vid_propname = port_vid_propname;
   1752 		inst = portp->port_num;
   1753 
   1754 	} else {
   1755 		return;
   1756 	}
   1757 
   1758 	if (type == VGEN_LOCAL && default_idp != NULL) {
   1759 		rv = md_get_prop_val(mdp, node, vgen_dvid_propname, &val);
   1760 		if (rv != 0) {
   1761 			DWARN(vgenp, NULL, "prop(%s) not found",
   1762 			    vgen_dvid_propname);
   1763 
   1764 			*default_idp = vnet_default_vlan_id;
   1765 		} else {
   1766 			*default_idp = val & 0xFFF;
   1767 			DBG2(vgenp, NULL, "%s(%d): (%d)\n", vgen_dvid_propname,
   1768 			    inst, *default_idp);
   1769 		}
   1770 	}
   1771 
   1772 	rv = md_get_prop_val(mdp, node, pvid_propname, &val);
   1773 	if (rv != 0) {
   1774 		DWARN(vgenp, NULL, "prop(%s) not found", pvid_propname);
   1775 		*pvidp = vnet_default_vlan_id;
   1776 	} else {
   1777 
   1778 		*pvidp = val & 0xFFF;
   1779 		DBG2(vgenp, NULL, "%s(%d): (%d)\n",
   1780 		    pvid_propname, inst, *pvidp);
   1781 	}
   1782 
   1783 	rv = md_get_prop_data(mdp, node, vid_propname, (uint8_t **)&data,
   1784 	    &size);
   1785 	if (rv != 0) {
   1786 		DBG2(vgenp, NULL, "prop(%s) not found", vid_propname);
   1787 		size = 0;
   1788 	} else {
   1789 		size /= sizeof (uint64_t);
   1790 	}
   1791 	nvids = size;
   1792 
   1793 	if (nvids != 0) {
   1794 		DBG2(vgenp, NULL, "%s(%d): ", vid_propname, inst);
   1795 		vids_size = sizeof (uint16_t) * nvids;
   1796 		*vidspp = kmem_zalloc(vids_size, KM_SLEEP);
   1797 		for (i = 0; i < nvids; i++) {
   1798 			(*vidspp)[i] = data[i] & 0xFFFF;
   1799 			DBG2(vgenp, NULL, " %d ", (*vidspp)[i]);
   1800 		}
   1801 		DBG2(vgenp, NULL, "\n");
   1802 	}
   1803 
   1804 	*nvidsp = nvids;
   1805 }
   1806 
   1807 /*
   1808  * Create a vlan id hash table for the given port.
   1809  */
   1810 static void
   1811 vgen_vlan_create_hash(vgen_port_t *portp)
   1812 {
   1813 	char		hashname[MAXNAMELEN];
   1814 
   1815 	(void) snprintf(hashname, MAXNAMELEN, "port%d-vlan-hash",
   1816 	    portp->port_num);
   1817 
   1818 	portp->vlan_nchains = vgen_vlan_nchains;
   1819 	portp->vlan_hashp = mod_hash_create_idhash(hashname,
   1820 	    portp->vlan_nchains, mod_hash_null_valdtor);
   1821 }
   1822 
   1823 /*
   1824  * Destroy the vlan id hash table in the given port.
   1825  */
   1826 static void
   1827 vgen_vlan_destroy_hash(vgen_port_t *portp)
   1828 {
   1829 	if (portp->vlan_hashp != NULL) {
   1830 		mod_hash_destroy_hash(portp->vlan_hashp);
   1831 		portp->vlan_hashp = NULL;
   1832 		portp->vlan_nchains = 0;
   1833 	}
   1834 }
   1835 
   1836 /*
   1837  * Add a port to the vlans specified in its port properites.
   1838  */
   1839 static void
   1840 vgen_vlan_add_ids(vgen_port_t *portp)
   1841 {
   1842 	int		rv;
   1843 	int		i;
   1844 
   1845 	rv = mod_hash_insert(portp->vlan_hashp,
   1846 	    (mod_hash_key_t)VLAN_ID_KEY(portp->pvid),
   1847 	    (mod_hash_val_t)B_TRUE);
   1848 	ASSERT(rv == 0);
   1849 
   1850 	for (i = 0; i < portp->nvids; i++) {
   1851 		rv = mod_hash_insert(portp->vlan_hashp,
   1852 		    (mod_hash_key_t)VLAN_ID_KEY(portp->vids[i]),
   1853 		    (mod_hash_val_t)B_TRUE);
   1854 		ASSERT(rv == 0);
   1855 	}
   1856 }
   1857 
   1858 /*
   1859  * Remove a port from the vlans it has been assigned to.
   1860  */
   1861 static void
   1862 vgen_vlan_remove_ids(vgen_port_t *portp)
   1863 {
   1864 	int		rv;
   1865 	int		i;
   1866 	mod_hash_val_t	vp;
   1867 
   1868 	rv = mod_hash_remove(portp->vlan_hashp,
   1869 	    (mod_hash_key_t)VLAN_ID_KEY(portp->pvid),
   1870 	    (mod_hash_val_t *)&vp);
   1871 	ASSERT(rv == 0);
   1872 
   1873 	for (i = 0; i < portp->nvids; i++) {
   1874 		rv = mod_hash_remove(portp->vlan_hashp,
   1875 		    (mod_hash_key_t)VLAN_ID_KEY(portp->vids[i]),
   1876 		    (mod_hash_val_t *)&vp);
   1877 		ASSERT(rv == 0);
   1878 	}
   1879 }
   1880 
   1881 /*
   1882  * Lookup the vlan id of the given tx frame. If it is a vlan-tagged frame,
   1883  * then the vlan-id is available in the tag; otherwise, its vlan id is
   1884  * implicitly obtained from the port-vlan-id of the vnet device.
   1885  * The vlan id determined is returned in vidp.
   1886  * Returns: B_TRUE if it is a tagged frame; B_FALSE if it is untagged.
   1887  */
   1888 static boolean_t
   1889 vgen_frame_lookup_vid(vnet_t *vnetp, struct ether_header *ehp, uint16_t *vidp)
   1890 {
   1891 	struct ether_vlan_header	*evhp;
   1892 
   1893 	/* If it's a tagged frame, get the vlan id from vlan header */
   1894 	if (ehp->ether_type == ETHERTYPE_VLAN) {
   1895 
   1896 		evhp = (struct ether_vlan_header *)ehp;
   1897 		*vidp = VLAN_ID(ntohs(evhp->ether_tci));
   1898 		return (B_TRUE);
   1899 	}
   1900 
   1901 	/* Untagged frame, vlan-id is the pvid of vnet device */
   1902 	*vidp = vnetp->pvid;
   1903 	return (B_FALSE);
   1904 }
   1905 
   1906 /*
   1907  * Find the given vlan id in the hash table.
   1908  * Return: B_TRUE if the id is found; B_FALSE if not found.
   1909  */
   1910 static boolean_t
   1911 vgen_vlan_lookup(mod_hash_t *vlan_hashp, uint16_t vid)
   1912 {
   1913 	int		rv;
   1914 	mod_hash_val_t	vp;
   1915 
   1916 	rv = mod_hash_find(vlan_hashp, VLAN_ID_KEY(vid), (mod_hash_val_t *)&vp);
   1917 
   1918 	if (rv != 0)
   1919 		return (B_FALSE);
   1920 
   1921 	return (B_TRUE);
   1922 }
   1923 
   1924 /*
   1925  * This function reads "priority-ether-types" property from md. This property
   1926  * is used to enable support for priority frames. Applications which need
   1927  * guaranteed and timely delivery of certain high priority frames to/from
   1928  * a vnet or vsw within ldoms, should configure this property by providing
   1929  * the ether type(s) for which the priority facility is needed.
   1930  * Normal data frames are delivered over a ldc channel using the descriptor
   1931  * ring mechanism which is constrained by factors such as descriptor ring size,
   1932  * the rate at which the ring is processed at the peer ldc end point, etc.
   1933  * The priority mechanism provides an Out-Of-Band path to send/receive frames
   1934  * as raw pkt data (VIO_PKT_DATA) messages over the channel, avoiding the
   1935  * descriptor ring path and enables a more reliable and timely delivery of
   1936  * frames to the peer.
   1937  */
   1938 static void
   1939 vgen_read_pri_eth_types(vgen_t *vgenp, md_t *mdp, mde_cookie_t node)
   1940 {
   1941 	int		rv;
   1942 	uint16_t	*types;
   1943 	uint64_t	*data;
   1944 	int		size;
   1945 	int		i;
   1946 	size_t		mblk_sz;
   1947 
   1948 	rv = md_get_prop_data(mdp, node, pri_types_propname,
   1949 	    (uint8_t **)&data, &size);
   1950 	if (rv != 0) {
   1951 		/*
   1952 		 * Property may not exist if we are running pre-ldoms1.1 f/w.
   1953 		 * Check if 'vgen_pri_eth_type' has been set in that case.
   1954 		 */
   1955 		if (vgen_pri_eth_type != 0) {
   1956 			size = sizeof (vgen_pri_eth_type);
   1957 			data = &vgen_pri_eth_type;
   1958 		} else {
   1959 			DBG2(vgenp, NULL,
   1960 			    "prop(%s) not found", pri_types_propname);
   1961 			size = 0;
   1962 		}
   1963 	}
   1964 
   1965 	if (size == 0) {
   1966 		vgenp->pri_num_types = 0;
   1967 		return;
   1968 	}
   1969 
   1970 	/*
   1971 	 * we have some priority-ether-types defined;
   1972 	 * allocate a table of these types and also
   1973 	 * allocate a pool of mblks to transmit these
   1974 	 * priority packets.
   1975 	 */
   1976 	size /= sizeof (uint64_t);
   1977 	vgenp->pri_num_types = size;
   1978 	vgenp->pri_types = kmem_zalloc(size * sizeof (uint16_t), KM_SLEEP);
   1979 	for (i = 0, types = vgenp->pri_types; i < size; i++) {
   1980 		types[i] = data[i] & 0xFFFF;
   1981 	}
   1982 	mblk_sz = (VIO_PKT_DATA_HDRSIZE + vgenp->max_frame_size + 7) & ~7;
   1983 	(void) vio_create_mblks(vgen_pri_tx_nmblks, mblk_sz,
   1984 	    &vgenp->pri_tx_vmp);
   1985 }
   1986 
   1987 static void
   1988 vgen_mtu_read(vgen_t *vgenp, md_t *mdp, mde_cookie_t node, uint32_t *mtu)
   1989 {
   1990 	int		rv;
   1991 	uint64_t	val;
   1992 	char		*mtu_propname;
   1993 
   1994 	mtu_propname = vgen_mtu_propname;
   1995 
   1996 	rv = md_get_prop_val(mdp, node, mtu_propname, &val);
   1997 	if (rv != 0) {
   1998 		DWARN(vgenp, NULL, "prop(%s) not found", mtu_propname);
   1999 		*mtu = vnet_ethermtu;
   2000 	} else {
   2001 
   2002 		*mtu = val & 0xFFFF;
   2003 		DBG2(vgenp, NULL, "%s(%d): (%d)\n", mtu_propname,
   2004 		    vgenp->instance, *mtu);
   2005 	}
   2006 }
   2007 
   2008 static void
   2009 vgen_linkprop_read(vgen_t *vgenp, md_t *mdp, mde_cookie_t node,
   2010 	boolean_t *pls)
   2011 {
   2012 	int		rv;
   2013 	uint64_t	val;
   2014 	char		*linkpropname;
   2015 
   2016 	linkpropname = vgen_linkprop_propname;
   2017 
   2018 	rv = md_get_prop_val(mdp, node, linkpropname, &val);
   2019 	if (rv != 0) {
   2020 		DWARN(vgenp, NULL, "prop(%s) not found", linkpropname);
   2021 		*pls = B_FALSE;
   2022 	} else {
   2023 
   2024 		*pls = (val & 0x1) ?  B_TRUE : B_FALSE;
   2025 		DBG2(vgenp, NULL, "%s(%d): (%d)\n", linkpropname,
   2026 		    vgenp->instance, *pls);
   2027 	}
   2028 }
   2029 
   2030 /* register with MD event generator */
   2031 static int
   2032 vgen_mdeg_reg(vgen_t *vgenp)
   2033 {
   2034 	mdeg_prop_spec_t	*pspecp;
   2035 	mdeg_node_spec_t	*parentp;
   2036 	uint_t			templatesz;
   2037 	int			rv;
   2038 	mdeg_handle_t		dev_hdl = NULL;
   2039 	mdeg_handle_t		port_hdl = NULL;
   2040 
   2041 	templatesz = sizeof (vgen_prop_template);
   2042 	pspecp = kmem_zalloc(templatesz, KM_NOSLEEP);
   2043 	if (pspecp == NULL) {
   2044 		return (DDI_FAILURE);
   2045 	}
   2046 	parentp = kmem_zalloc(sizeof (mdeg_node_spec_t), KM_NOSLEEP);
   2047 	if (parentp == NULL) {
   2048 		kmem_free(pspecp, templatesz);
   2049 		return (DDI_FAILURE);
   2050 	}
   2051 
   2052 	bcopy(vgen_prop_template, pspecp, templatesz);
   2053 
   2054 	/*
   2055 	 * NOTE: The instance here refers to the value of "reg" property and
   2056 	 * not the dev_info instance (ddi_get_instance()) of vnet.
   2057 	 */
   2058 	VGEN_SET_MDEG_PROP_INST(pspecp, vgenp->regprop);
   2059 
   2060 	parentp->namep = "virtual-device";
   2061 	parentp->specp = pspecp;
   2062 
   2063 	/* save parentp in vgen_t */
   2064 	vgenp->mdeg_parentp = parentp;
   2065 
   2066 	/*
   2067 	 * Register an interest in 'virtual-device' nodes with a
   2068 	 * 'name' property of 'network'
   2069 	 */
   2070 	rv = mdeg_register(parentp, &vdev_match, vgen_mdeg_cb, vgenp, &dev_hdl);
   2071 	if (rv != MDEG_SUCCESS) {
   2072 		DERR(vgenp, NULL, "mdeg_register failed\n");
   2073 		goto mdeg_reg_fail;
   2074 	}
   2075 
   2076 	/* Register an interest in 'port' nodes */
   2077 	rv = mdeg_register(parentp, &vport_match, vgen_mdeg_port_cb, vgenp,
   2078 	    &port_hdl);
   2079 	if (rv != MDEG_SUCCESS) {
   2080 		DERR(vgenp, NULL, "mdeg_register failed\n");
   2081 		goto mdeg_reg_fail;
   2082 	}
   2083 
   2084 	/* save mdeg handle in vgen_t */
   2085 	vgenp->mdeg_dev_hdl = dev_hdl;
   2086 	vgenp->mdeg_port_hdl = port_hdl;
   2087 
   2088 	return (DDI_SUCCESS);
   2089 
   2090 mdeg_reg_fail:
   2091 	if (dev_hdl != NULL) {
   2092 		(void) mdeg_unregister(dev_hdl);
   2093 	}
   2094 	KMEM_FREE(parentp);
   2095 	kmem_free(pspecp, templatesz);
   2096 	vgenp->mdeg_parentp = NULL;
   2097 	return (DDI_FAILURE);
   2098 }
   2099 
   2100 /* unregister with MD event generator */
   2101 static void
   2102 vgen_mdeg_unreg(vgen_t *vgenp)
   2103 {
   2104 	if (vgenp->mdeg_dev_hdl != NULL) {
   2105 		(void) mdeg_unregister(vgenp->mdeg_dev_hdl);
   2106 		vgenp->mdeg_dev_hdl = NULL;
   2107 	}
   2108 	if (vgenp->mdeg_port_hdl != NULL) {
   2109 		(void) mdeg_unregister(vgenp->mdeg_port_hdl);
   2110 		vgenp->mdeg_port_hdl = NULL;
   2111 	}
   2112 
   2113 	if (vgenp->mdeg_parentp != NULL) {
   2114 		kmem_free(vgenp->mdeg_parentp->specp,
   2115 		    sizeof (vgen_prop_template));
   2116 		KMEM_FREE(vgenp->mdeg_parentp);
   2117 		vgenp->mdeg_parentp = NULL;
   2118 	}
   2119 }
   2120 
   2121 /* mdeg callback function for the port node */
   2122 static int
   2123 vgen_mdeg_port_cb(void *cb_argp, mdeg_result_t *resp)
   2124 {
   2125 	int idx;
   2126 	int vsw_idx = -1;
   2127 	uint64_t val;
   2128 	vgen_t *vgenp;
   2129 
   2130 	if ((resp == NULL) || (cb_argp == NULL)) {
   2131 		return (MDEG_FAILURE);
   2132 	}
   2133 
   2134 	vgenp = (vgen_t *)cb_argp;
   2135 	DBG1(vgenp, NULL, "enter\n");
   2136 
   2137 	mutex_enter(&vgenp->lock);
   2138 
   2139 	DBG1(vgenp, NULL, "ports: removed(%x), "
   2140 	"added(%x), updated(%x)\n", resp->removed.nelem,
   2141 	    resp->added.nelem, resp->match_curr.nelem);
   2142 
   2143 	for (idx = 0; idx < resp->removed.nelem; idx++) {
   2144 		(void) vgen_remove_port(vgenp, resp->removed.mdp,
   2145 		    resp->removed.mdep[idx]);
   2146 	}
   2147 
   2148 	if (vgenp->vsw_portp == NULL) {
   2149 		/*
   2150 		 * find vsw_port and add it first, because other ports need
   2151 		 * this when adding fdb entry (see vgen_port_init()).
   2152 		 */
   2153 		for (idx = 0; idx < resp->added.nelem; idx++) {
   2154 			if (!(md_get_prop_val(resp->added.mdp,
   2155 			    resp->added.mdep[idx], swport_propname, &val))) {
   2156 				if (val == 0) {
   2157 					/*
   2158 					 * This port is connected to the
   2159 					 * vsw on service domain.
   2160 					 */
   2161 					vsw_idx = idx;
   2162 					if (vgen_add_port(vgenp,
   2163 					    resp->added.mdp,
   2164 					    resp->added.mdep[idx]) !=
   2165 					    DDI_SUCCESS) {
   2166 						cmn_err(CE_NOTE, "vnet%d Could "
   2167 						    "not initialize virtual "
   2168 						    "switch port.",
   2169 						    vgenp->instance);
   2170 						mutex_exit(&vgenp->lock);
   2171 						return (MDEG_FAILURE);
   2172 					}
   2173 					break;
   2174 				}
   2175 			}
   2176 		}
   2177 		if (vsw_idx == -1) {
   2178 			DWARN(vgenp, NULL, "can't find vsw_port\n");
   2179 			mutex_exit(&vgenp->lock);
   2180 			return (MDEG_FAILURE);
   2181 		}
   2182 	}
   2183 
   2184 	for (idx = 0; idx < resp->added.nelem; idx++) {
   2185 		if ((vsw_idx != -1) && (vsw_idx == idx)) /* skip vsw_port */
   2186 			continue;
   2187 
   2188 		/* If this port can't be added just skip it. */
   2189 		(void) vgen_add_port(vgenp, resp->added.mdp,
   2190 		    resp->added.mdep[idx]);
   2191 	}
   2192 
   2193 	for (idx = 0; idx < resp->match_curr.nelem; idx++) {
   2194 		(void) vgen_update_port(vgenp, resp->match_curr.mdp,
   2195 		    resp->match_curr.mdep[idx],
   2196 		    resp->match_prev.mdp,
   2197 		    resp->match_prev.mdep[idx]);
   2198 	}
   2199 
   2200 	mutex_exit(&vgenp->lock);
   2201 	DBG1(vgenp, NULL, "exit\n");
   2202 	return (MDEG_SUCCESS);
   2203 }
   2204 
   2205 /* mdeg callback function for the vnet node */
   2206 static int
   2207 vgen_mdeg_cb(void *cb_argp, mdeg_result_t *resp)
   2208 {
   2209 	vgen_t		*vgenp;
   2210 	vnet_t		*vnetp;
   2211 	md_t		*mdp;
   2212 	mde_cookie_t	node;
   2213 	uint64_t	inst;
   2214 	char		*node_name = NULL;
   2215 
   2216 	if ((resp == NULL) || (cb_argp == NULL)) {
   2217 		return (MDEG_FAILURE);
   2218 	}
   2219 
   2220 	vgenp = (vgen_t *)cb_argp;
   2221 	vnetp = vgenp->vnetp;
   2222 
   2223 	DBG1(vgenp, NULL, "added %d : removed %d : curr matched %d"
   2224 	    " : prev matched %d", resp->added.nelem, resp->removed.nelem,
   2225 	    resp->match_curr.nelem, resp->match_prev.nelem);
   2226 
   2227 	mutex_enter(&vgenp->lock);
   2228 
   2229 	/*
   2230 	 * We get an initial callback for this node as 'added' after
   2231 	 * registering with mdeg. Note that we would have already gathered
   2232 	 * information about this vnet node by walking MD earlier during attach
   2233 	 * (in vgen_read_mdprops()). So, there is a window where the properties
   2234 	 * of this node might have changed when we get this initial 'added'
   2235 	 * callback. We handle this as if an update occured and invoke the same
   2236 	 * function which handles updates to the properties of this vnet-node
   2237 	 * if any. A non-zero 'match' value indicates that the MD has been
   2238 	 * updated and that a 'network' node is present which may or may not
   2239 	 * have been updated. It is up to the clients to examine their own
   2240 	 * nodes and determine if they have changed.
   2241 	 */
   2242 	if (resp->added.nelem != 0) {
   2243 
   2244 		if (resp->added.nelem != 1) {
   2245 			cmn_err(CE_NOTE, "!vnet%d: number of nodes added "
   2246 			    "invalid: %d\n", vnetp->instance,
   2247 			    resp->added.nelem);
   2248 			goto vgen_mdeg_cb_err;
   2249 		}
   2250 
   2251 		mdp = resp->added.mdp;
   2252 		node = resp->added.mdep[0];
   2253 
   2254 	} else if (resp->match_curr.nelem != 0) {
   2255 
   2256 		if (resp->match_curr.nelem != 1) {
   2257 			cmn_err(CE_NOTE, "!vnet%d: number of nodes updated "
   2258 			    "invalid: %d\n", vnetp->instance,
   2259 			    resp->match_curr.nelem);
   2260 			goto vgen_mdeg_cb_err;
   2261 		}
   2262 
   2263 		mdp = resp->match_curr.mdp;
   2264 		node = resp->match_curr.mdep[0];
   2265 
   2266 	} else {
   2267 		goto vgen_mdeg_cb_err;
   2268 	}
   2269 
   2270 	/* Validate name and instance */
   2271 	if (md_get_prop_str(mdp, node, "name", &node_name) != 0) {
   2272 		DERR(vgenp, NULL, "unable to get node name\n");
   2273 		goto vgen_mdeg_cb_err;
   2274 	}
   2275 
   2276 	/* is this a virtual-network device? */
   2277 	if (strcmp(node_name, vnet_propname) != 0) {
   2278 		DERR(vgenp, NULL, "%s: Invalid node name: %s\n", node_name);
   2279 		goto vgen_mdeg_cb_err;
   2280 	}
   2281 
   2282 	if (md_get_prop_val(mdp, node, "cfg-handle", &inst)) {
   2283 		DERR(vgenp, NULL, "prop(cfg-handle) not found\n");
   2284 		goto vgen_mdeg_cb_err;
   2285 	}
   2286 
   2287 	/* is this the right instance of vnet? */
   2288 	if (inst != vgenp->regprop) {
   2289 		DERR(vgenp, NULL,  "Invalid cfg-handle: %lx\n", inst);
   2290 		goto vgen_mdeg_cb_err;
   2291 	}
   2292 
   2293 	vgen_update_md_prop(vgenp, mdp, node);
   2294 
   2295 	mutex_exit(&vgenp->lock);
   2296 	return (MDEG_SUCCESS);
   2297 
   2298 vgen_mdeg_cb_err:
   2299 	mutex_exit(&vgenp->lock);
   2300 	return (MDEG_FAILURE);
   2301 }
   2302 
   2303 /*
   2304  * Check to see if the relevant properties in the specified node have
   2305  * changed, and if so take the appropriate action.
   2306  */
   2307 static void
   2308 vgen_update_md_prop(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex)
   2309 {
   2310 	uint16_t	pvid;
   2311 	uint16_t	*vids;
   2312 	uint16_t	nvids;
   2313 	vnet_t		*vnetp = vgenp->vnetp;
   2314 	uint32_t	mtu;
   2315 	boolean_t	pls_update;
   2316 	enum		{ MD_init = 0x1,
   2317 			    MD_vlans = 0x2,
   2318 			    MD_mtu = 0x4,
   2319 			    MD_pls = 0x8 } updated;
   2320 	int		rv;
   2321 
   2322 	updated = MD_init;
   2323 
   2324 	/* Read the vlan ids */
   2325 	vgen_vlan_read_ids(vgenp, VGEN_LOCAL, mdp, mdex, &pvid, &vids,
   2326 	    &nvids, NULL);
   2327 
   2328 	/* Determine if there are any vlan id updates */
   2329 	if ((pvid != vnetp->pvid) ||		/* pvid changed? */
   2330 	    (nvids != vnetp->nvids) ||		/* # of vids changed? */
   2331 	    ((nvids != 0) && (vnetp->nvids != 0) &&	/* vids changed? */
   2332 	    bcmp(vids, vnetp->vids, sizeof (uint16_t) * nvids))) {
   2333 		updated |= MD_vlans;
   2334 	}
   2335 
   2336 	/* Read mtu */
   2337 	vgen_mtu_read(vgenp, mdp, mdex, &mtu);
   2338 	if (mtu != vnetp->mtu) {
   2339 		if (mtu >= ETHERMTU && mtu <= VNET_MAX_MTU) {
   2340 			updated |= MD_mtu;
   2341 		} else {
   2342 			cmn_err(CE_NOTE, "!vnet%d: Unable to process mtu update"
   2343 			    " as the specified value:%d is invalid\n",
   2344 			    vnetp->instance, mtu);
   2345 		}
   2346 	}
   2347 
   2348 	/*
   2349 	 * Read the 'linkprop' property.
   2350 	 */
   2351 	vgen_linkprop_read(vgenp, mdp, mdex, &pls_update);
   2352 	if (pls_update != vnetp->pls_update) {
   2353 		updated |= MD_pls;
   2354 	}
   2355 
   2356 	/* Now process the updated props */
   2357 
   2358 	if (updated & MD_vlans) {
   2359 
   2360 		/* save the new vlan ids */
   2361 		vnetp->pvid = pvid;
   2362 		if (vnetp->nvids != 0) {
   2363 			kmem_free(vnetp->vids,
   2364 			    sizeof (uint16_t) * vnetp->nvids);
   2365 			vnetp->nvids = 0;
   2366 		}
   2367 		if (nvids != 0) {
   2368 			vnetp->nvids = nvids;
   2369 			vnetp->vids = vids;
   2370 		}
   2371 
   2372 		/* reset vlan-unaware peers (ver < 1.3) and restart handshake */
   2373 		vgen_reset_vlan_unaware_ports(vgenp);
   2374 
   2375 	} else {
   2376 
   2377 		if (nvids != 0) {
   2378 			kmem_free(vids, sizeof (uint16_t) * nvids);
   2379 		}
   2380 	}
   2381 
   2382 	if (updated & MD_mtu) {
   2383 
   2384 		DBG2(vgenp, NULL, "curr_mtu(%d) new_mtu(%d)\n",
   2385 		    vnetp->mtu, mtu);
   2386 
   2387 		rv = vnet_mtu_update(vnetp, mtu);
   2388 		if (rv == 0) {
   2389 			vgenp->max_frame_size = mtu +
   2390 			    sizeof (struct ether_header) + VLAN_TAGSZ;
   2391 		}
   2392 	}
   2393 
   2394 	if (updated & MD_pls) {
   2395 		/* enable/disable physical link state updates */
   2396 		vnetp->pls_update = pls_update;
   2397 		mutex_exit(&vgenp->lock);
   2398 
   2399 		/* reset vsw-port to re-negotiate with the updated prop. */
   2400 		vgen_reset_vsw_port(vgenp);
   2401 
   2402 		mutex_enter(&vgenp->lock);
   2403 	}
   2404 }
   2405 
   2406 /* add a new port to the device */
   2407 static int
   2408 vgen_add_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex)
   2409 {
   2410 	vgen_port_t	*portp;
   2411 	int		rv;
   2412 
   2413 	portp = kmem_zalloc(sizeof (vgen_port_t), KM_SLEEP);
   2414 
   2415 	rv = vgen_port_read_props(portp, vgenp, mdp, mdex);
   2416 	if (rv != DDI_SUCCESS) {
   2417 		KMEM_FREE(portp);
   2418 		return (DDI_FAILURE);
   2419 	}
   2420 
   2421 	rv = vgen_port_attach(portp);
   2422 	if (rv != DDI_SUCCESS) {
   2423 		return (DDI_FAILURE);
   2424 	}
   2425 
   2426 	return (DDI_SUCCESS);
   2427 }
   2428 
   2429 /* read properties of the port from its md node */
   2430 static int
   2431 vgen_port_read_props(vgen_port_t *portp, vgen_t *vgenp, md_t *mdp,
   2432 	mde_cookie_t mdex)
   2433 {
   2434 	uint64_t		port_num;
   2435 	uint64_t		*ldc_ids;
   2436 	uint64_t		macaddr;
   2437 	uint64_t		val;
   2438 	int			num_ldcs;
   2439 	int			i;
   2440 	int			addrsz;
   2441 	int			num_nodes = 0;
   2442 	int			listsz = 0;
   2443 	mde_cookie_t		*listp = NULL;
   2444 	uint8_t			*addrp;
   2445 	struct ether_addr	ea;
   2446 
   2447 	/* read "id" property to get the port number */
   2448 	if (md_get_prop_val(mdp, mdex, id_propname, &port_num)) {
   2449 		DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname);
   2450 		return (DDI_FAILURE);
   2451 	}
   2452 
   2453 	/*
   2454 	 * Find the channel endpoint node(s) under this port node.
   2455 	 */
   2456 	if ((num_nodes = md_node_count(mdp)) <= 0) {
   2457 		DWARN(vgenp, NULL, "invalid number of nodes found (%d)",
   2458 		    num_nodes);
   2459 		return (DDI_FAILURE);
   2460 	}
   2461 
   2462 	/* allocate space for node list */
   2463 	listsz = num_nodes * sizeof (mde_cookie_t);
   2464 	listp = kmem_zalloc(listsz, KM_NOSLEEP);
   2465 	if (listp == NULL)
   2466 		return (DDI_FAILURE);
   2467 
   2468 	num_ldcs = md_scan_dag(mdp, mdex,
   2469 	    md_find_name(mdp, channel_propname),
   2470 	    md_find_name(mdp, "fwd"), listp);
   2471 
   2472 	if (num_ldcs <= 0) {
   2473 		DWARN(vgenp, NULL, "can't find %s nodes", channel_propname);
   2474 		kmem_free(listp, listsz);
   2475 		return (DDI_FAILURE);
   2476 	}
   2477 
   2478 	DBG2(vgenp, NULL, "num_ldcs %d", num_ldcs);
   2479 
   2480 	ldc_ids = kmem_zalloc(num_ldcs * sizeof (uint64_t), KM_NOSLEEP);
   2481 	if (ldc_ids == NULL) {
   2482 		kmem_free(listp, listsz);
   2483 		return (DDI_FAILURE);
   2484 	}
   2485 
   2486 	for (i = 0; i < num_ldcs; i++) {
   2487 		/* read channel ids */
   2488 		if (md_get_prop_val(mdp, listp[i], id_propname, &ldc_ids[i])) {
   2489 			DWARN(vgenp, NULL, "prop(%s) not found\n",
   2490 			    id_propname);
   2491 			kmem_free(listp, listsz);
   2492 			kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
   2493 			return (DDI_FAILURE);
   2494 		}
   2495 		DBG2(vgenp, NULL, "ldc_id 0x%llx", ldc_ids[i]);
   2496 	}
   2497 
   2498 	kmem_free(listp, listsz);
   2499 
   2500 	if (md_get_prop_data(mdp, mdex, rmacaddr_propname, &addrp,
   2501 	    &addrsz)) {
   2502 		DWARN(vgenp, NULL, "prop(%s) not found\n", rmacaddr_propname);
   2503 		kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
   2504 		return (DDI_FAILURE);
   2505 	}
   2506 
   2507 	if (addrsz < ETHERADDRL) {
   2508 		DWARN(vgenp, NULL, "invalid address size (%d)\n", addrsz);
   2509 		kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
   2510 		return (DDI_FAILURE);
   2511 	}
   2512 
   2513 	macaddr = *((uint64_t *)addrp);
   2514 
   2515 	DBG2(vgenp, NULL, "remote mac address 0x%llx\n", macaddr);
   2516 
   2517 	for (i = ETHERADDRL - 1; i >= 0; i--) {
   2518 		ea.ether_addr_octet[i] = macaddr & 0xFF;
   2519 		macaddr >>= 8;
   2520 	}
   2521 
   2522 	if (!(md_get_prop_val(mdp, mdex, swport_propname, &val))) {
   2523 		if (val == 0) {
   2524 			/* This port is connected to the vswitch */
   2525 			portp->is_vsw_port = B_TRUE;
   2526 		} else {
   2527 			portp->is_vsw_port = B_FALSE;
   2528 		}
   2529 	}
   2530 
   2531 	/* now update all properties into the port */
   2532 	portp->vgenp = vgenp;
   2533 	portp->port_num = port_num;
   2534 	ether_copy(&ea, &portp->macaddr);
   2535 	portp->ldc_ids = kmem_zalloc(sizeof (uint64_t) * num_ldcs, KM_SLEEP);
   2536 	bcopy(ldc_ids, portp->ldc_ids, sizeof (uint64_t) * num_ldcs);
   2537 	portp->num_ldcs = num_ldcs;
   2538 
   2539 	/* read vlan id properties of this port node */
   2540 	vgen_vlan_read_ids(portp, VGEN_PEER, mdp, mdex, &portp->pvid,
   2541 	    &portp->vids, &portp->nvids, NULL);
   2542 
   2543 	kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
   2544 
   2545 	return (DDI_SUCCESS);
   2546 }
   2547 
   2548 /* remove a port from the device */
   2549 static int
   2550 vgen_remove_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex)
   2551 {
   2552 	uint64_t	port_num;
   2553 	vgen_port_t	*portp;
   2554 	vgen_portlist_t	*plistp;
   2555 
   2556 	/* read "id" property to get the port number */
   2557 	if (md_get_prop_val(mdp, mdex, id_propname, &port_num)) {
   2558 		DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname);
   2559 		return (DDI_FAILURE);
   2560 	}
   2561 
   2562 	plistp = &(vgenp->vgenports);
   2563 
   2564 	WRITE_ENTER(&plistp->rwlock);
   2565 	portp = vgen_port_lookup(plistp, (int)port_num);
   2566 	if (portp == NULL) {
   2567 		DWARN(vgenp, NULL, "can't find port(%lx)\n", port_num);
   2568 		RW_EXIT(&plistp->rwlock);
   2569 		return (DDI_FAILURE);
   2570 	}
   2571 
   2572 	vgen_port_detach_mdeg(portp);
   2573 	RW_EXIT(&plistp->rwlock);
   2574 
   2575 	return (DDI_SUCCESS);
   2576 }
   2577 
   2578 /* attach a port to the device based on mdeg data */
   2579 static int
   2580 vgen_port_attach(vgen_port_t *portp)
   2581 {
   2582 	int			i;
   2583 	vgen_portlist_t		*plistp;
   2584 	vgen_t			*vgenp;
   2585 	uint64_t		*ldcids;
   2586 	uint32_t		num_ldcs;
   2587 	mac_register_t		*macp;
   2588 	vio_net_res_type_t	type;
   2589 	int			rv;
   2590 
   2591 	ASSERT(portp != NULL);
   2592 
   2593 	vgenp = portp->vgenp;
   2594 	ldcids = portp->ldc_ids;
   2595 	num_ldcs = portp->num_ldcs;
   2596 
   2597 	DBG1(vgenp, NULL, "port_num(%d)\n", portp->port_num);
   2598 
   2599 	mutex_init(&portp->lock, NULL, MUTEX_DRIVER, NULL);
   2600 	rw_init(&portp->ldclist.rwlock, NULL, RW_DRIVER, NULL);
   2601 	portp->ldclist.headp = NULL;
   2602 
   2603 	for (i = 0; i < num_ldcs; i++) {
   2604 		DBG2(vgenp, NULL, "ldcid (%lx)\n", ldcids[i]);
   2605 		if (vgen_ldc_attach(portp, ldcids[i]) == DDI_FAILURE) {
   2606 			vgen_port_detach(portp);
   2607 			return (DDI_FAILURE);
   2608 		}
   2609 	}
   2610 
   2611 	/* create vlan id hash table */
   2612 	vgen_vlan_create_hash(portp);
   2613 
   2614 	if (portp->is_vsw_port == B_TRUE) {
   2615 		/* This port is connected to the switch port */
   2616 		(void) atomic_swap_32(&portp->use_vsw_port, B_FALSE);
   2617 		type = VIO_NET_RES_LDC_SERVICE;
   2618 	} else {
   2619 		(void) atomic_swap_32(&portp->use_vsw_port, B_TRUE);
   2620 		type = VIO_NET_RES_LDC_GUEST;
   2621 	}
   2622 
   2623 	if ((macp = mac_alloc(MAC_VERSION)) == NULL) {
   2624 		vgen_port_detach(portp);
   2625 		return (DDI_FAILURE);
   2626 	}
   2627 	macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
   2628 	macp->m_driver = portp;
   2629 	macp->m_dip = vgenp->vnetdip;
   2630 	macp->m_src_addr = (uint8_t *)&(vgenp->macaddr);
   2631 	macp->m_callbacks = &vgen_m_callbacks;
   2632 	macp->m_min_sdu = 0;
   2633 	macp->m_max_sdu = ETHERMTU;
   2634 
   2635 	mutex_enter(&portp->lock);
   2636 	rv = vio_net_resource_reg(macp, type, vgenp->macaddr,
   2637 	    portp->macaddr, &portp->vhp, &portp->vcb);
   2638 	mutex_exit(&portp->lock);
   2639 	mac_free(macp);
   2640 
   2641 	if (rv == 0) {
   2642 		/* link it into the list of ports */
   2643 		plistp = &(vgenp->vgenports);
   2644 		WRITE_ENTER(&plistp->rwlock);
   2645 		vgen_port_list_insert(portp);
   2646 		RW_EXIT(&plistp->rwlock);
   2647 
   2648 		if (portp->is_vsw_port == B_TRUE) {
   2649 			/* We now have the vswitch port attached */
   2650 			vgenp->vsw_portp = portp;
   2651 			(void) atomic_swap_32(&vgenp->vsw_port_refcnt, 0);
   2652 		}
   2653 	} else {
   2654 		DERR(vgenp, NULL, "vio_net_resource_reg failed for portp=0x%p",
   2655 		    portp);
   2656 		vgen_port_detach(portp);
   2657 	}
   2658 
   2659 	DBG1(vgenp, NULL, "exit: port_num(%d)\n", portp->port_num);
   2660 	return (DDI_SUCCESS);
   2661 }
   2662 
   2663 /* detach a port from the device based on mdeg data */
   2664 static void
   2665 vgen_port_detach_mdeg(vgen_port_t *portp)
   2666 {
   2667 	vgen_t *vgenp = portp->vgenp;
   2668 
   2669 	DBG1(vgenp, NULL, "enter: port_num(%d)\n", portp->port_num);
   2670 
   2671 	mutex_enter(&portp->lock);
   2672 
   2673 	/* stop the port if needed */
   2674 	if (portp->flags & VGEN_STARTED) {
   2675 		vgen_port_uninit(portp);
   2676 	}
   2677 
   2678 	mutex_exit(&portp->lock);
   2679 	vgen_port_detach(portp);
   2680 
   2681 	DBG1(vgenp, NULL, "exit: port_num(%d)\n", portp->port_num);
   2682 }
   2683 
   2684 static int
   2685 vgen_update_port(vgen_t *vgenp, md_t *curr_mdp, mde_cookie_t curr_mdex,
   2686 	md_t *prev_mdp, mde_cookie_t prev_mdex)
   2687 {
   2688 	uint64_t	cport_num;
   2689 	uint64_t	pport_num;
   2690 	vgen_portlist_t	*plistp;
   2691 	vgen_port_t	*portp;
   2692 	boolean_t	updated_vlans = B_FALSE;
   2693 	uint16_t	pvid;
   2694 	uint16_t	*vids;
   2695 	uint16_t	nvids;
   2696 
   2697 	/*
   2698 	 * For now, we get port updates only if vlan ids changed.
   2699 	 * We read the port num and do some sanity check.
   2700 	 */
   2701 	if (md_get_prop_val(curr_mdp, curr_mdex, id_propname, &cport_num)) {
   2702 		DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname);
   2703 		return (DDI_FAILURE);
   2704 	}
   2705 
   2706 	if (md_get_prop_val(prev_mdp, prev_mdex, id_propname, &pport_num)) {
   2707 		DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname);
   2708 		return (DDI_FAILURE);
   2709 	}
   2710 	if (cport_num != pport_num)
   2711 		return (DDI_FAILURE);
   2712 
   2713 	plistp = &(vgenp->vgenports);
   2714 
   2715 	READ_ENTER(&plistp->rwlock);
   2716 
   2717 	portp = vgen_port_lookup(plistp, (int)cport_num);
   2718 	if (portp == NULL) {
   2719 		DWARN(vgenp, NULL, "can't find port(%lx)\n", cport_num);
   2720 		RW_EXIT(&plistp->rwlock);
   2721 		return (DDI_FAILURE);
   2722 	}
   2723 
   2724 	/* Read the vlan ids */
   2725 	vgen_vlan_read_ids(portp, VGEN_PEER, curr_mdp, curr_mdex, &pvid, &vids,
   2726 	    &nvids, NULL);
   2727 
   2728 	/* Determine if there are any vlan id updates */
   2729 	if ((pvid != portp->pvid) ||		/* pvid changed? */
   2730 	    (nvids != portp->nvids) ||		/* # of vids changed? */
   2731 	    ((nvids != 0) && (portp->nvids != 0) &&	/* vids changed? */
   2732 	    bcmp(vids, portp->vids, sizeof (uint16_t) * nvids))) {
   2733 		updated_vlans = B_TRUE;
   2734 	}
   2735 
   2736 	if (updated_vlans == B_FALSE) {
   2737 		RW_EXIT(&plistp->rwlock);
   2738 		return (DDI_FAILURE);
   2739 	}
   2740 
   2741 	/* remove the port from vlans it has been assigned to */
   2742 	vgen_vlan_remove_ids(portp);
   2743 
   2744 	/* save the new vlan ids */
   2745 	portp->pvid = pvid;
   2746 	if (portp->nvids != 0) {
   2747 		kmem_free(portp->vids, sizeof (uint16_t) * portp->nvids);
   2748 		portp->nvids = 0;
   2749 	}
   2750 	if (nvids != 0) {
   2751 		portp->vids = kmem_zalloc(sizeof (uint16_t) * nvids, KM_SLEEP);
   2752 		bcopy(vids, portp->vids, sizeof (uint16_t) * nvids);
   2753 		portp->nvids = nvids;
   2754 		kmem_free(vids, sizeof (uint16_t) * nvids);
   2755 	}
   2756 
   2757 	/* add port to the new vlans */
   2758 	vgen_vlan_add_ids(portp);
   2759 
   2760 	/* reset the port if it is vlan unaware (ver < 1.3) */
   2761 	vgen_vlan_unaware_port_reset(portp);
   2762 
   2763 	RW_EXIT(&plistp->rwlock);
   2764 
   2765 	return (DDI_SUCCESS);
   2766 }
   2767 
   2768 static uint64_t
   2769 vgen_port_stat(vgen_port_t *portp, uint_t stat)
   2770 {
   2771 	vgen_ldclist_t	*ldclp;
   2772 	vgen_ldc_t *ldcp;
   2773 	uint64_t	val;
   2774 
   2775 	val = 0;
   2776 	ldclp = &portp->ldclist;
   2777 
   2778 	READ_ENTER(&ldclp->rwlock);
   2779 	for (ldcp = ldclp->headp; ldcp != NULL; ldcp = ldcp->nextp) {
   2780 		val += vgen_ldc_stat(ldcp, stat);
   2781 	}
   2782 	RW_EXIT(&ldclp->rwlock);
   2783 
   2784 	return (val);
   2785 }
   2786 
   2787 /* allocate receive resources */
   2788 static int
   2789 vgen_init_multipools(vgen_ldc_t *ldcp)
   2790 {
   2791 	size_t		data_sz;
   2792 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
   2793 	int		status;
   2794 	uint32_t	sz1 = 0;
   2795 	uint32_t	sz2 = 0;
   2796 	uint32_t	sz3 = 0;
   2797 	uint32_t	sz4 = 0;
   2798 
   2799 	/*
   2800 	 * We round up the mtu specified to be a multiple of 2K.
   2801 	 * We then create rx pools based on the rounded up size.
   2802 	 */
   2803 	data_sz = vgenp->max_frame_size + VNET_IPALIGN + VNET_LDCALIGN;
   2804 	data_sz = VNET_ROUNDUP_2K(data_sz);
   2805 
   2806 	/*
   2807 	 * If pool sizes are specified, use them. Note that the presence of
   2808 	 * the first tunable will be used as a hint.
   2809 	 */
   2810 	if (vgen_rbufsz1 != 0) {
   2811 
   2812 		sz1 = vgen_rbufsz1;
   2813 		sz2 = vgen_rbufsz2;
   2814 		sz3 = vgen_rbufsz3;
   2815 		sz4 = vgen_rbufsz4;
   2816 
   2817 		if (sz4 == 0) { /* need 3 pools */
   2818 
   2819 			ldcp->max_rxpool_size = sz3;
   2820 			status = vio_init_multipools(&ldcp->vmp,
   2821 			    VGEN_NUM_VMPOOLS, sz1, sz2, sz3, vgen_nrbufs1,
   2822 			    vgen_nrbufs2, vgen_nrbufs3);
   2823 
   2824 		} else {
   2825 
   2826 			ldcp->max_rxpool_size = sz4;
   2827 			status = vio_init_multipools(&ldcp->vmp,
   2828 			    VGEN_NUM_VMPOOLS + 1, sz1, sz2, sz3, sz4,
   2829 			    vgen_nrbufs1, vgen_nrbufs2, vgen_nrbufs3,
   2830 			    vgen_nrbufs4);
   2831 		}
   2832 		return (status);
   2833 	}
   2834 
   2835 	/*
   2836 	 * Pool sizes are not specified. We select the pool sizes based on the
   2837 	 * mtu if vnet_jumbo_rxpools is enabled.
   2838 	 */
   2839 	if (vnet_jumbo_rxpools == B_FALSE || data_sz == VNET_2K) {
   2840 		/*
   2841 		 * Receive buffer pool allocation based on mtu is disabled.
   2842 		 * Use the default mechanism of standard size pool allocation.
   2843 		 */
   2844 		sz1 = VGEN_DBLK_SZ_128;
   2845 		sz2 = VGEN_DBLK_SZ_256;
   2846 		sz3 = VGEN_DBLK_SZ_2048;
   2847 		ldcp->max_rxpool_size = sz3;
   2848 
   2849 		status = vio_init_multipools(&ldcp->vmp, VGEN_NUM_VMPOOLS,
   2850 		    sz1, sz2, sz3,
   2851 		    vgen_nrbufs1, vgen_nrbufs2, vgen_nrbufs3);
   2852 
   2853 		return (status);
   2854 	}
   2855 
   2856 	switch (data_sz) {
   2857 
   2858 	case VNET_4K:
   2859 
   2860 		sz1 = VGEN_DBLK_SZ_128;
   2861 		sz2 = VGEN_DBLK_SZ_256;
   2862 		sz3 = VGEN_DBLK_SZ_2048;
   2863 		sz4 = sz3 << 1;			/* 4K */
   2864 		ldcp->max_rxpool_size = sz4;
   2865 
   2866 		status = vio_init_multipools(&ldcp->vmp, VGEN_NUM_VMPOOLS + 1,
   2867 		    sz1, sz2, sz3, sz4,
   2868 		    vgen_nrbufs1, vgen_nrbufs2, vgen_nrbufs3, vgen_nrbufs4);
   2869 		break;
   2870 
   2871 	default:	/* data_sz:  4K+ to 16K */
   2872 
   2873 		sz1 = VGEN_DBLK_SZ_256;
   2874 		sz2 = VGEN_DBLK_SZ_2048;
   2875 		sz3 = data_sz >> 1;	/* Jumbo-size/2 */
   2876 		sz4 = data_sz;		/* Jumbo-size  */
   2877 		ldcp->max_rxpool_size = sz4;
   2878 
   2879 		status = vio_init_multipools(&ldcp->vmp, VGEN_NUM_VMPOOLS + 1,
   2880 		    sz1, sz2, sz3, sz4,
   2881 		    vgen_nrbufs1, vgen_nrbufs2, vgen_nrbufs3, vgen_nrbufs4);
   2882 		break;
   2883 
   2884 	}
   2885 
   2886 	return (status);
   2887 }
   2888 
   2889 /* attach the channel corresponding to the given ldc_id to the port */
   2890 static int
   2891 vgen_ldc_attach(vgen_port_t *portp, uint64_t ldc_id)
   2892 {
   2893 	vgen_t 		*vgenp;
   2894 	vgen_ldclist_t	*ldclp;
   2895 	vgen_ldc_t 	*ldcp, **prev_ldcp;
   2896 	ldc_attr_t 	attr;
   2897 	int 		status;
   2898 	ldc_status_t	istatus;
   2899 	char		kname[MAXNAMELEN];
   2900 	int		instance;
   2901 	enum	{AST_init = 0x0, AST_ldc_alloc = 0x1,
   2902 		AST_mutex_init = 0x2, AST_ldc_init = 0x4,
   2903 		AST_ldc_reg_cb = 0x8, AST_alloc_tx_ring = 0x10,
   2904 		AST_create_rxmblks = 0x20,
   2905 		AST_create_rcv_thread = 0x40} attach_state;
   2906 
   2907 	attach_state = AST_init;
   2908 	vgenp = portp->vgenp;
   2909 	ldclp = &portp->ldclist;
   2910 
   2911 	ldcp = kmem_zalloc(sizeof (vgen_ldc_t), KM_NOSLEEP);
   2912 	if (ldcp == NULL) {
   2913 		goto ldc_attach_failed;
   2914 	}
   2915 	ldcp->ldc_id = ldc_id;
   2916 	ldcp->portp = portp;
   2917 
   2918 	attach_state |= AST_ldc_alloc;
   2919 
   2920 	mutex_init(&ldcp->txlock, NULL, MUTEX_DRIVER, NULL);
   2921 	mutex_init(&ldcp->cblock, NULL, MUTEX_DRIVER, NULL);
   2922 	mutex_init(&ldcp->tclock, NULL, MUTEX_DRIVER, NULL);
   2923 	mutex_init(&ldcp->wrlock, NULL, MUTEX_DRIVER, NULL);
   2924 	mutex_init(&ldcp->rxlock, NULL, MUTEX_DRIVER, NULL);
   2925 	mutex_init(&ldcp->pollq_lock, NULL, MUTEX_DRIVER, NULL);
   2926 
   2927 	attach_state |= AST_mutex_init;
   2928 
   2929 	attr.devclass = LDC_DEV_NT;
   2930 	attr.instance = vgenp->instance;
   2931 	attr.mode = LDC_MODE_UNRELIABLE;
   2932 	attr.mtu = vnet_ldc_mtu;
   2933 	status = ldc_init(ldc_id, &attr, &ldcp->ldc_handle);
   2934 	if (status != 0) {
   2935 		DWARN(vgenp, ldcp, "ldc_init failed,rv (%d)\n", status);
   2936 		goto ldc_attach_failed;
   2937 	}
   2938 	attach_state |= AST_ldc_init;
   2939 
   2940 	if (vgen_rcv_thread_enabled) {
   2941 		ldcp->rcv_thr_flags = 0;
   2942 
   2943 		mutex_init(&ldcp->rcv_thr_lock, NULL, MUTEX_DRIVER, NULL);
   2944 		cv_init(&ldcp->rcv_thr_cv, NULL, CV_DRIVER, NULL);
   2945 		ldcp->rcv_thread = thread_create(NULL, 2 * DEFAULTSTKSZ,
   2946 		    vgen_ldc_rcv_worker, ldcp, 0, &p0, TS_RUN, maxclsyspri);
   2947 
   2948 		attach_state |= AST_create_rcv_thread;
   2949 		if (ldcp->rcv_thread == NULL) {
   2950 			DWARN(vgenp, ldcp, "Failed to create worker thread");
   2951 			goto ldc_attach_failed;
   2952 		}
   2953 	}
   2954 
   2955 	status = ldc_reg_callback(ldcp->ldc_handle, vgen_ldc_cb, (caddr_t)ldcp);
   2956 	if (status != 0) {
   2957 		DWARN(vgenp, ldcp, "ldc_reg_callback failed, rv (%d)\n",
   2958 		    status);
   2959 		goto ldc_attach_failed;
   2960 	}
   2961 	/*
   2962 	 * allocate a message for ldc_read()s, big enough to hold ctrl and
   2963 	 * data msgs, including raw data msgs used to recv priority frames.
   2964 	 */
   2965 	ldcp->msglen = VIO_PKT_DATA_HDRSIZE + vgenp->max_frame_size;
   2966 	ldcp->ldcmsg = kmem_alloc(ldcp->msglen, KM_SLEEP);
   2967 	attach_state |= AST_ldc_reg_cb;
   2968 
   2969 	(void) ldc_status(ldcp->ldc_handle, &istatus);
   2970 	ASSERT(istatus == LDC_INIT);
   2971 	ldcp->ldc_status = istatus;
   2972 
   2973 	/* allocate transmit resources */
   2974 	status = vgen_alloc_tx_ring(ldcp);
   2975 	if (status != 0) {
   2976 		goto ldc_attach_failed;
   2977 	}
   2978 	attach_state |= AST_alloc_tx_ring;
   2979 
   2980 	/* allocate receive resources */
   2981 	status = vgen_init_multipools(ldcp);
   2982 	if (status != 0) {
   2983 		/*
   2984 		 * We do not return failure if receive mblk pools can't be
   2985 		 * allocated; instead allocb(9F) will be used to dynamically
   2986 		 * allocate buffers during receive.
   2987 		 */
   2988 		DWARN(vgenp, ldcp,
   2989 		    "vnet%d: status(%d), failed to allocate rx mblk pools for "
   2990 		    "channel(0x%lx)\n",
   2991 		    vgenp->instance, status, ldcp->ldc_id);
   2992 	} else {
   2993 		attach_state |= AST_create_rxmblks;
   2994 	}
   2995 
   2996 	/* Setup kstats for the channel */
   2997 	instance = vgenp->instance;
   2998 	(void) sprintf(kname, "vnetldc0x%lx", ldcp->ldc_id);
   2999 	ldcp->ksp = vgen_setup_kstats("vnet", instance, kname, &ldcp->stats);
   3000 	if (ldcp->ksp == NULL) {
   3001 		goto ldc_attach_failed;
   3002 	}
   3003 
   3004 	/* initialize vgen_versions supported */
   3005 	bcopy(vgen_versions, ldcp->vgen_versions, sizeof (ldcp->vgen_versions));
   3006 	vgen_reset_vnet_proto_ops(ldcp);
   3007 
   3008 	/* link it into the list of channels for this port */
   3009 	WRITE_ENTER(&ldclp->rwlock);
   3010 	prev_ldcp = (vgen_ldc_t **)(&ldclp->headp);
   3011 	ldcp->nextp = *prev_ldcp;
   3012 	*prev_ldcp = ldcp;
   3013 	RW_EXIT(&ldclp->rwlock);
   3014 
   3015 	ldcp->link_state = LINK_STATE_UNKNOWN;
   3016 #ifdef	VNET_IOC_DEBUG
   3017 	ldcp->link_down_forced = B_FALSE;
   3018 #endif
   3019 	ldcp->flags |= CHANNEL_ATTACHED;
   3020 	return (DDI_SUCCESS);
   3021 
   3022 ldc_attach_failed:
   3023 	if (attach_state & AST_ldc_reg_cb) {
   3024 		(void) ldc_unreg_callback(ldcp->ldc_handle);
   3025 		kmem_free(ldcp->ldcmsg, ldcp->msglen);
   3026 	}
   3027 	if (attach_state & AST_create_rcv_thread) {
   3028 		if (ldcp->rcv_thread != NULL) {
   3029 			vgen_stop_rcv_thread(ldcp);
   3030 		}
   3031 		mutex_destroy(&ldcp->rcv_thr_lock);
   3032 		cv_destroy(&ldcp->rcv_thr_cv);
   3033 	}
   3034 	if (attach_state & AST_create_rxmblks) {
   3035 		vio_mblk_pool_t *fvmp = NULL;
   3036 		vio_destroy_multipools(&ldcp->vmp, &fvmp);
   3037 		ASSERT(fvmp == NULL);
   3038 	}
   3039 	if (attach_state & AST_alloc_tx_ring) {
   3040 		vgen_free_tx_ring(ldcp);
   3041 	}
   3042 	if (attach_state & AST_ldc_init) {
   3043 		(void) ldc_fini(ldcp->ldc_handle);
   3044 	}
   3045 	if (attach_state & AST_mutex_init) {
   3046 		mutex_destroy(&ldcp->tclock);
   3047 		mutex_destroy(&ldcp->txlock);
   3048 		mutex_destroy(&ldcp->cblock);
   3049 		mutex_destroy(&ldcp->wrlock);
   3050 		mutex_destroy(&ldcp->rxlock);
   3051 		mutex_destroy(&ldcp->pollq_lock);
   3052 	}
   3053 	if (attach_state & AST_ldc_alloc) {
   3054 		KMEM_FREE(ldcp);
   3055 	}
   3056 	return (DDI_FAILURE);
   3057 }
   3058 
   3059 /* detach a channel from the port */
   3060 static void
   3061 vgen_ldc_detach(vgen_ldc_t *ldcp)
   3062 {
   3063 	vgen_port_t	*portp;
   3064 	vgen_t 		*vgenp;
   3065 	vgen_ldc_t 	*pldcp;
   3066 	vgen_ldc_t	**prev_ldcp;
   3067 	vgen_ldclist_t	*ldclp;
   3068 
   3069 	portp = ldcp->portp;
   3070 	vgenp = portp->vgenp;
   3071 	ldclp = &portp->ldclist;
   3072 
   3073 	prev_ldcp =  (vgen_ldc_t **)&ldclp->headp;
   3074 	for (; (pldcp = *prev_ldcp) != NULL; prev_ldcp = &pldcp->nextp) {
   3075 		if (pldcp == ldcp) {
   3076 			break;
   3077 		}
   3078 	}
   3079 
   3080 	if (pldcp == NULL) {
   3081 		/* invalid ldcp? */
   3082 		return;
   3083 	}
   3084 
   3085 	if (ldcp->ldc_status != LDC_INIT) {
   3086 		DWARN(vgenp, ldcp, "ldc_status is not INIT\n");
   3087 	}
   3088 
   3089 	if (ldcp->flags & CHANNEL_ATTACHED) {
   3090 		ldcp->flags &= ~(CHANNEL_ATTACHED);
   3091 
   3092 		(void) ldc_unreg_callback(ldcp->ldc_handle);
   3093 		if (ldcp->rcv_thread != NULL) {
   3094 			/* First stop the receive thread */
   3095 			vgen_stop_rcv_thread(ldcp);
   3096 			mutex_destroy(&ldcp->rcv_thr_lock);
   3097 			cv_destroy(&ldcp->rcv_thr_cv);
   3098 		}
   3099 		kmem_free(ldcp->ldcmsg, ldcp->msglen);
   3100 
   3101 		vgen_destroy_kstats(ldcp->ksp);
   3102 		ldcp->ksp = NULL;
   3103 
   3104 		/*
   3105 		 * if we cannot reclaim all mblks, put this
   3106 		 * on the list of pools(vgenp->rmp) to be reclaimed when the
   3107 		 * device gets detached (see vgen_uninit()).
   3108 		 */
   3109 		vio_destroy_multipools(&ldcp->vmp, &vgenp->rmp);
   3110 
   3111 		/* free transmit resources */
   3112 		vgen_free_tx_ring(ldcp);
   3113 
   3114 		(void) ldc_fini(ldcp->ldc_handle);
   3115 		mutex_destroy(&ldcp->tclock);
   3116 		mutex_destroy(&ldcp->txlock);
   3117 		mutex_destroy(&ldcp->cblock);
   3118 		mutex_destroy(&ldcp->wrlock);
   3119 		mutex_destroy(&ldcp->rxlock);
   3120 		mutex_destroy(&ldcp->pollq_lock);
   3121 
   3122 		/* unlink it from the list */
   3123 		*prev_ldcp = ldcp->nextp;
   3124 		KMEM_FREE(ldcp);
   3125 	}
   3126 }
   3127 
   3128 /*
   3129  * This function allocates transmit resources for the channel.
   3130  * The resources consist of a transmit descriptor ring and an associated
   3131  * transmit buffer ring.
   3132  */
   3133 static int
   3134 vgen_alloc_tx_ring(vgen_ldc_t *ldcp)
   3135 {
   3136 	void *tbufp;
   3137 	ldc_mem_info_t minfo;
   3138 	uint32_t txdsize;
   3139 	uint32_t tbufsize;
   3140 	int status;
   3141 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
   3142 
   3143 	ldcp->num_txds = vnet_ntxds;
   3144 	txdsize = sizeof (vnet_public_desc_t);
   3145 	tbufsize = sizeof (vgen_private_desc_t);
   3146 
   3147 	/* allocate transmit buffer ring */
   3148 	tbufp = kmem_zalloc(ldcp->num_txds * tbufsize, KM_NOSLEEP);
   3149 	if (tbufp == NULL) {
   3150 		return (DDI_FAILURE);
   3151 	}
   3152 
   3153 	/* create transmit descriptor ring */
   3154 	status = ldc_mem_dring_create(ldcp->num_txds, txdsize,
   3155 	    &ldcp->tx_dhandle);
   3156 	if (status) {
   3157 		DWARN(vgenp, ldcp, "ldc_mem_dring_create() failed\n");
   3158 		kmem_free(tbufp, ldcp->num_txds * tbufsize);
   3159 		return (DDI_FAILURE);
   3160 	}
   3161 
   3162 	/* get the addr of descripror ring */
   3163 	status = ldc_mem_dring_info(ldcp->tx_dhandle, &minfo);
   3164 	if (status) {
   3165 		DWARN(vgenp, ldcp, "ldc_mem_dring_info() failed\n");
   3166 		kmem_free(tbufp, ldcp->num_txds * tbufsize);
   3167 		(void) ldc_mem_dring_destroy(ldcp->tx_dhandle);
   3168 		ldcp->tbufp = NULL;
   3169 		return (DDI_FAILURE);
   3170 	}
   3171 	ldcp->txdp = (vnet_public_desc_t *)(minfo.vaddr);
   3172 	ldcp->tbufp = tbufp;
   3173 
   3174 	ldcp->txdendp = &((ldcp->txdp)[ldcp->num_txds]);
   3175 	ldcp->tbufendp = &((ldcp->tbufp)[ldcp->num_txds]);
   3176 
   3177 	return (DDI_SUCCESS);
   3178 }
   3179 
   3180 /* Free transmit resources for the channel */
   3181 static void
   3182 vgen_free_tx_ring(vgen_ldc_t *ldcp)
   3183 {
   3184 	int tbufsize = sizeof (vgen_private_desc_t);
   3185 
   3186 	/* free transmit descriptor ring */
   3187 	(void) ldc_mem_dring_destroy(ldcp->tx_dhandle);
   3188 
   3189 	/* free transmit buffer ring */
   3190 	kmem_free(ldcp->tbufp, ldcp->num_txds * tbufsize);
   3191 	ldcp->txdp = ldcp->txdendp = NULL;
   3192 	ldcp->tbufp = ldcp->tbufendp = NULL;
   3193 }
   3194 
   3195 /* enable transmit/receive on the channels for the port */
   3196 static void
   3197 vgen_init_ldcs(vgen_port_t *portp)
   3198 {
   3199 	vgen_ldclist_t	*ldclp = &portp->ldclist;
   3200 	vgen_ldc_t	*ldcp;
   3201 
   3202 	READ_ENTER(&ldclp->rwlock);
   3203 	ldcp =  ldclp->headp;
   3204 	for (; ldcp  != NULL; ldcp = ldcp->nextp) {
   3205 		(void) vgen_ldc_init(ldcp);
   3206 	}
   3207 	RW_EXIT(&ldclp->rwlock);
   3208 }
   3209 
   3210 /* stop transmit/receive on the channels for the port */
   3211 static void
   3212 vgen_uninit_ldcs(vgen_port_t *portp)
   3213 {
   3214 	vgen_ldclist_t	*ldclp = &portp->ldclist;
   3215 	vgen_ldc_t	*ldcp;
   3216 
   3217 	READ_ENTER(&ldclp->rwlock);
   3218 	ldcp =  ldclp->headp;
   3219 	for (; ldcp  != NULL; ldcp = ldcp->nextp) {
   3220 		vgen_ldc_uninit(ldcp);
   3221 	}
   3222 	RW_EXIT(&ldclp->rwlock);
   3223 }
   3224 
   3225 /* enable transmit/receive on the channel */
   3226 static int
   3227 vgen_ldc_init(vgen_ldc_t *ldcp)
   3228 {
   3229 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
   3230 	ldc_status_t	istatus;
   3231 	int		rv;
   3232 	uint32_t	retries = 0;
   3233 	enum	{ ST_init = 0x0, ST_ldc_open = 0x1,
   3234 		ST_init_tbufs = 0x2, ST_cb_enable = 0x4} init_state;
   3235 	init_state = ST_init;
   3236 
   3237 	DBG1(vgenp, ldcp, "enter\n");
   3238 	LDC_LOCK(ldcp);
   3239 
   3240 	rv = ldc_open(ldcp->ldc_handle);
   3241 	if (rv != 0) {
   3242 		DWARN(vgenp, ldcp, "ldc_open failed: rv(%d)\n", rv);
   3243 		goto ldcinit_failed;
   3244 	}
   3245 	init_state |= ST_ldc_open;
   3246 
   3247 	(void) ldc_status(ldcp->ldc_handle, &istatus);
   3248 	if (istatus != LDC_OPEN && istatus != LDC_READY) {
   3249 		DWARN(vgenp, ldcp, "status(%d) is not OPEN/READY\n", istatus);
   3250 		goto ldcinit_failed;
   3251 	}
   3252 	ldcp->ldc_status = istatus;
   3253 
   3254 	rv = vgen_init_tbufs(ldcp);
   3255 	if (rv != 0) {
   3256 		DWARN(vgenp, ldcp, "vgen_init_tbufs() failed\n");
   3257 		goto ldcinit_failed;
   3258 	}
   3259 	init_state |= ST_init_tbufs;
   3260 
   3261 	rv = ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_ENABLE);
   3262 	if (rv != 0) {
   3263 		DWARN(vgenp, ldcp, "ldc_set_cb_mode failed: rv(%d)\n", rv);
   3264 		goto ldcinit_failed;
   3265 	}
   3266 
   3267 	init_state |= ST_cb_enable;
   3268 
   3269 	do {
   3270 		rv = ldc_up(ldcp->ldc_handle);
   3271 		if ((rv != 0) && (rv == EWOULDBLOCK)) {
   3272 			DBG2(vgenp, ldcp, "ldc_up err rv(%d)\n", rv);
   3273 			drv_usecwait(VGEN_LDC_UP_DELAY);
   3274 		}
   3275 		if (retries++ >= vgen_ldcup_retries)
   3276 			break;
   3277 	} while (rv == EWOULDBLOCK);
   3278 
   3279 	(void) ldc_status(ldcp->ldc_handle, &istatus);
   3280 	if (istatus == LDC_UP) {
   3281 		DWARN(vgenp, ldcp, "status(%d) is UP\n", istatus);
   3282 	}
   3283 
   3284 	ldcp->ldc_status = istatus;
   3285 
   3286 	/* initialize transmit watchdog timeout */
   3287 	ldcp->wd_tid = timeout(vgen_ldc_watchdog, (caddr_t)ldcp,
   3288 	    drv_usectohz(vnet_ldcwd_interval * 1000));
   3289 
   3290 	ldcp->hphase = -1;
   3291 	ldcp->flags |= CHANNEL_STARTED;
   3292 
   3293 	/* if channel is already UP - start handshake */
   3294 	if (istatus == LDC_UP) {
   3295 		vgen_t *vgenp = LDC_TO_VGEN(ldcp);
   3296 		if (ldcp->portp != vgenp->vsw_portp) {
   3297 			/*
   3298 			 * As the channel is up, use this port from now on.
   3299 			 */
   3300 			(void) atomic_swap_32(
   3301 			    &ldcp->portp->use_vsw_port, B_FALSE);
   3302 		}
   3303 
   3304 		/* Initialize local session id */
   3305 		ldcp->local_sid = ddi_get_lbolt();
   3306 
   3307 		/* clear peer session id */
   3308 		ldcp->peer_sid = 0;
   3309 		ldcp->hretries = 0;
   3310 
   3311 		/* Initiate Handshake process with peer ldc endpoint */
   3312 		vgen_reset_hphase(ldcp);
   3313 
   3314 		mutex_exit(&ldcp->tclock);
   3315 		mutex_exit(&ldcp->txlock);
   3316 		mutex_exit(&ldcp->wrlock);
   3317 		mutex_exit(&ldcp->rxlock);
   3318 		vgen_handshake(vh_nextphase(ldcp));
   3319 		mutex_exit(&ldcp->cblock);
   3320 	} else {
   3321 		LDC_UNLOCK(ldcp);
   3322 	}
   3323 
   3324 	return (DDI_SUCCESS);
   3325 
   3326 ldcinit_failed:
   3327 	if (init_state & ST_cb_enable) {
   3328 		(void) ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_DISABLE);
   3329 	}
   3330 	if (init_state & ST_init_tbufs) {
   3331 		vgen_uninit_tbufs(ldcp);
   3332 	}
   3333 	if (init_state & ST_ldc_open) {
   3334 		(void) ldc_close(ldcp->ldc_handle);
   3335 	}
   3336 	LDC_UNLOCK(ldcp);
   3337 	DBG1(vgenp, ldcp, "exit\n");
   3338 	return (DDI_FAILURE);
   3339 }
   3340 
   3341 /* stop transmit/receive on the channel */
   3342 static void
   3343 vgen_ldc_uninit(vgen_ldc_t *ldcp)
   3344 {
   3345 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
   3346 	int	rv;
   3347 	uint_t	retries = 0;
   3348 
   3349 	DBG1(vgenp, ldcp, "enter\n");
   3350 	LDC_LOCK(ldcp);
   3351 
   3352 	if ((ldcp->flags & CHANNEL_STARTED) == 0) {
   3353 		LDC_UNLOCK(ldcp);
   3354 		DWARN(vgenp, ldcp, "CHANNEL_STARTED flag is not set\n");
   3355 		return;
   3356 	}
   3357 
   3358 	/* disable further callbacks */
   3359 	rv = ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_DISABLE);
   3360 	if (rv != 0) {
   3361 		DWARN(vgenp, ldcp, "ldc_set_cb_mode failed\n");
   3362 	}
   3363 
   3364 	/*
   3365 	 * clear handshake done bit and wait for pending tx and cb to finish.
   3366 	 * release locks before untimeout(9F) is invoked to cancel timeouts.
   3367 	 */
   3368 	ldcp->hphase &= ~(VH_DONE);
   3369 	LDC_UNLOCK(ldcp);
   3370 
   3371 	if (vgenp->vsw_portp == ldcp->portp) {
   3372 		vio_net_report_err_t rep_err =
   3373 		    ldcp->portp->vcb.vio_net_report_err;
   3374 		rep_err(ldcp->portp->vhp, VIO_NET_RES_DOWN);
   3375 	}
   3376 
   3377 	/* cancel handshake watchdog timeout */
   3378 	if (ldcp->htid) {
   3379 		(void) untimeout(ldcp->htid);
   3380 		ldcp->htid = 0;
   3381 	}
   3382 
   3383 	if (ldcp->cancel_htid) {
   3384 		(void) untimeout(ldcp->cancel_htid);
   3385 		ldcp->cancel_htid = 0;
   3386 	}
   3387 
   3388 	/* cancel transmit watchdog timeout */
   3389 	if (ldcp->wd_tid) {
   3390 		(void) untimeout(ldcp->wd_tid);
   3391 		ldcp->wd_tid = 0;
   3392 	}
   3393 
   3394 	drv_usecwait(1000);
   3395 
   3396 	if (ldcp->rcv_thread != NULL) {
   3397 		/*
   3398 		 * Note that callbacks have been disabled already(above). The
   3399 		 * drain function takes care of the condition when an already
   3400 		 * executing callback signals the worker to start processing or
   3401 		 * the worker has already been signalled and is in the middle of
   3402 		 * processing.
   3403 		 */
   3404 		vgen_drain_rcv_thread(ldcp);
   3405 	}
   3406 
   3407 	/* acquire locks again; any pending transmits and callbacks are done */
   3408 	LDC_LOCK(ldcp);
   3409 
   3410 	vgen_reset_hphase(ldcp);
   3411 
   3412 	vgen_uninit_tbufs(ldcp);
   3413 
   3414 	/* close the channel - retry on EAGAIN */
   3415 	while ((rv = ldc_close(ldcp->ldc_handle)) == EAGAIN) {
   3416 		if (++retries > vgen_ldccl_retries) {
   3417 			break;
   3418 		}
   3419 		drv_usecwait(VGEN_LDC_CLOSE_DELAY);
   3420 	}
   3421 	if (rv != 0) {
   3422 		cmn_err(CE_NOTE,
   3423 		    "!vnet%d: Error(%d) closing the channel(0x%lx)\n",
   3424 		    vgenp->instance, rv, ldcp->ldc_id);
   3425 	}
   3426 
   3427 	ldcp->ldc_status = LDC_INIT;
   3428 	ldcp->flags &= ~(CHANNEL_STARTED);
   3429 
   3430 	LDC_UNLOCK(ldcp);
   3431 
   3432 	DBG1(vgenp, ldcp, "exit\n");
   3433 }
   3434 
   3435 /* Initialize the transmit buffer ring for the channel */
   3436 static int
   3437 vgen_init_tbufs(vgen_ldc_t *ldcp)
   3438 {
   3439 	vgen_private_desc_t	*tbufp;
   3440 	vnet_public_desc_t	*txdp;
   3441 	vio_dring_entry_hdr_t		*hdrp;
   3442 	int 			i;
   3443 	int 			rv;
   3444 	caddr_t			datap = NULL;
   3445 	int			ci;
   3446 	uint32_t		ncookies;
   3447 	size_t			data_sz;
   3448 	vgen_t			*vgenp;
   3449 
   3450 	vgenp = LDC_TO_VGEN(ldcp);
   3451 
   3452 	bzero(ldcp->tbufp, sizeof (*tbufp) * (ldcp->num_txds));
   3453 	bzero(ldcp->txdp, sizeof (*txdp) * (ldcp->num_txds));
   3454 
   3455 	/*
   3456 	 * In order to ensure that the number of ldc cookies per descriptor is
   3457 	 * limited to be within the default MAX_COOKIES (2), we take the steps
   3458 	 * outlined below:
   3459 	 *
   3460 	 * Align the entire data buffer area to 8K and carve out per descriptor
   3461 	 * data buffers starting from this 8K aligned base address.
   3462 	 *
   3463 	 * We round up the mtu specified to be a multiple of 2K or 4K.
   3464 	 * For sizes up to 12K we round up the size to the next 2K.
   3465 	 * For sizes > 12K we round up to the next 4K (otherwise sizes such as
   3466 	 * 14K could end up needing 3 cookies, with the buffer spread across
   3467 	 * 3 8K pages:  8K+6K, 2K+8K+2K, 6K+8K, ...).
   3468 	 */
   3469 	data_sz = vgenp->max_frame_size + VNET_IPALIGN + VNET_LDCALIGN;
   3470 	if (data_sz <= VNET_12K) {
   3471 		data_sz = VNET_ROUNDUP_2K(data_sz);
   3472 	} else {
   3473 		data_sz = VNET_ROUNDUP_4K(data_sz);
   3474 	}
   3475 
   3476 	/* allocate extra 8K bytes for alignment */
   3477 	ldcp->tx_data_sz = (data_sz * ldcp->num_txds) + VNET_8K;
   3478 	datap = kmem_zalloc(ldcp->tx_data_sz, KM_SLEEP);
   3479 	ldcp->tx_datap = datap;
   3480 
   3481 
   3482 	/* align the starting address of the data area to 8K */
   3483 	datap = (caddr_t)VNET_ROUNDUP_8K((uintptr_t)datap);
   3484 
   3485 	/*
   3486 	 * for each private descriptor, allocate a ldc mem_handle which is
   3487 	 * required to map the data during transmit, set the flags
   3488 	 * to free (available for use by transmit routine).
   3489 	 */
   3490 
   3491 	for (i = 0; i < ldcp->num_txds; i++) {
   3492 
   3493 		tbufp = &(ldcp->tbufp[i]);
   3494 		rv = ldc_mem_alloc_handle(ldcp->ldc_handle,
   3495 		    &(tbufp->memhandle));
   3496 		if (rv) {
   3497 			tbufp->memhandle = 0;
   3498 			goto init_tbufs_failed;
   3499 		}
   3500 
   3501 		/*
   3502 		 * bind ldc memhandle to the corresponding transmit buffer.
   3503 		 */
   3504 		ci = ncookies = 0;
   3505 		rv = ldc_mem_bind_handle(tbufp->memhandle,
   3506 		    (caddr_t)datap, data_sz, LDC_SHADOW_MAP,
   3507 		    LDC_MEM_R, &(tbufp->memcookie[ci]), &ncookies);
   3508 		if (rv != 0) {
   3509 			goto init_tbufs_failed;
   3510 		}
   3511 
   3512 		/*
   3513 		 * successful in binding the handle to tx data buffer.
   3514 		 * set datap in the private descr to this buffer.
   3515 		 */
   3516 		tbufp->datap = datap;
   3517 
   3518 		if ((ncookies == 0) ||
   3519 		    (ncookies > MAX_COOKIES)) {
   3520 			goto init_tbufs_failed;
   3521 		}
   3522 
   3523 		for (ci = 1; ci < ncookies; ci++) {
   3524 			rv = ldc_mem_nextcookie(tbufp->memhandle,
   3525 			    &(tbufp->memcookie[ci]));
   3526 			if (rv != 0) {
   3527 				goto init_tbufs_failed;
   3528 			}
   3529 		}
   3530 
   3531 		tbufp->ncookies = ncookies;
   3532 		datap += data_sz;
   3533 
   3534 		tbufp->flags = VGEN_PRIV_DESC_FREE;
   3535 		txdp = &(ldcp->txdp[i]);
   3536 		hdrp = &txdp->hdr;
   3537 		hdrp->dstate = VIO_DESC_FREE;
   3538 		hdrp->ack = B_FALSE;
   3539 		tbufp->descp = txdp;
   3540 
   3541 	}
   3542 
   3543 	/* reset tbuf walking pointers */
   3544 	ldcp->next_tbufp = ldcp->tbufp;
   3545 	ldcp->cur_tbufp = ldcp->tbufp;
   3546 
   3547 	/* initialize tx seqnum and index */
   3548 	ldcp->next_txseq = VNET_ISS;
   3549 	ldcp->next_txi = 0;
   3550 
   3551 	ldcp->resched_peer = B_TRUE;
   3552 	ldcp->resched_peer_txi = 0;
   3553 
   3554 	return (DDI_SUCCESS);
   3555 
   3556 init_tbufs_failed:;
   3557 	vgen_uninit_tbufs(ldcp);
   3558 	return (DDI_FAILURE);
   3559 }
   3560 
   3561 /* Uninitialize transmit buffer ring for the channel */
   3562 static void
   3563 vgen_uninit_tbufs(vgen_ldc_t *ldcp)
   3564 {
   3565 	vgen_private_desc_t	*tbufp = ldcp->tbufp;
   3566 	int 			i;
   3567 
   3568 	/* for each tbuf (priv_desc), free ldc mem_handle */
   3569 	for (i = 0; i < ldcp->num_txds; i++) {
   3570 
   3571 		tbufp = &(ldcp->tbufp[i]);
   3572 
   3573 		if (tbufp->datap) { /* if bound to a ldc memhandle */
   3574 			(void) ldc_mem_unbind_handle(tbufp->memhandle);
   3575 			tbufp->datap = NULL;
   3576 		}
   3577 		if (tbufp->memhandle) {
   3578 			(void) ldc_mem_free_handle(tbufp->memhandle);
   3579 			tbufp->memhandle = 0;
   3580 		}
   3581 	}
   3582 
   3583 	if (ldcp->tx_datap) {
   3584 		/* prealloc'd tx data buffer */
   3585 		kmem_free(ldcp->tx_datap, ldcp->tx_data_sz);
   3586 		ldcp->tx_datap = NULL;
   3587 		ldcp->tx_data_sz = 0;
   3588 	}
   3589 
   3590 	bzero(ldcp->tbufp, sizeof (vgen_private_desc_t) * (ldcp->num_txds));
   3591 	bzero(ldcp->txdp, sizeof (vnet_public_desc_t) * (ldcp->num_txds));
   3592 }
   3593 
   3594 /* clobber tx descriptor ring */
   3595 static void
   3596 vgen_clobber_tbufs(vgen_ldc_t *ldcp)
   3597 {
   3598 	vnet_public_desc_t	*txdp;
   3599 	vgen_private_desc_t	*tbufp;
   3600 	vio_dring_entry_hdr_t	*hdrp;
   3601 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
   3602 	int i;
   3603 #ifdef DEBUG
   3604 	int ndone = 0;
   3605 #endif
   3606 
   3607 	for (i = 0; i < ldcp->num_txds; i++) {
   3608 
   3609 		tbufp = &(ldcp->tbufp[i]);
   3610 		txdp = tbufp->descp;
   3611 		hdrp = &txdp->hdr;
   3612 
   3613 		if (tbufp->flags & VGEN_PRIV_DESC_BUSY) {
   3614 			tbufp->flags = VGEN_PRIV_DESC_FREE;
   3615 #ifdef DEBUG
   3616 			if (hdrp->dstate == VIO_DESC_DONE)
   3617 				ndone++;
   3618 #endif
   3619 			hdrp->dstate = VIO_DESC_FREE;
   3620 			hdrp->ack = B_FALSE;
   3621 		}
   3622 	}
   3623 	/* reset tbuf walking pointers */
   3624 	ldcp->next_tbufp = ldcp->tbufp;
   3625 	ldcp->cur_tbufp = ldcp->tbufp;
   3626 
   3627 	/* reset tx seqnum and index */
   3628 	ldcp->next_txseq = VNET_ISS;
   3629 	ldcp->next_txi = 0;
   3630 
   3631 	ldcp->resched_peer = B_TRUE;
   3632 	ldcp->resched_peer_txi = 0;
   3633 
   3634 	DBG2(vgenp, ldcp, "num descrs done (%d)\n", ndone);
   3635 }
   3636 
   3637 /* clobber receive descriptor ring */
   3638 static void
   3639 vgen_clobber_rxds(vgen_ldc_t *ldcp)
   3640 {
   3641 	ldcp->rx_dhandle = 0;
   3642 	bzero(&ldcp->rx_dcookie, sizeof (ldcp->rx_dcookie));
   3643 	ldcp->rxdp = NULL;
   3644 	ldcp->next_rxi = 0;
   3645 	ldcp->num_rxds = 0;
   3646 	ldcp->next_rxseq = VNET_ISS;
   3647 }
   3648 
   3649 /* initialize receive descriptor ring */
   3650 static int
   3651 vgen_init_rxds(vgen_ldc_t *ldcp, uint32_t num_desc, uint32_t desc_size,
   3652 	ldc_mem_cookie_t *dcookie, uint32_t ncookies)
   3653 {
   3654 	int rv;
   3655 	ldc_mem_info_t minfo;
   3656 
   3657 	rv = ldc_mem_dring_map(ldcp->ldc_handle, dcookie, ncookies, num_desc,
   3658 	    desc_size, LDC_DIRECT_MAP, &(ldcp->rx_dhandle));
   3659 	if (rv != 0) {
   3660 		return (DDI_FAILURE);
   3661 	}
   3662 
   3663 	/*
   3664 	 * sucessfully mapped, now try to
   3665 	 * get info about the mapped dring
   3666 	 */
   3667 	rv = ldc_mem_dring_info(ldcp->rx_dhandle, &minfo);
   3668 	if (rv != 0) {
   3669 		(void) ldc_mem_dring_unmap(ldcp->rx_dhandle);
   3670 		return (DDI_FAILURE);
   3671 	}
   3672 
   3673 	/*
   3674 	 * save ring address, number of descriptors.
   3675 	 */
   3676 	ldcp->rxdp = (vnet_public_desc_t *)(minfo.vaddr);
   3677 	bcopy(dcookie, &(ldcp->rx_dcookie), sizeof (*dcookie));
   3678 	ldcp->num_rxdcookies = ncookies;
   3679 	ldcp->num_rxds = num_desc;
   3680 	ldcp->next_rxi = 0;
   3681 	ldcp->next_rxseq = VNET_ISS;
   3682 	ldcp->dring_mtype = minfo.mtype;
   3683 
   3684 	return (DDI_SUCCESS);
   3685 }
   3686 
   3687 /* get channel statistics */
   3688 static uint64_t
   3689 vgen_ldc_stat(vgen_ldc_t *ldcp, uint_t stat)
   3690 {
   3691 	vgen_stats_t *statsp;
   3692 	uint64_t val;
   3693 
   3694 	val = 0;
   3695 	statsp = &ldcp->stats;
   3696 	switch (stat) {
   3697 
   3698 	case MAC_STAT_MULTIRCV:
   3699 		val = statsp->multircv;
   3700 		break;
   3701 
   3702 	case MAC_STAT_BRDCSTRCV:
   3703 		val = statsp->brdcstrcv;
   3704 		break;
   3705 
   3706 	case MAC_STAT_MULTIXMT:
   3707 		val = statsp->multixmt;
   3708 		break;
   3709 
   3710 	case MAC_STAT_BRDCSTXMT:
   3711 		val = statsp->brdcstxmt;
   3712 		break;
   3713 
   3714 	case MAC_STAT_NORCVBUF:
   3715 		val = statsp->norcvbuf;
   3716 		break;
   3717 
   3718 	case MAC_STAT_IERRORS:
   3719 		val = statsp->ierrors;
   3720 		break;
   3721 
   3722 	case MAC_STAT_NOXMTBUF:
   3723 		val = statsp->noxmtbuf;
   3724 		break;
   3725 
   3726 	case MAC_STAT_OERRORS:
   3727 		val = statsp->oerrors;
   3728 		break;
   3729 
   3730 	case MAC_STAT_COLLISIONS:
   3731 		break;
   3732 
   3733 	case MAC_STAT_RBYTES:
   3734 		val = statsp->rbytes;
   3735 		break;
   3736 
   3737 	case MAC_STAT_IPACKETS:
   3738 		val = statsp->ipackets;
   3739 		break;
   3740 
   3741 	case MAC_STAT_OBYTES:
   3742 		val = statsp->obytes;
   3743 		break;
   3744 
   3745 	case MAC_STAT_OPACKETS:
   3746 		val = statsp->opackets;
   3747 		break;
   3748 
   3749 	/* stats not relevant to ldc, return 0 */
   3750 	case MAC_STAT_IFSPEED:
   3751 	case ETHER_STAT_ALIGN_ERRORS:
   3752 	case ETHER_STAT_FCS_ERRORS:
   3753 	case ETHER_STAT_FIRST_COLLISIONS:
   3754 	case ETHER_STAT_MULTI_COLLISIONS:
   3755 	case ETHER_STAT_DEFER_XMTS:
   3756 	case ETHER_STAT_TX_LATE_COLLISIONS:
   3757 	case ETHER_STAT_EX_COLLISIONS:
   3758 	case ETHER_STAT_MACXMT_ERRORS:
   3759 	case ETHER_STAT_CARRIER_ERRORS:
   3760 	case ETHER_STAT_TOOLONG_ERRORS:
   3761 	case ETHER_STAT_XCVR_ADDR:
   3762 	case ETHER_STAT_XCVR_ID:
   3763 	case ETHER_STAT_XCVR_INUSE:
   3764 	case ETHER_STAT_CAP_1000FDX:
   3765 	case ETHER_STAT_CAP_1000HDX:
   3766 	case ETHER_STAT_CAP_100FDX:
   3767 	case ETHER_STAT_CAP_100HDX:
   3768 	case ETHER_STAT_CAP_10FDX:
   3769 	case ETHER_STAT_CAP_10HDX:
   3770 	case ETHER_STAT_CAP_ASMPAUSE:
   3771 	case ETHER_STAT_CAP_PAUSE:
   3772 	case ETHER_STAT_CAP_AUTONEG:
   3773 	case ETHER_STAT_ADV_CAP_1000FDX:
   3774 	case ETHER_STAT_ADV_CAP_1000HDX:
   3775 	case ETHER_STAT_ADV_CAP_100FDX:
   3776 	case ETHER_STAT_ADV_CAP_100HDX:
   3777 	case ETHER_STAT_ADV_CAP_10FDX:
   3778 	case ETHER_STAT_ADV_CAP_10HDX:
   3779 	case ETHER_STAT_ADV_CAP_ASMPAUSE:
   3780 	case ETHER_STAT_ADV_CAP_PAUSE:
   3781 	case ETHER_STAT_ADV_CAP_AUTONEG:
   3782 	case ETHER_STAT_LP_CAP_1000FDX:
   3783 	case ETHER_STAT_LP_CAP_1000HDX:
   3784 	case ETHER_STAT_LP_CAP_100FDX:
   3785 	case ETHER_STAT_LP_CAP_100HDX:
   3786 	case ETHER_STAT_LP_CAP_10FDX:
   3787 	case ETHER_STAT_LP_CAP_10HDX:
   3788 	case ETHER_STAT_LP_CAP_ASMPAUSE:
   3789 	case ETHER_STAT_LP_CAP_PAUSE:
   3790 	case ETHER_STAT_LP_CAP_AUTONEG:
   3791 	case ETHER_STAT_LINK_ASMPAUSE:
   3792 	case ETHER_STAT_LINK_PAUSE:
   3793 	case ETHER_STAT_LINK_AUTONEG:
   3794 	case ETHER_STAT_LINK_DUPLEX:
   3795 	default:
   3796 		val = 0;
   3797 		break;
   3798 
   3799 	}
   3800 	return (val);
   3801 }
   3802 
   3803 /*
   3804  * LDC channel is UP, start handshake process with peer.
   3805  */
   3806 static void
   3807 vgen_handle_evt_up(vgen_ldc_t *ldcp)
   3808 {
   3809 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
   3810 
   3811 	DBG1(vgenp, ldcp, "enter\n");
   3812 
   3813 	ASSERT(MUTEX_HELD(&ldcp->cblock));
   3814 
   3815 	if (ldcp->portp != vgenp->vsw_portp) {
   3816 		/*
   3817 		 * As the channel is up, use this port from now on.
   3818 		 */
   3819 		(void) atomic_swap_32(&ldcp->portp->use_vsw_port, B_FALSE);
   3820 	}
   3821 
   3822 	/* Initialize local session id */
   3823 	ldcp->local_sid = ddi_get_lbolt();
   3824 
   3825 	/* clear peer session id */
   3826 	ldcp->peer_sid = 0;
   3827 	ldcp->hretries = 0;
   3828 
   3829 	if (ldcp->hphase != VH_PHASE0) {
   3830 		vgen_handshake_reset(ldcp);
   3831 	}
   3832 
   3833 	/* Initiate Handshake process with peer ldc endpoint */
   3834 	vgen_handshake(vh_nextphase(ldcp));
   3835 
   3836 	DBG1(vgenp, ldcp, "exit\n");
   3837 }
   3838 
   3839 /*
   3840  * LDC channel is Reset, terminate connection with peer and try to
   3841  * bring the channel up again.
   3842  */
   3843 static void
   3844 vgen_handle_evt_reset(vgen_ldc_t *ldcp)
   3845 {
   3846 	ldc_status_t istatus;
   3847 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
   3848 	int	rv;
   3849 
   3850 	DBG1(vgenp, ldcp, "enter\n");
   3851 
   3852 	ASSERT(MUTEX_HELD(&ldcp->cblock));
   3853 
   3854 	if ((ldcp->portp != vgenp->vsw_portp) &&
   3855 	    (vgenp->vsw_portp != NULL)) {
   3856 		/*
   3857 		 * As the channel is down, use the switch port until
   3858 		 * the channel becomes ready to be used.
   3859 		 */
   3860 		(void) atomic_swap_32(&ldcp->portp->use_vsw_port, B_TRUE);
   3861 	}
   3862 
   3863 	if (vgenp->vsw_portp == ldcp->portp) {
   3864 		vio_net_report_err_t rep_err =
   3865 		    ldcp->portp->vcb.vio_net_report_err;
   3866 
   3867 		/* Post a reset message */
   3868 		rep_err(ldcp->portp->vhp, VIO_NET_RES_DOWN);
   3869 	}
   3870 
   3871 	if (ldcp->hphase != VH_PHASE0) {
   3872 		vgen_handshake_reset(ldcp);
   3873 	}
   3874 
   3875 	/* try to bring the channel up */
   3876 #ifdef	VNET_IOC_DEBUG
   3877 	if (ldcp->link_down_forced == B_FALSE) {
   3878 		rv = ldc_up(ldcp->ldc_handle);
   3879 		if (rv != 0) {
   3880 			DWARN(vgenp, ldcp, "ldc_up err rv(%d)\n", rv);
   3881 		}
   3882 	}
   3883 #else
   3884 	rv = ldc_up(ldcp->ldc_handle);
   3885 	if (rv != 0) {
   3886 		DWARN(vgenp, ldcp, "ldc_up err rv(%d)\n", rv);
   3887 	}
   3888 #endif
   3889 
   3890 	if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
   3891 		DWARN(vgenp, ldcp, "ldc_status err\n");
   3892 	} else {
   3893 		ldcp->ldc_status = istatus;
   3894 	}
   3895 
   3896 	/* if channel is already UP - restart handshake */
   3897 	if (ldcp->ldc_status == LDC_UP) {
   3898 		vgen_handle_evt_up(ldcp);
   3899 	}
   3900 
   3901 	DBG1(vgenp, ldcp, "exit\n");
   3902 }
   3903 
   3904 /* Interrupt handler for the channel */
   3905 static uint_t
   3906 vgen_ldc_cb(uint64_t event, caddr_t arg)
   3907 {
   3908 	_NOTE(ARGUNUSED(event))
   3909 	vgen_ldc_t	*ldcp;
   3910 	vgen_t		*vgenp;
   3911 	ldc_status_t 	istatus;
   3912 	vgen_stats_t	*statsp;
   3913 	timeout_id_t	cancel_htid = 0;
   3914 	uint_t		ret = LDC_SUCCESS;
   3915 
   3916 	ldcp = (vgen_ldc_t *)arg;
   3917 	vgenp = LDC_TO_VGEN(ldcp);
   3918 	statsp = &ldcp->stats;
   3919 
   3920 	DBG1(vgenp, ldcp, "enter\n");
   3921 
   3922 	mutex_enter(&ldcp->cblock);
   3923 	statsp->callbacks++;
   3924 	if ((ldcp->ldc_status == LDC_INIT) || (ldcp->ldc_handle == NULL)) {
   3925 		DWARN(vgenp, ldcp, "status(%d) is LDC_INIT\n",
   3926 		    ldcp->ldc_status);
   3927 		mutex_exit(&ldcp->cblock);
   3928 		return (LDC_SUCCESS);
   3929 	}
   3930 
   3931 	/*
   3932 	 * cache cancel_htid before the events specific
   3933 	 * code may overwrite it. Do not clear ldcp->cancel_htid
   3934 	 * as it is also used to indicate the timer to quit immediately.
   3935 	 */
   3936 	cancel_htid = ldcp->cancel_htid;
   3937 
   3938 	/*
   3939 	 * NOTE: not using switch() as event could be triggered by
   3940 	 * a state change and a read request. Also the ordering	of the
   3941 	 * check for the event types is deliberate.
   3942 	 */
   3943 	if (event & LDC_EVT_UP) {
   3944 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
   3945 			DWARN(vgenp, ldcp, "ldc_status err\n");
   3946 			/* status couldn't be determined */
   3947 			ret = LDC_FAILURE;
   3948 			goto ldc_cb_ret;
   3949 		}
   3950 		ldcp->ldc_status = istatus;
   3951 		if (ldcp->ldc_status != LDC_UP) {
   3952 			DWARN(vgenp, ldcp, "LDC_EVT_UP received "
   3953 			    " but ldc status is not UP(0x%x)\n",
   3954 			    ldcp->ldc_status);
   3955 			/* spurious interrupt, return success */
   3956 			goto ldc_cb_ret;
   3957 		}
   3958 		DWARN(vgenp, ldcp, "event(%lx) UP, status(%d)\n",
   3959 		    event, ldcp->ldc_status);
   3960 
   3961 		vgen_handle_evt_up(ldcp);
   3962 
   3963 		ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0);
   3964 	}
   3965 
   3966 	/* Handle RESET/DOWN before READ event */
   3967 	if (event & (LDC_EVT_RESET | LDC_EVT_DOWN)) {
   3968 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
   3969 			DWARN(vgenp, ldcp, "ldc_status error\n");
   3970 			/* status couldn't be determined */
   3971 			ret = LDC_FAILURE;
   3972 			goto ldc_cb_ret;
   3973 		}
   3974 		ldcp->ldc_status = istatus;
   3975 		DWARN(vgenp, ldcp, "event(%lx) RESET/DOWN, status(%d)\n",
   3976 		    event, ldcp->ldc_status);
   3977 
   3978 		vgen_handle_evt_reset(ldcp);
   3979 
   3980 		/*
   3981 		 * As the channel is down/reset, ignore READ event
   3982 		 * but print a debug warning message.
   3983 		 */
   3984 		if (event & LDC_EVT_READ) {
   3985 			DWARN(vgenp, ldcp,
   3986 			    "LDC_EVT_READ set along with RESET/DOWN\n");
   3987 			event &= ~LDC_EVT_READ;
   3988 		}
   3989 	}
   3990 
   3991 	if (event & LDC_EVT_READ) {
   3992 		DBG2(vgenp, ldcp, "event(%lx) READ, status(%d)\n",
   3993 		    event, ldcp->ldc_status);
   3994 
   3995 		ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0);
   3996 
   3997 		if (ldcp->rcv_thread != NULL) {
   3998 			/*
   3999 			 * If the receive thread is enabled, then
   4000 			 * wakeup the receive thread to process the
   4001 			 * LDC messages.
   4002 			 */
   4003 			mutex_exit(&ldcp->cblock);
   4004 			mutex_enter(&ldcp->rcv_thr_lock);
   4005 			if (!(ldcp->rcv_thr_flags & VGEN_WTHR_DATARCVD)) {
   4006 				ldcp->rcv_thr_flags |= VGEN_WTHR_DATARCVD;
   4007 				cv_signal(&ldcp->rcv_thr_cv);
   4008 			}
   4009 			mutex_exit(&ldcp->rcv_thr_lock);
   4010 			mutex_enter(&ldcp->cblock);
   4011 		} else  {
   4012 			vgen_handle_evt_read(ldcp);
   4013 		}
   4014 	}
   4015 
   4016 ldc_cb_ret:
   4017 	/*
   4018 	 * Check to see if the status of cancel_htid has
   4019 	 * changed. If another timer needs to be cancelled,
   4020 	 * then let the next callback to clear it.
   4021 	 */
   4022 	if (cancel_htid == 0) {
   4023 		cancel_htid = ldcp->cancel_htid;
   4024 	}
   4025 	mutex_exit(&ldcp->cblock);
   4026 
   4027 	if (cancel_htid) {
   4028 		/*
   4029 		 * Cancel handshake timer.
   4030 		 * untimeout(9F) will not return until the pending callback is
   4031 		 * cancelled or has run. No problems will result from calling
   4032 		 * untimeout if the handler has already completed.
   4033 		 * If the timeout handler did run, then it would just
   4034 		 * return as cancel_htid is set.
   4035 		 */
   4036 		DBG2(vgenp, ldcp, "cancel_htid =0x%X \n", cancel_htid);
   4037 		(void) untimeout(cancel_htid);
   4038 		mutex_enter(&ldcp->cblock);
   4039 		/* clear it only if its the same as the one we cancelled */
   4040 		if (ldcp->cancel_htid == cancel_htid) {
   4041 			ldcp->cancel_htid = 0;
   4042 		}
   4043 		mutex_exit(&ldcp->cblock);
   4044 	}
   4045 	DBG1(vgenp, ldcp, "exit\n");
   4046 	return (ret);
   4047 }
   4048 
   4049 static void
   4050 vgen_handle_evt_read(vgen_ldc_t *ldcp)
   4051 {
   4052 	int		rv;
   4053 	uint64_t	*ldcmsg;
   4054 	size_t		msglen;
   4055 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
   4056 	vio_msg_tag_t	*tagp;
   4057 	ldc_status_t 	istatus;
   4058 	boolean_t 	has_data;
   4059 
   4060 	DBG1(vgenp, ldcp, "enter\n");
   4061 
   4062 	ldcmsg = ldcp->ldcmsg;
   4063 	/*
   4064 	 * If the receive thread is enabled, then the cblock
   4065 	 * need to be acquired here. If not, the vgen_ldc_cb()
   4066 	 * calls this function with cblock held already.
   4067 	 */
   4068 	if (ldcp->rcv_thread != NULL) {
   4069 		mutex_enter(&ldcp->cblock);
   4070 	} else {
   4071 		ASSERT(MUTEX_HELD(&ldcp->cblock));
   4072 	}
   4073 
   4074 vgen_evt_read:
   4075 	do {
   4076 		msglen = ldcp->msglen;
   4077 		rv = ldc_read(ldcp->ldc_handle, (caddr_t)ldcmsg, &msglen);
   4078 
   4079 		if (rv != 0) {
   4080 			DWARN(vgenp, ldcp, "err rv(%d) len(%d)\n",
   4081 			    rv, msglen);
   4082 			if (rv == ECONNRESET)
   4083 				goto vgen_evtread_error;
   4084 			break;
   4085 		}
   4086 		if (msglen == 0) {
   4087 			DBG2(vgenp, ldcp, "ldc_read NODATA");
   4088 			break;
   4089 		}
   4090 		DBG2(vgenp, ldcp, "ldc_read msglen(%d)", msglen);
   4091 
   4092 		tagp = (vio_msg_tag_t *)ldcmsg;
   4093 
   4094 		if (ldcp->peer_sid) {
   4095 			/*
   4096 			 * check sid only after we have received peer's sid
   4097 			 * in the version negotiate msg.
   4098 			 */
   4099 #ifdef DEBUG
   4100 			if (vgen_hdbg & HDBG_BAD_SID) {
   4101 				/* simulate bad sid condition */
   4102 				tagp->vio_sid = 0;
   4103 				vgen_hdbg &= ~(HDBG_BAD_SID);
   4104 			}
   4105 #endif
   4106 			rv = vgen_check_sid(ldcp, tagp);
   4107 			if (rv != VGEN_SUCCESS) {
   4108 				/*
   4109 				 * If sid mismatch is detected,
   4110 				 * reset the channel.
   4111 				 */
   4112 				goto vgen_evtread_error;
   4113 			}
   4114 		}
   4115 
   4116 		switch (tagp->vio_msgtype) {
   4117 		case VIO_TYPE_CTRL:
   4118 			rv = vgen_handle_ctrlmsg(ldcp, tagp);
   4119 			break;
   4120 
   4121 		case VIO_TYPE_DATA:
   4122 			rv = vgen_handle_datamsg(ldcp, tagp, msglen);
   4123 			break;
   4124 
   4125 		case VIO_TYPE_ERR:
   4126 			vgen_handle_errmsg(ldcp, tagp);
   4127 			break;
   4128 
   4129 		default:
   4130 			DWARN(vgenp, ldcp, "Unknown VIO_TYPE(%x)\n",
   4131 			    tagp->vio_msgtype);
   4132 			break;
   4133 		}
   4134 
   4135 		/*
   4136 		 * If an error is encountered, stop processing and
   4137 		 * handle the error.
   4138 		 */
   4139 		if (rv != 0) {
   4140 			goto vgen_evtread_error;
   4141 		}
   4142 
   4143 	} while (msglen);
   4144 
   4145 	/* check once more before exiting */
   4146 	rv = ldc_chkq(ldcp->ldc_handle, &has_data);
   4147 	if ((rv == 0) && (has_data == B_TRUE)) {
   4148 		DTRACE_PROBE(vgen_chkq);
   4149 		goto vgen_evt_read;
   4150 	}
   4151 
   4152 vgen_evtread_error:
   4153 	if (rv == ECONNRESET) {
   4154 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
   4155 			DWARN(vgenp, ldcp, "ldc_status err\n");
   4156 		} else {
   4157 			ldcp->ldc_status = istatus;
   4158 		}
   4159 		vgen_handle_evt_reset(ldcp);
   4160 	} else if (rv) {
   4161 		vgen_ldc_reset(ldcp);
   4162 	}
   4163 
   4164 	/*
   4165 	 * If the receive thread is enabled, then cancel the
   4166 	 * handshake timeout here.
   4167 	 */
   4168 	if (ldcp->rcv_thread != NULL) {
   4169 		timeout_id_t cancel_htid = ldcp->cancel_htid;
   4170 
   4171 		mutex_exit(&ldcp->cblock);
   4172 		if (cancel_htid) {
   4173 			/*
   4174 			 * Cancel handshake timer. untimeout(9F) will
   4175 			 * not return until the pending callback is cancelled
   4176 			 * or has run. No problems will result from calling
   4177 			 * untimeout if the handler has already completed.
   4178 			 * If the timeout handler did run, then it would just
   4179 			 * return as cancel_htid is set.
   4180 			 */
   4181 			DBG2(vgenp, ldcp, "cancel_htid =0x%X \n", cancel_htid);
   4182 			(void) untimeout(cancel_htid);
   4183 
   4184 			/*
   4185 			 * clear it only if its the same as the one we
   4186 			 * cancelled
   4187 			 */
   4188 			mutex_enter(&ldcp->cblock);
   4189 			if (ldcp->cancel_htid == cancel_htid) {
   4190 				ldcp->cancel_htid = 0;
   4191 			}
   4192 			mutex_exit(&ldcp->cblock);
   4193 		}
   4194 	}
   4195 
   4196 	DBG1(vgenp, ldcp, "exit\n");
   4197 }
   4198 
   4199 /* vgen handshake functions */
   4200 
   4201 /* change the hphase for the channel to the next phase */
   4202 static vgen_ldc_t *
   4203 vh_nextphase(vgen_ldc_t *ldcp)
   4204 {
   4205 	if (ldcp->hphase == VH_PHASE3) {
   4206 		ldcp->hphase = VH_DONE;
   4207 	} else {
   4208 		ldcp->hphase++;
   4209 	}
   4210 	return (ldcp);
   4211 }
   4212 
   4213 /*
   4214  * wrapper routine to send the given message over ldc using ldc_write().
   4215  */
   4216 static int
   4217 vgen_sendmsg(vgen_ldc_t *ldcp, caddr_t msg,  size_t msglen,
   4218     boolean_t caller_holds_lock)
   4219 {
   4220 	int			rv;
   4221 	size_t			len;
   4222 	uint32_t		retries = 0;
   4223 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
   4224 	vio_msg_tag_t		*tagp = (vio_msg_tag_t *)msg;
   4225 	vio_dring_msg_t		*dmsg;
   4226 	vio_raw_data_msg_t	*rmsg;
   4227 	boolean_t		data_msg = B_FALSE;
   4228 
   4229 	len = msglen;
   4230 	if ((len == 0) || (msg == NULL))
   4231 		return (VGEN_FAILURE);
   4232 
   4233 	if (!caller_holds_lock) {
   4234 		mutex_enter(&ldcp->wrlock);
   4235 	}
   4236 
   4237 	if (tagp->vio_subtype == VIO_SUBTYPE_INFO) {
   4238 		if (tagp->vio_subtype_env == VIO_DRING_DATA) {
   4239 			dmsg = (vio_dring_msg_t *)tagp;
   4240 			dmsg->seq_num = ldcp->next_txseq;
   4241 			data_msg = B_TRUE;
   4242 		} else if (tagp->vio_subtype_env == VIO_PKT_DATA) {
   4243 			rmsg = (vio_raw_data_msg_t *)tagp;
   4244 			rmsg->seq_num = ldcp->next_txseq;
   4245 			data_msg = B_TRUE;
   4246 		}
   4247 	}
   4248 
   4249 	do {
   4250 		len = msglen;
   4251 		rv = ldc_write(ldcp->ldc_handle, (caddr_t)msg, &len);
   4252 		if (retries++ >= vgen_ldcwr_retries)
   4253 			break;
   4254 	} while (rv == EWOULDBLOCK);
   4255 
   4256 	if (rv == 0 && data_msg == B_TRUE) {
   4257 		ldcp->next_txseq++;
   4258 	}
   4259 
   4260 	if (!caller_holds_lock) {
   4261 		mutex_exit(&ldcp->wrlock);
   4262 	}
   4263 
   4264 	if (rv != 0) {
   4265 		DWARN(vgenp, ldcp, "ldc_write failed: rv(%d)\n",
   4266 		    rv, msglen);
   4267 		return (rv);
   4268 	}
   4269 
   4270 	if (len != msglen) {
   4271 		DWARN(vgenp, ldcp, "ldc_write failed: rv(%d) msglen (%d)\n",
   4272 		    rv, msglen);
   4273 		return (VGEN_FAILURE);
   4274 	}
   4275 
   4276 	return (VGEN_SUCCESS);
   4277 }
   4278 
   4279 /* send version negotiate message to the peer over ldc */
   4280 static int
   4281 vgen_send_version_negotiate(vgen_ldc_t *ldcp)
   4282 {
   4283 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
   4284 	vio_ver_msg_t	vermsg;
   4285 	vio_msg_tag_t	*tagp = &vermsg.tag;
   4286 	int		rv;
   4287 
   4288 	bzero(&vermsg, sizeof (vermsg));
   4289 
   4290 	tagp->vio_msgtype = VIO_TYPE_CTRL;
   4291 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
   4292 	tagp->vio_subtype_env = VIO_VER_INFO;
   4293 	tagp->vio_sid = ldcp->local_sid;
   4294 
   4295 	/* get version msg payload from ldcp->local */
   4296 	vermsg.ver_major = ldcp->local_hparams.ver_major;
   4297 	vermsg.ver_minor = ldcp->local_hparams.ver_minor;
   4298 	vermsg.dev_class = ldcp->local_hparams.dev_class;
   4299 
   4300 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (vermsg), B_FALSE);
   4301 	if (rv != VGEN_SUCCESS) {
   4302 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
   4303 		return (rv);
   4304 	}
   4305 
   4306 	ldcp->hstate |= VER_INFO_SENT;
   4307 	DBG2(vgenp, ldcp, "VER_INFO_SENT ver(%d,%d)\n",
   4308 	    vermsg.ver_major, vermsg.ver_minor);
   4309 
   4310 	return (VGEN_SUCCESS);
   4311 }
   4312 
   4313 /* send attr info message to the peer over ldc */
   4314 static int
   4315 vgen_send_attr_info(vgen_ldc_t *ldcp)
   4316 {
   4317 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
   4318 	vnet_attr_msg_t	attrmsg;
   4319 	vio_msg_tag_t	*tagp = &attrmsg.tag;
   4320 	int		rv;
   4321 
   4322 	bzero(&attrmsg, sizeof (attrmsg));
   4323 
   4324 	tagp->vio_msgtype = VIO_TYPE_CTRL;
   4325 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
   4326 	tagp->vio_subtype_env = VIO_ATTR_INFO;
   4327 	tagp->vio_sid = ldcp->local_sid;
   4328 
   4329 	/* get attr msg payload from ldcp->local */
   4330 	attrmsg.mtu = ldcp->local_hparams.mtu;
   4331 	attrmsg.addr = ldcp->local_hparams.addr;
   4332 	attrmsg.addr_type = ldcp->local_hparams.addr_type;
   4333 	attrmsg.xfer_mode = ldcp->local_hparams.xfer_mode;
   4334 	attrmsg.ack_freq = ldcp->local_hparams.ack_freq;
   4335 	attrmsg.physlink_update = ldcp->local_hparams.physlink_update;
   4336 
   4337 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (attrmsg), B_FALSE);
   4338 	if (rv != VGEN_SUCCESS) {
   4339 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
   4340 		return (rv);
   4341 	}
   4342 
   4343 	ldcp->hstate |= ATTR_INFO_SENT;
   4344 	DBG2(vgenp, ldcp, "ATTR_INFO_SENT\n");
   4345 
   4346 	return (VGEN_SUCCESS);
   4347 }
   4348 
   4349 /* send descriptor ring register message to the peer over ldc */
   4350 static int
   4351 vgen_send_dring_reg(vgen_ldc_t *ldcp)
   4352 {
   4353 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
   4354 	vio_dring_reg_msg_t	msg;
   4355 	vio_msg_tag_t		*tagp = &msg.tag;
   4356 	int		rv;
   4357 
   4358 	bzero(&msg, sizeof (msg));
   4359 
   4360 	tagp->vio_msgtype = VIO_TYPE_CTRL;
   4361 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
   4362 	tagp->vio_subtype_env = VIO_DRING_REG;
   4363 	tagp->vio_sid = ldcp->local_sid;
   4364 
   4365 	/* get dring info msg payload from ldcp->local */
   4366 	bcopy(&(ldcp->local_hparams.dring_cookie), (msg.cookie),
   4367 	    sizeof (ldc_mem_cookie_t));
   4368 	msg.ncookies = ldcp->local_hparams.num_dcookies;
   4369 	msg.num_descriptors = ldcp->local_hparams.num_desc;
   4370 	msg.descriptor_size = ldcp->local_hparams.desc_size;
   4371 
   4372 	/*
   4373 	 * dring_ident is set to 0. After mapping the dring, peer sets this
   4374 	 * value and sends it in the ack, which is saved in
   4375 	 * vgen_handle_dring_reg().
   4376 	 */
   4377 	msg.dring_ident = 0;
   4378 
   4379 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (msg), B_FALSE);
   4380 	if (rv != VGEN_SUCCESS) {
   4381 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
   4382 		return (rv);
   4383 	}
   4384 
   4385 	ldcp->hstate |= DRING_INFO_SENT;
   4386 	DBG2(vgenp, ldcp, "DRING_INFO_SENT \n");
   4387 
   4388 	return (VGEN_SUCCESS);
   4389 }
   4390 
   4391 static int
   4392 vgen_send_rdx_info(vgen_ldc_t *ldcp)
   4393 {
   4394 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
   4395 	vio_rdx_msg_t	rdxmsg;
   4396 	vio_msg_tag_t	*tagp = &rdxmsg.tag;
   4397 	int		rv;
   4398 
   4399 	bzero(&rdxmsg, sizeof (rdxmsg));
   4400 
   4401 	tagp->vio_msgtype = VIO_TYPE_CTRL;
   4402 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
   4403 	tagp->vio_subtype_env = VIO_RDX;
   4404 	tagp->vio_sid = ldcp->local_sid;
   4405 
   4406 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (rdxmsg), B_FALSE);
   4407 	if (rv != VGEN_SUCCESS) {
   4408 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
   4409 		return (rv);
   4410 	}
   4411 
   4412 	ldcp->hstate |= RDX_INFO_SENT;
   4413 	DBG2(vgenp, ldcp, "RDX_INFO_SENT\n");
   4414 
   4415 	return (VGEN_SUCCESS);
   4416 }
   4417 
   4418 /* send descriptor ring data message to the peer over ldc */
   4419 static int
   4420 vgen_send_dring_data(vgen_ldc_t *ldcp, uint32_t start, int32_t end)
   4421 {
   4422 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
   4423 	vio_dring_msg_t	dringmsg, *msgp = &dringmsg;
   4424 	vio_msg_tag_t	*tagp = &msgp->tag;
   4425 	vgen_stats_t	*statsp = &ldcp->stats;
   4426 	int		rv;
   4427 
   4428 	bzero(msgp, sizeof (*msgp));
   4429 
   4430 	tagp->vio_msgtype = VIO_TYPE_DATA;
   4431 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
   4432 	tagp->vio_subtype_env = VIO_DRING_DATA;
   4433 	tagp->vio_sid = ldcp->local_sid;
   4434 
   4435 	msgp->dring_ident = ldcp->local_hparams.dring_ident;
   4436 	msgp->start_idx = start;
   4437 	msgp->end_idx = end;
   4438 
   4439 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (dringmsg), B_TRUE);
   4440 	if (rv != VGEN_SUCCESS) {
   4441 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
   4442 		return (rv);
   4443 	}
   4444 
   4445 	statsp->dring_data_msgs++;
   4446 
   4447 	DBG2(vgenp, ldcp, "DRING_DATA_SENT \n");
   4448 
   4449 	return (VGEN_SUCCESS);
   4450 }
   4451 
   4452 /* send multicast addr info message to vsw */
   4453 static int
   4454 vgen_send_mcast_info(vgen_ldc_t *ldcp)
   4455 {
   4456 	vnet_mcast_msg_t	mcastmsg;
   4457 	vnet_mcast_msg_t	*msgp;
   4458 	vio_msg_tag_t		*tagp;
   4459 	vgen_t			*vgenp;
   4460 	struct ether_addr	*mca;
   4461 	int			rv;
   4462 	int			i;
   4463 	uint32_t		size;
   4464 	uint32_t		mccount;
   4465 	uint32_t		n;
   4466 
   4467 	msgp = &mcastmsg;
   4468 	tagp = &msgp->tag;
   4469 	vgenp = LDC_TO_VGEN(ldcp);
   4470 
   4471 	mccount = vgenp->mccount;
   4472 	i = 0;
   4473 
   4474 	do {
   4475 		tagp->vio_msgtype = VIO_TYPE_CTRL;
   4476 		tagp->vio_subtype = VIO_SUBTYPE_INFO;
   4477 		tagp->vio_subtype_env = VNET_MCAST_INFO;
   4478 		tagp->vio_sid = ldcp->local_sid;
   4479 
   4480 		n = ((mccount >= VNET_NUM_MCAST) ? VNET_NUM_MCAST : mccount);
   4481 		size = n * sizeof (struct ether_addr);
   4482 
   4483 		mca = &(vgenp->mctab[i]);
   4484 		bcopy(mca, (msgp->mca), size);
   4485 		msgp->set = B_TRUE;
   4486 		msgp->count = n;
   4487 
   4488 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msgp),
   4489 		    B_FALSE);
   4490 		if (rv != VGEN_SUCCESS) {
   4491 			DWARN(vgenp, ldcp, "vgen_sendmsg err(%d)\n", rv);
   4492 			return (rv);
   4493 		}
   4494 
   4495 		mccount -= n;
   4496 		i += n;
   4497 
   4498 	} while (mccount);
   4499 
   4500 	return (VGEN_SUCCESS);
   4501 }
   4502 
   4503 /* Initiate Phase 2 of handshake */
   4504 static int
   4505 vgen_handshake_phase2(vgen_ldc_t *ldcp)
   4506 {
   4507 	int rv;
   4508 	uint32_t ncookies = 0;
   4509 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
   4510 
   4511 #ifdef DEBUG
   4512 	if (vgen_hdbg & HDBG_OUT_STATE) {
   4513 		/* simulate out of state condition */
   4514 		vgen_hdbg &= ~(HDBG_OUT_STATE);
   4515 		rv = vgen_send_rdx_info(ldcp);
   4516 		return (rv);
   4517 	}
   4518 	if (vgen_hdbg & HDBG_TIMEOUT) {
   4519 		/* simulate timeout condition */
   4520 		vgen_hdbg &= ~(HDBG_TIMEOUT);
   4521 		return (VGEN_SUCCESS);
   4522 	}
   4523 #endif
   4524 	rv = vgen_send_attr_info(ldcp);
   4525 	if (rv != VGEN_SUCCESS) {
   4526 		return (rv);
   4527 	}
   4528 
   4529 	/* Bind descriptor ring to the channel */
   4530 	if (ldcp->num_txdcookies == 0) {
   4531 		rv = ldc_mem_dring_bind(ldcp->ldc_handle, ldcp->tx_dhandle,
   4532 		    LDC_DIRECT_MAP | LDC_SHADOW_MAP, LDC_MEM_RW,
   4533 		    &ldcp->tx_dcookie, &ncookies);
   4534 		if (rv != 0) {
   4535 			DWARN(vgenp, ldcp, "ldc_mem_dring_bind failed "
   4536 			    "rv(%x)\n", rv);
   4537 			return (rv);
   4538 		}
   4539 		ASSERT(ncookies == 1);
   4540 		ldcp->num_txdcookies = ncookies;
   4541 	}
   4542 
   4543 	/* update local dring_info params */
   4544 	bcopy(&(ldcp->tx_dcookie), &(ldcp->local_hparams.dring_cookie),
   4545 	    sizeof (ldc_mem_cookie_t));
   4546 	ldcp->local_hparams.num_dcookies = ldcp->num_txdcookies;
   4547 	ldcp->local_hparams.num_desc = ldcp->num_txds;
   4548 	ldcp->local_hparams.desc_size = sizeof (vnet_public_desc_t);
   4549 
   4550 	rv = vgen_send_dring_reg(ldcp);
   4551 	if (rv != VGEN_SUCCESS) {
   4552 		return (rv);
   4553 	}
   4554 
   4555 	return (VGEN_SUCCESS);
   4556 }
   4557 
   4558 /*
   4559  * Set vnet-protocol-version dependent functions based on version.
   4560  */
   4561 static void
   4562 vgen_set_vnet_proto_ops(vgen_ldc_t *ldcp)
   4563 {
   4564 	vgen_hparams_t	*lp = &ldcp->local_hparams;
   4565 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
   4566 
   4567 	if (VGEN_VER_GTEQ(ldcp, 1, 5)) {
   4568 		vgen_port_t	*portp = ldcp->portp;
   4569 		vnet_t		*vnetp = vgenp->vnetp;
   4570 		/*
   4571 		 * If the version negotiated with vswitch is >= 1.5 (link
   4572 		 * status update support), set the required bits in our
   4573 		 * attributes if this vnet device has been configured to get
   4574 		 * physical link state updates.
   4575 		 */
   4576 		if (portp == vgenp->vsw_portp && vnetp->pls_update == B_TRUE) {
   4577 			lp->physlink_update = PHYSLINK_UPDATE_STATE;
   4578 		} else {
   4579 			lp->physlink_update = PHYSLINK_UPDATE_NONE;
   4580 		}
   4581 	}
   4582 
   4583 	if (VGEN_VER_GTEQ(ldcp, 1, 4)) {
   4584 		/*
   4585 		 * If the version negotiated with peer is >= 1.4(Jumbo Frame
   4586 		 * Support), set the mtu in our attributes to max_frame_size.
   4587 		 */
   4588 		lp->mtu = vgenp->max_frame_size;
   4589 	} else  if (VGEN_VER_EQ(ldcp, 1, 3)) {
   4590 		/*
   4591 		 * If the version negotiated with peer is == 1.3 (Vlan Tag
   4592 		 * Support) set the attr.mtu to ETHERMAX + VLAN_TAGSZ.
   4593 		 */
   4594 		lp->mtu = ETHERMAX + VLAN_TAGSZ;
   4595 	} else {
   4596 		vgen_port_t	*portp = ldcp->portp;
   4597 		vnet_t		*vnetp = vgenp->vnetp;
   4598 		/*
   4599 		 * Pre-1.3 peers expect max frame size of ETHERMAX.
   4600 		 * We can negotiate that size with those peers provided the
   4601 		 * following conditions are true:
   4602 		 * - Only pvid is defined for our peer and there are no vids.
   4603 		 * - pvids are equal.
   4604 		 * If the above conditions are true, then we can send/recv only
   4605 		 * untagged frames of max size ETHERMAX.
   4606 		 */
   4607 		if (portp->nvids == 0 && portp->pvid == vnetp->pvid) {
   4608 			lp->mtu = ETHERMAX;
   4609 		}
   4610 	}
   4611 
   4612 	if (VGEN_VER_GTEQ(ldcp, 1, 2)) {
   4613 		/* Versions >= 1.2 */
   4614 
   4615 		if (VGEN_PRI_ETH_DEFINED(vgenp)) {
   4616 			/*
   4617 			 * enable priority routines and pkt mode only if
   4618 			 * at least one pri-eth-type is specified in MD.
   4619 			 */
   4620 
   4621 			ldcp->tx = vgen_ldcsend;
   4622 			ldcp->rx_pktdata = vgen_handle_pkt_data;
   4623 
   4624 			/* set xfer mode for vgen_send_attr_info() */
   4625 			lp->xfer_mode = VIO_PKT_MODE | VIO_DRING_MODE_V1_2;
   4626 
   4627 		} else {
   4628 			/* no priority eth types defined in MD */
   4629 
   4630 			ldcp->tx = vgen_ldcsend_dring;
   4631 			ldcp->rx_pktdata = vgen_handle_pkt_data_nop;
   4632 
   4633 			/* set xfer mode for vgen_send_attr_info() */
   4634 			lp->xfer_mode = VIO_DRING_MODE_V1_2;
   4635 
   4636 		}
   4637 	} else {
   4638 		/* Versions prior to 1.2  */
   4639 
   4640 		vgen_reset_vnet_proto_ops(ldcp);
   4641 	}
   4642 }
   4643 
   4644 /*
   4645  * Reset vnet-protocol-version dependent functions to pre-v1.2.
   4646  */
   4647 static void
   4648 vgen_reset_vnet_proto_ops(vgen_ldc_t *ldcp)
   4649 {
   4650 	vgen_hparams_t	*lp = &ldcp->local_hparams;
   4651 
   4652 	ldcp->tx = vgen_ldcsend_dring;
   4653 	ldcp->rx_pktdata = vgen_handle_pkt_data_nop;
   4654 
   4655 	/* set xfer mode for vgen_send_attr_info() */
   4656 	lp->xfer_mode = VIO_DRING_MODE_V1_0;
   4657 }
   4658 
   4659 static void
   4660 vgen_vlan_unaware_port_reset(vgen_port_t *portp)
   4661 {
   4662 	vgen_ldclist_t	*ldclp;
   4663 	vgen_ldc_t	*ldcp;
   4664 	vgen_t		*vgenp = portp->vgenp;
   4665 	vnet_t		*vnetp = vgenp->vnetp;
   4666 
   4667 	ldclp = &portp->ldclist;
   4668 
   4669 	READ_ENTER(&ldclp->rwlock);
   4670 
   4671 	/*
   4672 	 * NOTE: for now, we will assume we have a single channel.
   4673 	 */
   4674 	if (ldclp->headp == NULL) {
   4675 		RW_EXIT(&ldclp->rwlock);
   4676 		return;
   4677 	}
   4678 	ldcp = ldclp->headp;
   4679 
   4680 	mutex_enter(&ldcp->cblock);
   4681 
   4682 	/*
   4683 	 * If the peer is vlan_unaware(ver < 1.3), reset channel and terminate
   4684 	 * the connection. See comments in vgen_set_vnet_proto_ops().
   4685 	 */
   4686 	if (ldcp->hphase == VH_DONE && VGEN_VER_LT(ldcp, 1, 3) &&
   4687 	    (portp->nvids != 0 || portp->pvid != vnetp->pvid)) {
   4688 		vgen_ldc_reset(ldcp);
   4689 	}
   4690 
   4691 	mutex_exit(&ldcp->cblock);
   4692 
   4693 	RW_EXIT(&ldclp->rwlock);
   4694 }
   4695 
   4696 static void
   4697 vgen_port_reset(vgen_port_t *portp)
   4698 {
   4699 	vgen_ldclist_t	*ldclp;
   4700 	vgen_ldc_t	*ldcp;
   4701 
   4702 	ldclp = &portp->ldclist;
   4703 
   4704 	READ_ENTER(&ldclp->rwlock);
   4705 
   4706 	/*
   4707 	 * NOTE: for now, we will assume we have a single channel.
   4708 	 */
   4709 	if (ldclp->headp == NULL) {
   4710 		RW_EXIT(&ldclp->rwlock);
   4711 		return;
   4712 	}
   4713 	ldcp = ldclp->headp;
   4714 
   4715 	mutex_enter(&ldcp->cblock);
   4716 
   4717 	vgen_ldc_reset(ldcp);
   4718 
   4719 	mutex_exit(&ldcp->cblock);
   4720 
   4721 	RW_EXIT(&ldclp->rwlock);
   4722 }
   4723 
   4724 static void
   4725 vgen_reset_vlan_unaware_ports(vgen_t *vgenp)
   4726 {
   4727 	vgen_port_t	*portp;
   4728 	vgen_portlist_t	*plistp;
   4729 
   4730 	plistp = &(vgenp->vgenports);
   4731 	READ_ENTER(&plistp->rwlock);
   4732 
   4733 	for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
   4734 
   4735 		vgen_vlan_unaware_port_reset(portp);
   4736 
   4737 	}
   4738 
   4739 	RW_EXIT(&plistp->rwlock);
   4740 }
   4741 
   4742 static void
   4743 vgen_reset_vsw_port(vgen_t *vgenp)
   4744 {
   4745 	vgen_port_t	*portp;
   4746 
   4747 	if ((portp = vgenp->vsw_portp) != NULL) {
   4748 		vgen_port_reset(portp);
   4749 	}
   4750 }
   4751 
   4752 /*
   4753  * This function resets the handshake phase to VH_PHASE0(pre-handshake phase).
   4754  * This can happen after a channel comes up (status: LDC_UP) or
   4755  * when handshake gets terminated due to various conditions.
   4756  */
   4757 static void
   4758 vgen_reset_hphase(vgen_ldc_t *ldcp)
   4759 {
   4760 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
   4761 	ldc_status_t istatus;
   4762 	int rv;
   4763 
   4764 	DBG1(vgenp, ldcp, "enter\n");
   4765 	/* reset hstate and hphase */
   4766 	ldcp->hstate = 0;
   4767 	ldcp->hphase = VH_PHASE0;
   4768 
   4769 	vgen_reset_vnet_proto_ops(ldcp);
   4770 
   4771 	/*
   4772 	 * Save the id of pending handshake timer in cancel_htid.
   4773 	 * This will be checked in vgen_ldc_cb() and the handshake timer will
   4774 	 * be cancelled after releasing cblock.
   4775 	 */
   4776 	if (ldcp->htid) {
   4777 		ldcp->cancel_htid = ldcp->htid;
   4778 		ldcp->htid = 0;
   4779 	}
   4780 
   4781 	if (ldcp->local_hparams.dring_ready) {
   4782 		ldcp->local_hparams.dring_ready = B_FALSE;
   4783 	}
   4784 
   4785 	/* Unbind tx descriptor ring from the channel */
   4786 	if (ldcp->num_txdcookies) {
   4787 		rv = ldc_mem_dring_unbind(ldcp->tx_dhandle);
   4788 		if (rv != 0) {
   4789 			DWARN(vgenp, ldcp, "ldc_mem_dring_unbind failed\n");
   4790 		}
   4791 		ldcp->num_txdcookies = 0;
   4792 	}
   4793 
   4794 	if (ldcp->peer_hparams.dring_ready) {
   4795 		ldcp->peer_hparams.dring_ready = B_FALSE;
   4796 		/* Unmap peer's dring */
   4797 		(void) ldc_mem_dring_unmap(ldcp->rx_dhandle);
   4798 		vgen_clobber_rxds(ldcp);
   4799 	}
   4800 
   4801 	vgen_clobber_tbufs(ldcp);
   4802 
   4803 	/*
   4804 	 * clear local handshake params and initialize.
   4805 	 */
   4806 	bzero(&(ldcp->local_hparams), sizeof (ldcp->local_hparams));
   4807 
   4808 	/* set version to the highest version supported */
   4809 	ldcp->local_hparams.ver_major =
   4810 	    ldcp->vgen_versions[0].ver_major;
   4811 	ldcp->local_hparams.ver_minor =
   4812 	    ldcp->vgen_versions[0].ver_minor;
   4813 	ldcp->local_hparams.dev_class = VDEV_NETWORK;
   4814 
   4815 	/* set attr_info params */
   4816 	ldcp->local_hparams.mtu = vgenp->max_frame_size;
   4817 	ldcp->local_hparams.addr =
   4818 	    vnet_macaddr_strtoul(vgenp->macaddr);
   4819 	ldcp->local_hparams.addr_type = ADDR_TYPE_MAC;
   4820 	ldcp->local_hparams.xfer_mode = VIO_DRING_MODE_V1_0;
   4821 	ldcp->local_hparams.ack_freq = 0;	/* don't need acks */
   4822 	ldcp->local_hparams.physlink_update = PHYSLINK_UPDATE_NONE;
   4823 
   4824 	/*
   4825 	 * Note: dring is created, but not bound yet.
   4826 	 * local dring_info params will be updated when we bind the dring in
   4827 	 * vgen_handshake_phase2().
   4828 	 * dring_ident is set to 0. After mapping the dring, peer sets this
   4829 	 * value and sends it in the ack, which is saved in
   4830 	 * vgen_handle_dring_reg().
   4831 	 */
   4832 	ldcp->local_hparams.dring_ident = 0;
   4833 
   4834 	/* clear peer_hparams */
   4835 	bzero(&(ldcp->peer_hparams), sizeof (ldcp->peer_hparams));
   4836 
   4837 	/* reset the channel if required */
   4838 #ifdef	VNET_IOC_DEBUG
   4839 	if (ldcp->need_ldc_reset && !ldcp->link_down_forced) {
   4840 #else
   4841 	if (ldcp->need_ldc_reset) {
   4842 #endif
   4843 		DWARN(vgenp, ldcp, "Doing Channel Reset...\n");
   4844 		ldcp->need_ldc_reset = B_FALSE;
   4845 		(void) ldc_down(ldcp->ldc_handle);
   4846 		(void) ldc_status(ldcp->ldc_handle, &istatus);
   4847 		DBG2(vgenp, ldcp, "Reset Done,ldc_status(%x)\n", istatus);
   4848 		ldcp->ldc_status = istatus;
   4849 
   4850 		/* clear sids */
   4851 		ldcp->local_sid = 0;
   4852 		ldcp->peer_sid = 0;
   4853 
   4854 		/* try to bring the channel up */
   4855 		rv = ldc_up(ldcp->ldc_handle);
   4856 		if (rv != 0) {
   4857 			DWARN(vgenp, ldcp, "ldc_up err rv(%d)\n", rv);
   4858 		}
   4859 
   4860 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
   4861 			DWARN(vgenp, ldcp, "ldc_status err\n");
   4862 		} else {
   4863 			ldcp->ldc_status = istatus;
   4864 		}
   4865 	}
   4866 }
   4867 
   4868 /* wrapper function for vgen_reset_hphase */
   4869 static void
   4870 vgen_handshake_reset(vgen_ldc_t *ldcp)
   4871 {
   4872 	vgen_t  *vgenp = LDC_TO_VGEN(ldcp);
   4873 
   4874 	ASSERT(MUTEX_HELD(&ldcp->cblock));
   4875 	mutex_enter(&ldcp->rxlock);
   4876 	mutex_enter(&ldcp->wrlock);
   4877 	mutex_enter(&ldcp->txlock);
   4878 	mutex_enter(&ldcp->tclock);
   4879 
   4880 	vgen_reset_hphase(ldcp);
   4881 
   4882 	mutex_exit(&ldcp->tclock);
   4883 	mutex_exit(&ldcp->txlock);
   4884 	mutex_exit(&ldcp->wrlock);
   4885 	mutex_exit(&ldcp->rxlock);
   4886 
   4887 	/*
   4888 	 * As the connection is now reset, mark the channel
   4889 	 * link_state as 'down' and notify the stack if needed.
   4890 	 */
   4891 	if (ldcp->link_state != LINK_STATE_DOWN) {
   4892 		ldcp->link_state = LINK_STATE_DOWN;
   4893 
   4894 		if (ldcp->portp == vgenp->vsw_portp) { /* vswitch port ? */
   4895 			/*
   4896 			 * As the channel link is down, mark physical link also
   4897 			 * as down. After the channel comes back up and
   4898 			 * handshake completes, we will get an update on the
   4899 			 * physlink state from vswitch (if this device has been
   4900 			 * configured to get phys link updates).
   4901 			 */
   4902 			vgenp->phys_link_state = LINK_STATE_DOWN;
   4903 
   4904 			/* Now update the stack */
   4905 			mutex_exit(&ldcp->cblock);
   4906 			vgen_link_update(vgenp, ldcp->link_state);
   4907 			mutex_enter(&ldcp->cblock);
   4908 		}
   4909 	}
   4910 }
   4911 
   4912 /*
   4913  * Initiate handshake with the peer by sending various messages
   4914  * based on the handshake-phase that the channel is currently in.
   4915  */
   4916 static void
   4917 vgen_handshake(vgen_ldc_t *ldcp)
   4918 {
   4919 	uint32_t	hphase = ldcp->hphase;
   4920 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
   4921 	ldc_status_t	istatus;
   4922 	int		rv = 0;
   4923 
   4924 	switch (hphase) {
   4925 
   4926 	case VH_PHASE1:
   4927 
   4928 		/*
   4929 		 * start timer, for entire handshake process, turn this timer
   4930 		 * off if all phases of handshake complete successfully and
   4931 		 * hphase goes to VH_DONE(below) or
   4932 		 * vgen_reset_hphase() gets called or
   4933 		 * channel is reset due to errors or
   4934 		 * vgen_ldc_uninit() is invoked(vgen_stop).
   4935 		 */
   4936 		ASSERT(ldcp->htid == 0);
   4937 		ldcp->htid = timeout(vgen_hwatchdog, (caddr_t)ldcp,
   4938 		    drv_usectohz(vgen_hwd_interval * MICROSEC));
   4939 
   4940 		/* Phase 1 involves negotiating the version */
   4941 		rv = vgen_send_version_negotiate(ldcp);
   4942 		break;
   4943 
   4944 	case VH_PHASE2:
   4945 		rv = vgen_handshake_phase2(ldcp);
   4946 		break;
   4947 
   4948 	case VH_PHASE3:
   4949 		rv = vgen_send_rdx_info(ldcp);
   4950 		break;
   4951 
   4952 	case VH_DONE:
   4953 		/*
   4954 		 * Save the id of pending handshake timer in cancel_htid.
   4955 		 * This will be checked in vgen_ldc_cb() and the handshake
   4956 		 * timer will be cancelled after releasing cblock.
   4957 		 */
   4958 		if (ldcp->htid) {
   4959 			ldcp->cancel_htid = ldcp->htid;
   4960 			ldcp->htid = 0;
   4961 		}
   4962 		ldcp->hretries = 0;
   4963 		DBG1(vgenp, ldcp, "Handshake Done\n");
   4964 
   4965 		/*
   4966 		 * The channel is up and handshake is done successfully. Now we
   4967 		 * can mark the channel link_state as 'up'. We also notify the
   4968 		 * stack if the channel is connected to vswitch.
   4969 		 */
   4970 		ldcp->link_state = LINK_STATE_UP;
   4971 
   4972 		if (ldcp->portp == vgenp->vsw_portp) {
   4973 			/*
   4974 			 * If this channel(port) is connected to vsw,
   4975 			 * need to sync multicast table with vsw.
   4976 			 */
   4977 			mutex_exit(&ldcp->cblock);
   4978 
   4979 			mutex_enter(&vgenp->lock);
   4980 			rv = vgen_send_mcast_info(ldcp);
   4981 			mutex_exit(&vgenp->lock);
   4982 
   4983 			if (vgenp->pls_negotiated == B_FALSE) {
   4984 				/*
   4985 				 * We haven't negotiated with vswitch to get
   4986 				 * physical link state updates. We can update
   4987 				 * update the stack at this point as the
   4988 				 * channel to vswitch is up and the handshake
   4989 				 * is done successfully.
   4990 				 *
   4991 				 * If we have negotiated to get physical link
   4992 				 * state updates, then we won't notify the
   4993 				 * the stack here; we do that as soon as
   4994 				 * vswitch sends us the initial phys link state
   4995 				 * (see vgen_handle_physlink_info()).
   4996 				 */
   4997 				vgen_link_update(vgenp, ldcp->link_state);
   4998 			}
   4999 
   5000 			mutex_enter(&ldcp->cblock);
   5001 			if (rv != VGEN_SUCCESS)
   5002 				break;
   5003 		}
   5004 
   5005 		/*
   5006 		 * Check if mac layer should be notified to restart
   5007 		 * transmissions. This can happen if the channel got
   5008 		 * reset and vgen_clobber_tbufs() is called, while
   5009 		 * need_resched is set.
   5010 		 */
   5011 		mutex_enter(&ldcp->tclock);
   5012 		if (ldcp->need_resched) {
   5013 			vio_net_tx_update_t vtx_update =
   5014 			    ldcp->portp->vcb.vio_net_tx_update;
   5015 
   5016 			ldcp->need_resched = B_FALSE;
   5017 			vtx_update(ldcp->portp->vhp);
   5018 		}
   5019 		mutex_exit(&ldcp->tclock);
   5020 
   5021 		break;
   5022 
   5023 	default:
   5024 		break;
   5025 	}
   5026 
   5027 	if (rv == ECONNRESET) {
   5028 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
   5029 			DWARN(vgenp, ldcp, "ldc_status err\n");
   5030 		} else {
   5031 			ldcp->ldc_status = istatus;
   5032 		}
   5033 		vgen_handle_evt_reset(ldcp);
   5034 	} else if (rv) {
   5035 		vgen_handshake_reset(ldcp);
   5036 	}
   5037 }
   5038 
   5039 /*
   5040  * Check if the current handshake phase has completed successfully and
   5041  * return the status.
   5042  */
   5043 static int
   5044 vgen_handshake_done(vgen_ldc_t *ldcp)
   5045 {
   5046 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
   5047 	uint32_t	hphase = ldcp->hphase;
   5048 	int 		status = 0;
   5049 
   5050 	switch (hphase) {
   5051 
   5052 	case VH_PHASE1:
   5053 		/*
   5054 		 * Phase1 is done, if version negotiation
   5055 		 * completed successfully.
   5056 		 */
   5057 		status = ((ldcp->hstate & VER_NEGOTIATED) ==
   5058 		    VER_NEGOTIATED);
   5059 		break;
   5060 
   5061 	case VH_PHASE2:
   5062 		/*
   5063 		 * Phase 2 is done, if attr info and dring info
   5064 		 * have been exchanged successfully.
   5065 		 */
   5066 		status = (((ldcp->hstate & ATTR_INFO_EXCHANGED) ==
   5067 		    ATTR_INFO_EXCHANGED) &&
   5068 		    ((ldcp->hstate & DRING_INFO_EXCHANGED) ==
   5069 		    DRING_INFO_EXCHANGED));
   5070 		break;
   5071 
   5072 	case VH_PHASE3:
   5073 		/* Phase 3 is done, if rdx msg has been exchanged */
   5074 		status = ((ldcp->hstate & RDX_EXCHANGED) ==
   5075 		    RDX_EXCHANGED);
   5076 		break;
   5077 
   5078 	default:
   5079 		break;
   5080 	}
   5081 
   5082 	if (status == 0) {
   5083 		return (VGEN_FAILURE);
   5084 	}
   5085 	DBG2(vgenp, ldcp, "PHASE(%d)\n", hphase);
   5086 	return (VGEN_SUCCESS);
   5087 }
   5088 
   5089 /* retry handshake on failure */
   5090 static void
   5091 vgen_handshake_retry(vgen_ldc_t *ldcp)
   5092 {
   5093 	/* reset handshake phase */
   5094 	vgen_handshake_reset(ldcp);
   5095 
   5096 	/* handshake retry is specified and the channel is UP */
   5097 	if (vgen_max_hretries && (ldcp->ldc_status == LDC_UP)) {
   5098 		if (ldcp->hretries++ < vgen_max_hretries) {
   5099 			ldcp->local_sid = ddi_get_lbolt();
   5100 			vgen_handshake(vh_nextphase(ldcp));
   5101 		}
   5102 	}
   5103 }
   5104 
   5105 
   5106 /*
   5107  * Link State Update Notes:
   5108  * The link state of the channel connected to vswitch is reported as the link
   5109  * state of the vnet device, by default. If the channel is down or reset, then
   5110  * the link state is marked 'down'. If the channel is 'up' *and* handshake
   5111  * between the vnet and vswitch is successful, then the link state is marked
   5112  * 'up'. If physical network link state is desired, then the vnet device must
   5113  * be configured to get physical link updates and the 'linkprop' property
   5114  * in the virtual-device MD node indicates this. As part of attribute exchange
   5115  * the vnet device negotiates with the vswitch to obtain physical link state
   5116  * updates. If it successfully negotiates, vswitch sends an initial physlink
   5117  * msg once the handshake is done and further whenever the physical link state
   5118  * changes. Currently we don't have mac layer interfaces to report two distinct
   5119  * link states - virtual and physical. Thus, if the vnet has been configured to
   5120  * get physical link updates, then the link status will be reported as 'up'
   5121  * only when both the virtual and physical links are up.
   5122  */
   5123 static void
   5124 vgen_link_update(vgen_t *vgenp, link_state_t link_state)
   5125 {
   5126 	vnet_link_update(vgenp->vnetp, link_state);
   5127 }
   5128 
   5129 /*
   5130  * Handle a version info msg from the peer or an ACK/NACK from the peer
   5131  * to a version info msg that we sent.
   5132  */
   5133 static int
   5134 vgen_handle_version_negotiate(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
   5135 {
   5136 	vgen_t		*vgenp;
   5137 	vio_ver_msg_t	*vermsg = (vio_ver_msg_t *)tagp;
   5138 	int		ack = 0;
   5139 	int		failed = 0;
   5140 	int		idx;
   5141 	vgen_ver_t	*versions = ldcp->vgen_versions;
   5142 	int		rv = 0;
   5143 
   5144 	vgenp = LDC_TO_VGEN(ldcp);
   5145 	DBG1(vgenp, ldcp, "enter\n");
   5146 	switch (tagp->vio_subtype) {
   5147 	case VIO_SUBTYPE_INFO:
   5148 
   5149 		/*  Cache sid of peer if this is the first time */
   5150 		if (ldcp->peer_sid == 0) {
   5151 			DBG2(vgenp, ldcp, "Caching peer_sid(%x)\n",
   5152 			    tagp->vio_sid);
   5153 			ldcp->peer_sid = tagp->vio_sid;
   5154 		}
   5155 
   5156 		if (ldcp->hphase != VH_PHASE1) {
   5157 			/*
   5158 			 * If we are not already in VH_PHASE1, reset to
   5159 			 * pre-handshake state, and initiate handshake
   5160 			 * to the peer too.
   5161 			 */
   5162 			vgen_handshake_reset(ldcp);
   5163 			vgen_handshake(vh_nextphase(ldcp));
   5164 		}
   5165 		ldcp->hstate |= VER_INFO_RCVD;
   5166 
   5167 		/* save peer's requested values */
   5168 		ldcp->peer_hparams.ver_major = vermsg->ver_major;
   5169 		ldcp->peer_hparams.ver_minor = vermsg->ver_minor;
   5170 		ldcp->peer_hparams.dev_class = vermsg->dev_class;
   5171 
   5172 		if ((vermsg->dev_class != VDEV_NETWORK) &&
   5173 		    (vermsg->dev_class != VDEV_NETWORK_SWITCH)) {
   5174 			/* unsupported dev_class, send NACK */
   5175 
   5176 			DWARN(vgenp, ldcp, "Version Negotiation Failed\n");
   5177 
   5178 			tagp->vio_subtype = VIO_SUBTYPE_NACK;
   5179 			tagp->vio_sid = ldcp->local_sid;
   5180 			/* send reply msg back to peer */
   5181 			rv = vgen_sendmsg(ldcp, (caddr_t)tagp,
   5182 			    sizeof (*vermsg), B_FALSE);
   5183 			if (rv != VGEN_SUCCESS) {
   5184 				return (rv);
   5185 			}
   5186 			return (VGEN_FAILURE);
   5187 		}
   5188 
   5189 		DBG2(vgenp, ldcp, "VER_INFO_RCVD, ver(%d,%d)\n",
   5190 		    vermsg->ver_major,  vermsg->ver_minor);
   5191 
   5192 		idx = 0;
   5193 
   5194 		for (;;) {
   5195 
   5196 			if (vermsg->ver_major > versions[idx].ver_major) {
   5197 
   5198 				/* nack with next lower version */
   5199 				tagp->vio_subtype = VIO_SUBTYPE_NACK;
   5200 				vermsg->ver_major = versions[idx].ver_major;
   5201 				vermsg->ver_minor = versions[idx].ver_minor;
   5202 				break;
   5203 			}
   5204 
   5205 			if (vermsg->ver_major == versions[idx].ver_major) {
   5206 
   5207 				/* major version match - ACK version */
   5208 				tagp->vio_subtype = VIO_SUBTYPE_ACK;
   5209 				ack = 1;
   5210 
   5211 				/*
   5212 				 * lower minor version to the one this endpt
   5213 				 * supports, if necessary
   5214 				 */
   5215 				if (vermsg->ver_minor >
   5216 				    versions[idx].ver_minor) {
   5217 					vermsg->ver_minor =
   5218 					    versions[idx].ver_minor;
   5219 					ldcp->peer_hparams.ver_minor =
   5220 					    versions[idx].ver_minor;
   5221 				}
   5222 				break;
   5223 			}
   5224 
   5225 			idx++;
   5226 
   5227 			if (idx == VGEN_NUM_VER) {
   5228 
   5229 				/* no version match - send NACK */
   5230 				tagp->vio_subtype = VIO_SUBTYPE_NACK;
   5231 				vermsg->ver_major = 0;
   5232 				vermsg->ver_minor = 0;
   5233 				failed = 1;
   5234 				break;
   5235 			}
   5236 
   5237 		}
   5238 
   5239 		tagp->vio_sid = ldcp->local_sid;
   5240 
   5241 		/* send reply msg back to peer */
   5242 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*vermsg),
   5243 		    B_FALSE);
   5244 		if (rv != VGEN_SUCCESS) {
   5245 			return (rv);
   5246 		}
   5247 
   5248 		if (ack) {
   5249 			ldcp->hstate |= VER_ACK_SENT;
   5250 			DBG2(vgenp, ldcp, "VER_ACK_SENT, ver(%d,%d) \n",
   5251 			    vermsg->ver_major, vermsg->ver_minor);
   5252 		}
   5253 		if (failed) {
   5254 			DWARN(vgenp, ldcp, "Negotiation Failed\n");
   5255 			return (VGEN_FAILURE);
   5256 		}
   5257 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
   5258 
   5259 			/*  VER_ACK_SENT and VER_ACK_RCVD */
   5260 
   5261 			/* local and peer versions match? */
   5262 			ASSERT((ldcp->local_hparams.ver_major ==
   5263 			    ldcp->peer_hparams.ver_major) &&
   5264 			    (ldcp->local_hparams.ver_minor ==
   5265 			    ldcp->peer_hparams.ver_minor));
   5266 
   5267 			vgen_set_vnet_proto_ops(ldcp);
   5268 
   5269 			/* move to the next phase */
   5270 			vgen_handshake(vh_nextphase(ldcp));
   5271 		}
   5272 
   5273 		break;
   5274 
   5275 	case VIO_SUBTYPE_ACK:
   5276 
   5277 		if (ldcp->hphase != VH_PHASE1) {
   5278 			/*  This should not happen. */
   5279 			DWARN(vgenp, ldcp, "Invalid Phase(%u)\n", ldcp->hphase);
   5280 			return (VGEN_FAILURE);
   5281 		}
   5282 
   5283 		/* SUCCESS - we have agreed on a version */
   5284 		ldcp->local_hparams.ver_major = vermsg->ver_major;
   5285 		ldcp->local_hparams.ver_minor = vermsg->ver_minor;
   5286 		ldcp->hstate |= VER_ACK_RCVD;
   5287 
   5288 		DBG2(vgenp, ldcp, "VER_ACK_RCVD, ver(%d,%d) \n",
   5289 		    vermsg->ver_major,  vermsg->ver_minor);
   5290 
   5291 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
   5292 
   5293 			/*  VER_ACK_SENT and VER_ACK_RCVD */
   5294 
   5295 			/* local and peer versions match? */
   5296 			ASSERT((ldcp->local_hparams.ver_major ==
   5297 			    ldcp->peer_hparams.ver_major) &&
   5298 			    (ldcp->local_hparams.ver_minor ==
   5299 			    ldcp->peer_hparams.ver_minor));
   5300 
   5301 			vgen_set_vnet_proto_ops(ldcp);
   5302 
   5303 			/* move to the next phase */
   5304 			vgen_handshake(vh_nextphase(ldcp));
   5305 		}
   5306 		break;
   5307 
   5308 	case VIO_SUBTYPE_NACK:
   5309 
   5310 		if (ldcp->hphase != VH_PHASE1) {
   5311 			/*  This should not happen.  */
   5312 			DWARN(vgenp, ldcp, "VER_NACK_RCVD Invalid "
   5313 			"Phase(%u)\n", ldcp->hphase);
   5314 			return (VGEN_FAILURE);
   5315 		}
   5316 
   5317 		DBG2(vgenp, ldcp, "VER_NACK_RCVD next ver(%d,%d)\n",
   5318 		    vermsg->ver_major, vermsg->ver_minor);
   5319 
   5320 		/* check if version in NACK is zero */
   5321 		if (vermsg->ver_major == 0 && vermsg->ver_minor == 0) {
   5322 			/*
   5323 			 * Version Negotiation has failed.
   5324 			 */
   5325 			DWARN(vgenp, ldcp, "Version Negotiation Failed\n");
   5326 			return (VGEN_FAILURE);
   5327 		}
   5328 
   5329 		idx = 0;
   5330 
   5331 		for (;;) {
   5332 
   5333 			if (vermsg->ver_major > versions[idx].ver_major) {
   5334 				/* select next lower version */
   5335 
   5336 				ldcp->local_hparams.ver_major =
   5337 				    versions[idx].ver_major;
   5338 				ldcp->local_hparams.ver_minor =
   5339 				    versions[idx].ver_minor;
   5340 				break;
   5341 			}
   5342 
   5343 			if (vermsg->ver_major == versions[idx].ver_major) {
   5344 				/* major version match */
   5345 
   5346 				ldcp->local_hparams.ver_major =
   5347 				    versions[idx].ver_major;
   5348 
   5349 				ldcp->local_hparams.ver_minor =
   5350 				    versions[idx].ver_minor;
   5351 				break;
   5352 			}
   5353 
   5354 			idx++;
   5355 
   5356 			if (idx == VGEN_NUM_VER) {
   5357 				/*
   5358 				 * no version match.
   5359 				 * Version Negotiation has failed.
   5360 				 */
   5361 				DWARN(vgenp, ldcp,
   5362 				    "Version Negotiation Failed\n");
   5363 				return (VGEN_FAILURE);
   5364 			}
   5365 
   5366 		}
   5367 
   5368 		rv = vgen_send_version_negotiate(ldcp);
   5369 		if (rv != VGEN_SUCCESS) {
   5370 			return (rv);
   5371 		}
   5372 
   5373 		break;
   5374 	}
   5375 
   5376 	DBG1(vgenp, ldcp, "exit\n");
   5377 	return (VGEN_SUCCESS);
   5378 }
   5379 
   5380 /* Check if the attributes are supported */
   5381 static int
   5382 vgen_check_attr_info(vgen_ldc_t *ldcp, vnet_attr_msg_t *msg)
   5383 {
   5384 	vgen_hparams_t	*lp = &ldcp->local_hparams;
   5385 
   5386 	if ((msg->addr_type != ADDR_TYPE_MAC) ||
   5387 	    (msg->ack_freq > 64) ||
   5388 	    (msg->xfer_mode != lp->xfer_mode)) {
   5389 		return (VGEN_FAILURE);
   5390 	}
   5391 
   5392 	if (VGEN_VER_LT(ldcp, 1, 4)) {
   5393 		/* versions < 1.4, mtu must match */
   5394 		if (msg->mtu != lp->mtu) {
   5395 			return (VGEN_FAILURE);
   5396 		}
   5397 	} else {
   5398 		/* Ver >= 1.4, validate mtu of the peer is at least ETHERMAX */
   5399 		if (msg->mtu < ETHERMAX) {
   5400 			return (VGEN_FAILURE);
   5401 		}
   5402 	}
   5403 
   5404 	return (VGEN_SUCCESS);
   5405 }
   5406 
   5407 /*
   5408  * Handle an attribute info msg from the peer or an ACK/NACK from the peer
   5409  * to an attr info msg that we sent.
   5410  */
   5411 static int
   5412 vgen_handle_attr_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
   5413 {
   5414 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
   5415 	vnet_attr_msg_t	*msg = (vnet_attr_msg_t *)tagp;
   5416 	vgen_hparams_t	*lp = &ldcp->local_hparams;
   5417 	vgen_hparams_t	*rp = &ldcp->peer_hparams;
   5418 	int		ack = 1;
   5419 	int		rv = 0;
   5420 	uint32_t	mtu;
   5421 
   5422 	DBG1(vgenp, ldcp, "enter\n");
   5423 	if (ldcp->hphase != VH_PHASE2) {
   5424 		DWARN(vgenp, ldcp, "Rcvd ATTR_INFO subtype(%d),"
   5425 		" Invalid Phase(%u)\n",
   5426 		    tagp->vio_subtype, ldcp->hphase);
   5427 		return (VGEN_FAILURE);
   5428 	}
   5429 	switch (tagp->vio_subtype) {
   5430 	case VIO_SUBTYPE_INFO:
   5431 
   5432 		DBG2(vgenp, ldcp, "ATTR_INFO_RCVD \n");
   5433 		ldcp->hstate |= ATTR_INFO_RCVD;
   5434 
   5435 		/* save peer's values */
   5436 		rp->mtu = msg->mtu;
   5437 		rp->addr = msg->addr;
   5438 		rp->addr_type = msg->addr_type;
   5439 		rp->xfer_mode = msg->xfer_mode;
   5440 		rp->ack_freq = msg->ack_freq;
   5441 
   5442 		rv = vgen_check_attr_info(ldcp, msg);
   5443 		if (rv == VGEN_FAILURE) {
   5444 			/* unsupported attr, send NACK */
   5445 			ack = 0;
   5446 		} else {
   5447 
   5448 			if (VGEN_VER_GTEQ(ldcp, 1, 4)) {
   5449 
   5450 				/*
   5451 				 * Versions >= 1.4:
   5452 				 * The mtu is negotiated down to the
   5453 				 * minimum of our mtu and peer's mtu.
   5454 				 */
   5455 				mtu = MIN(msg->mtu, vgenp->max_frame_size);
   5456 
   5457 				/*
   5458 				 * If we have received an ack for the attr info
   5459 				 * that we sent, then check if the mtu computed
   5460 				 * above matches the mtu that the peer had ack'd
   5461 				 * (saved in local hparams). If they don't
   5462 				 * match, we fail the handshake.
   5463 				 */
   5464 				if (ldcp->hstate & ATTR_ACK_RCVD) {
   5465 					if (mtu != lp->mtu) {
   5466 						/* send NACK */
   5467 						ack = 0;
   5468 					}
   5469 				} else {
   5470 					/*
   5471 					 * Save the mtu computed above in our
   5472 					 * attr parameters, so it gets sent in
   5473 					 * the attr info from us to the peer.
   5474 					 */
   5475 					lp->mtu = mtu;
   5476 				}
   5477 
   5478 				/* save the MIN mtu in the msg to be replied */
   5479 				msg->mtu = mtu;
   5480 
   5481 			}
   5482 		}
   5483 
   5484 
   5485 		if (ack) {
   5486 			tagp->vio_subtype = VIO_SUBTYPE_ACK;
   5487 		} else {
   5488 			tagp->vio_subtype = VIO_SUBTYPE_NACK;
   5489 		}
   5490 		tagp->vio_sid = ldcp->local_sid;
   5491 
   5492 		/* send reply msg back to peer */
   5493 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msg),
   5494 		    B_FALSE);
   5495 		if (rv != VGEN_SUCCESS) {
   5496 			return (rv);
   5497 		}
   5498 
   5499 		if (ack) {
   5500 			ldcp->hstate |= ATTR_ACK_SENT;
   5501 			DBG2(vgenp, ldcp, "ATTR_ACK_SENT \n");
   5502 		} else {
   5503 			/* failed */
   5504 			DWARN(vgenp, ldcp, "ATTR_NACK_SENT \n");
   5505 			return (VGEN_FAILURE);
   5506 		}
   5507 
   5508 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
   5509 			vgen_handshake(vh_nextphase(ldcp));
   5510 		}
   5511 
   5512 		break;
   5513 
   5514 	case VIO_SUBTYPE_ACK:
   5515 
   5516 		if (VGEN_VER_GTEQ(ldcp, 1, 5) &&
   5517 		    ldcp->portp == vgenp->vsw_portp) {
   5518 			/*
   5519 			 * Versions >= 1.5:
   5520 			 * If the vnet device has been configured to get
   5521 			 * physical link state updates, check the corresponding
   5522 			 * bits in the ack msg, if the peer is vswitch.
   5523 			 */
   5524 			if (((lp->physlink_update &
   5525 			    PHYSLINK_UPDATE_STATE_MASK) ==
   5526 			    PHYSLINK_UPDATE_STATE) &&
   5527 
   5528 			    ((msg->physlink_update &
   5529 			    PHYSLINK_UPDATE_STATE_MASK) ==
   5530 			    PHYSLINK_UPDATE_STATE_ACK)) {
   5531 				vgenp->pls_negotiated = B_TRUE;
   5532 			} else {
   5533 				vgenp->pls_negotiated = B_FALSE;
   5534 			}
   5535 		}
   5536 
   5537 		if (VGEN_VER_GTEQ(ldcp, 1, 4)) {
   5538 			/*
   5539 			 * Versions >= 1.4:
   5540 			 * The ack msg sent by the peer contains the minimum of
   5541 			 * our mtu (that we had sent in our attr info) and the
   5542 			 * peer's mtu.
   5543 			 *
   5544 			 * If we have sent an ack for the attr info msg from
   5545 			 * the peer, check if the mtu that was computed then
   5546 			 * (saved in local hparams) matches the mtu that the
   5547 			 * peer has ack'd. If they don't match, we fail the
   5548 			 * handshake.
   5549 			 */
   5550 			if (ldcp->hstate & ATTR_ACK_SENT) {
   5551 				if (lp->mtu != msg->mtu) {
   5552 					return (VGEN_FAILURE);
   5553 				}
   5554 			} else {
   5555 				/*
   5556 				 * If the mtu ack'd by the peer is > our mtu
   5557 				 * fail handshake. Otherwise, save the mtu, so
   5558 				 * we can validate it when we receive attr info
   5559 				 * from our peer.
   5560 				 */
   5561 				if (msg->mtu > lp->mtu) {
   5562 					return (VGEN_FAILURE);
   5563 				}
   5564 				if (msg->mtu <= lp->mtu) {
   5565 					lp->mtu = msg->mtu;
   5566 				}
   5567 			}
   5568 		}
   5569 
   5570 		ldcp->hstate |= ATTR_ACK_RCVD;
   5571 
   5572 		DBG2(vgenp, ldcp, "ATTR_ACK_RCVD \n");
   5573 
   5574 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
   5575 			vgen_handshake(vh_nextphase(ldcp));
   5576 		}
   5577 		break;
   5578 
   5579 	case VIO_SUBTYPE_NACK:
   5580 
   5581 		DBG2(vgenp, ldcp, "ATTR_NACK_RCVD \n");
   5582 		return (VGEN_FAILURE);
   5583 	}
   5584 	DBG1(vgenp, ldcp, "exit\n");
   5585 	return (VGEN_SUCCESS);
   5586 }
   5587 
   5588 /* Check if the dring info msg is ok */
   5589 static int
   5590 vgen_check_dring_reg(vio_dring_reg_msg_t *msg)
   5591 {
   5592 	/* check if msg contents are ok */
   5593 	if ((msg->num_descriptors < 128) || (msg->descriptor_size <
   5594 	    sizeof (vnet_public_desc_t))) {
   5595 		return (VGEN_FAILURE);
   5596 	}
   5597 	return (VGEN_SUCCESS);
   5598 }
   5599 
   5600 /*
   5601  * Handle a descriptor ring register msg from the peer or an ACK/NACK from
   5602  * the peer to a dring register msg that we sent.
   5603  */
   5604 static int
   5605 vgen_handle_dring_reg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
   5606 {
   5607 	vio_dring_reg_msg_t *msg = (vio_dring_reg_msg_t *)tagp;
   5608 	ldc_mem_cookie_t dcookie;
   5609 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
   5610 	int ack = 0;
   5611 	int rv = 0;
   5612 
   5613 	DBG1(vgenp, ldcp, "enter\n");
   5614 	if (ldcp->hphase < VH_PHASE2) {
   5615 		/* dring_info can be rcvd in any of the phases after Phase1 */
   5616 		DWARN(vgenp, ldcp,
   5617 		    "Rcvd DRING_INFO Subtype (%d), Invalid Phase(%u)\n",
   5618 		    tagp->vio_subtype, ldcp->hphase);
   5619 		return (VGEN_FAILURE);
   5620 	}
   5621 	switch (tagp->vio_subtype) {
   5622 	case VIO_SUBTYPE_INFO:
   5623 
   5624 		DBG2(vgenp, ldcp, "DRING_INFO_RCVD \n");
   5625 		ldcp->hstate |= DRING_INFO_RCVD;
   5626 		bcopy((msg->cookie), &dcookie, sizeof (dcookie));
   5627 
   5628 		ASSERT(msg->ncookies == 1);
   5629 
   5630 		if (vgen_check_dring_reg(msg) == VGEN_SUCCESS) {
   5631 			/*
   5632 			 * verified dring info msg to be ok,
   5633 			 * now try to map the remote dring.
   5634 			 */
   5635 			rv = vgen_init_rxds(ldcp, msg->num_descriptors,
   5636 			    msg->descriptor_size, &dcookie,
   5637 			    msg->ncookies);
   5638 			if (rv == DDI_SUCCESS) {
   5639 				/* now we can ack the peer */
   5640 				ack = 1;
   5641 			}
   5642 		}
   5643 		if (ack == 0) {
   5644 			/* failed, send NACK */
   5645 			tagp->vio_subtype = VIO_SUBTYPE_NACK;
   5646 		} else {
   5647 			if (!(ldcp->peer_hparams.dring_ready)) {
   5648 
   5649 				/* save peer's dring_info values */
   5650 				bcopy(&dcookie,
   5651 				    &(ldcp->peer_hparams.dring_cookie),
   5652 				    sizeof (dcookie));
   5653 				ldcp->peer_hparams.num_desc =
   5654 				    msg->num_descriptors;
   5655 				ldcp->peer_hparams.desc_size =
   5656 				    msg->descriptor_size;
   5657 				ldcp->peer_hparams.num_dcookies =
   5658 				    msg->ncookies;
   5659 
   5660 				/* set dring_ident for the peer */
   5661 				ldcp->peer_hparams.dring_ident =
   5662 				    (uint64_t)ldcp->rxdp;
   5663 				/* return the dring_ident in ack msg */
   5664 				msg->dring_ident =
   5665 				    (uint64_t)ldcp->rxdp;
   5666 
   5667 				ldcp->peer_hparams.dring_ready = B_TRUE;
   5668 			}
   5669 			tagp->vio_subtype = VIO_SUBTYPE_ACK;
   5670 		}
   5671 		tagp->vio_sid = ldcp->local_sid;
   5672 		/* send reply msg back to peer */
   5673 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msg),
   5674 		    B_FALSE);
   5675 		if (rv != VGEN_SUCCESS) {
   5676 			return (rv);
   5677 		}
   5678 
   5679 		if (ack) {
   5680 			ldcp->hstate |= DRING_ACK_SENT;
   5681 			DBG2(vgenp, ldcp, "DRING_ACK_SENT");
   5682 		} else {
   5683 			DWARN(vgenp, ldcp, "DRING_NACK_SENT");
   5684 			return (VGEN_FAILURE);
   5685 		}
   5686 
   5687 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
   5688 			vgen_handshake(vh_nextphase(ldcp));
   5689 		}
   5690 
   5691 		break;
   5692 
   5693 	case VIO_SUBTYPE_ACK:
   5694 
   5695 		ldcp->hstate |= DRING_ACK_RCVD;
   5696 
   5697 		DBG2(vgenp, ldcp, "DRING_ACK_RCVD");
   5698 
   5699 		if (!(ldcp->local_hparams.dring_ready)) {
   5700 			/* local dring is now ready */
   5701 			ldcp->local_hparams.dring_ready = B_TRUE;
   5702 
   5703 			/* save dring_ident acked by peer */
   5704 			ldcp->local_hparams.dring_ident =
   5705 			    msg->dring_ident;
   5706 		}
   5707 
   5708 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
   5709 			vgen_handshake(vh_nextphase(ldcp));
   5710 		}
   5711 
   5712 		break;
   5713 
   5714 	case VIO_SUBTYPE_NACK:
   5715 
   5716 		DBG2(vgenp, ldcp, "DRING_NACK_RCVD");
   5717 		return (VGEN_FAILURE);
   5718 	}
   5719 	DBG1(vgenp, ldcp, "exit\n");
   5720 	return (VGEN_SUCCESS);
   5721 }
   5722 
   5723 /*
   5724  * Handle a rdx info msg from the peer or an ACK/NACK
   5725  * from the peer to a rdx info msg that we sent.
   5726  */
   5727 static int
   5728 vgen_handle_rdx_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
   5729 {
   5730 	int rv = 0;
   5731 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
   5732 
   5733 	DBG1(vgenp, ldcp, "enter\n");
   5734 	if (ldcp->hphase != VH_PHASE3) {
   5735 		DWARN(vgenp, ldcp,
   5736 		    "Rcvd RDX_INFO Subtype (%d), Invalid Phase(%u)\n",
   5737 		    tagp->vio_subtype, ldcp->hphase);
   5738 		return (VGEN_FAILURE);
   5739 	}
   5740 	switch (tagp->vio_subtype) {
   5741 	case VIO_SUBTYPE_INFO:
   5742 
   5743 		DBG2(vgenp, ldcp, "RDX_INFO_RCVD \n");
   5744 		ldcp->hstate |= RDX_INFO_RCVD;
   5745 
   5746 		tagp->vio_subtype = VIO_SUBTYPE_ACK;
   5747 		tagp->vio_sid = ldcp->local_sid;
   5748 		/* send reply msg back to peer */
   5749 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (vio_rdx_msg_t),
   5750 		    B_FALSE);
   5751 		if (rv != VGEN_SUCCESS) {
   5752 			return (rv);
   5753 		}
   5754 
   5755 		ldcp->hstate |= RDX_ACK_SENT;
   5756 		DBG2(vgenp, ldcp, "RDX_ACK_SENT \n");
   5757 
   5758 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
   5759 			vgen_handshake(vh_nextphase(ldcp));
   5760 		}
   5761 
   5762 		break;
   5763 
   5764 	case VIO_SUBTYPE_ACK:
   5765 
   5766 		ldcp->hstate |= RDX_ACK_RCVD;
   5767 
   5768 		DBG2(vgenp, ldcp, "RDX_ACK_RCVD \n");
   5769 
   5770 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
   5771 			vgen_handshake(vh_nextphase(ldcp));
   5772 		}
   5773 		break;
   5774 
   5775 	case VIO_SUBTYPE_NACK:
   5776 
   5777 		DBG2(vgenp, ldcp, "RDX_NACK_RCVD \n");
   5778 		return (VGEN_FAILURE);
   5779 	}
   5780 	DBG1(vgenp, ldcp, "exit\n");
   5781 	return (VGEN_SUCCESS);
   5782 }
   5783 
   5784 /* Handle ACK/NACK from vsw to a set multicast msg that we sent */
   5785 static int
   5786 vgen_handle_mcast_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
   5787 {
   5788 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
   5789 	vnet_mcast_msg_t *msgp = (vnet_mcast_msg_t *)tagp;
   5790 	struct ether_addr *addrp;
   5791 	int count;
   5792 	int i;
   5793 
   5794 	DBG1(vgenp, ldcp, "enter\n");
   5795 	switch (tagp->vio_subtype) {
   5796 
   5797 	case VIO_SUBTYPE_INFO:
   5798 
   5799 		/* vnet shouldn't recv set mcast msg, only vsw handles it */
   5800 		DWARN(vgenp, ldcp, "rcvd SET_MCAST_INFO \n");
   5801 		break;
   5802 
   5803 	case VIO_SUBTYPE_ACK:
   5804 
   5805 		/* success adding/removing multicast addr */
   5806 		DBG1(vgenp, ldcp, "rcvd SET_MCAST_ACK \n");
   5807 		break;
   5808 
   5809 	case VIO_SUBTYPE_NACK:
   5810 
   5811 		DWARN(vgenp, ldcp, "rcvd SET_MCAST_NACK \n");
   5812 		if (!(msgp->set)) {
   5813 			/* multicast remove request failed */
   5814 			break;
   5815 		}
   5816 
   5817 		/* multicast add request failed */
   5818 		for (count = 0; count < msgp->count; count++) {
   5819 			addrp = &(msgp->mca[count]);
   5820 
   5821 			/* delete address from the table */
   5822 			for (i = 0; i < vgenp->mccount; i++) {
   5823 				if (ether_cmp(addrp,
   5824 				    &(vgenp->mctab[i])) == 0) {
   5825 					if (vgenp->mccount > 1) {
   5826 						int t = vgenp->mccount - 1;
   5827 						vgenp->mctab[i] =
   5828 						    vgenp->mctab[t];
   5829 					}
   5830 					vgenp->mccount--;
   5831 					break;
   5832 				}
   5833 			}
   5834 		}
   5835 		break;
   5836 
   5837 	}
   5838 	DBG1(vgenp, ldcp, "exit\n");
   5839 
   5840 	return (VGEN_SUCCESS);
   5841 }
   5842 
   5843 /*
   5844  * Physical link information message from the peer. Only vswitch should send
   5845  * us this message; if the vnet device has been configured to get physical link
   5846  * state updates. Note that we must have already negotiated this with the
   5847  * vswitch during attribute exchange phase of handshake.
   5848  */
   5849 static int
   5850 vgen_handle_physlink_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
   5851 {
   5852 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
   5853 	vnet_physlink_msg_t	*msgp = (vnet_physlink_msg_t *)tagp;
   5854 	link_state_t		link_state;
   5855 	int			rv;
   5856 
   5857 	if (ldcp->portp != vgenp->vsw_portp) {
   5858 		/*
   5859 		 * drop the message and don't process; as we should
   5860 		 * receive physlink_info message from only vswitch.
   5861 		 */
   5862 		return (VGEN_SUCCESS);
   5863 	}
   5864 
   5865 	if (vgenp->pls_negotiated == B_FALSE) {
   5866 		/*
   5867 		 * drop the message and don't process; as we should receive
   5868 		 * physlink_info message only if physlink update is enabled for
   5869 		 * the device and negotiated with vswitch.
   5870 		 */
   5871 		return (VGEN_SUCCESS);
   5872 	}
   5873 
   5874 	switch (tagp->vio_subtype) {
   5875 
   5876 	case VIO_SUBTYPE_INFO:
   5877 
   5878 		if ((msgp->physlink_info & VNET_PHYSLINK_STATE_MASK) ==
   5879 		    VNET_PHYSLINK_STATE_UP) {
   5880 			link_state = LINK_STATE_UP;
   5881 		} else {
   5882 			link_state = LINK_STATE_DOWN;
   5883 		}
   5884 
   5885 		if (vgenp->phys_link_state != link_state) {
   5886 			vgenp->phys_link_state = link_state;
   5887 			mutex_exit(&ldcp->cblock);
   5888 
   5889 			/* Now update the stack */
   5890 			vgen_link_update(vgenp, link_state);
   5891 
   5892 			mutex_enter(&ldcp->cblock);
   5893 		}
   5894 
   5895 		tagp->vio_subtype = VIO_SUBTYPE_ACK;
   5896 		tagp->vio_sid = ldcp->local_sid;
   5897 
   5898 		/* send reply msg back to peer */
   5899 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp,
   5900 		    sizeof (vnet_physlink_msg_t), B_FALSE);
   5901 		if (rv != VGEN_SUCCESS) {
   5902 			return (rv);
   5903 		}
   5904 		break;
   5905 
   5906 	case VIO_SUBTYPE_ACK:
   5907 
   5908 		/* vnet shouldn't recv physlink acks */
   5909 		DWARN(vgenp, ldcp, "rcvd PHYSLINK_ACK \n");
   5910 		break;
   5911 
   5912 	case VIO_SUBTYPE_NACK:
   5913 
   5914 		/* vnet shouldn't recv physlink nacks */
   5915 		DWARN(vgenp, ldcp, "rcvd PHYSLINK_NACK \n");
   5916 		break;
   5917 
   5918 	}
   5919 	DBG1(vgenp, ldcp, "exit\n");
   5920 
   5921 	return (VGEN_SUCCESS);
   5922 }
   5923 
   5924 /* handler for control messages received from the peer ldc end-point */
   5925 static int
   5926 vgen_handle_ctrlmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
   5927 {
   5928 	int rv = 0;
   5929 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
   5930 
   5931 	DBG1(vgenp, ldcp, "enter\n");
   5932 	switch (tagp->vio_subtype_env) {
   5933 
   5934 	case VIO_VER_INFO:
   5935 		rv = vgen_handle_version_negotiate(ldcp, tagp);
   5936 		break;
   5937 
   5938 	case VIO_ATTR_INFO:
   5939 		rv = vgen_handle_attr_info(ldcp, tagp);
   5940 		break;
   5941 
   5942 	case VIO_DRING_REG:
   5943 		rv = vgen_handle_dring_reg(ldcp, tagp);
   5944 		break;
   5945 
   5946 	case VIO_RDX:
   5947 		rv = vgen_handle_rdx_info(ldcp, tagp);
   5948 		break;
   5949 
   5950 	case VNET_MCAST_INFO:
   5951 		rv = vgen_handle_mcast_info(ldcp, tagp);
   5952 		break;
   5953 
   5954 	case VIO_DDS_INFO:
   5955 		/*
   5956 		 * If we are in the process of resetting the vswitch channel,
   5957 		 * drop the dds message. A new handshake will be initiated
   5958 		 * when the channel comes back up after the reset and dds
   5959 		 * negotiation can then continue.
   5960 		 */
   5961 		if (ldcp->need_ldc_reset == B_TRUE) {
   5962 			break;
   5963 		}
   5964 		rv = vgen_dds_rx(ldcp, tagp);
   5965 		break;
   5966 
   5967 	case VNET_PHYSLINK_INFO:
   5968 		rv = vgen_handle_physlink_info(ldcp, tagp);
   5969 		break;
   5970 	}
   5971 
   5972 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
   5973 	return (rv);
   5974 }
   5975 
   5976 /* handler for data messages received from the peer ldc end-point */
   5977 static int
   5978 vgen_handle_datamsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp, uint32_t msglen)
   5979 {
   5980 	int rv = 0;
   5981 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
   5982 
   5983 	DBG1(vgenp, ldcp, "enter\n");
   5984 
   5985 	if (ldcp->hphase != VH_DONE)
   5986 		return (rv);
   5987 
   5988 	if (tagp->vio_subtype == VIO_SUBTYPE_INFO) {
   5989 		rv = vgen_check_datamsg_seq(ldcp, tagp);
   5990 		if (rv != 0) {
   5991 			return (rv);
   5992 		}
   5993 	}
   5994 
   5995 	switch (tagp->vio_subtype_env) {
   5996 	case VIO_DRING_DATA:
   5997 		rv = vgen_handle_dring_data(ldcp, tagp);
   5998 		break;
   5999 
   6000 	case VIO_PKT_DATA:
   6001 		ldcp->rx_pktdata((void *)ldcp, (void *)tagp, msglen);
   6002 		break;
   6003 	default:
   6004 		break;
   6005 	}
   6006 
   6007 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
   6008 	return (rv);
   6009 }
   6010 
   6011 /*
   6012  * dummy pkt data handler function for vnet protocol version 1.0
   6013  */
   6014 static void
   6015 vgen_handle_pkt_data_nop(void *arg1, void *arg2, uint32_t msglen)
   6016 {
   6017 	_NOTE(ARGUNUSED(arg1, arg2, msglen))
   6018 }
   6019 
   6020 /*
   6021  * This function handles raw pkt data messages received over the channel.
   6022  * Currently, only priority-eth-type frames are received through this mechanism.
   6023  * In this case, the frame(data) is present within the message itself which
   6024  * is copied into an mblk before sending it up the stack.
   6025  */
   6026 static void
   6027 vgen_handle_pkt_data(void *arg1, void *arg2, uint32_t msglen)
   6028 {
   6029 	vgen_ldc_t		*ldcp = (vgen_ldc_t *)arg1;
   6030 	vio_raw_data_msg_t	*pkt	= (vio_raw_data_msg_t *)arg2;
   6031 	uint32_t		size;
   6032 	mblk_t			*mp;
   6033 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
   6034 	vgen_stats_t		*statsp = &ldcp->stats;
   6035 	vgen_hparams_t		*lp = &ldcp->local_hparams;
   6036 	vio_net_rx_cb_t		vrx_cb;
   6037 
   6038 	ASSERT(MUTEX_HELD(&ldcp->cblock));
   6039 
   6040 	mutex_exit(&ldcp->cblock);
   6041 
   6042 	size = msglen - VIO_PKT_DATA_HDRSIZE;
   6043 	if (size < ETHERMIN || size > lp->mtu) {
   6044 		(void) atomic_inc_32(&statsp->rx_pri_fail);
   6045 		goto exit;
   6046 	}
   6047 
   6048 	mp = vio_multipool_allocb(&ldcp->vmp, size);
   6049 	if (mp == NULL) {
   6050 		mp = allocb(size, BPRI_MED);
   6051 		if (mp == NULL) {
   6052 			(void) atomic_inc_32(&statsp->rx_pri_fail);
   6053 			DWARN(vgenp, ldcp, "allocb failure, "
   6054 			    "unable to process priority frame\n");
   6055 			goto exit;
   6056 		}
   6057 	}
   6058 
   6059 	/* copy the frame from the payload of raw data msg into the mblk */
   6060 	bcopy(pkt->data, mp->b_rptr, size);
   6061 	mp->b_wptr = mp->b_rptr + size;
   6062 
   6063 	/* update stats */
   6064 	(void) atomic_inc_64(&statsp->rx_pri_packets);
   6065 	(void) atomic_add_64(&statsp->rx_pri_bytes, size);
   6066 
   6067 	/* send up; call vrx_cb() as cblock is already released */
   6068 	vrx_cb = ldcp->portp->vcb.vio_net_rx_cb;
   6069 	vrx_cb(ldcp->portp->vhp, mp);
   6070 
   6071 exit:
   6072 	mutex_enter(&ldcp->cblock);
   6073 }
   6074 
   6075 static int
   6076 vgen_send_dring_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp, uint32_t start,
   6077     int32_t end, uint8_t pstate)
   6078 {
   6079 	int rv = 0;
   6080 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
   6081 	vio_dring_msg_t *msgp = (vio_dring_msg_t *)tagp;
   6082 
   6083 	tagp->vio_subtype = VIO_SUBTYPE_ACK;
   6084 	tagp->vio_sid = ldcp->local_sid;
   6085 	msgp->start_idx = start;
   6086 	msgp->end_idx = end;
   6087 	msgp->dring_process_state = pstate;
   6088 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msgp), B_FALSE);
   6089 	if (rv != VGEN_SUCCESS) {
   6090 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
   6091 	}
   6092 	return (rv);
   6093 }
   6094 
   6095 static int
   6096 vgen_handle_dring_data(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
   6097 {
   6098 	int rv = 0;
   6099 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
   6100 
   6101 
   6102 	DBG1(vgenp, ldcp, "enter\n");
   6103 	switch (tagp->vio_subtype) {
   6104 
   6105 	case VIO_SUBTYPE_INFO:
   6106 		/*
   6107 		 * To reduce the locking contention, release the
   6108 		 * cblock here and re-acquire it once we are done
   6109 		 * receiving packets.
   6110 		 */
   6111 		mutex_exit(&ldcp->cblock);
   6112 		mutex_enter(&ldcp->rxlock);
   6113 		rv = vgen_handle_dring_data_info(ldcp, tagp);
   6114 		mutex_exit(&ldcp->rxlock);
   6115 		mutex_enter(&ldcp->cblock);
   6116 		break;
   6117 
   6118 	case VIO_SUBTYPE_ACK:
   6119 		rv = vgen_handle_dring_data_ack(ldcp, tagp);
   6120 		break;
   6121 
   6122 	case VIO_SUBTYPE_NACK:
   6123 		rv = vgen_handle_dring_data_nack(ldcp, tagp);
   6124 		break;
   6125 	}
   6126 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
   6127 	return (rv);
   6128 }
   6129 
   6130 static int
   6131 vgen_handle_dring_data_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
   6132 {
   6133 	uint32_t start;
   6134 	int32_t end;
   6135 	int rv = 0;
   6136 	vio_dring_msg_t *dringmsg = (vio_dring_msg_t *)tagp;
   6137 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
   6138 #ifdef VGEN_HANDLE_LOST_PKTS
   6139 	vgen_stats_t *statsp = &ldcp->stats;
   6140 	uint32_t rxi;
   6141 	int n;
   6142 #endif
   6143 
   6144 	DBG1(vgenp, ldcp, "enter\n");
   6145 
   6146 	start = dringmsg->start_idx;
   6147 	end = dringmsg->end_idx;
   6148 	/*
   6149 	 * received a data msg, which contains the start and end
   6150 	 * indices of the descriptors within the rx ring holding data,
   6151 	 * the seq_num of data packet corresponding to the start index,
   6152 	 * and the dring_ident.
   6153 	 * We can now read the contents of each of these descriptors
   6154 	 * and gather data from it.
   6155 	 */
   6156 	DBG1(vgenp, ldcp, "INFO: start(%d), end(%d)\n",
   6157 	    start, end);
   6158 
   6159 	/* validate rx start and end indeces */
   6160 	if (!(CHECK_RXI(start, ldcp)) || ((end != -1) &&
   6161 	    !(CHECK_RXI(end, ldcp)))) {
   6162 		DWARN(vgenp, ldcp, "Invalid Rx start(%d) or end(%d)\n",
   6163 		    start, end);
   6164 		/* drop the message if invalid index */
   6165 		return (rv);
   6166 	}
   6167 
   6168 	/* validate dring_ident */
   6169 	if (dringmsg->dring_ident != ldcp->peer_hparams.dring_ident) {
   6170 		DWARN(vgenp, ldcp, "Invalid dring ident 0x%x\n",
   6171 		    dringmsg->dring_ident);
   6172 		/* invalid dring_ident, drop the msg */
   6173 		return (rv);
   6174 	}
   6175 #ifdef DEBUG
   6176 	if (vgen_trigger_rxlost) {
   6177 		/* drop this msg to simulate lost pkts for debugging */
   6178 		vgen_trigger_rxlost = 0;
   6179 		return (rv);
   6180 	}
   6181 #endif
   6182 
   6183 #ifdef	VGEN_HANDLE_LOST_PKTS
   6184 
   6185 	/* receive start index doesn't match expected index */
   6186 	if (ldcp->next_rxi != start) {
   6187 		DWARN(vgenp, ldcp, "next_rxi(%d) != start(%d)\n",
   6188 		    ldcp->next_rxi, start);
   6189 
   6190 		/* calculate the number of pkts lost */
   6191 		if (start >= ldcp->next_rxi) {
   6192 			n = start - ldcp->next_rxi;
   6193 		} else  {
   6194 			n = ldcp->num_rxds - (ldcp->next_rxi - start);
   6195 		}
   6196 
   6197 		statsp->rx_lost_pkts += n;
   6198 		tagp->vio_subtype = VIO_SUBTYPE_NACK;
   6199 		tagp->vio_sid = ldcp->local_sid;
   6200 		/* indicate the range of lost descriptors */
   6201 		dringmsg->start_idx = ldcp->next_rxi;
   6202 		rxi = start;
   6203 		DECR_RXI(rxi, ldcp);
   6204 		dringmsg->end_idx = rxi;
   6205 		/* dring ident is left unchanged */
   6206 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp,
   6207 		    sizeof (*dringmsg), B_FALSE);
   6208 		if (rv != VGEN_SUCCESS) {
   6209 			DWARN(vgenp, ldcp,
   6210 			    "vgen_sendmsg failed, stype:NACK\n");
   6211 			return (rv);
   6212 		}
   6213 		/*
   6214 		 * treat this range of descrs/pkts as dropped
   6215 		 * and set the new expected value of next_rxi
   6216 		 * and continue(below) to process from the new
   6217 		 * start index.
   6218 		 */
   6219 		ldcp->next_rxi = start;
   6220 	}
   6221 
   6222 #endif	/* VGEN_HANDLE_LOST_PKTS */
   6223 
   6224 	/* Now receive messages */
   6225 	rv = vgen_process_dring_data(ldcp, tagp);
   6226 
   6227 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
   6228 	return (rv);
   6229 }
   6230 
   6231 static int
   6232 vgen_process_dring_data(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
   6233 {
   6234 	boolean_t set_ack_start = B_FALSE;
   6235 	uint32_t start;
   6236 	uint32_t ack_end;
   6237 	uint32_t next_rxi;
   6238 	uint32_t rxi;
   6239 	int count = 0;
   6240 	int rv = 0;
   6241 	uint32_t retries = 0;
   6242 	vgen_stats_t *statsp;
   6243 	vnet_public_desc_t rxd;
   6244 	vio_dring_entry_hdr_t *hdrp;
   6245 	mblk_t *bp = NULL;
   6246 	mblk_t *bpt = NULL;
   6247 	uint32_t ack_start;
   6248 	boolean_t rxd_err = B_FALSE;
   6249 	mblk_t *mp = NULL;
   6250 	size_t nbytes;
   6251 	boolean_t ack_needed = B_FALSE;
   6252 	size_t nread;
   6253 	uint64_t off = 0;
   6254 	struct ether_header *ehp;
   6255 	vio_dring_msg_t *dringmsg = (vio_dring_msg_t *)tagp;
   6256 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
   6257 	vgen_hparams_t	*lp = &ldcp->local_hparams;
   6258 
   6259 	DBG1(vgenp, ldcp, "enter\n");
   6260 
   6261 	statsp = &ldcp->stats;
   6262 	start = dringmsg->start_idx;
   6263 
   6264 	/*
   6265 	 * start processing the descriptors from the specified
   6266 	 * start index, up to the index a descriptor is not ready
   6267 	 * to be processed or we process the entire descriptor ring
   6268 	 * and wrap around upto the start index.
   6269 	 */
   6270 
   6271 	/* need to set the start index of descriptors to be ack'd */
   6272 	set_ack_start = B_TRUE;
   6273 
   6274 	/* index upto which we have ack'd */
   6275 	ack_end = start;
   6276 	DECR_RXI(ack_end, ldcp);
   6277 
   6278 	next_rxi = rxi =  start;
   6279 	do {
   6280 vgen_recv_retry:
   6281 		rv = vnet_dring_entry_copy(&(ldcp->rxdp[rxi]), &rxd,
   6282 		    ldcp->dring_mtype, ldcp->rx_dhandle, rxi, rxi);
   6283 		if (rv != 0) {
   6284 			DWARN(vgenp, ldcp, "ldc_mem_dring_acquire() failed"
   6285 			    " rv(%d)\n", rv);
   6286 			statsp->ierrors++;
   6287 			return (rv);
   6288 		}
   6289 
   6290 		hdrp = &rxd.hdr;
   6291 
   6292 		if (hdrp->dstate != VIO_DESC_READY) {
   6293 			/*
   6294 			 * Before waiting and retry here, send up
   6295 			 * the packets that are received already
   6296 			 */
   6297 			if (bp != NULL) {
   6298 				DTRACE_PROBE1(vgen_rcv_msgs, int, count);
   6299 				vgen_rx(ldcp, bp, bpt);
   6300 				count = 0;
   6301 				bp = bpt = NULL;
   6302 			}
   6303 			/*
   6304 			 * descriptor is not ready.
   6305 			 * retry descriptor acquire, stop processing
   6306 			 * after max # retries.
   6307 			 */
   6308 			if (retries == vgen_recv_retries)
   6309 				break;
   6310 			retries++;
   6311 			drv_usecwait(vgen_recv_delay);
   6312 			goto vgen_recv_retry;
   6313 		}
   6314 		retries = 0;
   6315 
   6316 		if (set_ack_start) {
   6317 			/*
   6318 			 * initialize the start index of the range
   6319 			 * of descriptors to be ack'd.
   6320 			 */
   6321 			ack_start = rxi;
   6322 			set_ack_start = B_FALSE;
   6323 		}
   6324 
   6325 		if ((rxd.nbytes < ETHERMIN) ||
   6326 		    (rxd.nbytes > lp->mtu) ||
   6327 		    (rxd.ncookies == 0) ||
   6328 		    (rxd.ncookies > MAX_COOKIES)) {
   6329 			rxd_err = B_TRUE;
   6330 		} else {
   6331 			/*
   6332 			 * Try to allocate an mblk from the free pool
   6333 			 * of recv mblks for the channel.
   6334 			 * If this fails, use allocb().
   6335 			 */
   6336 			nbytes = (VNET_IPALIGN + rxd.nbytes + 7) & ~7;
   6337 			if (nbytes > ldcp->max_rxpool_size) {
   6338 				mp = allocb(VNET_IPALIGN + rxd.nbytes + 8,
   6339 				    BPRI_MED);
   6340 			} else {
   6341 				mp = vio_multipool_allocb(&ldcp->vmp, nbytes);
   6342 				if (mp == NULL) {
   6343 					statsp->rx_vio_allocb_fail++;
   6344 					/*
   6345 					 * Data buffer returned by allocb(9F)
   6346 					 * is 8byte aligned. We allocate extra
   6347 					 * 8 bytes to ensure size is multiple
   6348 					 * of 8 bytes for ldc_mem_copy().
   6349 					 */
   6350 					mp = allocb(VNET_IPALIGN +
   6351 					    rxd.nbytes + 8, BPRI_MED);
   6352 				}
   6353 			}
   6354 		}
   6355 		if ((rxd_err) || (mp == NULL)) {
   6356 			/*
   6357 			 * rxd_err or allocb() failure,
   6358 			 * drop this packet, get next.
   6359 			 */
   6360 			if (rxd_err) {
   6361 				statsp->ierrors++;
   6362 				rxd_err = B_FALSE;
   6363 			} else {
   6364 				statsp->rx_allocb_fail++;
   6365 			}
   6366 
   6367 			ack_needed = hdrp->ack;
   6368 
   6369 			/* set descriptor done bit */
   6370 			rv = vnet_dring_entry_set_dstate(&(ldcp->rxdp[rxi]),
   6371 			    ldcp->dring_mtype, ldcp->rx_dhandle, rxi, rxi,
   6372 			    VIO_DESC_DONE);
   6373 			if (rv != 0) {
   6374 				DWARN(vgenp, ldcp,
   6375 				    "vnet_dring_entry_set_dstate err rv(%d)\n",
   6376 				    rv);
   6377 				return (rv);
   6378 			}
   6379 
   6380 			if (ack_needed) {
   6381 				ack_needed = B_FALSE;
   6382 				/*
   6383 				 * sender needs ack for this packet,
   6384 				 * ack pkts upto this index.
   6385 				 */
   6386 				ack_end = rxi;
   6387 
   6388 				rv = vgen_send_dring_ack(ldcp, tagp,
   6389 				    ack_start, ack_end,
   6390 				    VIO_DP_ACTIVE);
   6391 				if (rv != VGEN_SUCCESS) {
   6392 					goto error_ret;
   6393 				}
   6394 
   6395 				/* need to set new ack start index */
   6396 				set_ack_start = B_TRUE;
   6397 			}
   6398 			goto vgen_next_rxi;
   6399 		}
   6400 
   6401 		nread = nbytes;
   6402 		rv = ldc_mem_copy(ldcp->ldc_handle,
   6403 		    (caddr_t)mp->b_rptr, off, &nread,
   6404 		    rxd.memcookie, rxd.ncookies, LDC_COPY_IN);
   6405 
   6406 		/* if ldc_mem_copy() failed */
   6407 		if (rv) {
   6408 			DWARN(vgenp, ldcp, "ldc_mem_copy err rv(%d)\n", rv);
   6409 			statsp->ierrors++;
   6410 			freemsg(mp);
   6411 			goto error_ret;
   6412 		}
   6413 
   6414 		ack_needed = hdrp->ack;
   6415 
   6416 		rv = vnet_dring_entry_set_dstate(&(ldcp->rxdp[rxi]),
   6417 		    ldcp->dring_mtype, ldcp->rx_dhandle, rxi, rxi,
   6418 		    VIO_DESC_DONE);
   6419 		if (rv != 0) {
   6420 			DWARN(vgenp, ldcp,
   6421 			    "vnet_dring_entry_set_dstate err rv(%d)\n", rv);
   6422 			goto error_ret;
   6423 		}
   6424 
   6425 		mp->b_rptr += VNET_IPALIGN;
   6426 
   6427 		if (ack_needed) {
   6428 			ack_needed = B_FALSE;
   6429 			/*
   6430 			 * sender needs ack for this packet,
   6431 			 * ack pkts upto this index.
   6432 			 */
   6433 			ack_end = rxi;
   6434 
   6435 			rv = vgen_send_dring_ack(ldcp, tagp,
   6436 			    ack_start, ack_end, VIO_DP_ACTIVE);
   6437 			if (rv != VGEN_SUCCESS) {
   6438 				goto error_ret;
   6439 			}
   6440 
   6441 			/* need to set new ack start index */
   6442 			set_ack_start = B_TRUE;
   6443 		}
   6444 
   6445 		if (nread != nbytes) {
   6446 			DWARN(vgenp, ldcp,
   6447 			    "ldc_mem_copy nread(%lx), nbytes(%lx)\n",
   6448 			    nread, nbytes);
   6449 			statsp->ierrors++;
   6450 			freemsg(mp);
   6451 			goto vgen_next_rxi;
   6452 		}
   6453 
   6454 		/* point to the actual end of data */
   6455 		mp->b_wptr = mp->b_rptr + rxd.nbytes;
   6456 
   6457 		/* update stats */
   6458 		statsp->ipackets++;
   6459 		statsp->rbytes += rxd.nbytes;
   6460 		ehp = (struct ether_header *)mp->b_rptr;
   6461 		if (IS_BROADCAST(ehp))
   6462 			statsp->brdcstrcv++;
   6463 		else if (IS_MULTICAST(ehp))
   6464 			statsp->multircv++;
   6465 
   6466 		/* build a chain of received packets */
   6467 		if (bp == NULL) {
   6468 			/* first pkt */
   6469 			bp = mp;
   6470 			bpt = bp;
   6471 			bpt->b_next = NULL;
   6472 		} else {
   6473 			mp->b_next = NULL;
   6474 			bpt->b_next = mp;
   6475 			bpt = mp;
   6476 		}
   6477 
   6478 		if (count++ > vgen_chain_len) {
   6479 			DTRACE_PROBE1(vgen_rcv_msgs, int, count);
   6480 			vgen_rx(ldcp, bp, bpt);
   6481 			count = 0;
   6482 			bp = bpt = NULL;
   6483 		}
   6484 
   6485 vgen_next_rxi:
   6486 		/* update end index of range of descrs to be ack'd */
   6487 		ack_end = rxi;
   6488 
   6489 		/* update the next index to be processed */
   6490 		INCR_RXI(next_rxi, ldcp);
   6491 		if (next_rxi == start) {
   6492 			/*
   6493 			 * processed the entire descriptor ring upto
   6494 			 * the index at which we started.
   6495 			 */
   6496 			break;
   6497 		}
   6498 
   6499 		rxi = next_rxi;
   6500 
   6501 	_NOTE(CONSTCOND)
   6502 	} while (1);
   6503 
   6504 	/*
   6505 	 * send an ack message to peer indicating that we have stopped
   6506 	 * processing descriptors.
   6507 	 */
   6508 	if (set_ack_start) {
   6509 		/*
   6510 		 * We have ack'd upto some index and we have not
   6511 		 * processed any descriptors beyond that index.
   6512 		 * Use the last ack'd index as both the start and
   6513 		 * end of range of descrs being ack'd.
   6514 		 * Note: This results in acking the last index twice
   6515 		 * and should be harmless.
   6516 		 */
   6517 		ack_start = ack_end;
   6518 	}
   6519 
   6520 	rv = vgen_send_dring_ack(ldcp, tagp, ack_start, ack_end,
   6521 	    VIO_DP_STOPPED);
   6522 	if (rv != VGEN_SUCCESS) {
   6523 		goto error_ret;
   6524 	}
   6525 
   6526 	/* save new recv index of next dring msg */
   6527 	ldcp->next_rxi = next_rxi;
   6528 
   6529 error_ret:
   6530 	/* send up packets received so far */
   6531 	if (bp != NULL) {
   6532 		DTRACE_PROBE1(vgen_rcv_msgs, int, count);
   6533 		vgen_rx(ldcp, bp, bpt);
   6534 		bp = bpt = NULL;
   6535 	}
   6536 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
   6537 	return (rv);
   6538 
   6539 }
   6540 
   6541 static int
   6542 vgen_handle_dring_data_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
   6543 {
   6544 	int rv = 0;
   6545 	uint32_t start;
   6546 	int32_t end;
   6547 	uint32_t txi;
   6548 	boolean_t ready_txd = B_FALSE;
   6549 	vgen_stats_t *statsp;
   6550 	vgen_private_desc_t *tbufp;
   6551 	vnet_public_desc_t *txdp;
   6552 	vio_dring_entry_hdr_t *hdrp;
   6553 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
   6554 	vio_dring_msg_t *dringmsg = (vio_dring_msg_t *)tagp;
   6555 
   6556 	DBG1(vgenp, ldcp, "enter\n");
   6557 	start = dringmsg->start_idx;
   6558 	end = dringmsg->end_idx;
   6559 	statsp = &ldcp->stats;
   6560 
   6561 	/*
   6562 	 * received an ack corresponding to a specific descriptor for
   6563 	 * which we had set the ACK bit in the descriptor (during
   6564 	 * transmit). This enables us to reclaim descriptors.
   6565 	 */
   6566 
   6567 	DBG2(vgenp, ldcp, "ACK:  start(%d), end(%d)\n", start, end);
   6568 
   6569 	/* validate start and end indeces in the tx ack msg */
   6570 	if (!(CHECK_TXI(start, ldcp)) || !(CHECK_TXI(end, ldcp))) {
   6571 		/* drop the message if invalid index */
   6572 		DWARN(vgenp, ldcp, "Invalid Tx ack start(%d) or end(%d)\n",
   6573 		    start, end);
   6574 		return (rv);
   6575 	}
   6576 	/* validate dring_ident */
   6577 	if (dringmsg->dring_ident != ldcp->local_hparams.dring_ident) {
   6578 		/* invalid dring_ident, drop the msg */
   6579 		DWARN(vgenp, ldcp, "Invalid dring ident 0x%x\n",
   6580 		    dringmsg->dring_ident);
   6581 		return (rv);
   6582 	}
   6583 	statsp->dring_data_acks++;
   6584 
   6585 	/* reclaim descriptors that are done */
   6586 	vgen_reclaim(ldcp);
   6587 
   6588 	if (dringmsg->dring_process_state != VIO_DP_STOPPED) {
   6589 		/*
   6590 		 * receiver continued processing descriptors after
   6591 		 * sending us the ack.
   6592 		 */
   6593 		return (rv);
   6594 	}
   6595 
   6596 	statsp->dring_stopped_acks++;
   6597 
   6598 	/* receiver stopped processing descriptors */
   6599 	mutex_enter(&ldcp->wrlock);
   6600 	mutex_enter(&ldcp->tclock);
   6601 
   6602 	/*
   6603 	 * determine if there are any pending tx descriptors
   6604 	 * ready to be processed by the receiver(peer) and if so,
   6605 	 * send a message to the peer to restart receiving.
   6606 	 */
   6607 	ready_txd = B_FALSE;
   6608 
   6609 	/*
   6610 	 * using the end index of the descriptor range for which
   6611 	 * we received the ack, check if the next descriptor is
   6612 	 * ready.
   6613 	 */
   6614 	txi = end;
   6615 	INCR_TXI(txi, ldcp);
   6616 	tbufp = &ldcp->tbufp[txi];
   6617 	txdp = tbufp->descp;
   6618 	hdrp = &txdp->hdr;
   6619 	if (hdrp->dstate == VIO_DESC_READY) {
   6620 		ready_txd = B_TRUE;
   6621 	} else {
   6622 		/*
   6623 		 * descr next to the end of ack'd descr range is not
   6624 		 * ready.
   6625 		 * starting from the current reclaim index, check
   6626 		 * if any descriptor is ready.
   6627 		 */
   6628 
   6629 		txi = ldcp->cur_tbufp - ldcp->tbufp;
   6630 		tbufp = &ldcp->tbufp[txi];
   6631 
   6632 		txdp = tbufp->descp;
   6633 		hdrp = &txdp->hdr;
   6634 		if (hdrp->dstate == VIO_DESC_READY) {
   6635 			ready_txd = B_TRUE;
   6636 		}
   6637 
   6638 	}
   6639 
   6640 	if (ready_txd) {
   6641 		/*
   6642 		 * we have tx descriptor(s) ready to be
   6643 		 * processed by the receiver.
   6644 		 * send a message to the peer with the start index
   6645 		 * of ready descriptors.
   6646 		 */
   6647 		rv = vgen_send_dring_data(ldcp, txi, -1);
   6648 		if (rv != VGEN_SUCCESS) {
   6649 			ldcp->resched_peer = B_TRUE;
   6650 			ldcp->resched_peer_txi = txi;
   6651 			mutex_exit(&ldcp->tclock);
   6652 			mutex_exit(&ldcp->wrlock);
   6653 			return (rv);
   6654 		}
   6655 	} else {
   6656 		/*
   6657 		 * no ready tx descriptors. set the flag to send a
   6658 		 * message to peer when tx descriptors are ready in
   6659 		 * transmit routine.
   6660 		 */
   6661 		ldcp->resched_peer = B_TRUE;
   6662 		ldcp->resched_peer_txi = ldcp->cur_tbufp - ldcp->tbufp;
   6663 	}
   6664 
   6665 	mutex_exit(&ldcp->tclock);
   6666 	mutex_exit(&ldcp->wrlock);
   6667 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
   6668 	return (rv);
   6669 }
   6670 
   6671 static int
   6672 vgen_handle_dring_data_nack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
   6673 {
   6674 	int rv = 0;
   6675 	uint32_t start;
   6676 	int32_t end;
   6677 	uint32_t txi;
   6678 	vnet_public_desc_t *txdp;
   6679 	vio_dring_entry_hdr_t *hdrp;
   6680 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
   6681 	vio_dring_msg_t *dringmsg = (vio_dring_msg_t *)tagp;
   6682 
   6683 	DBG1(vgenp, ldcp, "enter\n");
   6684 	start = dringmsg->start_idx;
   6685 	end = dringmsg->end_idx;
   6686 
   6687 	/*
   6688 	 * peer sent a NACK msg to indicate lost packets.
   6689 	 * The start and end correspond to the range of descriptors
   6690 	 * for which the peer didn't receive a dring data msg and so
   6691 	 * didn't receive the corresponding data.
   6692 	 */
   6693 	DWARN(vgenp, ldcp, "NACK: start(%d), end(%d)\n", start, end);
   6694 
   6695 	/* validate start and end indeces in the tx nack msg */
   6696 	if (!(CHECK_TXI(start, ldcp)) || !(CHECK_TXI(end, ldcp))) {
   6697 		/* drop the message if invalid index */
   6698 		DWARN(vgenp, ldcp, "Invalid Tx nack start(%d) or end(%d)\n",
   6699 		    start, end);
   6700 		return (rv);
   6701 	}
   6702 	/* validate dring_ident */
   6703 	if (dringmsg->dring_ident != ldcp->local_hparams.dring_ident) {
   6704 		/* invalid dring_ident, drop the msg */
   6705 		DWARN(vgenp, ldcp, "Invalid dring ident 0x%x\n",
   6706 		    dringmsg->dring_ident);
   6707 		return (rv);
   6708 	}
   6709 	mutex_enter(&ldcp->txlock);
   6710 	mutex_enter(&ldcp->tclock);
   6711 
   6712 	if (ldcp->next_tbufp == ldcp->cur_tbufp) {
   6713 		/* no busy descriptors, bogus nack ? */
   6714 		mutex_exit(&ldcp->tclock);
   6715 		mutex_exit(&ldcp->txlock);
   6716 		return (rv);
   6717 	}
   6718 
   6719 	/* we just mark the descrs as done so they can be reclaimed */
   6720 	for (txi = start; txi <= end; ) {
   6721 		txdp = &(ldcp->txdp[txi]);
   6722 		hdrp = &txdp->hdr;
   6723 		if (hdrp->dstate == VIO_DESC_READY)
   6724 			hdrp->dstate = VIO_DESC_DONE;
   6725 		INCR_TXI(txi, ldcp);
   6726 	}
   6727 	mutex_exit(&ldcp->tclock);
   6728 	mutex_exit(&ldcp->txlock);
   6729 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
   6730 	return (rv);
   6731 }
   6732 
   6733 static void
   6734 vgen_reclaim(vgen_ldc_t *ldcp)
   6735 {
   6736 	mutex_enter(&ldcp->tclock);
   6737 
   6738 	vgen_reclaim_dring(ldcp);
   6739 	ldcp->reclaim_lbolt = ddi_get_lbolt();
   6740 
   6741 	mutex_exit(&ldcp->tclock);
   6742 }
   6743 
   6744 /*
   6745  * transmit reclaim function. starting from the current reclaim index
   6746  * look for descriptors marked DONE and reclaim the descriptor and the
   6747  * corresponding buffers (tbuf).
   6748  */
   6749 static void
   6750 vgen_reclaim_dring(vgen_ldc_t *ldcp)
   6751 {
   6752 	int count = 0;
   6753 	vnet_public_desc_t *txdp;
   6754 	vgen_private_desc_t *tbufp;
   6755 	vio_dring_entry_hdr_t	*hdrp;
   6756 
   6757 #ifdef DEBUG
   6758 	if (vgen_trigger_txtimeout)
   6759 		return;
   6760 #endif
   6761 
   6762 	tbufp = ldcp->cur_tbufp;
   6763 	txdp = tbufp->descp;
   6764 	hdrp = &txdp->hdr;
   6765 
   6766 	while ((hdrp->dstate == VIO_DESC_DONE) &&
   6767 	    (tbufp != ldcp->next_tbufp)) {
   6768 		tbufp->flags = VGEN_PRIV_DESC_FREE;
   6769 		hdrp->dstate = VIO_DESC_FREE;
   6770 		hdrp->ack = B_FALSE;
   6771 
   6772 		tbufp = NEXTTBUF(ldcp, tbufp);
   6773 		txdp = tbufp->descp;
   6774 		hdrp = &txdp->hdr;
   6775 		count++;
   6776 	}
   6777 
   6778 	ldcp->cur_tbufp = tbufp;
   6779 
   6780 	/*
   6781 	 * Check if mac layer should be notified to restart transmissions
   6782 	 */
   6783 	if ((ldcp->need_resched) && (count > 0)) {
   6784 		vio_net_tx_update_t vtx_update =
   6785 		    ldcp->portp->vcb.vio_net_tx_update;
   6786 
   6787 		ldcp->need_resched = B_FALSE;
   6788 		vtx_update(ldcp->portp->vhp);
   6789 	}
   6790 }
   6791 
   6792 /* return the number of pending transmits for the channel */
   6793 static int
   6794 vgen_num_txpending(vgen_ldc_t *ldcp)
   6795 {
   6796 	int n;
   6797 
   6798 	if (ldcp->next_tbufp >= ldcp->cur_tbufp) {
   6799 		n = ldcp->next_tbufp - ldcp->cur_tbufp;
   6800 	} else  {
   6801 		/* cur_tbufp > next_tbufp */
   6802 		n = ldcp->num_txds - (ldcp->cur_tbufp - ldcp->next_tbufp);
   6803 	}
   6804 
   6805 	return (n);
   6806 }
   6807 
   6808 /* determine if the transmit descriptor ring is full */
   6809 static int
   6810 vgen_tx_dring_full(vgen_ldc_t *ldcp)
   6811 {
   6812 	vgen_private_desc_t	*tbufp;
   6813 	vgen_private_desc_t	*ntbufp;
   6814 
   6815 	tbufp = ldcp->next_tbufp;
   6816 	ntbufp = NEXTTBUF(ldcp, tbufp);
   6817 	if (ntbufp == ldcp->cur_tbufp) { /* out of tbufs/txds */
   6818 		return (VGEN_SUCCESS);
   6819 	}
   6820 	return (VGEN_FAILURE);
   6821 }
   6822 
   6823 /* determine if timeout condition has occured */
   6824 static int
   6825 vgen_ldc_txtimeout(vgen_ldc_t *ldcp)
   6826 {
   6827 	if (((ddi_get_lbolt() - ldcp->reclaim_lbolt) >
   6828 	    drv_usectohz(vnet_ldcwd_txtimeout * 1000)) &&
   6829 	    (vnet_ldcwd_txtimeout) &&
   6830 	    (vgen_tx_dring_full(ldcp) == VGEN_SUCCESS)) {
   6831 		return (VGEN_SUCCESS);
   6832 	} else {
   6833 		return (VGEN_FAILURE);
   6834 	}
   6835 }
   6836 
   6837 /* transmit watchdog timeout handler */
   6838 static void
   6839 vgen_ldc_watchdog(void *arg)
   6840 {
   6841 	vgen_ldc_t *ldcp;
   6842 	vgen_t *vgenp;
   6843 	int rv;
   6844 
   6845 	ldcp = (vgen_ldc_t *)arg;
   6846 	vgenp = LDC_TO_VGEN(ldcp);
   6847 
   6848 	rv = vgen_ldc_txtimeout(ldcp);
   6849 	if (rv == VGEN_SUCCESS) {
   6850 		DWARN(vgenp, ldcp, "transmit timeout\n");
   6851 #ifdef DEBUG
   6852 		if (vgen_trigger_txtimeout) {
   6853 			/* tx timeout triggered for debugging */
   6854 			vgen_trigger_txtimeout = 0;
   6855 		}
   6856 #endif
   6857 		mutex_enter(&ldcp->cblock);
   6858 		vgen_ldc_reset(ldcp);
   6859 		mutex_exit(&ldcp->cblock);
   6860 		if (ldcp->need_resched) {
   6861 			vio_net_tx_update_t vtx_update =
   6862 			    ldcp->portp->vcb.vio_net_tx_update;
   6863 
   6864 			ldcp->need_resched = B_FALSE;
   6865 			vtx_update(ldcp->portp->vhp);
   6866 		}
   6867 	}
   6868 
   6869 	ldcp->wd_tid = timeout(vgen_ldc_watchdog, (caddr_t)ldcp,
   6870 	    drv_usectohz(vnet_ldcwd_interval * 1000));
   6871 }
   6872 
   6873 /* handler for error messages received from the peer ldc end-point */
   6874 static void
   6875 vgen_handle_errmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
   6876 {
   6877 	_NOTE(ARGUNUSED(ldcp, tagp))
   6878 }
   6879 
   6880 static int
   6881 vgen_check_datamsg_seq(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
   6882 {
   6883 	vio_raw_data_msg_t	*rmsg;
   6884 	vio_dring_msg_t		*dmsg;
   6885 	uint64_t		seq_num;
   6886 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
   6887 
   6888 	if (tagp->vio_subtype_env == VIO_DRING_DATA) {
   6889 		dmsg = (vio_dring_msg_t *)tagp;
   6890 		seq_num = dmsg->seq_num;
   6891 	} else if (tagp->vio_subtype_env == VIO_PKT_DATA) {
   6892 		rmsg = (vio_raw_data_msg_t *)tagp;
   6893 		seq_num = rmsg->seq_num;
   6894 	} else {
   6895 		return (EINVAL);
   6896 	}
   6897 
   6898 	if (seq_num != ldcp->next_rxseq) {
   6899 
   6900 		/* seqnums don't match */
   6901 		DWARN(vgenp, ldcp,
   6902 		    "next_rxseq(0x%lx) != seq_num(0x%lx)\n",
   6903 		    ldcp->next_rxseq, seq_num);
   6904 
   6905 		return (EINVAL);
   6906 
   6907 	}
   6908 
   6909 	ldcp->next_rxseq++;
   6910 
   6911 	return (0);
   6912 }
   6913 
   6914 /* Check if the session id in the received message is valid */
   6915 static int
   6916 vgen_check_sid(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
   6917 {
   6918 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
   6919 
   6920 	if (tagp->vio_sid != ldcp->peer_sid) {
   6921 		DWARN(vgenp, ldcp, "sid mismatch: expected(%x), rcvd(%x)\n",
   6922 		    ldcp->peer_sid, tagp->vio_sid);
   6923 		return (VGEN_FAILURE);
   6924 	}
   6925 	else
   6926 		return (VGEN_SUCCESS);
   6927 }
   6928 
   6929 static caddr_t
   6930 vgen_print_ethaddr(uint8_t *a, char *ebuf)
   6931 {
   6932 	(void) sprintf(ebuf,
   6933 	    "%x:%x:%x:%x:%x:%x", a[0], a[1], a[2], a[3], a[4], a[5]);
   6934 	return (ebuf);
   6935 }
   6936 
   6937 /* Handshake watchdog timeout handler */
   6938 static void
   6939 vgen_hwatchdog(void *arg)
   6940 {
   6941 	vgen_ldc_t *ldcp = (vgen_ldc_t *)arg;
   6942 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
   6943 
   6944 	DWARN(vgenp, ldcp, "handshake timeout phase(%x) state(%x)\n",
   6945 	    ldcp->hphase, ldcp->hstate);
   6946 
   6947 	mutex_enter(&ldcp->cblock);
   6948 	if (ldcp->cancel_htid) {
   6949 		ldcp->cancel_htid = 0;
   6950 		mutex_exit(&ldcp->cblock);
   6951 		return;
   6952 	}
   6953 	ldcp->htid = 0;
   6954 	vgen_ldc_reset(ldcp);
   6955 	mutex_exit(&ldcp->cblock);
   6956 }
   6957 
   6958 static void
   6959 vgen_print_hparams(vgen_hparams_t *hp)
   6960 {
   6961 	uint8_t	addr[6];
   6962 	char	ea[6];
   6963 	ldc_mem_cookie_t *dc;
   6964 
   6965 	cmn_err(CE_CONT, "version_info:\n");
   6966 	cmn_err(CE_CONT,
   6967 	    "\tver_major: %d, ver_minor: %d, dev_class: %d\n",
   6968 	    hp->ver_major, hp->ver_minor, hp->dev_class);
   6969 
   6970 	vnet_macaddr_ultostr(hp->addr, addr);
   6971 	cmn_err(CE_CONT, "attr_info:\n");
   6972 	cmn_err(CE_CONT, "\tMTU: %lx, addr: %s\n", hp->mtu,
   6973 	    vgen_print_ethaddr(addr, ea));
   6974 	cmn_err(CE_CONT,
   6975 	    "\taddr_type: %x, xfer_mode: %x, ack_freq: %x\n",
   6976 	    hp->addr_type, hp->xfer_mode, hp->ack_freq);
   6977 
   6978 	dc = &hp->dring_cookie;
   6979 	cmn_err(CE_CONT, "dring_info:\n");
   6980 	cmn_err(CE_CONT,
   6981 	    "\tlength: %d, dsize: %d\n", hp->num_desc, hp->desc_size);
   6982 	cmn_err(CE_CONT,
   6983 	    "\tldc_addr: 0x%lx, ldc_size: %ld\n",
   6984 	    dc->addr, dc->size);
   6985 	cmn_err(CE_CONT, "\tdring_ident: 0x%lx\n", hp->dring_ident);
   6986 }
   6987 
   6988 static void
   6989 vgen_print_ldcinfo(vgen_ldc_t *ldcp)
   6990 {
   6991 	vgen_hparams_t *hp;
   6992 
   6993 	cmn_err(CE_CONT, "Channel Information:\n");
   6994 	cmn_err(CE_CONT,
   6995 	    "\tldc_id: 0x%lx, ldc_status: 0x%x\n",
   6996 	    ldcp->ldc_id, ldcp->ldc_status);
   6997 	cmn_err(CE_CONT,
   6998 	    "\tlocal_sid: 0x%x, peer_sid: 0x%x\n",
   6999 	    ldcp->local_sid, ldcp->peer_sid);
   7000 	cmn_err(CE_CONT,
   7001 	    "\thphase: 0x%x, hstate: 0x%x\n",
   7002 	    ldcp->hphase, ldcp->hstate);
   7003 
   7004 	cmn_err(CE_CONT, "Local handshake params:\n");
   7005 	hp = &ldcp->local_hparams;
   7006 	vgen_print_hparams(hp);
   7007 
   7008 	cmn_err(CE_CONT, "Peer handshake params:\n");
   7009 	hp = &ldcp->peer_hparams;
   7010 	vgen_print_hparams(hp);
   7011 }
   7012 
   7013 /*
   7014  * Send received packets up the stack.
   7015  */
   7016 static void
   7017 vgen_rx(vgen_ldc_t *ldcp, mblk_t *bp, mblk_t *bpt)
   7018 {
   7019 	vio_net_rx_cb_t vrx_cb = ldcp->portp->vcb.vio_net_rx_cb;
   7020 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
   7021 
   7022 	if (ldcp->rcv_thread != NULL) {
   7023 		ASSERT(MUTEX_HELD(&ldcp->rxlock));
   7024 	} else {
   7025 		ASSERT(MUTEX_HELD(&ldcp->cblock));
   7026 	}
   7027 
   7028 	mutex_enter(&ldcp->pollq_lock);
   7029 
   7030 	if (ldcp->polling_on == B_TRUE) {
   7031 		/*
   7032 		 * If we are in polling mode, simply queue
   7033 		 * the packets onto the poll queue and return.
   7034 		 */
   7035 		if (ldcp->pollq_headp == NULL) {
   7036 			ldcp->pollq_headp = bp;
   7037 			ldcp->pollq_tailp = bpt;
   7038 		} else {
   7039 			ldcp->pollq_tailp->b_next = bp;
   7040 			ldcp->pollq_tailp = bpt;
   7041 		}
   7042 
   7043 		mutex_exit(&ldcp->pollq_lock);
   7044 		return;
   7045 	}
   7046 
   7047 	/*
   7048 	 * Prepend any pending mblks in the poll queue, now that we
   7049 	 * are in interrupt mode, before sending up the chain of pkts.
   7050 	 */
   7051 	if (ldcp->pollq_headp != NULL) {
   7052 		DBG2(vgenp, ldcp, "vgen_rx(%lx), pending pollq_headp\n",
   7053 		    (uintptr_t)ldcp);
   7054 		ldcp->pollq_tailp->b_next = bp;
   7055 		bp = ldcp->pollq_headp;
   7056 		ldcp->pollq_headp = ldcp->pollq_tailp = NULL;
   7057 	}
   7058 
   7059 	mutex_exit(&ldcp->pollq_lock);
   7060 
   7061 	if (ldcp->rcv_thread != NULL) {
   7062 		mutex_exit(&ldcp->rxlock);
   7063 	} else {
   7064 		mutex_exit(&ldcp->cblock);
   7065 	}
   7066 
   7067 	/* Send up the packets */
   7068 	vrx_cb(ldcp->portp->vhp, bp);
   7069 
   7070 	if (ldcp->rcv_thread != NULL) {
   7071 		mutex_enter(&ldcp->rxlock);
   7072 	} else {
   7073 		mutex_enter(&ldcp->cblock);
   7074 	}
   7075 }
   7076 
   7077 /*
   7078  * vgen_ldc_rcv_worker -- A per LDC worker thread to receive data.
   7079  * This thread is woken up by the LDC interrupt handler to process
   7080  * LDC packets and receive data.
   7081  */
   7082 static void
   7083 vgen_ldc_rcv_worker(void *arg)
   7084 {
   7085 	callb_cpr_t	cprinfo;
   7086 	vgen_ldc_t *ldcp = (vgen_ldc_t *)arg;
   7087 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
   7088 
   7089 	DBG1(vgenp, ldcp, "enter\n");
   7090 	CALLB_CPR_INIT(&cprinfo, &ldcp->rcv_thr_lock, callb_generic_cpr,
   7091 	    "vnet_rcv_thread");
   7092 	mutex_enter(&ldcp->rcv_thr_lock);
   7093 	while (!(ldcp->rcv_thr_flags & VGEN_WTHR_STOP)) {
   7094 
   7095 		CALLB_CPR_SAFE_BEGIN(&cprinfo);
   7096 		/*
   7097 		 * Wait until the data is received or a stop
   7098 		 * request is received.
   7099 		 */
   7100 		while (!(ldcp->rcv_thr_flags &
   7101 		    (VGEN_WTHR_DATARCVD | VGEN_WTHR_STOP))) {
   7102 			cv_wait(&ldcp->rcv_thr_cv, &ldcp->rcv_thr_lock);
   7103 		}
   7104 		CALLB_CPR_SAFE_END(&cprinfo, &ldcp->rcv_thr_lock)
   7105 
   7106 		/*
   7107 		 * First process the stop request.
   7108 		 */
   7109 		if (ldcp->rcv_thr_flags & VGEN_WTHR_STOP) {
   7110 			DBG2(vgenp, ldcp, "stopped\n");
   7111 			break;
   7112 		}
   7113 		ldcp->rcv_thr_flags &= ~VGEN_WTHR_DATARCVD;
   7114 		ldcp->rcv_thr_flags |= VGEN_WTHR_PROCESSING;
   7115 		mutex_exit(&ldcp->rcv_thr_lock);
   7116 		DBG2(vgenp, ldcp, "calling vgen_handle_evt_read\n");
   7117 		vgen_handle_evt_read(ldcp);
   7118 		mutex_enter(&ldcp->rcv_thr_lock);
   7119 		ldcp->rcv_thr_flags &= ~VGEN_WTHR_PROCESSING;
   7120 	}
   7121 
   7122 	/*
   7123 	 * Update the run status and wakeup the thread that
   7124 	 * has sent the stop request.
   7125 	 */
   7126 	ldcp->rcv_thr_flags &= ~VGEN_WTHR_STOP;
   7127 	ldcp->rcv_thread = NULL;
   7128 	CALLB_CPR_EXIT(&cprinfo);
   7129 
   7130 	thread_exit();
   7131 	DBG1(vgenp, ldcp, "exit\n");
   7132 }
   7133 
   7134 /* vgen_stop_rcv_thread -- Co-ordinate with receive thread to stop it */
   7135 static void
   7136 vgen_stop_rcv_thread(vgen_ldc_t *ldcp)
   7137 {
   7138 	kt_did_t	tid = 0;
   7139 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
   7140 
   7141 	DBG1(vgenp, ldcp, "enter\n");
   7142 	/*
   7143 	 * Send a stop request by setting the stop flag and
   7144 	 * wait until the receive thread stops.
   7145 	 */
   7146 	mutex_enter(&ldcp->rcv_thr_lock);
   7147 	if (ldcp->rcv_thread != NULL) {
   7148 		tid = ldcp->rcv_thread->t_did;
   7149 		ldcp->rcv_thr_flags |= VGEN_WTHR_STOP;
   7150 		cv_signal(&ldcp->rcv_thr_cv);
   7151 	}
   7152 	mutex_exit(&ldcp->rcv_thr_lock);
   7153 
   7154 	if (tid != 0) {
   7155 		thread_join(tid);
   7156 	}
   7157 	DBG1(vgenp, ldcp, "exit\n");
   7158 }
   7159 
   7160 /*
   7161  * Wait for the channel rx-queue to be drained by allowing the receive
   7162  * worker thread to read all messages from the rx-queue of the channel.
   7163  * Assumption: further callbacks are disabled at this time.
   7164  */
   7165 static void
   7166 vgen_drain_rcv_thread(vgen_ldc_t *ldcp)
   7167 {
   7168 	clock_t	tm;
   7169 	clock_t	wt;
   7170 	clock_t	rv;
   7171 
   7172 	/*
   7173 	 * If there is data in ldc rx queue, wait until the rx
   7174 	 * worker thread runs and drains all msgs in the queue.
   7175 	 */
   7176 	wt = drv_usectohz(MILLISEC);
   7177 
   7178 	mutex_enter(&ldcp->rcv_thr_lock);
   7179 
   7180 	tm = ddi_get_lbolt() + wt;
   7181 
   7182 	/*
   7183 	 * We need to check both bits - DATARCVD and PROCESSING, to be cleared.
   7184 	 * If DATARCVD is set, that means the callback has signalled the worker
   7185 	 * thread, but the worker hasn't started processing yet. If PROCESSING
   7186 	 * is set, that means the thread is awake and processing. Note that the
   7187 	 * DATARCVD state can only be seen once, as the assumption is that
   7188 	 * further callbacks have been disabled at this point.
   7189 	 */
   7190 	while (ldcp->rcv_thr_flags &
   7191 	    (VGEN_WTHR_DATARCVD | VGEN_WTHR_PROCESSING)) {
   7192 		rv = cv_timedwait(&ldcp->rcv_thr_cv, &ldcp->rcv_thr_lock, tm);
   7193 		if (rv == -1) {	/* timeout */
   7194 			/*
   7195 			 * Note that the only way we return is due to a timeout;
   7196 			 * we set the new time to wait, before we go back and
   7197 			 * check the condition. The other(unlikely) possibility
   7198 			 * is a premature wakeup(see cv_timedwait(9F)) in which
   7199 			 * case we just continue to use the same time to wait.
   7200 			 */
   7201 			tm = ddi_get_lbolt() + wt;
   7202 		}
   7203 	}
   7204 
   7205 	mutex_exit(&ldcp->rcv_thr_lock);
   7206 }
   7207 
   7208 /*
   7209  * vgen_dds_rx -- post DDS messages to vnet.
   7210  */
   7211 static int
   7212 vgen_dds_rx(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
   7213 {
   7214 	vio_dds_msg_t *dmsg = (vio_dds_msg_t *)tagp;
   7215 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
   7216 
   7217 	if (dmsg->dds_class != DDS_VNET_NIU) {
   7218 		DWARN(vgenp, ldcp, "Unknown DDS class, dropping");
   7219 		return (EBADMSG);
   7220 	}
   7221 	vnet_dds_rx(vgenp->vnetp, dmsg);
   7222 	return (0);
   7223 }
   7224 
   7225 /*
   7226  * vgen_dds_tx -- an interface called by vnet to send DDS messages.
   7227  */
   7228 int
   7229 vgen_dds_tx(void *arg, void *msg)
   7230 {
   7231 	vgen_t *vgenp = arg;
   7232 	vio_dds_msg_t *dmsg = msg;
   7233 	vgen_portlist_t *plistp = &vgenp->vgenports;
   7234 	vgen_ldc_t *ldcp;
   7235 	vgen_ldclist_t *ldclp;
   7236 	int rv = EIO;
   7237 
   7238 
   7239 	READ_ENTER(&plistp->rwlock);
   7240 	ldclp = &(vgenp->vsw_portp->ldclist);
   7241 	READ_ENTER(&ldclp->rwlock);
   7242 	ldcp = ldclp->headp;
   7243 	if ((ldcp == NULL) || (ldcp->hphase != VH_DONE)) {
   7244 		goto vgen_dsend_exit;
   7245 	}
   7246 
   7247 	dmsg->tag.vio_sid = ldcp->local_sid;
   7248 	rv = vgen_sendmsg(ldcp, (caddr_t)dmsg, sizeof (vio_dds_msg_t), B_FALSE);
   7249 	if (rv != VGEN_SUCCESS) {
   7250 		rv = EIO;
   7251 	} else {
   7252 		rv = 0;
   7253 	}
   7254 
   7255 vgen_dsend_exit:
   7256 	RW_EXIT(&ldclp->rwlock);
   7257 	RW_EXIT(&plistp->rwlock);
   7258 	return (rv);
   7259 
   7260 }
   7261 
   7262 static void
   7263 vgen_ldc_reset(vgen_ldc_t *ldcp)
   7264 {
   7265 	vnet_t	*vnetp = LDC_TO_VNET(ldcp);
   7266 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
   7267 
   7268 	ASSERT(MUTEX_HELD(&ldcp->cblock));
   7269 
   7270 	if (ldcp->need_ldc_reset == B_TRUE) {
   7271 		/* another thread is already in the process of resetting */
   7272 		return;
   7273 	}
   7274 
   7275 	/* Set the flag to indicate reset is in progress */
   7276 	ldcp->need_ldc_reset = B_TRUE;
   7277 
   7278 	if (ldcp->portp == vgenp->vsw_portp) {
   7279 		mutex_exit(&ldcp->cblock);
   7280 		/*
   7281 		 * Now cleanup any HIO resources; the above flag also tells
   7282 		 * the code that handles dds messages to drop any new msgs
   7283 		 * that arrive while we are cleaning up and resetting the
   7284 		 * channel.
   7285 		 */
   7286 		vnet_dds_cleanup_hio(vnetp);
   7287 		mutex_enter(&ldcp->cblock);
   7288 	}
   7289 
   7290 	vgen_handshake_retry(ldcp);
   7291 }
   7292 
   7293 int
   7294 vgen_enable_intr(void *arg)
   7295 {
   7296 	vgen_port_t		*portp = (vgen_port_t *)arg;
   7297 	vgen_ldclist_t		*ldclp;
   7298 	vgen_ldc_t		*ldcp;
   7299 
   7300 	ldclp = &portp->ldclist;
   7301 	READ_ENTER(&ldclp->rwlock);
   7302 	/*
   7303 	 * NOTE: for now, we will assume we have a single channel.
   7304 	 */
   7305 	if (ldclp->headp == NULL) {
   7306 		RW_EXIT(&ldclp->rwlock);
   7307 		return (1);
   7308 	}
   7309 	ldcp = ldclp->headp;
   7310 
   7311 	mutex_enter(&ldcp->pollq_lock);
   7312 	ldcp->polling_on = B_FALSE;
   7313 	mutex_exit(&ldcp->pollq_lock);
   7314 
   7315 	RW_EXIT(&ldclp->rwlock);
   7316 
   7317 	return (0);
   7318 }
   7319 
   7320 int
   7321 vgen_disable_intr(void *arg)
   7322 {
   7323 	vgen_port_t		*portp = (vgen_port_t *)arg;
   7324 	vgen_ldclist_t		*ldclp;
   7325 	vgen_ldc_t		*ldcp;
   7326 
   7327 	ldclp = &portp->ldclist;
   7328 	READ_ENTER(&ldclp->rwlock);
   7329 	/*
   7330 	 * NOTE: for now, we will assume we have a single channel.
   7331 	 */
   7332 	if (ldclp->headp == NULL) {
   7333 		RW_EXIT(&ldclp->rwlock);
   7334 		return (1);
   7335 	}
   7336 	ldcp = ldclp->headp;
   7337 
   7338 
   7339 	mutex_enter(&ldcp->pollq_lock);
   7340 	ldcp->polling_on = B_TRUE;
   7341 	mutex_exit(&ldcp->pollq_lock);
   7342 
   7343 	RW_EXIT(&ldclp->rwlock);
   7344 
   7345 	return (0);
   7346 }
   7347 
   7348 mblk_t *
   7349 vgen_poll(void *arg, int bytes_to_pickup)
   7350 {
   7351 	vgen_port_t		*portp = (vgen_port_t *)arg;
   7352 	vgen_ldclist_t		*ldclp;
   7353 	vgen_ldc_t		*ldcp;
   7354 	mblk_t			*mp = NULL;
   7355 
   7356 	ldclp = &portp->ldclist;
   7357 	READ_ENTER(&ldclp->rwlock);
   7358 	/*
   7359 	 * NOTE: for now, we will assume we have a single channel.
   7360 	 */
   7361 	if (ldclp->headp == NULL) {
   7362 		RW_EXIT(&ldclp->rwlock);
   7363 		return (NULL);
   7364 	}
   7365 	ldcp = ldclp->headp;
   7366 
   7367 	mp = vgen_ldc_poll(ldcp, bytes_to_pickup);
   7368 
   7369 	RW_EXIT(&ldclp->rwlock);
   7370 	return (mp);
   7371 }
   7372 
   7373 static mblk_t *
   7374 vgen_ldc_poll(vgen_ldc_t *ldcp, int bytes_to_pickup)
   7375 {
   7376 	mblk_t	*bp = NULL;
   7377 	mblk_t	*bpt = NULL;
   7378 	mblk_t	*mp = NULL;
   7379 	size_t	mblk_sz = 0;
   7380 	size_t	sz = 0;
   7381 	uint_t	count = 0;
   7382 
   7383 	mutex_enter(&ldcp->pollq_lock);
   7384 
   7385 	bp = ldcp->pollq_headp;
   7386 	while (bp != NULL) {
   7387 		/* get the size of this packet */
   7388 		mblk_sz = msgdsize(bp);
   7389 
   7390 		/* if adding this pkt, exceeds the size limit, we are done. */
   7391 		if (sz + mblk_sz >  bytes_to_pickup) {
   7392 			break;
   7393 		}
   7394 
   7395 		/* we have room for this packet */
   7396 		sz += mblk_sz;
   7397 
   7398 		/* increment the # of packets being sent up */
   7399 		count++;
   7400 
   7401 		/* track the last processed pkt */
   7402 		bpt = bp;
   7403 
   7404 		/* get the next pkt */
   7405 		bp = bp->b_next;
   7406 	}
   7407 
   7408 	if (count != 0) {
   7409 		/*
   7410 		 * picked up some packets; save the head of pkts to be sent up.
   7411 		 */
   7412 		mp = ldcp->pollq_headp;
   7413 
   7414 		/* move the pollq_headp to skip over the pkts being sent up */
   7415 		ldcp->pollq_headp = bp;
   7416 
   7417 		/* picked up all pending pkts in the queue; reset tail also */
   7418 		if (ldcp->pollq_headp == NULL) {
   7419 			ldcp->pollq_tailp = NULL;
   7420 		}
   7421 
   7422 		/* terminate the tail of pkts to be sent up */
   7423 		bpt->b_next = NULL;
   7424 	}
   7425 
   7426 	mutex_exit(&ldcp->pollq_lock);
   7427 
   7428 	DTRACE_PROBE1(vgen_poll_pkts, uint_t, count);
   7429 	return (mp);
   7430 }
   7431 
   7432 #if DEBUG
   7433 
   7434 /*
   7435  * Print debug messages - set to 0xf to enable all msgs
   7436  */
   7437 static void
   7438 debug_printf(const char *fname, vgen_t *vgenp,
   7439     vgen_ldc_t *ldcp, const char *fmt, ...)
   7440 {
   7441 	char    buf[256];
   7442 	char    *bufp = buf;
   7443 	va_list ap;
   7444 
   7445 	if ((vgenp != NULL) && (vgenp->vnetp != NULL)) {
   7446 		(void) sprintf(bufp, "vnet%d:",
   7447 		    ((vnet_t *)(vgenp->vnetp))->instance);
   7448 		bufp += strlen(bufp);
   7449 	}
   7450 	if (ldcp != NULL) {
   7451 		(void) sprintf(bufp, "ldc(%ld):", ldcp->ldc_id);
   7452 		bufp += strlen(bufp);
   7453 	}
   7454 	(void) sprintf(bufp, "%s: ", fname);
   7455 	bufp += strlen(bufp);
   7456 
   7457 	va_start(ap, fmt);
   7458 	(void) vsprintf(bufp, fmt, ap);
   7459 	va_end(ap);
   7460 
   7461 	if ((ldcp == NULL) ||(vgendbg_ldcid == -1) ||
   7462 	    (vgendbg_ldcid == ldcp->ldc_id)) {
   7463 		cmn_err(CE_CONT, "%s\n", buf);
   7464 	}
   7465 }
   7466 #endif
   7467 
   7468 #ifdef	VNET_IOC_DEBUG
   7469 
   7470 static void
   7471 vgen_ioctl(void *arg, queue_t *q, mblk_t *mp)
   7472 {
   7473 	struct iocblk	*iocp;
   7474 	vgen_port_t	*portp;
   7475 	enum		ioc_reply {
   7476 			IOC_INVAL = -1,		/* bad, NAK with EINVAL */
   7477 			IOC_ACK			/* OK, just send ACK    */
   7478 	}		status;
   7479 	int		rv;
   7480 
   7481 	iocp = (struct iocblk *)(uintptr_t)mp->b_rptr;
   7482 	iocp->ioc_error = 0;
   7483 	portp = (vgen_port_t *)arg;
   7484 
   7485 	if (portp == NULL) {
   7486 		status = IOC_INVAL;
   7487 		goto vgen_ioc_exit;
   7488 	}
   7489 
   7490 	mutex_enter(&portp->lock);
   7491 
   7492 	switch (iocp->ioc_cmd) {
   7493 
   7494 	case VNET_FORCE_LINK_DOWN:
   7495 	case VNET_FORCE_LINK_UP:
   7496 		rv = vgen_force_link_state(portp, iocp->ioc_cmd);
   7497 		(rv == 0) ? (status = IOC_ACK) : (status = IOC_INVAL);
   7498 		break;
   7499 
   7500 	default:
   7501 		status = IOC_INVAL;
   7502 		break;
   7503 
   7504 	}
   7505 
   7506 	mutex_exit(&portp->lock);
   7507 
   7508 vgen_ioc_exit:
   7509 
   7510 	switch (status) {
   7511 	default:
   7512 	case IOC_INVAL:
   7513 		/* Error, reply with a NAK and EINVAL error */
   7514 		miocnak(q, mp, 0, EINVAL);
   7515 		break;
   7516 	case IOC_ACK:
   7517 		/* OK, reply with an ACK */
   7518 		miocack(q, mp, 0, 0);
   7519 		break;
   7520 	}
   7521 }
   7522 
   7523 static int
   7524 vgen_force_link_state(vgen_port_t *portp, int cmd)
   7525 {
   7526 	ldc_status_t	istatus;
   7527 	vgen_ldclist_t	*ldclp;
   7528 	vgen_ldc_t	*ldcp;
   7529 	vgen_t		*vgenp = portp->vgenp;
   7530 	int		rv;
   7531 
   7532 	ldclp = &portp->ldclist;
   7533 	READ_ENTER(&ldclp->rwlock);
   7534 
   7535 	/*
   7536 	 * NOTE: for now, we will assume we have a single channel.
   7537 	 */
   7538 	if (ldclp->headp == NULL) {
   7539 		RW_EXIT(&ldclp->rwlock);
   7540 		return (1);
   7541 	}
   7542 	ldcp = ldclp->headp;
   7543 	mutex_enter(&ldcp->cblock);
   7544 
   7545 	switch (cmd) {
   7546 
   7547 	case VNET_FORCE_LINK_DOWN:
   7548 		(void) ldc_down(ldcp->ldc_handle);
   7549 		ldcp->link_down_forced = B_TRUE;
   7550 		break;
   7551 
   7552 	case VNET_FORCE_LINK_UP:
   7553 		rv = ldc_up(ldcp->ldc_handle);
   7554 		if (rv != 0) {
   7555 			DWARN(vgenp, ldcp, "ldc_up err rv(%d)\n", rv);
   7556 		}
   7557 		ldcp->link_down_forced = B_FALSE;
   7558 
   7559 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
   7560 			DWARN(vgenp, ldcp, "ldc_status err\n");
   7561 		} else {
   7562 			ldcp->ldc_status = istatus;
   7563 		}
   7564 
   7565 		/* if channel is already UP - restart handshake */
   7566 		if (ldcp->ldc_status == LDC_UP) {
   7567 			vgen_handle_evt_up(ldcp);
   7568 		}
   7569 		break;
   7570 
   7571 	}
   7572 
   7573 	mutex_exit(&ldcp->cblock);
   7574 	RW_EXIT(&ldclp->rwlock);
   7575 
   7576 	return (0);
   7577 }
   7578 
   7579 #else
   7580 
   7581 static void
   7582 vgen_ioctl(void *arg, queue_t *q, mblk_t *mp)
   7583 {
   7584 	vgen_port_t	*portp;
   7585 
   7586 	portp = (vgen_port_t *)arg;
   7587 
   7588 	if (portp == NULL) {
   7589 		miocnak(q, mp, 0, EINVAL);
   7590 		return;
   7591 	}
   7592 
   7593 	miocnak(q, mp, 0, ENOTSUP);
   7594 }
   7595 
   7596 #endif
   7597