Home | History | Annotate | Download | only in sys
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 
     22 /*
     23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     24  * Use is subject to license terms.
     25  */
     26 
     27 /*
     28  * This header file contains the basic data structures which the
     29  * virtual switch (vsw) uses to communicate with vnet clients.
     30  *
     31  * The virtual switch reads the machine description (MD) to
     32  * determine how many port_t structures to create (each port_t
     33  * can support communications to a single network device). The
     34  * port_t's are maintained in a linked list.
     35  *
     36  * Each port in turn contains a number of logical domain channels
     37  * (ldc's) which are inter domain communications channels which
     38  * are used for passing small messages between the domains. Their
     39  * may be an unlimited number of channels associated with each port,
     40  * though most devices only use a single channel.
     41  *
     42  * The ldc is a bi-directional channel, which is divided up into
     43  * two directional 'lanes', one outbound from the switch to the
     44  * virtual network device, the other inbound to the switch.
     45  * Depending on the type of device each lane may have seperate
     46  * communication paramaters (such as mtu etc).
     47  *
     48  * For those network clients which use descriptor rings the
     49  * rings are associated with the appropriate lane. I.e. rings
     50  * which the switch exports are associated with the outbound lanes
     51  * while those which the network clients are exporting to the switch
     52  * are associated with the inbound lane.
     53  *
     54  * In diagram form the data structures look as follows:
     55  *
     56  * vsw instance
     57  *     |
     58  *     +----->port_t----->port_t----->port_t----->
     59  *		|
     60  *		+--->ldc_t--->ldc_t--->ldc_t--->
     61  *		       |
     62  *		       +--->lane_t (inbound)
     63  *		       |       |
     64  *		       |       +--->dring--->dring--->
     65  *		       |
     66  *		       +--->lane_t (outbound)
     67  *			       |
     68  *			       +--->dring--->dring--->
     69  *
     70  */
     71 
     72 #ifndef	_VSW_LDC_H
     73 #define	_VSW_LDC_H
     74 
     75 #ifdef	__cplusplus
     76 extern "C" {
     77 #endif
     78 
     79 /*
     80  * Default message type.
     81  */
     82 typedef struct def_msg {
     83 	uint64_t	data[8];
     84 } def_msg_t;
     85 
     86 /*
     87  * Currently only support one major/minor pair.
     88  */
     89 #define	VSW_NUM_VER	1
     90 
     91 typedef struct ver_sup {
     92 	uint16_t	ver_major;	/* major version number */
     93 	uint16_t	ver_minor;	/* minor version number */
     94 } ver_sup_t;
     95 
     96 /*
     97  * Lane states.
     98  */
     99 #define	VSW_LANE_INACTIV	0x0	/* No params set for lane */
    100 
    101 #define	VSW_VER_INFO_SENT	0x1	/* Version # sent to peer */
    102 #define	VSW_VER_INFO_RECV	0x2	/* Version # recv from peer */
    103 #define	VSW_VER_ACK_RECV	0x4
    104 #define	VSW_VER_ACK_SENT	0x8
    105 #define	VSW_VER_NACK_RECV	0x10
    106 #define	VSW_VER_NACK_SENT	0x20
    107 
    108 #define	VSW_ATTR_INFO_SENT	0x40	/* Attributes sent to peer */
    109 #define	VSW_ATTR_INFO_RECV	0x80	/* Peer attributes received */
    110 #define	VSW_ATTR_ACK_SENT	0x100
    111 #define	VSW_ATTR_ACK_RECV	0x200
    112 #define	VSW_ATTR_NACK_SENT	0x400
    113 #define	VSW_ATTR_NACK_RECV	0x800
    114 
    115 #define	VSW_DRING_INFO_SENT	0x1000	/* Dring info sent to peer */
    116 #define	VSW_DRING_INFO_RECV	0x2000	/* Dring info received */
    117 #define	VSW_DRING_ACK_SENT	0x4000
    118 #define	VSW_DRING_ACK_RECV	0x8000
    119 #define	VSW_DRING_NACK_SENT	0x10000
    120 #define	VSW_DRING_NACK_RECV	0x20000
    121 
    122 #define	VSW_RDX_INFO_SENT	0x40000	/* RDX sent to peer */
    123 #define	VSW_RDX_INFO_RECV	0x80000	/* RDX received from peer */
    124 #define	VSW_RDX_ACK_SENT	0x100000
    125 #define	VSW_RDX_ACK_RECV	0x200000
    126 #define	VSW_RDX_NACK_SENT	0x400000
    127 #define	VSW_RDX_NACK_RECV	0x800000
    128 
    129 #define	VSW_MCST_INFO_SENT	0x1000000
    130 #define	VSW_MCST_INFO_RECV	0x2000000
    131 #define	VSW_MCST_ACK_SENT	0x4000000
    132 #define	VSW_MCST_ACK_RECV	0x8000000
    133 #define	VSW_MCST_NACK_SENT	0x10000000
    134 #define	VSW_MCST_NACK_RECV	0x20000000
    135 
    136 #define	VSW_LANE_ACTIVE		0x40000000	/* Lane open to xmit data */
    137 
    138 /* Handshake milestones */
    139 #define	VSW_MILESTONE0		0x1	/* ver info exchanged */
    140 #define	VSW_MILESTONE1		0x2	/* attribute exchanged */
    141 #define	VSW_MILESTONE2		0x4	/* dring info exchanged */
    142 #define	VSW_MILESTONE3		0x8	/* rdx exchanged */
    143 #define	VSW_MILESTONE4		0x10	/* handshake complete */
    144 
    145 /*
    146  * Lane direction (relative to ourselves).
    147  */
    148 #define	INBOUND			0x1
    149 #define	OUTBOUND		0x2
    150 
    151 /* Peer session id received */
    152 #define	VSW_PEER_SESSION	0x1
    153 
    154 /*
    155  * Maximum number of consecutive reads of data from channel
    156  */
    157 #define	VSW_MAX_CHAN_READ	50
    158 
    159 /*
    160  * Currently only support one ldc per port.
    161  */
    162 #define	VSW_PORT_MAX_LDCS	1	/* max # of ldcs per port */
    163 
    164 /*
    165  * Used for port add/deletion.
    166  */
    167 #define	VSW_PORT_UPDATED	0x1
    168 
    169 #define	LDC_TX_SUCCESS		0	/* ldc transmit success */
    170 #define	LDC_TX_FAILURE		1	/* ldc transmit failure */
    171 #define	LDC_TX_NORESOURCES	2	/* out of descriptors */
    172 
    173 /*
    174  * Descriptor ring info
    175  *
    176  * Each descriptor element has a pre-allocated data buffer
    177  * associated with it, into which data being transmitted is
    178  * copied. By pre-allocating we speed up the copying process.
    179  * The buffer is re-used once the peer has indicated that it is
    180  * finished with the descriptor.
    181  */
    182 #define	VSW_RING_EL_DATA_SZ	2048	/* Size of data section (bytes) */
    183 #define	VSW_PRIV_SIZE	sizeof (vnet_private_desc_t)
    184 #define	VSW_PUB_SIZE	sizeof (vnet_public_desc_t)
    185 
    186 #define	VSW_MAX_COOKIES		((ETHERMTU >> MMU_PAGESHIFT) + 2)
    187 
    188 /*
    189  * LDC pkt tranfer MTU
    190  */
    191 #define	VSW_LDC_MTU	sizeof (def_msg_t)
    192 
    193 /*
    194  * Size of the mblk in each mblk pool.
    195  */
    196 #define	VSW_MBLK_SZ_128		128
    197 #define	VSW_MBLK_SZ_256		256
    198 #define	VSW_MBLK_SZ_2048	2048
    199 
    200 /*
    201  * Number of mblks in each mblk pool.
    202  */
    203 #define	VSW_NUM_MBLKS	1024
    204 
    205 /*
    206  * Private descriptor
    207  */
    208 typedef struct vsw_private_desc {
    209 	/*
    210 	 * Below lock must be held when accessing the state of
    211 	 * a descriptor on either the private or public sections
    212 	 * of the ring.
    213 	 */
    214 	kmutex_t		dstate_lock;
    215 	uint64_t		dstate;
    216 	vnet_public_desc_t	*descp;
    217 	ldc_mem_handle_t	memhandle;
    218 	void			*datap;
    219 	uint64_t		datalen;
    220 	uint64_t		ncookies;
    221 	ldc_mem_cookie_t	memcookie[VSW_MAX_COOKIES];
    222 	int			bound;
    223 } vsw_private_desc_t;
    224 
    225 /*
    226  * Descriptor ring structure
    227  */
    228 typedef struct dring_info {
    229 	struct	dring_info	*next;	/* next ring in chain */
    230 	kmutex_t		dlock;
    231 	uint32_t		num_descriptors;
    232 	uint32_t		descriptor_size;
    233 	uint32_t		options;
    234 	uint32_t		ncookies;
    235 	ldc_mem_cookie_t	cookie[1];
    236 
    237 	ldc_dring_handle_t	handle;
    238 	uint64_t		ident;	/* identifier sent to peer */
    239 	uint64_t		end_idx;	/* last idx processed */
    240 	int64_t			last_ack_recv;
    241 
    242 	kmutex_t		restart_lock;
    243 	boolean_t		restart_reqd;	/* send restart msg */
    244 
    245 	/*
    246 	 * base address of private and public portions of the
    247 	 * ring (where appropriate), and data block.
    248 	 */
    249 	void			*pub_addr;	/* base of public section */
    250 	void			*priv_addr;	/* base of private section */
    251 	void			*data_addr;	/* base of data section */
    252 	size_t			data_sz;	/* size of data section */
    253 	size_t			desc_data_sz;	/* size of descr data blk */
    254 	uint8_t			dring_mtype;	/* dring mem map type */
    255 } dring_info_t;
    256 
    257 /*
    258  * Each ldc connection is comprised of two lanes, incoming
    259  * from a peer, and outgoing to that peer. Each lane shares
    260  * common ldc parameters and also has private lane-specific
    261  * parameters.
    262  */
    263 typedef struct lane {
    264 	uint64_t	lstate;		/* Lane state */
    265 	uint16_t	ver_major;	/* Version major number */
    266 	uint16_t	ver_minor;	/* Version minor number */
    267 	uint64_t	seq_num;	/* Sequence number */
    268 	uint64_t	mtu;		/* ETHERMTU */
    269 	uint64_t	addr;		/* Unique physical address */
    270 	uint8_t		addr_type;	/* Only MAC address at moment */
    271 	uint8_t		xfer_mode;	/* Dring or Pkt based */
    272 	uint8_t		ack_freq;	/* Only non zero for Pkt based xfer */
    273 	uint32_t	physlink_update;	/* physlink updates */
    274 	krwlock_t	dlistrw;	/* Lock for dring list */
    275 	dring_info_t	*dringp;	/* List of drings for this lane */
    276 } lane_t;
    277 
    278 /* channel drain states */
    279 #define	VSW_LDC_INIT		0x1	/* Initial non-drain state */
    280 #define	VSW_LDC_DRAINING	0x2	/* Channel draining */
    281 
    282 /*
    283  * vnet-protocol-version dependent function prototypes.
    284  */
    285 typedef int	(*vsw_ldctx_t) (void *, mblk_t *, mblk_t *, uint32_t);
    286 typedef void	(*vsw_ldcrx_pktdata_t) (void *, void *, uint32_t);
    287 
    288 /* ldc information associated with a vsw-port */
    289 typedef struct vsw_ldc {
    290 	struct vsw_ldc		*ldc_next;	/* next ldc in the list */
    291 	struct vsw_port		*ldc_port;	/* associated port */
    292 	struct vsw		*ldc_vswp;	/* associated vsw */
    293 	kmutex_t		ldc_cblock;	/* sync callback processing */
    294 	kmutex_t		ldc_txlock;	/* sync transmits */
    295 	kmutex_t		ldc_rxlock;	/* sync rx */
    296 	uint64_t		ldc_id;		/* channel number */
    297 	ldc_handle_t		ldc_handle;	/* channel handle */
    298 	kmutex_t		drain_cv_lock;
    299 	kcondvar_t		drain_cv;	/* channel draining */
    300 	int			drain_state;
    301 	uint32_t		hphase;		/* handshake phase */
    302 	int			hcnt;		/* # handshake attempts */
    303 	kmutex_t		status_lock;
    304 	ldc_status_t		ldc_status;	/* channel status */
    305 	uint8_t			reset_active;	/* reset flag */
    306 	uint64_t		local_session;	/* Our session id */
    307 	uint64_t		peer_session;	/* Our peers session id */
    308 	uint8_t			session_status;	/* Session recv'd, sent */
    309 	uint32_t		hss_id;		/* Handshake session id */
    310 	uint64_t		next_ident;	/* Next dring ident # to use */
    311 	lane_t			lane_in;	/* Inbound lane */
    312 	lane_t			lane_out;	/* Outbound lane */
    313 	uint8_t			dev_class;	/* Peer device class */
    314 	boolean_t		pls_negotiated;	/* phys link state update ? */
    315 	vio_multi_pool_t	vmp;		/* Receive mblk pools */
    316 	uint32_t		max_rxpool_size; /* max size of rxpool in use */
    317 	uint64_t		*ldcmsg;	/* msg buffer for ldc_read() */
    318 	uint64_t		msglen;		/* size of ldcmsg */
    319 
    320 	/* tx thread fields */
    321 	kthread_t		*tx_thread;	/* tx thread */
    322 	uint32_t		tx_thr_flags;	/* tx thread flags */
    323 	kmutex_t		tx_thr_lock;	/* lock for tx thread */
    324 	kcondvar_t		tx_thr_cv;	/* cond.var for tx thread */
    325 	mblk_t			*tx_mhead;	/* tx mblks head */
    326 	mblk_t			*tx_mtail;	/* tx mblks tail */
    327 	uint32_t		tx_cnt;		/* # of pkts queued for tx */
    328 
    329 	/* receive thread fields */
    330 	kthread_t		*rx_thread;	/* receive thread */
    331 	uint32_t		rx_thr_flags;	/* receive thread flags */
    332 	kmutex_t		rx_thr_lock;	/* lock for receive thread */
    333 	kcondvar_t		rx_thr_cv;	/* cond.var for recv thread */
    334 
    335 	vsw_ldctx_t		tx;		/* transmit function */
    336 	vsw_ldcrx_pktdata_t	rx_pktdata;	/* process rx raw data msg */
    337 
    338 	/* channel statistics */
    339 	vgen_stats_t		ldc_stats;	/* channel statistics */
    340 	kstat_t			*ksp;		/* channel kstats */
    341 } vsw_ldc_t;
    342 
    343 /* worker thread flags */
    344 #define	VSW_WTHR_DATARCVD 	0x01	/* data received */
    345 #define	VSW_WTHR_STOP 		0x02	/* stop worker thread request */
    346 
    347 /* list of ldcs per port */
    348 typedef struct vsw_ldc_list {
    349 	vsw_ldc_t	*head;		/* head of the list */
    350 	krwlock_t	lockrw;		/* sync access(rw) to the list */
    351 } vsw_ldc_list_t;
    352 
    353 /* multicast addresses port is interested in */
    354 typedef struct mcst_addr {
    355 	struct mcst_addr	*nextp;
    356 	struct ether_addr	mca;	/* multicast address */
    357 	uint64_t		addr;	/* mcast addr converted to hash key */
    358 	boolean_t		mac_added; /* added into physical device */
    359 } mcst_addr_t;
    360 
    361 /* Port detach states */
    362 #define	VSW_PORT_INIT		0x1	/* Initial non-detach state */
    363 #define	VSW_PORT_DETACHING	0x2	/* In process of being detached */
    364 #define	VSW_PORT_DETACHABLE	0x4	/* Safe to detach */
    365 
    366 /* port information associated with a vsw */
    367 typedef struct vsw_port {
    368 	int			p_instance;	/* port instance */
    369 	struct vsw_port		*p_next;	/* next port in the list */
    370 	struct vsw		*p_vswp;	/* associated vsw */
    371 	int			num_ldcs;	/* # of ldcs in the port */
    372 	uint64_t		*ldc_ids;	/* ldc ids */
    373 	vsw_ldc_list_t		p_ldclist;	/* list of ldcs for this port */
    374 
    375 	kmutex_t		tx_lock;	/* transmit lock */
    376 	int			(*transmit)(vsw_ldc_t *, mblk_t *);
    377 
    378 	int			state;		/* port state */
    379 	kmutex_t		state_lock;
    380 	kcondvar_t		state_cv;
    381 
    382 	krwlock_t		maccl_rwlock;	/* protect fields below */
    383 	mac_client_handle_t	p_mch;		/* mac client handle */
    384 	mac_unicast_handle_t	p_muh;		/* mac unicast handle */
    385 
    386 	kmutex_t		mca_lock;	/* multicast lock */
    387 	mcst_addr_t		*mcap;		/* list of multicast addrs */
    388 
    389 	boolean_t		addr_set;	/* Addr set where */
    390 
    391 	/*
    392 	 * mac address of the port & connected device
    393 	 */
    394 	struct ether_addr	p_macaddr;
    395 	uint16_t		pvid;	/* port vlan id (untagged) */
    396 	struct vsw_vlanid	*vids;	/* vlan ids (tagged) */
    397 	uint16_t		nvids;	/* # of vids */
    398 	mod_hash_t		*vlan_hashp;	/* vlan hash table */
    399 	uint32_t		vlan_nchains;	/* # of vlan hash chains */
    400 
    401 	/* HybridIO related info */
    402 	uint32_t		p_hio_enabled;	/* Hybrid mode enabled? */
    403 	uint32_t		p_hio_capable;	/* Port capable of HIO */
    404 
    405 	/* bandwidth limit */
    406 	uint64_t		p_bandwidth;	/* bandwidth limit */
    407 } vsw_port_t;
    408 
    409 /* list of ports per vsw */
    410 typedef struct vsw_port_list {
    411 	vsw_port_t	*head;		/* head of the list */
    412 	krwlock_t	lockrw;		/* sync access(rw) to the list */
    413 	int		num_ports;	/* number of ports in the list */
    414 } vsw_port_list_t;
    415 
    416 /*
    417  * Taskq control message
    418  */
    419 typedef struct vsw_ctrl_task {
    420 	vsw_ldc_t	*ldcp;
    421 	def_msg_t	pktp;
    422 	uint32_t	hss_id;
    423 } vsw_ctrl_task_t;
    424 
    425 /*
    426  * State of connection to peer. Some of these states
    427  * can be mapped to LDC events as follows:
    428  *
    429  * VSW_CONN_RESET -> LDC_RESET_EVT
    430  * VSW_CONN_UP    -> LDC_UP_EVT
    431  */
    432 #define	VSW_CONN_UP		0x1	/* Connection come up */
    433 #define	VSW_CONN_RESET		0x2	/* Connection reset */
    434 #define	VSW_CONN_RESTART	0x4	/* Restarting handshake on connection */
    435 
    436 typedef struct vsw_conn_evt {
    437 	uint16_t	evt;		/* Connection event */
    438 	vsw_ldc_t	*ldcp;
    439 } vsw_conn_evt_t;
    440 
    441 /*
    442  * Ethernet broadcast address definition.
    443  */
    444 static	struct	ether_addr	etherbroadcastaddr = {
    445 	0xff, 0xff, 0xff, 0xff, 0xff, 0xff
    446 };
    447 
    448 #define	IS_BROADCAST(ehp) \
    449 	(bcmp(&ehp->ether_dhost, &etherbroadcastaddr, ETHERADDRL) == 0)
    450 #define	IS_MULTICAST(ehp) \
    451 	((ehp->ether_dhost.ether_addr_octet[0] & 01) == 1)
    452 
    453 #define	READ_ENTER(x)	rw_enter(x, RW_READER)
    454 #define	WRITE_ENTER(x)	rw_enter(x, RW_WRITER)
    455 #define	RW_EXIT(x)	rw_exit(x)
    456 
    457 #define	VSW_PORT_REFHOLD(portp)	atomic_inc_32(&((portp)->ref_cnt))
    458 #define	VSW_PORT_REFRELE(portp)	atomic_dec_32(&((portp)->ref_cnt))
    459 
    460 #ifdef	__cplusplus
    461 }
    462 #endif
    463 
    464 #endif	/* _VSW_LDC_H */
    465