Home | History | Annotate | Download | only in sys
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 
     22 /*
     23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     24  * Use is subject to license terms.
     25  */
     26 
     27 #ifndef	_VDC_H
     28 #define	_VDC_H
     29 
     30 /*
     31  * Virtual disk client implementation definitions
     32  */
     33 
     34 #include <sys/sysmacros.h>
     35 #include <sys/note.h>
     36 
     37 #include <sys/ldc.h>
     38 #include <sys/vio_mailbox.h>
     39 #include <sys/vdsk_mailbox.h>
     40 #include <sys/vdsk_common.h>
     41 
     42 #ifdef	__cplusplus
     43 extern "C" {
     44 #endif
     45 
     46 #define	VDC_DRIVER_NAME		"vdc"
     47 
     48 /*
     49  * Bit-field values to indicate if parts of the vdc driver are initialised.
     50  */
     51 #define	VDC_SOFT_STATE	0x0001
     52 #define	VDC_LOCKS	0x0002
     53 #define	VDC_MINOR	0x0004
     54 #define	VDC_THREAD	0x0008
     55 #define	VDC_DRING_INIT	0x0010	/* The DRing was created */
     56 #define	VDC_DRING_BOUND	0x0020	/* The DRing was bound to an LDC channel */
     57 #define	VDC_DRING_LOCAL	0x0040	/* The local private DRing was allocated */
     58 #define	VDC_DRING_ENTRY	0x0080	/* At least one DRing entry was initialised */
     59 #define	VDC_DRING	(VDC_DRING_INIT | VDC_DRING_BOUND |	\
     60 				VDC_DRING_LOCAL | VDC_DRING_ENTRY)
     61 #define	VDC_HANDSHAKE	0x0100	/* Indicates if a handshake is in progress */
     62 #define	VDC_HANDSHAKE_STOP	0x0200	/* stop further handshakes */
     63 
     64 /*
     65  * Definitions of MD nodes/properties.
     66  */
     67 #define	VDC_MD_CHAN_NAME		"channel-endpoint"
     68 #define	VDC_MD_VDEV_NAME		"virtual-device"
     69 #define	VDC_MD_PORT_NAME		"virtual-device-port"
     70 #define	VDC_MD_DISK_NAME		"disk"
     71 #define	VDC_MD_CFG_HDL			"cfg-handle"
     72 #define	VDC_MD_TIMEOUT			"vdc-timeout"
     73 #define	VDC_MD_ID			"id"
     74 
     75 /*
     76  * Definition of actions to be carried out when processing the sequence ID
     77  * of a message received from the vDisk server. The function verifying the
     78  * sequence number checks the 'seq_num_xxx' fields in the soft state and
     79  * returns whether the message should be processed (VDC_SEQ_NUM_TODO) or
     80  * whether it was it was previously processed (VDC_SEQ_NUM_SKIP).
     81  */
     82 #define	VDC_SEQ_NUM_INVALID		-1	/* Error */
     83 #define	VDC_SEQ_NUM_SKIP		0	/* Request already processed */
     84 #define	VDC_SEQ_NUM_TODO		1	/* Request needs processing */
     85 
     86 /*
     87  * Flags for virtual disk operations.
     88  */
     89 #define	VDC_OP_STATE_RUNNING	0x01	/* do operation in running state */
     90 #define	VDC_OP_ERRCHK_BACKEND	0x02	/* check backend on error */
     91 #define	VDC_OP_ERRCHK_CONFLICT	0x04	/* check resv conflict on error */
     92 
     93 #define	VDC_OP_ERRCHK	(VDC_OP_ERRCHK_BACKEND | VDC_OP_ERRCHK_CONFLICT)
     94 #define	VDC_OP_NORMAL	(VDC_OP_STATE_RUNNING | VDC_OP_ERRCHK)
     95 
     96 /*
     97  * Macros to get UNIT and PART number
     98  */
     99 #define	VDCUNIT_SHIFT	3
    100 #define	VDCPART_MASK	7
    101 
    102 #define	VDCUNIT(dev)	(getminor((dev)) >> VDCUNIT_SHIFT)
    103 #define	VDCPART(dev)	(getminor((dev)) &  VDCPART_MASK)
    104 
    105 /*
    106  * Scheme to store the instance number and the slice number in the minor number.
    107  * (NOTE: Uses the same format and definitions as the sd(7D) driver)
    108  */
    109 #define	VD_MAKE_DEV(instance, minor)	((instance << VDCUNIT_SHIFT) | minor)
    110 
    111 #define	VDC_EFI_DEV_SET(dev, vdsk, ioctl)	\
    112 	VDSK_EFI_DEV_SET(dev, vdsk, ioctl,	\
    113 	    (vdsk)->vdisk_bsize, (vdsk)->vdisk_size)
    114 
    115 /*
    116  * variables controlling how long to wait before timing out and how many
    117  * retries to attempt before giving up when communicating with vds.
    118  *
    119  * These values need to be sufficiently large so that a guest can survive
    120  * the reboot of the service domain.
    121  */
    122 #define	VDC_RETRIES	10
    123 
    124 #define	VDC_USEC_TIMEOUT_MIN	(30 * MICROSEC)		/* 30 sec */
    125 
    126 /*
    127  * This macro returns the number of Hz that the vdc driver should wait before
    128  * a timeout is triggered. The 'timeout' parameter specifiecs the wait
    129  * time in Hz. The 'mul' parameter allows for a multiplier to be
    130  * specified allowing for a backoff to be implemented (e.g. using the
    131  * retry number as a multiplier) where the wait time will get longer if
    132  * there is no response on the previous retry.
    133  */
    134 #define	VD_GET_TIMEOUT_HZ(timeout, mul)	\
    135 	(ddi_get_lbolt() + ((timeout) * MAX(1, (mul))))
    136 
    137 /*
    138  * Macros to manipulate Descriptor Ring variables in the soft state
    139  * structure.
    140  */
    141 #define	VDC_GET_NEXT_REQ_ID(vdc)	((vdc)->req_id++)
    142 
    143 #define	VDC_GET_DRING_ENTRY_PTR(vdc, idx)	\
    144 		(vd_dring_entry_t *)(uintptr_t)((vdc)->dring_mem_info.vaddr + \
    145 			(idx * (vdc)->dring_entry_size))
    146 
    147 #define	VDC_MARK_DRING_ENTRY_FREE(vdc, idx)			\
    148 	{ \
    149 		vd_dring_entry_t *dep = NULL;				\
    150 		ASSERT(vdc != NULL);					\
    151 		ASSERT(idx < vdc->dring_len);		\
    152 		ASSERT(vdc->dring_mem_info.vaddr != NULL);		\
    153 		dep = (vd_dring_entry_t *)(uintptr_t)			\
    154 			(vdc->dring_mem_info.vaddr +	\
    155 			(idx * vdc->dring_entry_size));			\
    156 		ASSERT(dep != NULL);					\
    157 		dep->hdr.dstate = VIO_DESC_FREE;			\
    158 	}
    159 
    160 /* Initialise the Session ID and Sequence Num in the DRing msg */
    161 #define	VDC_INIT_DRING_DATA_MSG_IDS(dmsg, vdc)		\
    162 		ASSERT(vdc != NULL);			\
    163 		dmsg.tag.vio_sid = vdc->session_id;	\
    164 		dmsg.seq_num = vdc->seq_num;
    165 
    166 /*
    167  * The states that the read thread can be in.
    168  */
    169 typedef enum vdc_rd_state {
    170 	VDC_READ_IDLE,			/* idling - conn is not up */
    171 	VDC_READ_WAITING,		/* waiting for data */
    172 	VDC_READ_PENDING,		/* pending data avail for read */
    173 	VDC_READ_RESET			/* channel was reset - stop reads */
    174 } vdc_rd_state_t;
    175 
    176 /*
    177  * The states that the vdc-vds connection can be in.
    178  */
    179 typedef enum vdc_state {
    180 	VDC_STATE_INIT,			/* device is initialized */
    181 	VDC_STATE_INIT_WAITING,		/* waiting for ldc connection */
    182 	VDC_STATE_NEGOTIATE,		/* doing handshake negotiation */
    183 	VDC_STATE_HANDLE_PENDING,	/* handle requests in backup dring */
    184 	VDC_STATE_FAULTED,		/* multipath backend is inaccessible */
    185 	VDC_STATE_FAILED,		/* device is not usable */
    186 	VDC_STATE_RUNNING,		/* running and accepting requests */
    187 	VDC_STATE_DETACH,		/* detaching */
    188 	VDC_STATE_RESETTING		/* resetting connection with vds */
    189 } vdc_state_t;
    190 
    191 /*
    192  * States of the service provided by a vds server
    193  */
    194 typedef enum vdc_service_state {
    195 	VDC_SERVICE_NONE = -1, 		/* no state define */
    196 	VDC_SERVICE_OFFLINE,		/* no connection with the service */
    197 	VDC_SERVICE_CONNECTED,		/* connection established */
    198 	VDC_SERVICE_ONLINE,		/* connection and backend available */
    199 	VDC_SERVICE_FAILED,		/* connection failed */
    200 	VDC_SERVICE_FAULTED		/* connection but backend unavailable */
    201 } vdc_service_state_t;
    202 
    203 /*
    204  * The states that the vdc instance can be in.
    205  */
    206 typedef enum vdc_lc_state {
    207 	VDC_LC_ATTACHING,	/* driver is attaching */
    208 	VDC_LC_ONLINE,		/* driver is attached and online */
    209 	VDC_LC_DETACHING	/* driver is detaching */
    210 } vdc_lc_state_t;
    211 
    212 /*
    213  * Local Descriptor Ring entry
    214  *
    215  * vdc creates a Local (private) descriptor ring the same size as the
    216  * public descriptor ring it exports to vds.
    217  */
    218 
    219 typedef enum {
    220 	VIO_read_dir,		/* read data from server */
    221 	VIO_write_dir,		/* write data to server */
    222 	VIO_both_dir		/* transfer both in and out in same buffer */
    223 } vio_desc_direction_t;
    224 
    225 typedef struct vdc_local_desc {
    226 	boolean_t		is_free;	/* local state - inuse or not */
    227 
    228 	int			operation;	/* VD_OP_xxx to be performed */
    229 	caddr_t			addr;		/* addr passed in by consumer */
    230 	int			slice;
    231 	diskaddr_t		offset;		/* disk offset */
    232 	size_t			nbytes;
    233 	struct buf		*buf;		/* buf of operation */
    234 	vio_desc_direction_t	dir;		/* direction of transfer */
    235 	int			flags;		/* flags of operation */
    236 
    237 	caddr_t			align_addr;	/* used if addr non-aligned */
    238 	ldc_mem_handle_t	desc_mhdl;	/* Mem handle of buf */
    239 	vd_dring_entry_t	*dep;		/* public Dring Entry Pointer */
    240 
    241 } vdc_local_desc_t;
    242 
    243 /*
    244  * I/O queue used for checking backend or failfast
    245  */
    246 typedef struct vdc_io {
    247 	struct vdc_io	*vio_next;	/* next pending I/O in the queue */
    248 	int		vio_index;	/* descriptor index */
    249 	clock_t		vio_qtime;	/* time the I/O was queued */
    250 } vdc_io_t;
    251 
    252 /*
    253  * Per vDisk server channel states
    254  */
    255 #define	VDC_LDC_INIT	0x0001
    256 #define	VDC_LDC_CB	0x0002
    257 #define	VDC_LDC_OPEN	0x0004
    258 #define	VDC_LDC		(VDC_LDC_INIT | VDC_LDC_CB | VDC_LDC_OPEN)
    259 
    260 /*
    261  * vDisk server information
    262  */
    263 typedef struct vdc_server {
    264 	struct vdc_server	*next;			/* Next server */
    265 	struct vdc		*vdcp;			/* Ptr to vdc struct */
    266 	uint64_t		id;			/* Server port id */
    267 	uint64_t		state;			/* Server state */
    268 	vdc_service_state_t	svc_state;		/* Service state */
    269 	vdc_service_state_t	log_state;		/* Last state logged */
    270 	uint64_t		ldc_id;			/* Server LDC id */
    271 	ldc_handle_t		ldc_handle;		/* Server LDC handle */
    272 	ldc_status_t		ldc_state;		/* Server LDC state */
    273 	uint64_t		ctimeout;		/* conn tmout (secs) */
    274 } vdc_server_t;
    275 
    276 /*
    277  * vdc soft state structure
    278  */
    279 typedef struct vdc {
    280 
    281 	kmutex_t	lock;		/* protects next 2 sections of vars */
    282 	kcondvar_t	running_cv;	/* signal when upper layers can send */
    283 	kcondvar_t	initwait_cv;	/* signal when ldc conn is up */
    284 	kcondvar_t	dring_free_cv;	/* signal when desc is avail */
    285 	kcondvar_t	membind_cv;	/* signal when mem can be bound */
    286 	boolean_t	self_reset;	/* self initiated reset */
    287 	kcondvar_t	io_pending_cv;	/* signal on pending I/O */
    288 	boolean_t	io_pending;	/* pending I/O */
    289 
    290 	int		initialized;	/* keeps track of what's init'ed */
    291 	vdc_lc_state_t	lifecycle;	/* Current state of the vdc instance */
    292 
    293 	int		hshake_cnt;	/* number of failed handshakes */
    294 	uint8_t		open[OTYPCNT];	/* mask of opened slices */
    295 	uint8_t		open_excl;	/* mask of exclusively opened slices */
    296 	ulong_t		open_lyr[V_NUMPAR]; /* number of layered opens */
    297 	int		dkio_flush_pending; /* # outstanding DKIO flushes */
    298 	int		validate_pending; /* # outstanding validate request */
    299 	vd_disk_label_t vdisk_label; 	/* label type of device/disk imported */
    300 	struct extvtoc	*vtoc;		/* structure to store VTOC data */
    301 	struct dk_geom	*geom;		/* structure to store geometry data */
    302 	vd_slice_t	slice[V_NUMPAR]; /* logical partitions */
    303 
    304 	kthread_t	*msg_proc_thr;	/* main msg processing thread */
    305 
    306 	kmutex_t	read_lock;	/* lock to protect read */
    307 	kcondvar_t	read_cv;	/* cv to wait for READ events */
    308 	vdc_rd_state_t	read_state;	/* current read state */
    309 
    310 	uint32_t	sync_op_cnt;	/* num of active sync operations */
    311 	boolean_t	sync_op_blocked; /* blocked waiting to do sync op */
    312 	kcondvar_t	sync_blocked_cv; /* cv wait for other syncs to finish */
    313 
    314 	uint64_t	session_id;	/* common ID sent with all messages */
    315 	uint64_t	seq_num;	/* most recent sequence num generated */
    316 	uint64_t	seq_num_reply;	/* Last seq num ACK/NACK'ed by vds */
    317 	uint64_t	req_id;		/* Most recent Request ID generated */
    318 	uint64_t	req_id_proc;	/* Last request ID processed by vdc */
    319 	vdc_state_t	state;		/* Current disk client-server state */
    320 
    321 	dev_info_t	*dip;		/* device info pointer */
    322 	int		instance;	/* driver instance number */
    323 
    324 	vio_ver_t	ver;		/* version number agreed with server */
    325 	vd_disk_type_t	vdisk_type;	/* type of device/disk being imported */
    326 	uint32_t	vdisk_media;	/* physical media type of vDisk */
    327 	uint64_t	vdisk_size;	/* device size in blocks */
    328 	uint64_t	max_xfer_sz;	/* maximum block size of a descriptor */
    329 	uint64_t	vdisk_bsize;	/* blk size for the virtual disk */
    330 	uint32_t	vio_bmask;	/* mask to check vio blk alignment */
    331 	int		vio_bshift;	/* shift for vio blk conversion */
    332 	uint64_t	operations;	/* bitmask of ops. server supports */
    333 	struct dk_cinfo	*cinfo;		/* structure to store DKIOCINFO data */
    334 	struct dk_minfo	*minfo;		/* structure for DKIOCGMEDIAINFO data */
    335 	ddi_devid_t	devid;		/* device id */
    336 	boolean_t	ctimeout_reached; /* connection timeout has expired */
    337 
    338 	/*
    339 	 * The ownership fields are protected by the lock mutex. The
    340 	 * ownership_lock mutex is used to serialize ownership operations;
    341 	 * it should be acquired before the lock mutex.
    342 	 */
    343 	kmutex_t	ownership_lock;		/* serialize ownership ops */
    344 	int		ownership;		/* ownership status flags */
    345 	kthread_t	*ownership_thread;	/* ownership thread */
    346 	kcondvar_t	ownership_cv;		/* cv for ownership update */
    347 
    348 	/*
    349 	 * The eio and failfast fields are protected by the lock mutex.
    350 	 */
    351 	kthread_t	*eio_thread;		/* error io thread */
    352 	kcondvar_t	eio_cv;			/* cv for eio thread update */
    353 	vdc_io_t	*eio_queue;		/* error io queue */
    354 	clock_t		failfast_interval;	/* interval in microsecs */
    355 
    356 	/*
    357 	 * kstats used to store I/O statistics consumed by iostat(1M).
    358 	 * These are protected by the lock mutex.
    359 	 */
    360 	kstat_t		*io_stats;
    361 	kstat_t		*err_stats;
    362 
    363 	ldc_dring_handle_t	dring_hdl;		/* dring handle */
    364 	ldc_mem_info_t		dring_mem_info;		/* dring information */
    365 	uint_t			dring_curr_idx;		/* current index */
    366 	uint32_t		dring_len;		/* dring length */
    367 	uint32_t		dring_max_cookies;	/* dring max cookies */
    368 	uint32_t		dring_cookie_count;	/* num cookies */
    369 	uint32_t		dring_entry_size;	/* descriptor size */
    370 	ldc_mem_cookie_t 	*dring_cookie;		/* dring cookies */
    371 	uint64_t		dring_ident;		/* dring ident */
    372 
    373 	uint64_t		threads_pending; 	/* num of threads */
    374 
    375 	vdc_local_desc_t	*local_dring;		/* local dring */
    376 	vdc_local_desc_t	*local_dring_backup;	/* local dring backup */
    377 	int			local_dring_backup_tail; /* backup dring tail */
    378 	int			local_dring_backup_len;	/* backup dring len */
    379 
    380 	int			num_servers;		/* no. of servers */
    381 	vdc_server_t		*server_list;		/* vdisk server list */
    382 	vdc_server_t		*curr_server;		/* curr vdisk server */
    383 } vdc_t;
    384 
    385 /*
    386  * Ownership status flags
    387  */
    388 #define	VDC_OWNERSHIP_NONE	0x00 /* no ownership wanted */
    389 #define	VDC_OWNERSHIP_WANTED	0x01 /* ownership is wanted */
    390 #define	VDC_OWNERSHIP_GRANTED	0x02 /* ownership has been granted */
    391 #define	VDC_OWNERSHIP_RESET	0x04 /* ownership has been reset */
    392 
    393 /*
    394  * Reservation conflict panic message
    395  */
    396 #define	VDC_RESV_CONFLICT_FMT_STR	"Reservation Conflict\nDisk: "
    397 #define	VDC_RESV_CONFLICT_FMT_LEN 	(sizeof (VDC_RESV_CONFLICT_FMT_STR))
    398 
    399 /*
    400  * Debugging macros
    401  */
    402 #ifdef DEBUG
    403 extern int	vdc_msglevel;
    404 extern uint64_t	vdc_matchinst;
    405 
    406 #define	DMSG(_vdc, err_level, format, ...)				\
    407 	do {								\
    408 		if (vdc_msglevel > err_level &&				\
    409 		(vdc_matchinst & (1ull << (_vdc)->instance)))		\
    410 			cmn_err(CE_CONT, "?[%d,t@%p] %s: "format,	\
    411 			(_vdc)->instance, (void *)curthread,		\
    412 			__func__, __VA_ARGS__);				\
    413 		_NOTE(CONSTANTCONDITION)				\
    414 	} while (0);
    415 
    416 #define	DMSGX(err_level, format, ...)					\
    417 	do {								\
    418 		if (vdc_msglevel > err_level)				\
    419 			cmn_err(CE_CONT, "?%s: "format, __func__, __VA_ARGS__);\
    420 		_NOTE(CONSTANTCONDITION)				\
    421 	} while (0);
    422 
    423 #define	VDC_DUMP_DRING_MSG(dmsgp)					\
    424 		DMSGX(0, "sq:%lu start:%d end:%d ident:%lu\n",		\
    425 			dmsgp->seq_num, dmsgp->start_idx,		\
    426 			dmsgp->end_idx, dmsgp->dring_ident);
    427 
    428 #else	/* !DEBUG */
    429 #define	DMSG(err_level, ...)
    430 #define	DMSGX(err_level, format, ...)
    431 #define	VDC_DUMP_DRING_MSG(dmsgp)
    432 
    433 #endif	/* !DEBUG */
    434 
    435 #ifdef	__cplusplus
    436 }
    437 #endif
    438 
    439 #endif	/* _VDC_H */
    440