Home | History | Annotate | Download | only in io
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 
     22 /*
     23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     24  * Use is subject to license terms.
     25  */
     26 
     27 
     28 #ifndef _SYS_XDF_H
     29 #define	_SYS_XDF_H
     30 
     31 #include <sys/ddi.h>
     32 #include <sys/sunddi.h>
     33 #include <sys/cmlb.h>
     34 #include <sys/dkio.h>
     35 
     36 #include <sys/gnttab.h>
     37 #include <xen/sys/xendev.h>
     38 
     39 #ifdef __cplusplus
     40 extern "C" {
     41 #endif
     42 
     43 
     44 /*
     45  * VBDs have standard 512 byte blocks
     46  * A single blkif_request can transfer up to 11 pages of data, 1 page/segment
     47  */
     48 #define	XB_BSIZE	DEV_BSIZE
     49 #define	XB_BMASK	(XB_BSIZE - 1)
     50 #define	XB_BSHIFT	9
     51 #define	XB_DTOB(bn, vdp)	((bn) * (vdp)->xdf_xdev_secsize)
     52 
     53 #define	XB_MAX_SEGLEN	(8 * XB_BSIZE)
     54 #define	XB_SEGOFFSET	(XB_MAX_SEGLEN - 1)
     55 #define	XB_MAX_XFER	(XB_MAX_SEGLEN * BLKIF_MAX_SEGMENTS_PER_REQUEST)
     56 #define	XB_MAXPHYS	(XB_MAX_XFER * BLKIF_RING_SIZE)
     57 
     58 /* Number of sectors per segement */
     59 #define	XB_NUM_SECTORS_PER_SEG	(PAGESIZE / XB_BSIZE)
     60 /* sectors are number 0 through XB_NUM_SECTORS_PER_SEG - 1 */
     61 #define	XB_LAST_SECTOR_IN_SEG	(XB_NUM_SECTORS_PER_SEG - 1)
     62 
     63 
     64 /*
     65  * Slice for absolute disk transaction.
     66  *
     67  * Hack Alert.  XB_SLICE_NONE is a magic value that can be written into the
     68  * b_private field of buf structures passed to xdf_strategy().  When present
     69  * it indicates that the I/O is using an absolute offset.  (ie, the I/O is
     70  * not bound to any one partition.)  This magic value is currently used by
     71  * the pv_cmdk driver.  This hack is shamelessly stolen from the sun4v vdc
     72  * driver, another virtual disk device driver.  (Although in the case of
     73  * vdc the hack is less egregious since it is self contained within the
     74  * vdc driver, where as here it is used as an interface between the pv_cmdk
     75  * driver and the xdf driver.)
     76  */
     77 #define	XB_SLICE_NONE		0xFF
     78 
     79 /*
     80  * blkif status
     81  */
     82 typedef enum xdf_state {
     83 	/*
     84 	 * initial state
     85 	 */
     86 	XD_UNKNOWN = 0,
     87 	/*
     88 	 * ring and evtchn alloced, xenbus state changed to
     89 	 * XenbusStateInitialised, wait for backend to connect
     90 	 */
     91 	XD_INIT = 1,
     92 	/*
     93 	 * backend and frontend xenbus state has changed to
     94 	 * XenbusStateConnected.  IO is now allowed, but we are not still
     95 	 * fully initialized.
     96 	 */
     97 	XD_CONNECTED = 2,
     98 	/*
     99 	 * We're fully initialized and allowing regular IO.
    100 	 */
    101 	XD_READY = 3,
    102 	/*
    103 	 * vbd interface close request received from backend, no more I/O
    104 	 * requestis allowed to be put into ring buffer, while interrupt handler
    105 	 * is allowed to run to finish any outstanding I/O request, disconnect
    106 	 * process is kicked off by changing xenbus state to XenbusStateClosed
    107 	 */
    108 	XD_CLOSING = 4,
    109 	/*
    110 	 * disconnection process finished, both backend and frontend's
    111 	 * xenbus state has been changed to XenbusStateClosed, can be detached
    112 	 */
    113 	XD_CLOSED = 5,
    114 	/*
    115 	 * We're either being suspended or resuming from a suspend.  If we're
    116 	 * in the process of suspending, we block all new IO, but but allow
    117 	 * existing IO to drain.
    118 	 */
    119 	XD_SUSPEND = 6
    120 } xdf_state_t;
    121 
    122 /*
    123  * 16 partitions + fdisk
    124  */
    125 #define	XDF_PSHIFT	6
    126 #define	XDF_PMASK	((1 << XDF_PSHIFT) - 1)
    127 #define	XDF_PEXT	(1 << XDF_PSHIFT)
    128 #define	XDF_MINOR(i, m) (((i) << XDF_PSHIFT) | (m))
    129 #define	XDF_INST(m)	((m) >> XDF_PSHIFT)
    130 #define	XDF_PART(m)	((m) & XDF_PMASK)
    131 
    132 /*
    133  * one blkif_request_t will have one corresponding ge_slot_t
    134  * where we save those grant table refs used in this blkif_request_t
    135  *
    136  * the id of this ge_slot_t will also be put into 'id' field in
    137  * each blkif_request_t when sent out to the ring buffer.
    138  */
    139 typedef struct ge_slot {
    140 	list_node_t	gs_vreq_link;
    141 	struct v_req	*gs_vreq;
    142 	domid_t		gs_oeid;
    143 	int		gs_isread;
    144 	grant_ref_t	gs_ghead;
    145 	int		gs_ngrefs;
    146 	grant_ref_t	gs_ge[BLKIF_MAX_SEGMENTS_PER_REQUEST];
    147 } ge_slot_t;
    148 
    149 /*
    150  * vbd I/O request
    151  *
    152  * An instance of this structure is bound to each buf passed to
    153  * the driver's strategy by setting the pointer into bp->av_back.
    154  * The id of this vreq will also be put into 'id' field in each
    155  * blkif_request_t when sent out to the ring buffer for one DMA
    156  * window of this buf.
    157  *
    158  * Vreq mainly contains DMA information for this buf. In one vreq/buf,
    159  * there could be more than one DMA window, each of which will be
    160  * mapped to one blkif_request_t/ge_slot_t. Ge_slot_t contains all grant
    161  * table entry information for this buf. The ge_slot_t for current DMA
    162  * window is pointed to by v_gs in vreq.
    163  *
    164  * So, grant table entries will only be alloc'ed when the DMA window is
    165  * about to be transferred via blkif_request_t to the ring buffer. And
    166  * they will be freed right after the blkif_response_t is seen. By this
    167  * means, we can make use of grant table entries more efficiently.
    168  */
    169 typedef struct v_req {
    170 	list_node_t	v_link;
    171 	list_t		v_gs;
    172 	int		v_status;
    173 	buf_t		*v_buf;
    174 	uint_t		v_ndmacs;
    175 	uint_t		v_dmaw;
    176 	uint_t		v_ndmaws;
    177 	uint_t		v_nslots;
    178 	uint64_t	v_blkno;
    179 	ddi_dma_handle_t v_memdmahdl;
    180 	ddi_acc_handle_t v_align;
    181 	ddi_dma_handle_t v_dmahdl;
    182 	ddi_dma_cookie_t v_dmac;
    183 	caddr_t		v_abuf;
    184 	uint8_t		v_flush_diskcache;
    185 	boolean_t	v_runq;
    186 } v_req_t;
    187 
    188 /*
    189  * Status set and checked in vreq->v_status by vreq_setup()
    190  *
    191  * These flags will help us to continue the vreq setup work from last failure
    192  * point, instead of starting from scratch after each failure.
    193  */
    194 #define	VREQ_INIT		0x0
    195 #define	VREQ_INIT_DONE		0x1
    196 #define	VREQ_DMAHDL_ALLOCED	0x2
    197 #define	VREQ_MEMDMAHDL_ALLOCED	0x3
    198 #define	VREQ_DMAMEM_ALLOCED	0x4
    199 #define	VREQ_DMABUF_BOUND	0x5
    200 #define	VREQ_GS_ALLOCED		0x6
    201 #define	VREQ_DMAWIN_DONE	0x7
    202 
    203 /*
    204  * virtual block device per-instance softstate
    205  */
    206 typedef struct xdf {
    207 	dev_info_t	*xdf_dip;
    208 	char		*xdf_addr;
    209 	ddi_iblock_cookie_t xdf_ibc; /* mutex iblock cookie */
    210 	domid_t		xdf_peer; /* otherend's dom ID */
    211 	xendev_ring_t	*xdf_xb_ring; /* I/O ring buffer */
    212 	ddi_acc_handle_t xdf_xb_ring_hdl; /* access handler for ring buffer */
    213 	list_t		xdf_vreq_act; /* active vreq list */
    214 	buf_t		*xdf_f_act; /* active buf list head */
    215 	buf_t		*xdf_l_act; /* active buf list tail */
    216 	buf_t		*xdf_i_act; /* active buf list index */
    217 	xdf_state_t	xdf_state; /* status of this virtual disk */
    218 	boolean_t	xdf_suspending;
    219 	ulong_t		xdf_vd_open[OTYPCNT];
    220 	ulong_t		xdf_vd_lyropen[XDF_PEXT];
    221 	ulong_t		xdf_connect_req;
    222 	kthread_t	*xdf_connect_thread;
    223 	ulong_t		xdf_vd_exclopen;
    224 	kmutex_t	xdf_iostat_lk; /* muxes lock for the iostat ptr */
    225 	kmutex_t	xdf_dev_lk; /* mutex lock for I/O path */
    226 	kmutex_t	xdf_cb_lk; /* mutex lock for event handling path */
    227 	kcondvar_t	xdf_dev_cv; /* cv used in I/O path */
    228 	uint_t		xdf_dinfo; /* disk info from backend xenstore */
    229 	diskaddr_t	xdf_xdev_nblocks; /* total size in block */
    230 	uint_t		xdf_xdev_secsize; /* disk blksize from backend */
    231 	cmlb_geom_t	xdf_pgeom;
    232 	boolean_t	xdf_pgeom_set;
    233 	boolean_t	xdf_pgeom_fixed;
    234 	kstat_t		*xdf_xdev_iostat;
    235 	cmlb_handle_t	xdf_vd_lbl;
    236 	ddi_softintr_t	xdf_softintr_id;
    237 	timeout_id_t	xdf_timeout_id;
    238 	struct gnttab_free_callback xdf_gnt_callback;
    239 	boolean_t	xdf_feature_barrier;
    240 	boolean_t	xdf_flush_supported;
    241 	boolean_t	xdf_media_req_supported;
    242 	boolean_t	xdf_wce;
    243 	boolean_t	xdf_cmbl_reattach;
    244 	char		*xdf_flush_mem;
    245 	char		*xdf_cache_flush_block;
    246 	int		xdf_evtchn;
    247 	enum dkio_state	xdf_mstate;
    248 	kcondvar_t	xdf_mstate_cv;
    249 	kcondvar_t	xdf_hp_status_cv;
    250 	struct buf	*xdf_ready_bp;
    251 	ddi_taskq_t	*xdf_ready_tq;
    252 	kthread_t	*xdf_ready_tq_thread;
    253 	struct buf	*xdf_ready_tq_bp;
    254 #ifdef	DEBUG
    255 	int		xdf_dmacallback_num;
    256 	kthread_t	*xdf_oe_change_thread;
    257 #endif
    258 } xdf_t;
    259 
    260 /*
    261  * VBD I/O requests must be aligned on a 512-byte boundary and specify
    262  * a transfer size which is a mutiple of 512-bytes
    263  */
    264 #define	ALIGNED_XFER(bp) \
    265 	((((uintptr_t)((bp)->b_un.b_addr) & XB_BMASK) == 0) && \
    266 	(((bp)->b_bcount & XB_BMASK) == 0))
    267 
    268 #define	U_INVAL(u)	(((u)->uio_loffset & (offset_t)(XB_BMASK)) || \
    269 	((u)->uio_iov->iov_len & (offset_t)(XB_BMASK)))
    270 
    271 /* wrap pa_to_ma() for xdf to run in dom0 */
    272 #define	PATOMA(addr)	(DOMAIN_IS_INITDOMAIN(xen_info) ? addr : pa_to_ma(addr))
    273 
    274 #define	XD_IS_RO(vbd)	VOID2BOOLEAN((vbd)->xdf_dinfo & VDISK_READONLY)
    275 #define	XD_IS_CD(vbd)	VOID2BOOLEAN((vbd)->xdf_dinfo & VDISK_CDROM)
    276 #define	XD_IS_RM(vbd)	VOID2BOOLEAN((vbd)->xdf_dinfo & VDISK_REMOVABLE)
    277 #define	IS_READ(bp)	VOID2BOOLEAN((bp)->b_flags & B_READ)
    278 #define	IS_ERROR(bp)	VOID2BOOLEAN((bp)->b_flags & B_ERROR)
    279 
    280 #define	XDF_UPDATE_IO_STAT(vdp, bp)					\
    281 	{								\
    282 		kstat_io_t *kip = KSTAT_IO_PTR((vdp)->xdf_xdev_iostat);	\
    283 		size_t n_done = (bp)->b_bcount - (bp)->b_resid;		\
    284 		if ((bp)->b_flags & B_READ) {				\
    285 			kip->reads++;					\
    286 			kip->nread += n_done;				\
    287 		} else {                                                \
    288 			kip->writes++;					\
    289 			kip->nwritten += n_done;			\
    290 		}							\
    291 	}
    292 
    293 #ifdef DEBUG
    294 #define	DPRINTF(flag, args)	{if (xdf_debug & (flag)) prom_printf args; }
    295 #define	SETDMACBON(vbd)		{(vbd)->xdf_dmacallback_num++; }
    296 #define	SETDMACBOFF(vbd)	{(vbd)->xdf_dmacallback_num--; }
    297 #define	ISDMACBON(vbd)		((vbd)->xdf_dmacallback_num > 0)
    298 #else
    299 #define	DPRINTF(flag, args)
    300 #define	SETDMACBON(vbd)
    301 #define	SETDMACBOFF(vbd)
    302 #define	ISDMACBON(vbd)
    303 #endif /* DEBUG */
    304 
    305 #define	DDI_DBG		0x1
    306 #define	DMA_DBG		0x2
    307 #define	INTR_DBG	0x8
    308 #define	IO_DBG		0x10
    309 #define	IOCTL_DBG	0x20
    310 #define	SUSRES_DBG	0x40
    311 #define	LBL_DBG		0x80
    312 
    313 #if defined(XPV_HVM_DRIVER)
    314 extern int xdf_lb_getinfo(dev_info_t *, int, void *, void *);
    315 extern int xdf_lb_rdwr(dev_info_t *, uchar_t, void *, diskaddr_t, size_t,
    316     void *);
    317 extern void xdfmin(struct buf *bp);
    318 extern dev_info_t *xdf_hvm_hold(const char *);
    319 extern boolean_t xdf_hvm_connect(dev_info_t *);
    320 extern int xdf_hvm_setpgeom(dev_info_t *, cmlb_geom_t *);
    321 extern int xdf_kstat_create(dev_info_t *, char *, int);
    322 extern void xdf_kstat_delete(dev_info_t *);
    323 extern boolean_t xdf_is_cd(dev_info_t *);
    324 extern boolean_t xdf_is_rm(dev_info_t *);
    325 extern boolean_t xdf_media_req_supported(dev_info_t *);
    326 #endif /* XPV_HVM_DRIVER */
    327 
    328 #ifdef __cplusplus
    329 }
    330 #endif
    331 
    332 #endif	/* _SYS_XDF_H */
    333