Home | History | Annotate | Download | only in io
      1   5084    johnlev /*
      2   5084    johnlev  * CDDL HEADER START
      3   5084    johnlev  *
      4   5084    johnlev  * The contents of this file are subject to the terms of the
      5   5084    johnlev  * Common Development and Distribution License (the "License").
      6   5084    johnlev  * You may not use this file except in compliance with the License.
      7   5084    johnlev  *
      8   5084    johnlev  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9   5084    johnlev  * or http://www.opensolaris.org/os/licensing.
     10   5084    johnlev  * See the License for the specific language governing permissions
     11   5084    johnlev  * and limitations under the License.
     12   5084    johnlev  *
     13   5084    johnlev  * When distributing Covered Code, include this CDDL HEADER in each
     14   5084    johnlev  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15   5084    johnlev  * If applicable, add the following below this CDDL HEADER, with the
     16   5084    johnlev  * fields enclosed by brackets "[]" replaced with your own identifying
     17   5084    johnlev  * information: Portions Copyright [yyyy] [name of copyright owner]
     18   5084    johnlev  *
     19   5084    johnlev  * CDDL HEADER END
     20   5084    johnlev  */
     21   5084    johnlev 
     22   5084    johnlev /*
     23   8863     Edward  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     24   5084    johnlev  * Use is subject to license terms.
     25   5084    johnlev  */
     26   5084    johnlev 
     27   5084    johnlev /*
     28   5084    johnlev  * xdf.c - Xen Virtual Block Device Driver
     29   5084    johnlev  * TODO:
     30   5084    johnlev  *	- support alternate block size (currently only DEV_BSIZE supported)
     31   5084    johnlev  *	- revalidate geometry for removable devices
     32   8863     Edward  *
     33   8863     Edward  * This driver export solaris disk device nodes, accepts IO requests from
     34   8863     Edward  * those nodes, and services those requests by talking to a backend device
     35   8863     Edward  * in another domain.
     36   8863     Edward  *
     37   8863     Edward  * Communication with the backend device is done via a ringbuffer (which is
     38   8863     Edward  * managed via xvdi interfaces) and dma memory (which is managed via ddi
     39   8863     Edward  * interfaces).
     40   8863     Edward  *
     41   8863     Edward  * Communication with the backend device is dependant upon establishing a
     42   8863     Edward  * connection to the backend device.  This connection process involves
     43   8863     Edward  * reading device configuration information from xenbus and publishing
     44   8863     Edward  * some frontend runtime configuration parameters via the xenbus (for
     45   8863     Edward  * consumption by the backend).  Once we've published runtime configuration
     46   8863     Edward  * information via the xenbus, the backend device can enter the connected
     47   8863     Edward  * state and we'll enter the XD_CONNECTED state.  But before we can allow
     48   8863     Edward  * random IO to begin, we need to do IO to the backend device to determine
     49   8863     Edward  * the device label and if flush operations are supported.  Once this is
     50   8863     Edward  * done we enter the XD_READY state and can process any IO operations.
     51   8863     Edward  *
     52   8863     Edward  * We recieve notifications of xenbus state changes for the backend device
     53   8863     Edward  * (aka, the "other end") via the xdf_oe_change() callback.  This callback
     54   8863     Edward  * is single threaded, meaning that we can't recieve new notification of
     55   8863     Edward  * other end state changes while we're processing an outstanding
     56   8863     Edward  * notification of an other end state change.  There for we can't do any
     57   8863     Edward  * blocking operations from the xdf_oe_change() callback.  This is why we
     58   8863     Edward  * have a seperate taskq (xdf_ready_tq) which exists to do the necessary
     59   8863     Edward  * IO to get us from the XD_CONNECTED to the XD_READY state.  All IO
     60   8863     Edward  * generated by the xdf_ready_tq thread (xdf_ready_tq_thread) will go
     61   8863     Edward  * throught xdf_lb_rdwr(), which is a synchronous IO interface.  IOs
     62   8863     Edward  * generated by the xdf_ready_tq_thread thread have priority over all
     63   8863     Edward  * other IO requests.
     64   8863     Edward  *
     65   8863     Edward  * We also communicate with the backend device via the xenbus "media-req"
     66   8863     Edward  * (XBP_MEDIA_REQ) property.  For more information on this see the
     67   8863     Edward  * comments in blkif.h.
     68   5084    johnlev  */
     69   5084    johnlev 
     70   8863     Edward #include <io/xdf.h>
     71   8863     Edward 
     72   5741        mrj #include <sys/conf.h>
     73   5741        mrj #include <sys/dkio.h>
     74   5741        mrj #include <sys/promif.h>
     75   5741        mrj #include <sys/sysmacros.h>
     76   5741        mrj #include <sys/kstat.h>
     77   5741        mrj #include <sys/mach_mmu.h>
     78   5741        mrj #ifdef XPV_HVM_DRIVER
     79   5741        mrj #include <sys/xpv_support.h>
     80   6318        edp #include <sys/sunndi.h>
     81   8863     Edward #else /* !XPV_HVM_DRIVER */
     82   8863     Edward #include <sys/evtchn_impl.h>
     83   8863     Edward #endif /* !XPV_HVM_DRIVER */
     84   5741        mrj #include <public/io/xenbus.h>
     85   5741        mrj #include <xen/sys/xenbus_impl.h>
     86   5741        mrj #include <sys/scsi/generic/inquiry.h>
     87   6144        rab #include <xen/io/blkif_impl.h>
     88   8863     Edward #include <sys/fdio.h>
     89   8863     Edward #include <sys/cdio.h>
     90   5084    johnlev 
     91   8863     Edward /*
     92   8863     Edward  * DEBUG_EVAL can be used to include debug only statements without
     93   8863     Edward  * having to use '#ifdef DEBUG' statements
     94   8863     Edward  */
     95   8863     Edward #ifdef DEBUG
     96   8863     Edward #define	DEBUG_EVAL(x)	(x)
     97   8863     Edward #else /* !DEBUG */
     98   8863     Edward #define	DEBUG_EVAL(x)
     99   8863     Edward #endif /* !DEBUG */
    100   8863     Edward 
    101   8863     Edward #define	XDF_DRAIN_MSEC_DELAY		(50*1000)	/* 00.05 sec */
    102   8863     Edward #define	XDF_DRAIN_RETRY_COUNT		200		/* 10.00 sec */
    103   8863     Edward 
    104   8863     Edward #define	INVALID_DOMID	((domid_t)-1)
    105   5084    johnlev #define	FLUSH_DISKCACHE	0x1
    106   5084    johnlev #define	WRITE_BARRIER	0x2
    107   5084    johnlev #define	DEFAULT_FLUSH_BLOCK	156 /* block to write to cause a cache flush */
    108   8863     Edward #define	USE_WRITE_BARRIER(vdp)						\
    109   5084    johnlev 	((vdp)->xdf_feature_barrier && !(vdp)->xdf_flush_supported)
    110   8863     Edward #define	USE_FLUSH_DISKCACHE(vdp)					\
    111   5084    johnlev 	((vdp)->xdf_feature_barrier && (vdp)->xdf_flush_supported)
    112   8863     Edward #define	IS_WRITE_BARRIER(vdp, bp)					\
    113   8863     Edward 	(!IS_READ(bp) && USE_WRITE_BARRIER(vdp) &&			\
    114   5084    johnlev 	((bp)->b_un.b_addr == (vdp)->xdf_cache_flush_block))
    115   8863     Edward #define	IS_FLUSH_DISKCACHE(bp)						\
    116   5084    johnlev 	(!IS_READ(bp) && USE_FLUSH_DISKCACHE(vdp) && ((bp)->b_bcount == 0))
    117   5084    johnlev 
    118   8863     Edward #define	VREQ_DONE(vreq)							\
    119   8863     Edward 	VOID2BOOLEAN(((vreq)->v_status == VREQ_DMAWIN_DONE) &&		\
    120   8863     Edward 	    (((vreq)->v_flush_diskcache == FLUSH_DISKCACHE) ||		\
    121   8863     Edward 	    (((vreq)->v_dmaw + 1) == (vreq)->v_ndmaws)))
    122   5084    johnlev 
    123   8863     Edward #define	BP_VREQ(bp)		((v_req_t *)((bp)->av_back))
    124   8863     Edward #define	BP_VREQ_SET(bp, vreq)	(((bp)->av_back = (buf_t *)(vreq)))
    125   5084    johnlev 
    126   8863     Edward extern int		do_polled_io;
    127   6318        edp 
    128   8863     Edward /* run-time tunables that we don't want the compiler to optimize away */
    129   8863     Edward volatile int		xdf_debug = 0;
    130   8863     Edward volatile boolean_t	xdf_barrier_flush_disable = B_FALSE;
    131   5084    johnlev 
    132   8863     Edward /* per module globals */
    133   8863     Edward major_t			xdf_major;
    134   8863     Edward static void		*xdf_ssp;
    135   8863     Edward static kmem_cache_t	*xdf_vreq_cache;
    136   8863     Edward static kmem_cache_t	*xdf_gs_cache;
    137   8863     Edward static int		xdf_maxphys = XB_MAXPHYS;
    138   8863     Edward static diskaddr_t	xdf_flush_block = DEFAULT_FLUSH_BLOCK;
    139   8863     Edward static int		xdf_fbrewrites;	/* flush block re-write count */
    140   5084    johnlev 
    141   8863     Edward /* misc public functions (used by xdf_shell.c) */
    142   8863     Edward int xdf_lb_rdwr(dev_info_t *, uchar_t, void *, diskaddr_t, size_t, void *);
    143   8863     Edward int xdf_lb_getinfo(dev_info_t *, int, void *, void *);
    144   5084    johnlev 
    145   8863     Edward /*  misc private functions */
    146   8863     Edward static void xdf_io_start(xdf_t *);
    147   5084    johnlev 
    148   8863     Edward /* callbacks from commmon label */
    149   8863     Edward static cmlb_tg_ops_t xdf_lb_ops = {
    150   8863     Edward 	TG_DK_OPS_VERSION_1,
    151   8863     Edward 	xdf_lb_rdwr,
    152   8863     Edward 	xdf_lb_getinfo
    153   5084    johnlev };
    154   5084    johnlev 
    155   5084    johnlev /*
    156   5084    johnlev  * I/O buffer DMA attributes
    157   5084    johnlev  * Make sure: one DMA window contains BLKIF_MAX_SEGMENTS_PER_REQUEST at most
    158   5084    johnlev  */
    159   5084    johnlev static ddi_dma_attr_t xb_dma_attr = {
    160   5084    johnlev 	DMA_ATTR_V0,
    161   5084    johnlev 	(uint64_t)0,			/* lowest address */
    162   5084    johnlev 	(uint64_t)0xffffffffffffffff,	/* highest usable address */
    163   5084    johnlev 	(uint64_t)0xffffff,		/* DMA counter limit max */
    164   5084    johnlev 	(uint64_t)XB_BSIZE,		/* alignment in bytes */
    165   5084    johnlev 	XB_BSIZE - 1,			/* bitmap of burst sizes */
    166   5084    johnlev 	XB_BSIZE,			/* min transfer */
    167   5084    johnlev 	(uint64_t)XB_MAX_XFER, 		/* maximum transfer */
    168   5084    johnlev 	(uint64_t)PAGEOFFSET,		/* 1 page segment length  */
    169   5084    johnlev 	BLKIF_MAX_SEGMENTS_PER_REQUEST,	/* maximum number of segments */
    170   5084    johnlev 	XB_BSIZE,			/* granularity */
    171   5084    johnlev 	0,				/* flags (reserved) */
    172   5084    johnlev };
    173   5084    johnlev 
    174   5084    johnlev static ddi_device_acc_attr_t xc_acc_attr = {
    175   5084    johnlev 	DDI_DEVICE_ATTR_V0,
    176   5084    johnlev 	DDI_NEVERSWAP_ACC,
    177   5084    johnlev 	DDI_STRICTORDER_ACC
    178   5084    johnlev };
    179   5084    johnlev 
    180   8863     Edward static void
    181   8863     Edward xdf_timeout_handler(void *arg)
    182   5084    johnlev {
    183   8863     Edward 	xdf_t *vdp = arg;
    184   5084    johnlev 
    185   5084    johnlev 	mutex_enter(&vdp->xdf_dev_lk);
    186   8863     Edward 	vdp->xdf_timeout_id = 0;
    187   5084    johnlev 	mutex_exit(&vdp->xdf_dev_lk);
    188   5084    johnlev 
    189   8863     Edward 	/* new timeout thread could be re-scheduled */
    190   8863     Edward 	xdf_io_start(vdp);
    191   5084    johnlev }
    192   5084    johnlev 
    193   5084    johnlev /*
    194   5084    johnlev  * callback func when DMA/GTE resources is available
    195   5084    johnlev  *
    196   5084    johnlev  * Note: we only register one callback function to grant table subsystem
    197   5084    johnlev  * since we only have one 'struct gnttab_free_callback' in xdf_t.
    198   5084    johnlev  */
    199   5084    johnlev static int
    200   5084    johnlev xdf_dmacallback(caddr_t arg)
    201   5084    johnlev {
    202   5084    johnlev 	xdf_t *vdp = (xdf_t *)arg;
    203   5084    johnlev 	ASSERT(vdp != NULL);
    204   5084    johnlev 
    205   5084    johnlev 	DPRINTF(DMA_DBG, ("xdf@%s: DMA callback started\n",
    206   8863     Edward 	    vdp->xdf_addr));
    207   5084    johnlev 
    208   5084    johnlev 	ddi_trigger_softintr(vdp->xdf_softintr_id);
    209   5084    johnlev 	return (DDI_DMA_CALLBACK_DONE);
    210   5084    johnlev }
    211   5084    johnlev 
    212   8863     Edward static ge_slot_t *
    213   8863     Edward gs_get(xdf_t *vdp, int isread)
    214   5084    johnlev {
    215   8863     Edward 	grant_ref_t gh;
    216   8863     Edward 	ge_slot_t *gs;
    217   5084    johnlev 
    218   8863     Edward 	/* try to alloc GTEs needed in this slot, first */
    219   8863     Edward 	if (gnttab_alloc_grant_references(
    220   8863     Edward 	    BLKIF_MAX_SEGMENTS_PER_REQUEST, &gh) == -1) {
    221   8863     Edward 		if (vdp->xdf_gnt_callback.next == NULL) {
    222   8863     Edward 			SETDMACBON(vdp);
    223   8863     Edward 			gnttab_request_free_callback(
    224   8863     Edward 			    &vdp->xdf_gnt_callback,
    225   8863     Edward 			    (void (*)(void *))xdf_dmacallback,
    226   8863     Edward 			    (void *)vdp,
    227   8863     Edward 			    BLKIF_MAX_SEGMENTS_PER_REQUEST);
    228   8863     Edward 		}
    229   8863     Edward 		return (NULL);
    230   8863     Edward 	}
    231   5084    johnlev 
    232   8863     Edward 	gs = kmem_cache_alloc(xdf_gs_cache, KM_NOSLEEP);
    233   8863     Edward 	if (gs == NULL) {
    234   8863     Edward 		gnttab_free_grant_references(gh);
    235   8863     Edward 		if (vdp->xdf_timeout_id == 0)
    236   8863     Edward 			/* restart I/O after one second */
    237   8863     Edward 			vdp->xdf_timeout_id =
    238   8863     Edward 			    timeout(xdf_timeout_handler, vdp, hz);
    239   8863     Edward 		return (NULL);
    240   8863     Edward 	}
    241   5084    johnlev 
    242   8863     Edward 	/* init gs_slot */
    243   8863     Edward 	gs->gs_oeid = vdp->xdf_peer;
    244   8863     Edward 	gs->gs_isread = isread;
    245   8863     Edward 	gs->gs_ghead = gh;
    246   8863     Edward 	gs->gs_ngrefs = 0;
    247   5084    johnlev 
    248   8863     Edward 	return (gs);
    249   5084    johnlev }
    250   5084    johnlev 
    251   5084    johnlev static void
    252   8863     Edward gs_free(ge_slot_t *gs)
    253   5084    johnlev {
    254   8863     Edward 	int		i;
    255   5084    johnlev 
    256   8863     Edward 	/* release all grant table entry resources used in this slot */
    257   8863     Edward 	for (i = 0; i < gs->gs_ngrefs; i++)
    258   8863     Edward 		gnttab_end_foreign_access(gs->gs_ge[i], !gs->gs_isread, 0);
    259   8863     Edward 	gnttab_free_grant_references(gs->gs_ghead);
    260   8863     Edward 	list_remove(&gs->gs_vreq->v_gs, gs);
    261   8863     Edward 	kmem_cache_free(xdf_gs_cache, gs);
    262   8863     Edward }
    263   5084    johnlev 
    264   8863     Edward static grant_ref_t
    265   8863     Edward gs_grant(ge_slot_t *gs, mfn_t mfn)
    266   8863     Edward {
    267   8863     Edward 	grant_ref_t gr = gnttab_claim_grant_reference(&gs->gs_ghead);
    268   8863     Edward 
    269   8863     Edward 	ASSERT(gr != -1);
    270   8863     Edward 	ASSERT(gs->gs_ngrefs < BLKIF_MAX_SEGMENTS_PER_REQUEST);
    271   8863     Edward 	gs->gs_ge[gs->gs_ngrefs++] = gr;
    272   8863     Edward 	gnttab_grant_foreign_access_ref(gr, gs->gs_oeid, mfn, !gs->gs_isread);
    273   8863     Edward 
    274   8863     Edward 	return (gr);
    275   5084    johnlev }
    276   5084    johnlev 
    277   5084    johnlev /*
    278   5084    johnlev  * Alloc a vreq for this bp
    279   5084    johnlev  * bp->av_back contains the pointer to the vreq upon return
    280   5084    johnlev  */
    281   5084    johnlev static v_req_t *
    282   5084    johnlev vreq_get(xdf_t *vdp, buf_t *bp)
    283   5084    johnlev {
    284   5084    johnlev 	v_req_t *vreq = NULL;
    285   5084    johnlev 
    286   8863     Edward 	ASSERT(BP_VREQ(bp) == NULL);
    287   5084    johnlev 
    288   5084    johnlev 	vreq = kmem_cache_alloc(xdf_vreq_cache, KM_NOSLEEP);
    289   5084    johnlev 	if (vreq == NULL) {
    290   5084    johnlev 		if (vdp->xdf_timeout_id == 0)
    291   5084    johnlev 			/* restart I/O after one second */
    292   5084    johnlev 			vdp->xdf_timeout_id =
    293   5084    johnlev 			    timeout(xdf_timeout_handler, vdp, hz);
    294   5084    johnlev 		return (NULL);
    295   5084    johnlev 	}
    296   5084    johnlev 	bzero(vreq, sizeof (v_req_t));
    297   8863     Edward 	list_create(&vreq->v_gs, sizeof (ge_slot_t),
    298   8863     Edward 	    offsetof(ge_slot_t, gs_vreq_link));
    299   8863     Edward 	vreq->v_buf = bp;
    300   8863     Edward 	vreq->v_status = VREQ_INIT;
    301   8863     Edward 	vreq->v_runq = B_FALSE;
    302   8863     Edward 	BP_VREQ_SET(bp, vreq);
    303   8863     Edward 	/* init of other fields in vreq is up to the caller */
    304   5084    johnlev 
    305   5084    johnlev 	list_insert_head(&vdp->xdf_vreq_act, (void *)vreq);
    306   5084    johnlev 
    307   5084    johnlev 	return (vreq);
    308   5084    johnlev }
    309   5084    johnlev 
    310   5084    johnlev static void
    311   5084    johnlev vreq_free(xdf_t *vdp, v_req_t *vreq)
    312   5084    johnlev {
    313   8863     Edward 	buf_t	*bp = vreq->v_buf;
    314   5084    johnlev 
    315   8863     Edward 	ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk));
    316   8863     Edward 	ASSERT(BP_VREQ(bp) == vreq);
    317   8863     Edward 
    318   8863     Edward 	list_remove(&vdp->xdf_vreq_act, vreq);
    319   5084    johnlev 
    320   5385   cz147101 	if (vreq->v_flush_diskcache == FLUSH_DISKCACHE)
    321   5385   cz147101 		goto done;
    322   5385   cz147101 
    323   5084    johnlev 	switch (vreq->v_status) {
    324   5084    johnlev 	case VREQ_DMAWIN_DONE:
    325   5084    johnlev 	case VREQ_GS_ALLOCED:
    326   5084    johnlev 	case VREQ_DMABUF_BOUND:
    327   5084    johnlev 		(void) ddi_dma_unbind_handle(vreq->v_dmahdl);
    328   5084    johnlev 		/*FALLTHRU*/
    329   5084    johnlev 	case VREQ_DMAMEM_ALLOCED:
    330   5084    johnlev 		if (!ALIGNED_XFER(bp)) {
    331   5084    johnlev 			ASSERT(vreq->v_abuf != NULL);
    332   5084    johnlev 			if (!IS_ERROR(bp) && IS_READ(bp))
    333   5084    johnlev 				bcopy(vreq->v_abuf, bp->b_un.b_addr,
    334   5084    johnlev 				    bp->b_bcount);
    335   5084    johnlev 			ddi_dma_mem_free(&vreq->v_align);
    336   5084    johnlev 		}
    337   5084    johnlev 		/*FALLTHRU*/
    338   5084    johnlev 	case VREQ_MEMDMAHDL_ALLOCED:
    339   5084    johnlev 		if (!ALIGNED_XFER(bp))
    340   5084    johnlev 			ddi_dma_free_handle(&vreq->v_memdmahdl);
    341   5084    johnlev 		/*FALLTHRU*/
    342   5084    johnlev 	case VREQ_DMAHDL_ALLOCED:
    343   5084    johnlev 		ddi_dma_free_handle(&vreq->v_dmahdl);
    344   5084    johnlev 		break;
    345   5084    johnlev 	default:
    346   5084    johnlev 		break;
    347   5084    johnlev 	}
    348   5385   cz147101 done:
    349   8863     Edward 	ASSERT(!vreq->v_runq);
    350   8863     Edward 	list_destroy(&vreq->v_gs);
    351   5084    johnlev 	kmem_cache_free(xdf_vreq_cache, vreq);
    352   8863     Edward }
    353   8863     Edward 
    354   8863     Edward /*
    355   8863     Edward  * Snarf new data if our flush block was re-written
    356   8863     Edward  */
    357   8863     Edward static void
    358   8863     Edward check_fbwrite(xdf_t *vdp, buf_t *bp, daddr_t blkno)
    359   8863     Edward {
    360   8863     Edward 	int nblks;
    361   8863     Edward 	boolean_t mapin;
    362   8863     Edward 
    363   8863     Edward 	if (IS_WRITE_BARRIER(vdp, bp))
    364   8863     Edward 		return; /* write was a flush write */
    365   8863     Edward 
    366   8863     Edward 	mapin = B_FALSE;
    367   8863     Edward 	nblks = bp->b_bcount >> DEV_BSHIFT;
    368   8863     Edward 	if (xdf_flush_block >= blkno && xdf_flush_block < (blkno + nblks)) {
    369   8863     Edward 		xdf_fbrewrites++;
    370   8863     Edward 		if (bp->b_flags & (B_PAGEIO | B_PHYS)) {
    371   8863     Edward 			mapin = B_TRUE;
    372   8863     Edward 			bp_mapin(bp);
    373   8863     Edward 		}
    374   8863     Edward 		bcopy(bp->b_un.b_addr +
    375   8863     Edward 		    ((xdf_flush_block - blkno) << DEV_BSHIFT),
    376   8863     Edward 		    vdp->xdf_cache_flush_block, DEV_BSIZE);
    377   8863     Edward 		if (mapin)
    378   8863     Edward 			bp_mapout(bp);
    379   8863     Edward 	}
    380   5084    johnlev }
    381   5084    johnlev 
    382   5084    johnlev /*
    383   5084    johnlev  * Initalize the DMA and grant table resources for the buf
    384   5084    johnlev  */
    385   5084    johnlev static int
    386   5084    johnlev vreq_setup(xdf_t *vdp, v_req_t *vreq)
    387   5084    johnlev {
    388   5084    johnlev 	int rc;
    389   5084    johnlev 	ddi_dma_attr_t dmaattr;
    390   5084    johnlev 	uint_t ndcs, ndws;
    391   5084    johnlev 	ddi_dma_handle_t dh;
    392   5084    johnlev 	ddi_dma_handle_t mdh;
    393   5084    johnlev 	ddi_dma_cookie_t dc;
    394   5084    johnlev 	ddi_acc_handle_t abh;
    395   5084    johnlev 	caddr_t	aba;
    396   5084    johnlev 	ge_slot_t *gs;
    397   5084    johnlev 	size_t bufsz;
    398   5084    johnlev 	off_t off;
    399   5084    johnlev 	size_t sz;
    400   5084    johnlev 	buf_t *bp = vreq->v_buf;
    401   5084    johnlev 	int dma_flags = (IS_READ(bp) ? DDI_DMA_READ : DDI_DMA_WRITE) |
    402   5084    johnlev 	    DDI_DMA_STREAMING | DDI_DMA_PARTIAL;
    403   5084    johnlev 
    404   5084    johnlev 	switch (vreq->v_status) {
    405   5084    johnlev 	case VREQ_INIT:
    406   5084    johnlev 		if (IS_FLUSH_DISKCACHE(bp)) {
    407   5084    johnlev 			if ((gs = gs_get(vdp, IS_READ(bp))) == NULL) {
    408   8863     Edward 				DPRINTF(DMA_DBG, ("xdf@%s: "
    409   8863     Edward 				    "get ge_slotfailed\n", vdp->xdf_addr));
    410   5084    johnlev 				return (DDI_FAILURE);
    411   5084    johnlev 			}
    412   5084    johnlev 			vreq->v_blkno = 0;
    413   5084    johnlev 			vreq->v_nslots = 1;
    414   5084    johnlev 			vreq->v_flush_diskcache = FLUSH_DISKCACHE;
    415   5385   cz147101 			vreq->v_status = VREQ_GS_ALLOCED;
    416   8863     Edward 			gs->gs_vreq = vreq;
    417   8863     Edward 			list_insert_head(&vreq->v_gs, gs);
    418   5084    johnlev 			return (DDI_SUCCESS);
    419   5084    johnlev 		}
    420   5084    johnlev 
    421   5084    johnlev 		if (IS_WRITE_BARRIER(vdp, bp))
    422   5084    johnlev 			vreq->v_flush_diskcache = WRITE_BARRIER;
    423   5084    johnlev 		vreq->v_blkno = bp->b_blkno +
    424   5084    johnlev 		    (diskaddr_t)(uintptr_t)bp->b_private;
    425   5084    johnlev 		/* See if we wrote new data to our flush block */
    426   5084    johnlev 		if (!IS_READ(bp) && USE_WRITE_BARRIER(vdp))
    427   5084    johnlev 			check_fbwrite(vdp, bp, vreq->v_blkno);
    428   5084    johnlev 		vreq->v_status = VREQ_INIT_DONE;
    429   5084    johnlev 		/*FALLTHRU*/
    430   5084    johnlev 
    431   5084    johnlev 	case VREQ_INIT_DONE:
    432   5084    johnlev 		/*
    433   5084    johnlev 		 * alloc DMA handle
    434   5084    johnlev 		 */
    435   5084    johnlev 		rc = ddi_dma_alloc_handle(vdp->xdf_dip, &xb_dma_attr,
    436   5084    johnlev 		    xdf_dmacallback, (caddr_t)vdp, &dh);
    437   5084    johnlev 		if (rc != DDI_SUCCESS) {
    438   5084    johnlev 			SETDMACBON(vdp);
    439   5084    johnlev 			DPRINTF(DMA_DBG, ("xdf@%s: DMA handle alloc failed\n",
    440   8863     Edward 			    vdp->xdf_addr));
    441   5084    johnlev 			return (DDI_FAILURE);
    442   5084    johnlev 		}
    443   5084    johnlev 
    444   5084    johnlev 		vreq->v_dmahdl = dh;
    445   5084    johnlev 		vreq->v_status = VREQ_DMAHDL_ALLOCED;
    446   5084    johnlev 		/*FALLTHRU*/
    447   5084    johnlev 
    448   5084    johnlev 	case VREQ_DMAHDL_ALLOCED:
    449   5084    johnlev 		/*
    450   5084    johnlev 		 * alloc dma handle for 512-byte aligned buf
    451   5084    johnlev 		 */
    452   5084    johnlev 		if (!ALIGNED_XFER(bp)) {
    453   5084    johnlev 			/*
    454   5084    johnlev 			 * XXPV: we need to temporarily enlarge the seg
    455   5084    johnlev 			 * boundary and s/g length to work round CR6381968
    456   5084    johnlev 			 */
    457   5084    johnlev 			dmaattr = xb_dma_attr;
    458   5084    johnlev 			dmaattr.dma_attr_seg = (uint64_t)-1;
    459   5084    johnlev 			dmaattr.dma_attr_sgllen = INT_MAX;
    460   5084    johnlev 			rc = ddi_dma_alloc_handle(vdp->xdf_dip, &dmaattr,
    461   5084    johnlev 			    xdf_dmacallback, (caddr_t)vdp, &mdh);
    462   5084    johnlev 			if (rc != DDI_SUCCESS) {
    463   5084    johnlev 				SETDMACBON(vdp);
    464   8863     Edward 				DPRINTF(DMA_DBG, ("xdf@%s: "
    465   8863     Edward 				    "unaligned buf DMAhandle alloc failed\n",
    466   8863     Edward 				    vdp->xdf_addr));
    467   5084    johnlev 				return (DDI_FAILURE);
    468   5084    johnlev 			}
    469   5084    johnlev 			vreq->v_memdmahdl = mdh;
    470   5084    johnlev 			vreq->v_status = VREQ_MEMDMAHDL_ALLOCED;
    471   5084    johnlev 		}
    472   5084    johnlev 		/*FALLTHRU*/
    473   5084    johnlev 
    474   5084    johnlev 	case VREQ_MEMDMAHDL_ALLOCED:
    475   5084    johnlev 		/*
    476   5084    johnlev 		 * alloc 512-byte aligned buf
    477   5084    johnlev 		 */
    478   5084    johnlev 		if (!ALIGNED_XFER(bp)) {
    479   5084    johnlev 			if (bp->b_flags & (B_PAGEIO | B_PHYS))
    480   5084    johnlev 				bp_mapin(bp);
    481   5084    johnlev 			rc = ddi_dma_mem_alloc(vreq->v_memdmahdl,
    482   5084    johnlev 			    roundup(bp->b_bcount, XB_BSIZE), &xc_acc_attr,
    483   5084    johnlev 			    DDI_DMA_STREAMING, xdf_dmacallback, (caddr_t)vdp,
    484   5084    johnlev 			    &aba, &bufsz, &abh);
    485   5084    johnlev 			if (rc != DDI_SUCCESS) {
    486   5084    johnlev 				SETDMACBON(vdp);
    487   8863     Edward 				DPRINTF(DMA_DBG, ("xdf@%s: "
    488   8863     Edward 				    "DMA mem allocation failed\n",
    489   8863     Edward 				    vdp->xdf_addr));
    490   5084    johnlev 				return (DDI_FAILURE);
    491   5084    johnlev 			}
    492   5084    johnlev 
    493   5084    johnlev 			vreq->v_abuf = aba;
    494   5084    johnlev 			vreq->v_align = abh;
    495   5084    johnlev 			vreq->v_status = VREQ_DMAMEM_ALLOCED;
    496   5084    johnlev 
    497   5084    johnlev 			ASSERT(bufsz >= bp->b_bcount);
    498   5084    johnlev 			if (!IS_READ(bp))
    499   5084    johnlev 				bcopy(bp->b_un.b_addr, vreq->v_abuf,
    500   5084    johnlev 				    bp->b_bcount);
    501   5084    johnlev 		}
    502   5084    johnlev 		/*FALLTHRU*/
    503   5084    johnlev 
    504   5084    johnlev 	case VREQ_DMAMEM_ALLOCED:
    505   5084    johnlev 		/*
    506   5084    johnlev 		 * dma bind
    507   5084    johnlev 		 */
    508   5084    johnlev 		if (ALIGNED_XFER(bp)) {
    509   5084    johnlev 			rc = ddi_dma_buf_bind_handle(vreq->v_dmahdl, bp,
    510   5084    johnlev 			    dma_flags, xdf_dmacallback, (caddr_t)vdp,
    511   5084    johnlev 			    &dc, &ndcs);
    512   5084    johnlev 		} else {
    513   5084    johnlev 			rc = ddi_dma_addr_bind_handle(vreq->v_dmahdl,
    514   5084    johnlev 			    NULL, vreq->v_abuf, bp->b_bcount, dma_flags,
    515   5084    johnlev 			    xdf_dmacallback, (caddr_t)vdp, &dc, &ndcs);
    516   5084    johnlev 		}
    517   5084    johnlev 		if (rc == DDI_DMA_MAPPED || rc == DDI_DMA_PARTIAL_MAP) {
    518   5084    johnlev 			/* get num of dma windows */
    519   5084    johnlev 			if (rc == DDI_DMA_PARTIAL_MAP) {
    520   5084    johnlev 				rc = ddi_dma_numwin(vreq->v_dmahdl, &ndws);
    521   5084    johnlev 				ASSERT(rc == DDI_SUCCESS);
    522   5084    johnlev 			} else {
    523   5084    johnlev 				ndws = 1;
    524   5084    johnlev 			}
    525   5084    johnlev 		} else {
    526   5084    johnlev 			SETDMACBON(vdp);
    527   5084    johnlev 			DPRINTF(DMA_DBG, ("xdf@%s: DMA bind failed\n",
    528   8863     Edward 			    vdp->xdf_addr));
    529   5084    johnlev 			return (DDI_FAILURE);
    530   5084    johnlev 		}
    531   5084    johnlev 
    532   5084    johnlev 		vreq->v_dmac = dc;
    533   5084    johnlev 		vreq->v_dmaw = 0;
    534   5084    johnlev 		vreq->v_ndmacs = ndcs;
    535   5084    johnlev 		vreq->v_ndmaws = ndws;
    536   5084    johnlev 		vreq->v_nslots = ndws;
    537   5084    johnlev 		vreq->v_status = VREQ_DMABUF_BOUND;
    538   5084    johnlev 		/*FALLTHRU*/
    539   5084    johnlev 
    540   5084    johnlev 	case VREQ_DMABUF_BOUND:
    541   5084    johnlev 		/*
    542   5084    johnlev 		 * get ge_slot, callback is set upon failure from gs_get(),
    543   5084    johnlev 		 * if not set previously
    544   5084    johnlev 		 */
    545   5084    johnlev 		if ((gs = gs_get(vdp, IS_READ(bp))) == NULL) {
    546   5084    johnlev 			DPRINTF(DMA_DBG, ("xdf@%s: get ge_slot failed\n",
    547   8863     Edward 			    vdp->xdf_addr));
    548   5084    johnlev 			return (DDI_FAILURE);
    549   5084    johnlev 		}
    550   5084    johnlev 
    551   5084    johnlev 		vreq->v_status = VREQ_GS_ALLOCED;
    552   8863     Edward 		gs->gs_vreq = vreq;
    553   8863     Edward 		list_insert_head(&vreq->v_gs, gs);
    554   5084    johnlev 		break;
    555   5084    johnlev 
    556   5084    johnlev 	case VREQ_GS_ALLOCED:
    557   5084    johnlev 		/* nothing need to be done */
    558   5084    johnlev 		break;
    559   5084    johnlev 
    560   5084    johnlev 	case VREQ_DMAWIN_DONE:
    561   5084    johnlev 		/*
    562   5084    johnlev 		 * move to the next dma window
    563   5084    johnlev 		 */
    564   5084    johnlev 		ASSERT((vreq->v_dmaw + 1) < vreq->v_ndmaws);
    565   5084    johnlev 
    566   5084    johnlev 		/* get a ge_slot for this DMA window */
    567   5084    johnlev 		if ((gs = gs_get(vdp, IS_READ(bp))) == NULL) {
    568   5084    johnlev 			DPRINTF(DMA_DBG, ("xdf@%s: get ge_slot failed\n",
    569   8863     Edward 			    vdp->xdf_addr));
    570   5084    johnlev 			return (DDI_FAILURE);
    571   5084    johnlev 		}
    572   5084    johnlev 
    573   5084    johnlev 		vreq->v_dmaw++;
    574   8863     Edward 		VERIFY(ddi_dma_getwin(vreq->v_dmahdl, vreq->v_dmaw, &off, &sz,
    575   8863     Edward 		    &vreq->v_dmac, &vreq->v_ndmacs) == DDI_SUCCESS);
    576   5084    johnlev 		vreq->v_status = VREQ_GS_ALLOCED;
    577   8863     Edward 		gs->gs_vreq = vreq;
    578   8863     Edward 		list_insert_head(&vreq->v_gs, gs);
    579   5084    johnlev 		break;
    580   5084    johnlev 
    581   5084    johnlev 	default:
    582   5084    johnlev 		return (DDI_FAILURE);
    583   5084    johnlev 	}
    584   5084    johnlev 
    585   5084    johnlev 	return (DDI_SUCCESS);
    586   5084    johnlev }
    587   5084    johnlev 
    588   8863     Edward static int
    589   8863     Edward xdf_cmlb_attach(xdf_t *vdp)
    590   5084    johnlev {
    591   8863     Edward 	dev_info_t	*dip = vdp->xdf_dip;
    592   5084    johnlev 
    593   8863     Edward 	return (cmlb_attach(dip, &xdf_lb_ops,
    594   8863     Edward 	    XD_IS_CD(vdp) ? DTYPE_RODIRECT : DTYPE_DIRECT,
    595   8863     Edward 	    XD_IS_RM(vdp),
    596   8863     Edward 	    B_TRUE,
    597   8863     Edward 	    XD_IS_CD(vdp) ? DDI_NT_CD_XVMD : DDI_NT_BLOCK_XVMD,
    598   8863     Edward #if defined(XPV_HVM_DRIVER)
    599   8863     Edward 	    (XD_IS_CD(vdp) ? 0 : CMLB_CREATE_ALTSLICE_VTOC_16_DTYPE_DIRECT) |
    600   8863     Edward 	    CMLB_INTERNAL_MINOR_NODES,
    601   8863     Edward #else /* !XPV_HVM_DRIVER */
    602   8863     Edward 	    XD_IS_CD(vdp) ? 0 : CMLB_FAKE_LABEL_ONE_PARTITION,
    603   8863     Edward #endif /* !XPV_HVM_DRIVER */
    604   8863     Edward 	    vdp->xdf_vd_lbl, NULL));
    605   5084    johnlev }
    606   5084    johnlev 
    607   5084    johnlev static void
    608   8863     Edward xdf_io_err(buf_t *bp, int err, size_t resid)
    609   5084    johnlev {
    610   8863     Edward 	bioerror(bp, err);
    611   8863     Edward 	if (resid == 0)
    612   8863     Edward 		bp->b_resid = bp->b_bcount;
    613   8863     Edward 	biodone(bp);
    614   5084    johnlev }
    615   5084    johnlev 
    616   5084    johnlev static void
    617   8863     Edward xdf_kstat_enter(xdf_t *vdp, buf_t *bp)
    618   5084    johnlev {
    619   8863     Edward 	v_req_t *vreq = BP_VREQ(bp);
    620   5084    johnlev 
    621   8863     Edward 	ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk));
    622   5084    johnlev 
    623   8863     Edward 	if (vdp->xdf_xdev_iostat == NULL)
    624   8863     Edward 		return;
    625   8863     Edward 	if ((vreq != NULL) && vreq->v_runq) {
    626   8863     Edward 		kstat_runq_enter(KSTAT_IO_PTR(vdp->xdf_xdev_iostat));
    627   8863     Edward 	} else {
    628   8863     Edward 		kstat_waitq_enter(KSTAT_IO_PTR(vdp->xdf_xdev_iostat));
    629   5084    johnlev 	}
    630   5084    johnlev }
    631   5084    johnlev 
    632   5084    johnlev static void
    633   8863     Edward xdf_kstat_exit(xdf_t *vdp, buf_t *bp)
    634   5084    johnlev {
    635   8863     Edward 	v_req_t *vreq = BP_VREQ(bp);
    636   8863     Edward 
    637   8863     Edward 	ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk));
    638   8863     Edward 
    639   8863     Edward 	if (vdp->xdf_xdev_iostat == NULL)
    640   8863     Edward 		return;
    641   8863     Edward 	if ((vreq != NULL) && vreq->v_runq) {
    642   8863     Edward 		kstat_runq_exit(KSTAT_IO_PTR(vdp->xdf_xdev_iostat));
    643   8863     Edward 	} else {
    644   8863     Edward 		kstat_waitq_exit(KSTAT_IO_PTR(vdp->xdf_xdev_iostat));
    645   8863     Edward 	}
    646   8863     Edward }
    647   8863     Edward 
    648   8863     Edward static void
    649   8863     Edward xdf_kstat_waitq_to_runq(xdf_t *vdp, buf_t *bp)
    650   8863     Edward {
    651   8863     Edward 	v_req_t *vreq = BP_VREQ(bp);
    652   8863     Edward 
    653   8863     Edward 	ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk));
    654   8863     Edward 	ASSERT(!vreq->v_runq);
    655   8863     Edward 
    656   8863     Edward 	vreq->v_runq = B_TRUE;
    657   8863     Edward 	if (vdp->xdf_xdev_iostat == NULL)
    658   8863     Edward 		return;
    659   8863     Edward 	kstat_waitq_to_runq(KSTAT_IO_PTR(vdp->xdf_xdev_iostat));
    660   8863     Edward }
    661   8863     Edward 
    662   8863     Edward static void
    663   8863     Edward xdf_kstat_runq_to_waitq(xdf_t *vdp, buf_t *bp)
    664   8863     Edward {
    665   8863     Edward 	v_req_t *vreq = BP_VREQ(bp);
    666   8863     Edward 
    667   8863     Edward 	ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk));
    668   8863     Edward 	ASSERT(vreq->v_runq);
    669   8863     Edward 
    670   8863     Edward 	vreq->v_runq = B_FALSE;
    671   8863     Edward 	if (vdp->xdf_xdev_iostat == NULL)
    672   8863     Edward 		return;
    673   8863     Edward 	kstat_runq_back_to_waitq(KSTAT_IO_PTR(vdp->xdf_xdev_iostat));
    674   8863     Edward }
    675   8863     Edward 
    676   8863     Edward int
    677   8863     Edward xdf_kstat_create(dev_info_t *dip, char *ks_module, int instance)
    678   8863     Edward {
    679   8863     Edward 	xdf_t		*vdp = (xdf_t *)ddi_get_driver_private(dip);
    680   8863     Edward 	kstat_t		*kstat;
    681   8863     Edward 	buf_t		*bp;
    682   8863     Edward 
    683   8863     Edward 	if ((kstat = kstat_create(
    684   8863     Edward 	    ks_module, instance, NULL, "disk",
    685   8863     Edward 	    KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT)) == NULL)
    686   8863     Edward 		return (-1);
    687   8863     Edward 
    688   8863     Edward 	/* See comment about locking in xdf_kstat_delete(). */
    689   8863     Edward 	mutex_enter(&vdp->xdf_iostat_lk);
    690   8863     Edward 	mutex_enter(&vdp->xdf_dev_lk);
    691   8863     Edward 
    692   8863     Edward 	/* only one kstat can exist at a time */
    693   8863     Edward 	if (vdp->xdf_xdev_iostat != NULL) {
    694   8863     Edward 		mutex_exit(&vdp->xdf_dev_lk);
    695   8863     Edward 		mutex_exit(&vdp->xdf_iostat_lk);
    696   8863     Edward 		kstat_delete(kstat);
    697   8863     Edward 		return (-1);
    698   8863     Edward 	}
    699   8863     Edward 
    700   8863     Edward 	vdp->xdf_xdev_iostat = kstat;
    701   8863     Edward 	vdp->xdf_xdev_iostat->ks_lock = &vdp->xdf_dev_lk;
    702   8863     Edward 	kstat_install(vdp->xdf_xdev_iostat);
    703   8863     Edward 
    704   8863     Edward 	/*
    705   8863     Edward 	 * Now that we've created a kstat, we need to update the waitq and
    706   8863     Edward 	 * runq counts for the kstat to reflect our current state.
    707   8863     Edward 	 *
    708   8863     Edward 	 * For a buf_t structure to be on the runq, it must have a ring
    709   8863     Edward 	 * buffer slot associated with it.  To get a ring buffer slot the
    710   8863     Edward 	 * buf must first have a v_req_t and a ge_slot_t associated with it.
    711   8863     Edward 	 * Then when it is granted a ring buffer slot, v_runq will be set to
    712   8863     Edward 	 * true.
    713   8863     Edward 	 *
    714   8863     Edward 	 * For a buf_t structure to be on the waitq, it must not be on the
    715   8863     Edward 	 * runq.  So to find all the buf_t's that should be on waitq, we
    716   8863     Edward 	 * walk the active buf list and add any buf_t's which aren't on the
    717   8863     Edward 	 * runq to the waitq.
    718   8863     Edward 	 */
    719   8863     Edward 	bp = vdp->xdf_f_act;
    720   8863     Edward 	while (bp != NULL) {
    721   8863     Edward 		xdf_kstat_enter(vdp, bp);
    722   8863     Edward 		bp = bp->av_forw;
    723   8863     Edward 	}
    724   8863     Edward 	if (vdp->xdf_ready_tq_bp != NULL)
    725   8863     Edward 		xdf_kstat_enter(vdp, vdp->xdf_ready_tq_bp);
    726   8863     Edward 
    727   8863     Edward 	mutex_exit(&vdp->xdf_dev_lk);
    728   8863     Edward 	mutex_exit(&vdp->xdf_iostat_lk);
    729   8863     Edward 	return (0);
    730   5084    johnlev }
    731   6318        edp 
    732   6318        edp void
    733   6318        edp xdf_kstat_delete(dev_info_t *dip)
    734   6318        edp {
    735   8863     Edward 	xdf_t		*vdp = (xdf_t *)ddi_get_driver_private(dip);
    736   8863     Edward 	kstat_t		*kstat;
    737   8863     Edward 	buf_t		*bp;
    738   6318        edp 
    739   6318        edp 	/*
    740   6318        edp 	 * The locking order here is xdf_iostat_lk and then xdf_dev_lk.
    741   6318        edp 	 * xdf_dev_lk is used to protect the xdf_xdev_iostat pointer
    742   6318        edp 	 * and the contents of the our kstat.  xdf_iostat_lk is used
    743   6318        edp 	 * to protect the allocation and freeing of the actual kstat.
    744   6318        edp 	 * xdf_dev_lk can't be used for this purpose because kstat
    745   6318        edp 	 * readers use it to access the contents of the kstat and
    746   6318        edp 	 * hence it can't be held when calling kstat_delete().
    747   6318        edp 	 */
    748   6318        edp 	mutex_enter(&vdp->xdf_iostat_lk);
    749   6318        edp 	mutex_enter(&vdp->xdf_dev_lk);
    750   6318        edp 
    751   6318        edp 	if (vdp->xdf_xdev_iostat == NULL) {
    752   6318        edp 		mutex_exit(&vdp->xdf_dev_lk);
    753   6318        edp 		mutex_exit(&vdp->xdf_iostat_lk);
    754   6318        edp 		return;
    755   6318        edp 	}
    756   6318        edp 
    757   8863     Edward 	/*
    758   8863     Edward 	 * We're about to destroy the kstat structures, so it isn't really
    759   8863     Edward 	 * necessary to update the runq and waitq counts.  But, since this
    760   8863     Edward 	 * isn't a hot code path we can afford to be a little pedantic and
    761   8863     Edward 	 * go ahead and decrement the runq and waitq kstat counters to zero
    762   8863     Edward 	 * before free'ing them.  This helps us ensure that we've gotten all
    763   8863     Edward 	 * our accounting correct.
    764   8863     Edward 	 *
    765   8863     Edward 	 * For an explanation of how we determine which buffers go on the
    766   8863     Edward 	 * runq vs which go on the waitq, see the comments in
    767   8863     Edward 	 * xdf_kstat_create().
    768   8863     Edward 	 */
    769   8863     Edward 	bp = vdp->xdf_f_act;
    770   8863     Edward 	while (bp != NULL) {
    771   8863     Edward 		xdf_kstat_exit(vdp, bp);
    772   8863     Edward 		bp = bp->av_forw;
    773   8863     Edward 	}
    774   8863     Edward 	if (vdp->xdf_ready_tq_bp != NULL)
    775   8863     Edward 		xdf_kstat_exit(vdp, vdp->xdf_ready_tq_bp);
    776   8863     Edward 
    777   6318        edp 	kstat = vdp->xdf_xdev_iostat;
    778   6318        edp 	vdp->xdf_xdev_iostat = NULL;
    779   6318        edp 	mutex_exit(&vdp->xdf_dev_lk);
    780   6318        edp 	kstat_delete(kstat);
    781   6318        edp 	mutex_exit(&vdp->xdf_iostat_lk);
    782   6318        edp }
    783   6318        edp 
    784   8863     Edward /*
    785   8863     Edward  * Add an IO requests onto the active queue.
    786   8863     Edward  *
    787   8863     Edward  * We have to detect IOs generated by xdf_ready_tq_thread.  These IOs
    788   8863     Edward  * are used to establish a connection to the backend, so they recieve
    789   8863     Edward  * priority over all other IOs.  Since xdf_ready_tq_thread only does
    790   8863     Edward  * synchronous IO, there can only be one xdf_ready_tq_thread request at any
    791   8863     Edward  * given time and we record the buf associated with that request in
    792   8863     Edward  * xdf_ready_tq_bp.
    793   8863     Edward  */
    794   8863     Edward static void
    795   8863     Edward xdf_bp_push(xdf_t *vdp, buf_t *bp)
    796   6318        edp {
    797   8863     Edward 	ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk));
    798   8863     Edward 	ASSERT(bp->av_forw == NULL);
    799   6318        edp 
    800   8863     Edward 	xdf_kstat_enter(vdp, bp);
    801   8863     Edward 
    802   8863     Edward 	if (curthread == vdp->xdf_ready_tq_thread) {
    803   8863     Edward 		/* new IO requests from the ready thread */
    804   8863     Edward 		ASSERT(vdp->xdf_ready_tq_bp == NULL);
    805   8863     Edward 		vdp->xdf_ready_tq_bp = bp;
    806   8863     Edward 		return;
    807   8863     Edward 	}
    808   8863     Edward 
    809   8863     Edward 	/* this is normal IO request */
    810   8863     Edward 	ASSERT(bp != vdp->xdf_ready_tq_bp);
    811   8863     Edward 
    812   8863     Edward 	if (vdp->xdf_f_act == NULL) {
    813   8863     Edward 		/* this is only only IO on the active queue */
    814   8863     Edward 		ASSERT(vdp->xdf_l_act == NULL);
    815   8863     Edward 		ASSERT(vdp->xdf_i_act == NULL);
    816   8863     Edward 		vdp->xdf_f_act = vdp->xdf_l_act = vdp->xdf_i_act = bp;
    817   8863     Edward 		return;
    818   8863     Edward 	}
    819   8863     Edward 
    820   8863     Edward 	/* add this IO to the tail of the active queue */
    821   8863     Edward 	vdp->xdf_l_act->av_forw = bp;
    822   8863     Edward 	vdp->xdf_l_act = bp;
    823   8863     Edward 	if (vdp->xdf_i_act == NULL)
    824   8863     Edward 		vdp->xdf_i_act = bp;
    825   8863     Edward }
    826   8863     Edward 
    827   8863     Edward static void
    828   8863     Edward xdf_bp_pop(xdf_t *vdp, buf_t *bp)
    829   8863     Edward {
    830   8863     Edward 	buf_t	*bp_iter;
    831   8863     Edward 
    832   8863     Edward 	ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk));
    833   8863     Edward 	ASSERT(VREQ_DONE(BP_VREQ(bp)));
    834   8863     Edward 
    835   8863     Edward 	if (vdp->xdf_ready_tq_bp == bp) {
    836   8863     Edward 		/* we're done with a ready thread IO request */
    837   8863     Edward 		ASSERT(bp->av_forw == NULL);
    838   8863     Edward 		vdp->xdf_ready_tq_bp = NULL;
    839   8863     Edward 		return;
    840   8863     Edward 	}
    841   8863     Edward 
    842   8863     Edward 	/* we're done with a normal IO request */
    843   8863     Edward 	ASSERT((bp->av_forw != NULL) || (bp == vdp->xdf_l_act));
    844   8863     Edward 	ASSERT((bp->av_forw == NULL) || (bp != vdp->xdf_l_act));
    845   8863     Edward 	ASSERT(VREQ_DONE(BP_VREQ(vdp->xdf_f_act)));
    846   8863     Edward 	ASSERT(vdp->xdf_f_act != vdp->xdf_i_act);
    847   8863     Edward 
    848   8863     Edward 	if (bp == vdp->xdf_f_act) {
    849   8863     Edward 		/* This IO was at the head of our active queue. */
    850   8863     Edward 		vdp->xdf_f_act = bp->av_forw;
    851   8863     Edward 		if (bp == vdp->xdf_l_act)
    852   8863     Edward 			vdp->xdf_l_act = NULL;
    853   8863     Edward 	} else {
    854   8863     Edward 		/* There IO finished before some other pending IOs. */
    855   8863     Edward 		bp_iter = vdp->xdf_f_act;
    856   8863     Edward 		while (bp != bp_iter->av_forw) {
    857   8863     Edward 			bp_iter = bp_iter->av_forw;
    858   8863     Edward 			ASSERT(VREQ_DONE(BP_VREQ(bp_iter)));
    859   8863     Edward 			ASSERT(bp_iter != vdp->xdf_i_act);
    860   8863     Edward 		}
    861   8863     Edward 		bp_iter->av_forw = bp->av_forw;
    862   8863     Edward 		if (bp == vdp->xdf_l_act)
    863   8863     Edward 			vdp->xdf_l_act = bp_iter;
    864   8863     Edward 	}
    865   8863     Edward 	bp->av_forw = NULL;
    866   8863     Edward }
    867   8863     Edward 
    868   8863     Edward static buf_t *
    869   8863     Edward xdf_bp_next(xdf_t *vdp)
    870   8863     Edward {
    871   8863     Edward 	v_req_t	*vreq;
    872   8863     Edward 	buf_t	*bp;
    873   8863     Edward 
    874   8863     Edward 	if (vdp->xdf_state == XD_CONNECTED) {
    875   8863     Edward 		/*
    876   8863     Edward 		 * If we're in the XD_CONNECTED state, we only service IOs
    877   8863     Edward 		 * from the xdf_ready_tq_thread thread.
    878   8863     Edward 		 */
    879   8863     Edward 		if ((bp = vdp->xdf_ready_tq_bp) == NULL)
    880   8863     Edward 			return (NULL);
    881   8863     Edward 		if (((vreq = BP_VREQ(bp)) == NULL) || (!VREQ_DONE(vreq)))
    882   8863     Edward 			return (bp);
    883   8863     Edward 		return (NULL);
    884   8863     Edward 	}
    885   8863     Edward 
    886   8863     Edward 	/* if we're not in the XD_CONNECTED or XD_READY state we can't do IO */
    887   8863     Edward 	if (vdp->xdf_state != XD_READY)
    888   8863     Edward 		return (NULL);
    889   8863     Edward 
    890   8863     Edward 	ASSERT(vdp->xdf_ready_tq_bp == NULL);
    891   8863     Edward 	for (;;) {
    892   8863     Edward 		if ((bp = vdp->xdf_i_act) == NULL)
    893   8863     Edward 			return (NULL);
    894   8863     Edward 		if (((vreq = BP_VREQ(bp)) == NULL) || (!VREQ_DONE(vreq)))
    895   8863     Edward 			return (bp);
    896   8863     Edward 
    897   8863     Edward 		/* advance the active buf index pointer */
    898   8863     Edward 		vdp->xdf_i_act = bp->av_forw;
    899   8863     Edward 	}
    900   8863     Edward }
    901   8863     Edward 
    902   8863     Edward static void
    903   8863     Edward xdf_io_fini(xdf_t *vdp, uint64_t id, int bioerr)
    904   8863     Edward {
    905   8863     Edward 	ge_slot_t	*gs = (ge_slot_t *)(uintptr_t)id;
    906   8863     Edward 	v_req_t		*vreq = gs->gs_vreq;
    907   8863     Edward 	buf_t		*bp = vreq->v_buf;
    908   8863     Edward 
    909   8863     Edward 	ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk));
    910   8863     Edward 	ASSERT(BP_VREQ(bp) == vreq);
    911   8863     Edward 
    912   8863     Edward 	gs_free(gs);
    913   8863     Edward 
    914   8863     Edward 	if (bioerr != 0)
    915   8863     Edward 		bioerror(bp, bioerr);
    916   8863     Edward 	ASSERT(vreq->v_nslots > 0);
    917   8863     Edward 	if (--vreq->v_nslots > 0)
    918   8863     Edward 		return;
    919   8863     Edward 
    920   8863     Edward 	/* remove this IO from our active queue */
    921   8863     Edward 	xdf_bp_pop(vdp, bp);
    922   8863     Edward 
    923   8863     Edward 	ASSERT(vreq->v_runq);
    924   8863     Edward 	xdf_kstat_exit(vdp, bp);
    925   8863     Edward 	vreq->v_runq = B_FALSE;
    926   8863     Edward 	vreq_free(vdp, vreq);
    927   8863     Edward 
    928   8863     Edward 	if (IS_ERROR(bp)) {
    929   8863     Edward 		xdf_io_err(bp, geterror(bp), 0);
    930   8863     Edward 	} else if (bp->b_resid != 0) {
    931   8863     Edward 		/* Partial transfers are an error */
    932   8863     Edward 		xdf_io_err(bp, EIO, bp->b_resid);
    933   8863     Edward 	} else {
    934   8863     Edward 		biodone(bp);
    935   8863     Edward 	}
    936   8863     Edward }
    937   8863     Edward 
    938   8863     Edward /*
    939   8863     Edward  * xdf interrupt handler
    940   8863     Edward  */
    941   8863     Edward static uint_t
    942   8863     Edward xdf_intr_locked(xdf_t *vdp)
    943   8863     Edward {
    944   8863     Edward 	xendev_ring_t *xbr;
    945   8863     Edward 	blkif_response_t *resp;
    946   8863     Edward 	int bioerr;
    947   8863     Edward 	uint64_t id;
    948   8863     Edward 	uint8_t op;
    949   8863     Edward 	uint16_t status;
    950   8863     Edward 	ddi_acc_handle_t acchdl;
    951   8863     Edward 
    952   8863     Edward 	ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk));
    953   8863     Edward 
    954   8863     Edward 	if ((xbr = vdp->xdf_xb_ring) == NULL)
    955   8863     Edward 		return (DDI_INTR_UNCLAIMED);
    956   8863     Edward 
    957   8863     Edward 	acchdl = vdp->xdf_xb_ring_hdl;
    958   8863     Edward 
    959   8863     Edward 	/*
    960   8863     Edward 	 * complete all requests which have a response
    961   8863     Edward 	 */
    962   8863     Edward 	while (resp = xvdi_ring_get_response(xbr)) {
    963   8863     Edward 		id = ddi_get64(acchdl, &resp->id);
    964   8863     Edward 		op = ddi_get8(acchdl, &resp->operation);
    965   8863     Edward 		status = ddi_get16(acchdl, (uint16_t *)&resp->status);
    966   8863     Edward 		DPRINTF(INTR_DBG, ("resp: op %d id %"PRIu64" status %d\n",
    967   8863     Edward 		    op, id, status));
    968   8863     Edward 
    969   8863     Edward 		if (status != BLKIF_RSP_OKAY) {
    970   8863     Edward 			DPRINTF(IO_DBG, ("xdf@%s: I/O error while %s",
    971   8863     Edward 			    vdp->xdf_addr,
    972   8863     Edward 			    (op == BLKIF_OP_READ) ? "reading" : "writing"));
    973   8863     Edward 			bioerr = EIO;
    974   8863     Edward 		} else {
    975   8863     Edward 			bioerr = 0;
    976   8863     Edward 		}
    977   8863     Edward 
    978   8863     Edward 		xdf_io_fini(vdp, id, bioerr);
    979   8863     Edward 	}
    980   8863     Edward 	return (DDI_INTR_CLAIMED);
    981   8863     Edward }
    982   8863     Edward 
    983   9471     Edward /*
    984   9471     Edward  * xdf_intr runs at PIL 5, so no one else can grab xdf_dev_lk and
    985   9471     Edward  * block at a lower pil.
    986   9471     Edward  */
    987   8863     Edward static uint_t
    988   8863     Edward xdf_intr(caddr_t arg)
    989   8863     Edward {
    990   8863     Edward 	xdf_t *vdp = (xdf_t *)arg;
    991   8863     Edward 	int rv;
    992   8863     Edward 
    993   8863     Edward 	mutex_enter(&vdp->xdf_dev_lk);
    994   8863     Edward 	rv = xdf_intr_locked(vdp);
    995   8863     Edward 	mutex_exit(&vdp->xdf_dev_lk);
    996   8863     Edward 
    997   8863     Edward 	if (!do_polled_io)
    998   8863     Edward 		xdf_io_start(vdp);
    999   8863     Edward 
   1000   8863     Edward 	return (rv);
   1001   8863     Edward }
   1002   8863     Edward 
   1003   8863     Edward static void
   1004   8863     Edward xdf_ring_push(xdf_t *vdp)
   1005   8863     Edward {
   1006   8863     Edward 	ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk));
   1007   8863     Edward 
   1008   8863     Edward 	if (vdp->xdf_xb_ring == NULL)
   1009   8863     Edward 		return;
   1010   8863     Edward 
   1011   8863     Edward 	if (xvdi_ring_push_request(vdp->xdf_xb_ring)) {
   1012   8863     Edward 		DPRINTF(IO_DBG, (
   1013   8863     Edward 		    "xdf@%s: xdf_ring_push: sent request(s) to backend\n",
   1014   8863     Edward 		    vdp->xdf_addr));
   1015   8863     Edward 	}
   1016   8863     Edward 
   1017   8863     Edward 	if (xvdi_get_evtchn(vdp->xdf_dip) != INVALID_EVTCHN)
   1018   8863     Edward 		xvdi_notify_oe(vdp->xdf_dip);
   1019   8863     Edward }
   1020   8863     Edward 
   1021   8863     Edward static int
   1022   8863     Edward xdf_ring_drain_locked(xdf_t *vdp)
   1023   8863     Edward {
   1024   8863     Edward 	int		pollc, rv = 0;
   1025   8863     Edward 
   1026   8863     Edward 	ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk));
   1027   8863     Edward 
   1028   8863     Edward 	if (xdf_debug & SUSRES_DBG)
   1029   8863     Edward 		xen_printf("xdf_ring_drain: start\n");
   1030   8863     Edward 
   1031   8863     Edward 	for (pollc = 0; pollc < XDF_DRAIN_RETRY_COUNT; pollc++) {
   1032   8863     Edward 		if (vdp->xdf_xb_ring == NULL)
   1033   8863     Edward 			goto out;
   1034   8863     Edward 
   1035   8863     Edward 		if (xvdi_ring_has_unconsumed_responses(vdp->xdf_xb_ring))
   1036   8863     Edward 			(void) xdf_intr_locked(vdp);
   1037   8863     Edward 		if (!xvdi_ring_has_incomp_request(vdp->xdf_xb_ring))
   1038   8863     Edward 			goto out;
   1039   8863     Edward 		xdf_ring_push(vdp);
   1040   8863     Edward 
   1041   8863     Edward 		/* file-backed devices can be slow */
   1042   8863     Edward 		mutex_exit(&vdp->xdf_dev_lk);
   1043   8863     Edward #ifdef XPV_HVM_DRIVER
   1044   8863     Edward 		(void) HYPERVISOR_yield();
   1045   8863     Edward #endif /* XPV_HVM_DRIVER */
   1046   8863     Edward 		delay(drv_usectohz(XDF_DRAIN_MSEC_DELAY));
   1047   8863     Edward 		mutex_enter(&vdp->xdf_dev_lk);
   1048   8863     Edward 	}
   1049   8863     Edward 	cmn_err(CE_WARN, "xdf@%s: xdf_ring_drain: timeout", vdp->xdf_addr);
   1050   8863     Edward 
   1051   8863     Edward out:
   1052   8863     Edward 	if (vdp->xdf_xb_ring != NULL) {
   1053   8863     Edward 		if (xvdi_ring_has_incomp_request(vdp->xdf_xb_ring) ||
   1054   8863     Edward 		    xvdi_ring_has_unconsumed_responses(vdp->xdf_xb_ring))
   1055   8863     Edward 			rv = EIO;
   1056   8863     Edward 	}
   1057   8863     Edward 	if (xdf_debug & SUSRES_DBG)
   1058   8863     Edward 		xen_printf("xdf@%s: xdf_ring_drain: end, err=%d\n",
   1059   8863     Edward 		    vdp->xdf_addr, rv);
   1060   8863     Edward 	return (rv);
   1061   8863     Edward }
   1062   8863     Edward 
   1063   8863     Edward static int
   1064   8863     Edward xdf_ring_drain(xdf_t *vdp)
   1065   8863     Edward {
   1066   8863     Edward 	int rv;
   1067   8863     Edward 	mutex_enter(&vdp->xdf_dev_lk);
   1068   8863     Edward 	rv = xdf_ring_drain_locked(vdp);
   1069   8863     Edward 	mutex_exit(&vdp->xdf_dev_lk);
   1070   8863     Edward 	return (rv);
   1071   8863     Edward }
   1072   8863     Edward 
   1073   8863     Edward /*
   1074   8863     Edward  * Destroy all v_req_t, grant table entries, and our ring buffer.
   1075   8863     Edward  */
   1076   8863     Edward static void
   1077   8863     Edward xdf_ring_destroy(xdf_t *vdp)
   1078   8863     Edward {
   1079   8863     Edward 	v_req_t		*vreq;
   1080   8863     Edward 	buf_t		*bp;
   1081   8863     Edward 	ge_slot_t	*gs;
   1082   8863     Edward 
   1083   8863     Edward 	ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk));
   1084   8863     Edward 	ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk));
   1085   8863     Edward 
   1086   8863     Edward 	if ((vdp->xdf_state != XD_INIT) &&
   1087   8863     Edward 	    (vdp->xdf_state != XD_CONNECTED) &&
   1088   8863     Edward 	    (vdp->xdf_state != XD_READY)) {
   1089   8863     Edward 		ASSERT(vdp->xdf_xb_ring == NULL);
   1090   8863     Edward 		ASSERT(vdp->xdf_xb_ring_hdl == NULL);
   1091   8863     Edward 		ASSERT(vdp->xdf_peer == INVALID_DOMID);
   1092   8863     Edward 		ASSERT(vdp->xdf_evtchn == INVALID_EVTCHN);
   1093   8863     Edward 		ASSERT(list_is_empty(&vdp->xdf_vreq_act));
   1094   8863     Edward 		return;
   1095   8863     Edward 	}
   1096   8863     Edward 
   1097   8863     Edward 	/*
   1098   8863     Edward 	 * We don't want to recieve async notifications from the backend
   1099   8863     Edward 	 * when it finishes processing ring entries.
   1100   8863     Edward 	 */
   1101   8863     Edward #ifdef XPV_HVM_DRIVER
   1102   8863     Edward 	ec_unbind_evtchn(vdp->xdf_evtchn);
   1103   8863     Edward #else /* !XPV_HVM_DRIVER */
   1104   8863     Edward 	(void) ddi_remove_intr(vdp->xdf_dip, 0, NULL);
   1105   8863     Edward #endif /* !XPV_HVM_DRIVER */
   1106   8863     Edward 
   1107   8863     Edward 	/*
   1108   8863     Edward 	 * Drain any requests in the ring.  We need to do this before we
   1109   8863     Edward 	 * can free grant table entries, because if active ring entries
   1110   8863     Edward 	 * point to grants, then the backend could be trying to access
   1111   8863     Edward 	 * those grants.
   1112   8863     Edward 	 */
   1113   8863     Edward 	(void) xdf_ring_drain_locked(vdp);
   1114   8863     Edward 
   1115   8863     Edward 	/* We're done talking to the backend so free up our event channel */
   1116   8863     Edward 	xvdi_free_evtchn(vdp->xdf_dip);
   1117   8863     Edward 	vdp->xdf_evtchn = INVALID_EVTCHN;
   1118   8863     Edward 
   1119   8863     Edward 	while ((vreq = list_head(&vdp->xdf_vreq_act)) != NULL) {
   1120   8863     Edward 		bp = vreq->v_buf;
   1121   8863     Edward 		ASSERT(BP_VREQ(bp) == vreq);
   1122   8863     Edward 
   1123   8863     Edward 		/* Free up any grant table entries associaed with this IO */
   1124   8863     Edward 		while ((gs = list_head(&vreq->v_gs)) != NULL)
   1125   8863     Edward 			gs_free(gs);
   1126   8863     Edward 
   1127   8863     Edward 		/* If this IO was on the runq, move it back to the waitq. */
   1128   8863     Edward 		if (vreq->v_runq)
   1129   8863     Edward 			xdf_kstat_runq_to_waitq(vdp, bp);
   1130   8863     Edward 
   1131   8863     Edward 		/*
   1132   8863     Edward 		 * Reset any buf IO state since we're going to re-issue the
   1133   8863     Edward 		 * IO when we reconnect.
   1134   8863     Edward 		 */
   1135   8863     Edward 		vreq_free(vdp, vreq);
   1136   8863     Edward 		BP_VREQ_SET(bp, NULL);
   1137   8863     Edward 		bioerror(bp, 0);
   1138   8863     Edward 	}
   1139   8863     Edward 
   1140   8863     Edward 	/* reset the active queue index pointer */
   1141   8863     Edward 	vdp->xdf_i_act = vdp->xdf_f_act;
   1142   8863     Edward 
   1143   8863     Edward 	/* Destroy the ring */
   1144   8863     Edward 	xvdi_free_ring(vdp->xdf_xb_ring);
   1145   8863     Edward 	vdp->xdf_xb_ring = NULL;
   1146   8863     Edward 	vdp->xdf_xb_ring_hdl = NULL;
   1147   8863     Edward 	vdp->xdf_peer = INVALID_DOMID;
   1148   8863     Edward }
   1149   8863     Edward 
   1150   8863     Edward void
   1151   8863     Edward xdfmin(struct buf *bp)
   1152   8863     Edward {
   1153   8863     Edward 	if (bp->b_bcount > xdf_maxphys)
   1154   8863     Edward 		bp->b_bcount = xdf_maxphys;
   1155   8863     Edward }
   1156   8863     Edward 
   1157   8863     Edward /*
   1158   8863     Edward  * Check if we have a pending "eject" media request.
   1159   8863     Edward  */
   1160   8863     Edward static int
   1161   8863     Edward xdf_eject_pending(xdf_t *vdp)
   1162   8863     Edward {
   1163   8863     Edward 	dev_info_t	*dip = vdp->xdf_dip;
   1164   8863     Edward 	char		*xsname, *str;
   1165   8863     Edward 
   1166   8863     Edward 	if (!vdp->xdf_media_req_supported)
   1167   8863     Edward 		return (B_FALSE);
   1168   8863     Edward 
   1169   8863     Edward 	if (((xsname = xvdi_get_xsname(dip)) == NULL) ||
   1170   8863     Edward 	    (xenbus_read_str(xsname, XBP_MEDIA_REQ, &str) != 0))
   1171   8863     Edward 		return (B_FALSE);
   1172   8863     Edward 
   1173   8863     Edward 	if (strcmp(str, XBV_MEDIA_REQ_EJECT) != 0) {
   1174   8863     Edward 		strfree(str);
   1175   8863     Edward 		return (B_FALSE);
   1176   8863     Edward 	}
   1177   8863     Edward 	strfree(str);
   1178   8863     Edward 	return (B_TRUE);
   1179   8863     Edward }
   1180   8863     Edward 
   1181   8863     Edward /*
   1182   8863     Edward  * Generate a media request.
   1183   8863     Edward  */
   1184   8863     Edward static int
   1185   8863     Edward xdf_media_req(xdf_t *vdp, char *req, boolean_t media_required)
   1186   8863     Edward {
   1187   8863     Edward 	dev_info_t	*dip = vdp->xdf_dip;
   1188   8863     Edward 	char		*xsname;
   1189   8863     Edward 
   1190   9471     Edward 	/*
   1191   9471     Edward 	 * we can't be holding xdf_dev_lk because xenbus_printf() can
   1192   9471     Edward 	 * block while waiting for a PIL 1 interrupt message.  this
   1193   9471     Edward 	 * would cause a deadlock with xdf_intr() which needs to grab
   1194   9471     Edward 	 * xdf_dev_lk as well and runs at PIL 5.
   1195   9471     Edward 	 */
   1196   8863     Edward 	ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk));
   1197   9471     Edward 	ASSERT(MUTEX_NOT_HELD(&vdp->xdf_dev_lk));
   1198   8863     Edward 
   1199   8863     Edward 	if ((xsname = xvdi_get_xsname(dip)) == NULL)
   1200   8863     Edward 		return (ENXIO);
   1201   8863     Edward 
   1202   8863     Edward 	/* Check if we support media requests */
   1203   8863     Edward 	if (!XD_IS_CD(vdp) || !vdp->xdf_media_req_supported)
   1204   8863     Edward 		return (ENOTTY);
   1205   8863     Edward 
   1206   8863     Edward 	/* If an eject is pending then don't allow any new requests */
   1207   8863     Edward 	if (xdf_eject_pending(vdp))
   1208   8863     Edward 		return (ENXIO);
   1209   8863     Edward 
   1210   8863     Edward 	/* Make sure that there is media present */
   1211   8863     Edward 	if (media_required && (vdp->xdf_xdev_nblocks == 0))
   1212   8863     Edward 		return (ENXIO);
   1213   8863     Edward 
   1214   8863     Edward 	/* We only allow operations when the device is ready and connected */
   1215   8863     Edward 	if (vdp->xdf_state != XD_READY)
   1216   8863     Edward 		return (EIO);
   1217   8863     Edward 
   1218   8863     Edward 	if (xenbus_printf(XBT_NULL, xsname, XBP_MEDIA_REQ, "%s", req) != 0)
   1219   8863     Edward 		return (EIO);
   1220   8863     Edward 
   1221   8863     Edward 	return (0);
   1222   8863     Edward }
   1223   8863     Edward 
   1224   8863     Edward /*
   1225   8863     Edward  * populate a single blkif_request_t w/ a buf
   1226   8863     Edward  */
   1227   8863     Edward static void
   1228   8863     Edward xdf_process_rreq(xdf_t *vdp, struct buf *bp, blkif_request_t *rreq)
   1229   8863     Edward {
   1230   8863     Edward 	grant_ref_t	gr;
   1231   8863     Edward 	uint8_t		fsect, lsect;
   1232   8863     Edward 	size_t		bcnt;
   1233   8863     Edward 	paddr_t		dma_addr;
   1234   8863     Edward 	off_t		blk_off;
   1235   8863     Edward 	dev_info_t	*dip = vdp->xdf_dip;
   1236   8863     Edward 	blkif_vdev_t	vdev = xvdi_get_vdevnum(dip);
   1237   8863     Edward 	v_req_t		*vreq = BP_VREQ(bp);
   1238   8863     Edward 	uint64_t	blkno = vreq->v_blkno;
   1239   8863     Edward 	uint_t		ndmacs = vreq->v_ndmacs;
   1240   8863     Edward 	ddi_acc_handle_t acchdl = vdp->xdf_xb_ring_hdl;
   1241   8863     Edward 	int		seg = 0;
   1242   8863     Edward 	int		isread = IS_READ(bp);
   1243   8863     Edward 	ge_slot_t	*gs = list_head(&vreq->v_gs);
   1244   8863     Edward 
   1245   8863     Edward 	ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk));
   1246   8863     Edward 	ASSERT(vreq->v_status == VREQ_GS_ALLOCED);
   1247   8863     Edward 
   1248   8863     Edward 	if (isread)
   1249   8863     Edward 		ddi_put8(acchdl, &rreq->operation, BLKIF_OP_READ);
   1250   8863     Edward 	else {
   1251   8863     Edward 		switch (vreq->v_flush_diskcache) {
   1252   8863     Edward 		case FLUSH_DISKCACHE:
   1253   8863     Edward 			ddi_put8(acchdl, &rreq->operation,
   1254   8863     Edward 			    BLKIF_OP_FLUSH_DISKCACHE);
   1255   8863     Edward 			ddi_put16(acchdl, &rreq->handle, vdev);
   1256   8863     Edward 			ddi_put64(acchdl, &rreq->id,
   1257   8863     Edward 			    (uint64_t)(uintptr_t)(gs));
   1258   8863     Edward 			ddi_put8(acchdl, &rreq->nr_segments, 0);
   1259   8863     Edward 			vreq->v_status = VREQ_DMAWIN_DONE;
   1260   8863     Edward 			return;
   1261   8863     Edward 		case WRITE_BARRIER:
   1262   8863     Edward 			ddi_put8(acchdl, &rreq->operation,
   1263   8863     Edward 			    BLKIF_OP_WRITE_BARRIER);
   1264   8863     Edward 			break;
   1265   8863     Edward 		default:
   1266   8863     Edward 			if (!vdp->xdf_wce)
   1267   8863     Edward 				ddi_put8(acchdl, &rreq->operation,
   1268   8863     Edward 				    BLKIF_OP_WRITE_BARRIER);
   1269   8863     Edward 			else
   1270   8863     Edward 				ddi_put8(acchdl, &rreq->operation,
   1271   8863     Edward 				    BLKIF_OP_WRITE);
   1272   8863     Edward 			break;
   1273   8863     Edward 		}
   1274   8863     Edward 	}
   1275   8863     Edward 
   1276   8863     Edward 	ddi_put16(acchdl, &rreq->handle, vdev);
   1277   8863     Edward 	ddi_put64(acchdl, &rreq->sector_number, blkno);
   1278   8863     Edward 	ddi_put64(acchdl, &rreq->id, (uint64_t)(uintptr_t)(gs));
   1279   8863     Edward 
   1280   8863     Edward 	/*
   1281   8863     Edward 	 * loop until all segments are populated or no more dma cookie in buf
   1282   8863     Edward 	 */
   1283   8863     Edward 	for (;;) {
   1284   8863     Edward 		/*
   1285   8863     Edward 		 * Each segment of a blkif request can transfer up to
   1286   8863     Edward 		 * one 4K page of data.
   1287   8863     Edward 		 */
   1288   8863     Edward 		bcnt = vreq->v_dmac.dmac_size;
   1289   8863     Edward 		dma_addr = vreq->v_dmac.dmac_laddress;
   1290   8863     Edward 		blk_off = (uint_t)((paddr_t)XB_SEGOFFSET & dma_addr);
   1291   8863     Edward 		fsect = blk_off >> XB_BSHIFT;
   1292   8863     Edward 		lsect = fsect + (bcnt >> XB_BSHIFT) - 1;
   1293   8863     Edward 
   1294   8863     Edward 		ASSERT(bcnt <= PAGESIZE);
   1295   8863     Edward 		ASSERT((bcnt % XB_BSIZE) == 0);
   1296   8863     Edward 		ASSERT((blk_off & XB_BMASK) == 0);
   1297   8863     Edward 		ASSERT(fsect < XB_MAX_SEGLEN / XB_BSIZE &&
   1298   8863     Edward 		    lsect < XB_MAX_SEGLEN / XB_BSIZE);
   1299   8863     Edward 
   1300   8863     Edward 		gr = gs_grant(gs, PATOMA(dma_addr) >> PAGESHIFT);
   1301   8863     Edward 		ddi_put32(acchdl, &rreq->seg[seg].gref, gr);
   1302   8863     Edward 		ddi_put8(acchdl, &rreq->seg[seg].first_sect, fsect);
   1303   8863     Edward 		ddi_put8(acchdl, &rreq->seg[seg].last_sect, lsect);
   1304   8863     Edward 
   1305   8863     Edward 		DPRINTF(IO_DBG, (
   1306   8863     Edward 		    "xdf@%s: seg%d: dmacS %lu blk_off %ld\n",
   1307   8863     Edward 		    vdp->xdf_addr, seg, vreq->v_dmac.dmac_size, blk_off));
   1308   8863     Edward 		DPRINTF(IO_DBG, (
   1309   8863     Edward 		    "xdf@%s: seg%d: fs %d ls %d gr %d dma 0x%"PRIx64"\n",
   1310   8863     Edward 		    vdp->xdf_addr, seg, fsect, lsect, gr, dma_addr));
   1311   8863     Edward 
   1312   8863     Edward 		blkno += (bcnt >> XB_BSHIFT);
   1313   8863     Edward 		seg++;
   1314   8863     Edward 		ASSERT(seg <= BLKIF_MAX_SEGMENTS_PER_REQUEST);
   1315   8863     Edward 		if (--ndmacs) {
   1316   8863     Edward 			ddi_dma_nextcookie(vreq->v_dmahdl, &vreq->v_dmac);
   1317   8863     Edward 			continue;
   1318   8863     Edward 		}
   1319   8863     Edward 
   1320   8863     Edward 		vreq->v_status = VREQ_DMAWIN_DONE;
   1321   8863     Edward 		vreq->v_blkno = blkno;
   1322   8863     Edward 		break;
   1323   8863     Edward 	}
   1324   8863     Edward 	ddi_put8(acchdl,  &rreq->nr_segments, seg);
   1325   8863     Edward 	DPRINTF(IO_DBG, (
   1326   8863     Edward 	    "xdf@%s: xdf_process_rreq: request id=%"PRIx64" ready\n",
   1327   8863     Edward 	    vdp->xdf_addr, rreq->id));
   1328   8863     Edward }
   1329   8863     Edward 
   1330   8863     Edward static void
   1331   8863     Edward xdf_io_start(xdf_t *vdp)
   1332   8863     Edward {
   1333   8863     Edward 	struct buf	*bp;
   1334   8863     Edward 	v_req_t		*vreq;
   1335   8863     Edward 	blkif_request_t	*rreq;
   1336   8863     Edward 	boolean_t	rreqready = B_FALSE;
   1337   8863     Edward 
   1338   6318        edp 	mutex_enter(&vdp->xdf_dev_lk);
   1339   6318        edp 
   1340   8863     Edward 	/*
   1341   8863     Edward 	 * Populate the ring request(s).  Loop until there is no buf to
   1342   8863     Edward 	 * transfer or no free slot available in I/O ring.
   1343   8863     Edward 	 */
   1344   8863     Edward 	for (;;) {
   1345   8863     Edward 		/* don't start any new IO if we're suspending */
   1346   8863     Edward 		if (vdp->xdf_suspending)
   1347   8863     Edward 			break;
   1348   8863     Edward 		if ((bp = xdf_bp_next(vdp)) == NULL)
   1349   8863     Edward 			break;
   1350   8863     Edward 
   1351   8863     Edward 		/* if the buf doesn't already have a vreq, allocate one */
   1352   8863     Edward 		if (((vreq = BP_VREQ(bp)) == NULL) &&
   1353   8863     Edward 		    ((vreq = vreq_get(vdp, bp)) == NULL))
   1354   8863     Edward 			break;
   1355   8863     Edward 
   1356   8863     Edward 		/* alloc DMA/GTE resources */
   1357   8863     Edward 		if (vreq_setup(vdp, vreq) != DDI_SUCCESS)
   1358   8863     Edward 			break;
   1359   8863     Edward 
   1360   8863     Edward 		/* get next blkif_request in the ring */
   1361   8863     Edward 		if ((rreq = xvdi_ring_get_request(vdp->xdf_xb_ring)) == NULL)
   1362   8863     Edward 			break;
   1363   8863     Edward 		bzero(rreq, sizeof (blkif_request_t));
   1364   8863     Edward 		rreqready = B_TRUE;
   1365   8863     Edward 
   1366   8863     Edward 		/* populate blkif_request with this buf */
   1367   8863     Edward 		xdf_process_rreq(vdp, bp, rreq);
   1368   8863     Edward 
   1369   8863     Edward 		/*
   1370   8863     Edward 		 * This buffer/vreq pair is has been allocated a ring buffer
   1371   8863     Edward 		 * resources, so if it isn't already in our runq, add it.
   1372   8863     Edward 		 */
   1373   8863     Edward 		if (!vreq->v_runq)
   1374   8863     Edward 			xdf_kstat_waitq_to_runq(vdp, bp);
   1375   6318        edp 	}
   1376   6318        edp 
   1377   8863     Edward 	/* Send the request(s) to the backend */
   1378   8863     Edward 	if (rreqready)
   1379   8863     Edward 		xdf_ring_push(vdp);
   1380   8863     Edward 
   1381   8863     Edward 	mutex_exit(&vdp->xdf_dev_lk);
   1382   8863     Edward }
   1383   8863     Edward 
   1384   8863     Edward 
   1385   8863     Edward /* check if partition is open, -1 - check all partitions on the disk */
   1386   8863     Edward static boolean_t
   1387   8863     Edward xdf_isopen(xdf_t *vdp, int partition)
   1388   8863     Edward {
   1389   8863     Edward 	int i;
   1390   8863     Edward 	ulong_t parbit;
   1391   8863     Edward 	boolean_t rval = B_FALSE;
   1392   8863     Edward 
   1393   8863     Edward 	ASSERT((partition == -1) ||
   1394   8863     Edward 	    ((partition >= 0) || (partition < XDF_PEXT)));
   1395   8863     Edward 
   1396   8863     Edward 	if (partition == -1)
   1397   8863     Edward 		parbit = (ulong_t)-1;
   1398   8863     Edward 	else
   1399   8863     Edward 		parbit = 1 << partition;
   1400   8863     Edward 
   1401   8863     Edward 	for (i = 0; i < OTYPCNT; i++) {
   1402   8863     Edward 		if (vdp->xdf_vd_open[i] & parbit)
   1403   8863     Edward 			rval = B_TRUE;
   1404   6318        edp 	}
   1405   6318        edp 
   1406   8863     Edward 	return (rval);
   1407   8863     Edward }
   1408   8863     Edward 
   1409   8863     Edward /*
   1410   8863     Edward  * The connection should never be closed as long as someone is holding
   1411   8863     Edward  * us open, there is pending IO, or someone is waiting waiting for a
   1412   8863     Edward  * connection.
   1413   8863     Edward  */
   1414   8863     Edward static boolean_t
   1415   8863     Edward xdf_busy(xdf_t *vdp)
   1416   8863     Edward {
   1417   8863     Edward 	ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk));
   1418   8863     Edward 
   1419   8863     Edward 	if ((vdp->xdf_xb_ring != NULL) &&
   1420   8863     Edward 	    xvdi_ring_has_unconsumed_responses(vdp->xdf_xb_ring)) {
   1421   8863     Edward 		ASSERT(vdp->xdf_state != XD_CLOSED);
   1422   8863     Edward 		return (B_TRUE);
   1423   8863     Edward 	}
   1424   8863     Edward 
   1425   8863     Edward 	if (!list_is_empty(&vdp->xdf_vreq_act) || (vdp->xdf_f_act != NULL)) {
   1426   8863     Edward 		ASSERT(vdp->xdf_state != XD_CLOSED);
   1427   8863     Edward 		return (B_TRUE);
   1428   8863     Edward 	}
   1429   8863     Edward 
   1430   8863     Edward 	if (xdf_isopen(vdp, -1)) {
   1431   8863     Edward 		ASSERT(vdp->xdf_state != XD_CLOSED);
   1432   8863     Edward 		return (B_TRUE);
   1433   8863     Edward 	}
   1434   8863     Edward 
   1435   8863     Edward 	if (vdp->xdf_connect_req > 0) {
   1436   8863     Edward 		ASSERT(vdp->xdf_state != XD_CLOSED);
   1437   8863     Edward 		return (B_TRUE);
   1438   8863     Edward 	}
   1439   8863     Edward 
   1440   8863     Edward 	return (B_FALSE);
   1441   8863     Edward }
   1442   8863     Edward 
   1443   8863     Edward static void
   1444   8863     Edward xdf_set_state(xdf_t *vdp, xdf_state_t new_state)
   1445   8863     Edward {
   1446   9471     Edward 	ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk));
   1447   8863     Edward 	ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk));
   1448   8863     Edward 	DPRINTF(DDI_DBG, ("xdf@%s: state change %d -> %d\n",
   1449   8863     Edward 	    vdp->xdf_addr, vdp->xdf_state, new_state));
   1450   8863     Edward 	vdp->xdf_state = new_state;
   1451   8863     Edward 	cv_broadcast(&vdp->xdf_dev_cv);
   1452   8863     Edward }
   1453   8863     Edward 
   1454   8863     Edward static void
   1455   8863     Edward xdf_disconnect(xdf_t *vdp, xdf_state_t new_state, boolean_t quiet)
   1456   8863     Edward {
   1457   8863     Edward 	dev_info_t	*dip = vdp->xdf_dip;
   1458   8863     Edward 	boolean_t	busy;
   1459   8863     Edward 
   1460   8863     Edward 	ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk));
   1461   8863     Edward 	ASSERT(MUTEX_NOT_HELD(&vdp->xdf_dev_lk));
   1462   8863     Edward 	ASSERT((new_state == XD_UNKNOWN) || (new_state == XD_CLOSED));
   1463   8863     Edward 
   1464   8863     Edward 	/* Check if we're already there. */
   1465   8863     Edward 	if (vdp->xdf_state == new_state)
   1466   8863     Edward 		return;
   1467   8863     Edward 
   1468   8863     Edward 	mutex_enter(&vdp->xdf_dev_lk);
   1469   8863     Edward 	busy = xdf_busy(vdp);
   1470   8863     Edward 
   1471   8863     Edward 	/* If we're already closed then there's nothing todo. */
   1472   8863     Edward 	if (vdp->xdf_state == XD_CLOSED) {
   1473   8863     Edward 		ASSERT(!busy);
   1474   8863     Edward 		xdf_set_state(vdp, new_state);
   1475   8863     Edward 		mutex_exit(&vdp->xdf_dev_lk);
   1476   8863     Edward 		return;
   1477   8863     Edward 	}
   1478   8863     Edward 
   1479   8863     Edward #ifdef DEBUG
   1480   8863     Edward 	/* UhOh.  Warn the user that something bad has happened. */
   1481   8863     Edward 	if (!quiet && busy && (vdp->xdf_state == XD_READY) &&
   1482   8863     Edward 	    (vdp->xdf_xdev_nblocks != 0)) {
   1483   8863     Edward 		cmn_err(CE_WARN, "xdf@%s: disconnected while in use",
   1484   8863     Edward 		    vdp->xdf_addr);
   1485   8863     Edward 	}
   1486   8863     Edward #endif /* DEBUG */
   1487   8863     Edward 
   1488   8863     Edward 	xdf_ring_destroy(vdp);
   1489   8863     Edward 
   1490   8863     Edward 	/* If we're busy then we can only go into the unknown state */
   1491   8863     Edward 	xdf_set_state(vdp, (busy) ? XD_UNKNOWN : new_state);
   1492   6318        edp 	mutex_exit(&vdp->xdf_dev_lk);
   1493   6318        edp 
   1494   8863     Edward 	/* if we're closed now, let the other end know */
   1495   8863     Edward 	if (vdp->xdf_state == XD_CLOSED)
   1496   8863     Edward 		(void) xvdi_switch_state(dip, XBT_NULL, XenbusStateClosed);
   1497   8863     Edward }
   1498   8863     Edward 
   1499   8863     Edward 
   1500   8863     Edward /*
   1501   8863     Edward  * Kick-off connect process
   1502   8863     Edward  * Status should be XD_UNKNOWN or XD_CLOSED
   1503   8863     Edward  * On success, status will be changed to XD_INIT
   1504   8863     Edward  * On error, it will be changed to XD_UNKNOWN
   1505   8863     Edward  */
   1506   8863     Edward static int
   1507   8863     Edward xdf_setstate_init(xdf_t *vdp)
   1508   8863     Edward {
   1509   8863     Edward 	dev_info_t		*dip = vdp->xdf_dip;
   1510   8863     Edward 	xenbus_transaction_t	xbt;
   1511   8863     Edward 	grant_ref_t		gref;
   1512   8863     Edward 	char			*xsname, *str;
   1513   8863     Edward 	int 			rv;
   1514   8863     Edward 
   1515   8863     Edward 	ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk));
   1516   8863     Edward 	ASSERT(MUTEX_NOT_HELD(&vdp->xdf_dev_lk));
   1517   8863     Edward 	ASSERT((vdp->xdf_state == XD_UNKNOWN) ||
   1518   8863     Edward 	    (vdp->xdf_state == XD_CLOSED));
   1519   8863     Edward 
   1520   8863     Edward 	DPRINTF(DDI_DBG,
   1521   8863     Edward 	    ("xdf@%s: starting connection process\n", vdp->xdf_addr));
   1522   8863     Edward 
   1523   8863     Edward 	/*
   1524   9471     Edward 	 * If an eject is pending then don't allow a new connection.
   1525   9471     Edward 	 * (Only the backend can clear media request eject request.)
   1526   8863     Edward 	 */
   1527   9471     Edward 	if (xdf_eject_pending(vdp))
   1528   8863     Edward 		return (DDI_FAILURE);
   1529   8863     Edward 
   1530   8863     Edward 	if ((xsname = xvdi_get_xsname(dip)) == NULL)
   1531   8863     Edward 		goto errout;
   1532   8863     Edward 
   1533   8863     Edward 	if ((vdp->xdf_peer = xvdi_get_oeid(dip)) == INVALID_DOMID)
   1534   8863     Edward 		goto errout;
   1535   8863     Edward 
   1536   8863     Edward 	(void) xvdi_switch_state(dip, XBT_NULL, XenbusStateInitialising);
   1537   8863     Edward 
   1538   8863     Edward 	/*
   1539   8863     Edward 	 * Sanity check for the existance of the xenbus device-type property.
   1540   8863     Edward 	 * This property might not exist if we our xenbus device nodes was
   1541   8863     Edward 	 * force destroyed while we were still connected to the backend.
   1542   8863     Edward 	 */
   1543   8863     Edward 	if (xenbus_read_str(xsname, XBP_DEV_TYPE, &str) != 0)
   1544   8863     Edward 		goto errout;
   1545   8863     Edward 	strfree(str);
   1546   8863     Edward 
   1547   8863     Edward 	if (xvdi_alloc_evtchn(dip) != DDI_SUCCESS)
   1548   8863     Edward 		goto errout;
   1549   8863     Edward 
   1550   8863     Edward 	vdp->xdf_evtchn = xvdi_get_evtchn(dip);
   1551   8863     Edward #ifdef XPV_HVM_DRIVER
   1552   8863     Edward 	ec_bind_evtchn_to_handler(vdp->xdf_evtchn, IPL_VBD, xdf_intr, vdp);
   1553   8863     Edward #else /* !XPV_HVM_DRIVER */
   1554   8863     Edward 	if (ddi_add_intr(dip, 0, NULL, NULL, xdf_intr, (caddr_t)vdp) !=
   1555   8863     Edward 	    DDI_SUCCESS) {
   1556   8863     Edward 		cmn_err(CE_WARN, "xdf@%s: xdf_setstate_init: "
   1557   8863     Edward 		    "failed to add intr handler", vdp->xdf_addr);
   1558   8863     Edward 		goto errout1;
   1559   8863     Edward 	}
   1560   8863     Edward #endif /* !XPV_HVM_DRIVER */
   1561   8863     Edward 
   1562   8863     Edward 	if (xvdi_alloc_ring(dip, BLKIF_RING_SIZE,
   1563   8863     Edward 	    sizeof (union blkif_sring_entry), &gref, &vdp->xdf_xb_ring) !=
   1564   8863     Edward 	    DDI_SUCCESS) {
   1565   8863     Edward 		cmn_err(CE_WARN, "xdf@%s: failed to alloc comm ring",
   1566   8863     Edward 		    vdp->xdf_addr);
   1567   8863     Edward 		goto errout2;
   1568   8863     Edward 	}
   1569   8863     Edward 	vdp->xdf_xb_ring_hdl = vdp->xdf_xb_ring->xr_acc_hdl; /* ugly!! */
   1570   8863     Edward 
   1571   8863     Edward 	/*
   1572   8863     Edward 	 * Write into xenstore the info needed by backend
   1573   8863     Edward 	 */
   1574   8863     Edward trans_retry:
   1575   8863     Edward 	if (xenbus_transaction_start(&xbt)) {
   1576   8863     Edward 		cmn_err(CE_WARN, "xdf@%s: failed to start transaction",
   1577   8863     Edward 		    vdp->xdf_addr);
   1578   8863     Edward 		xvdi_fatal_error(dip, EIO, "connect transaction init");
   1579   8863     Edward 		goto fail_trans;
   1580   8863     Edward 	}
   1581   8863     Edward 
   1582   8863     Edward 	/*
   1583   8863     Edward 	 * XBP_PROTOCOL is written by the domain builder in the case of PV
   1584   8863     Edward 	 * domains. However, it is not written for HVM domains, so let's
   1585   8863     Edward 	 * write it here.
   1586   8863     Edward 	 */
   1587   8863     Edward 	if (((rv = xenbus_printf(xbt, xsname,
   1588   8863     Edward 	    XBP_MEDIA_REQ, "%s", XBV_MEDIA_REQ_NONE)) != 0) ||
   1589   8863     Edward 	    ((rv = xenbus_printf(xbt, xsname,
   1590   8863     Edward 	    XBP_RING_REF, "%u", gref)) != 0) ||
   1591   8863     Edward 	    ((rv = xenbus_printf(xbt, xsname,
   1592   8863     Edward 	    XBP_EVENT_CHAN, "%u", vdp->xdf_evtchn)) != 0) ||
   1593   8863     Edward 	    ((rv = xenbus_printf(xbt, xsname,
   1594   8863     Edward 	    XBP_PROTOCOL, "%s", XEN_IO_PROTO_ABI_NATIVE)) != 0) ||
   1595   8863     Edward 	    ((rv = xvdi_switch_state(dip, xbt, XenbusStateInitialised)) > 0)) {
   1596   8863     Edward 		(void) xenbus_transaction_end(xbt, 1);
   1597   8863     Edward 		xvdi_fatal_error(dip, rv, "connect transaction setup");
   1598   8863     Edward 		goto fail_trans;
   1599   8863     Edward 	}
   1600   8863     Edward 
   1601   8863     Edward 	/* kick-off connect process */
   1602   8863     Edward 	if (rv = xenbus_transaction_end(xbt, 0)) {
   1603   8863     Edward 		if (rv == EAGAIN)
   1604   8863     Edward 			goto trans_retry;
   1605   8863     Edward 		xvdi_fatal_error(dip, rv, "connect transaction commit");
   1606   8863     Edward 		goto fail_trans;
   1607   8863     Edward 	}
   1608   8863     Edward 
   1609   8863     Edward 	ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk));
   1610   8863     Edward 	mutex_enter(&vdp->xdf_dev_lk);
   1611   8863     Edward 	xdf_set_state(vdp, XD_INIT);
   1612   8863     Edward 	mutex_exit(&vdp->xdf_dev_lk);
   1613   8863     Edward 
   1614   8863     Edward 	return (DDI_SUCCESS);
   1615   8863     Edward 
   1616   8863     Edward fail_trans:
   1617   8863     Edward 	xvdi_free_ring(vdp->xdf_xb_ring);
   1618   8863     Edward errout2:
   1619   8863     Edward #ifdef XPV_HVM_DRIVER
   1620   8863     Edward 	ec_unbind_evtchn(vdp->xdf_evtchn);
   1621   8863     Edward #else /* !XPV_HVM_DRIVER */
   1622   8863     Edward 	(void) ddi_remove_intr(vdp->xdf_dip, 0, NULL);
   1623   8863     Edward #endif /* !XPV_HVM_DRIVER */
   1624   8863     Edward errout1:
   1625   8863     Edward 	xvdi_free_evtchn(dip);
   1626   8863     Edward 	vdp->xdf_evtchn = INVALID_EVTCHN;
   1627   8863     Edward errout:
   1628   8863     Edward 	xdf_disconnect(vdp, XD_UNKNOWN, B_FALSE);
   1629   8863     Edward 	cmn_err(CE_WARN, "xdf@%s: failed to start connection to backend",
   1630   8863     Edward 	    vdp->xdf_addr);
   1631   8863     Edward 	return (DDI_FAILURE);
   1632   8863     Edward }
   1633   8863     Edward 
   1634   8863     Edward int
   1635   8863     Edward xdf_get_flush_block(xdf_t *vdp)
   1636   8863     Edward {
   1637   8863     Edward 	/*
   1638   8863     Edward 	 * Get a DEV_BSIZE aligned bufer
   1639   8863     Edward 	 */
   1640   9889      Larry 	vdp->xdf_flush_mem = kmem_alloc(vdp->xdf_xdev_secsize * 2, KM_SLEEP);
   1641   8863     Edward 	vdp->xdf_cache_flush_block =
   1642   9889      Larry 	    (char *)P2ROUNDUP((uintptr_t)(vdp->xdf_flush_mem),
   1643   9889      Larry 	    (int)vdp->xdf_xdev_secsize);
   1644   9889      Larry 
   1645   8863     Edward 	if (xdf_lb_rdwr(vdp->xdf_dip, TG_READ, vdp->xdf_cache_flush_block,
   1646   9889      Larry 	    xdf_flush_block, vdp->xdf_xdev_secsize, NULL) != 0)
   1647   8863     Edward 		return (DDI_FAILURE);
   1648   8863     Edward 	return (DDI_SUCCESS);
   1649   8863     Edward }
   1650   8863     Edward 
   1651   8863     Edward static void
   1652   8863     Edward xdf_setstate_ready(void *arg)
   1653   8863     Edward {
   1654   8863     Edward 	xdf_t	*vdp = (xdf_t *)arg;
   1655   8863     Edward 
   1656   8863     Edward 	vdp->xdf_ready_tq_thread = curthread;
   1657   8863     Edward 
   1658   8863     Edward 	/*
   1659   8863     Edward 	 * We've created all the minor nodes via cmlb_attach() using default
   1660   8863     Edward 	 * value in xdf_attach() to make it possible to block in xdf_open(),
   1661   8863     Edward 	 * in case there's anyone (say, booting thread) ever trying to open
   1662   8863     Edward 	 * it before connected to backend. We will refresh all those minor
   1663   8863     Edward 	 * nodes w/ latest info we've got now when we are almost connected.
   1664   8863     Edward 	 */
   1665   8863     Edward 	mutex_enter(&vdp->xdf_dev_lk);
   1666   8863     Edward 	if (vdp->xdf_cmbl_reattach) {
   1667   8863     Edward 		vdp->xdf_cmbl_reattach = B_FALSE;
   1668   8863     Edward 
   1669   8863     Edward 		mutex_exit(&vdp->xdf_dev_lk);
   1670   8863     Edward 		if (xdf_cmlb_attach(vdp) != 0) {
   1671   8863     Edward 			xdf_disconnect(vdp, XD_UNKNOWN, B_FALSE);
   1672   8863     Edward 			return;
   1673   8863     Edward 		}
   1674   8863     Edward 		mutex_enter(&vdp->xdf_dev_lk);
   1675   8863     Edward 	}
   1676   8863     Edward 
   1677   8863     Edward 	/* If we're not still trying to get to the ready state, then bail. */
   1678   8863     Edward 	if (vdp->xdf_state != XD_CONNECTED) {
   1679   8863     Edward 		mutex_exit(&vdp->xdf_dev_lk);
   1680   8863     Edward 		return;
   1681   8863     Edward 	}
   1682   8863     Edward 	mutex_exit(&vdp->xdf_dev_lk);
   1683   8863     Edward 
   1684   8863     Edward 	/*
   1685   8863     Edward 	 * If backend has feature-barrier, see if it supports disk
   1686   8863     Edward 	 * cache flush op.
   1687   8863     Edward 	 */
   1688   8863     Edward 	vdp->xdf_flush_supported = B_FALSE;
   1689   8863     Edward 	if (vdp->xdf_feature_barrier) {
   1690   8863     Edward 		/*
   1691   8863     Edward 		 * Pretend we already know flush is supported so probe
   1692   8863     Edward 		 * will attempt the correct op.
   1693   8863     Edward 		 */
   1694   8863     Edward 		vdp->xdf_flush_supported = B_TRUE;
   1695   8863     Edward 		if (xdf_lb_rdwr(vdp->xdf_dip, TG_WRITE, NULL, 0, 0, 0) == 0) {
   1696   8863     Edward 			vdp->xdf_flush_supported = B_TRUE;
   1697   8863     Edward 		} else {
   1698   8863     Edward 			vdp->xdf_flush_supported = B_FALSE;
   1699   8863     Edward 			/*
   1700   8863     Edward 			 * If the other end does not support the cache flush op
   1701   8863     Edward 			 * then we must use a barrier-write to force disk
   1702   8863     Edward 			 * cache flushing.  Barrier writes require that a data
   1703   8863     Edward 			 * block actually be written.
   1704   8863     Edward 			 * Cache a block to barrier-write when we are
   1705   8863     Edward 			 * asked to perform a flush.
   1706   8863     Edward 			 * XXX - would it be better to just copy 1 block
   1707   8863     Edward 			 * (512 bytes) from whatever write we did last
   1708   8863     Edward 			 * and rewrite that block?
   1709   8863     Edward 			 */
   1710   8863     Edward 			if (xdf_get_flush_block(vdp) != DDI_SUCCESS) {
   1711   8863     Edward 				xdf_disconnect(vdp, XD_UNKNOWN, B_FALSE);
   1712   8863     Edward 				return;
   1713   8863     Edward 			}
   1714   8863     Edward 		}
   1715   8863     Edward 	}
   1716   8863     Edward 
   1717   8863     Edward 	mutex_enter(&vdp->xdf_cb_lk);
   1718   8863     Edward 	mutex_enter(&vdp->xdf_dev_lk);
   1719   8863     Edward 	if (vdp->xdf_state == XD_CONNECTED)
   1720   8863     Edward 		xdf_set_state(vdp, XD_READY);
   1721   8863     Edward 	mutex_exit(&vdp->xdf_dev_lk);
   1722   8863     Edward 
   1723   8863     Edward 	/* Restart any currently queued up io */
   1724   8863     Edward 	xdf_io_start(vdp);
   1725   8863     Edward 
   1726   8863     Edward 	mutex_exit(&vdp->xdf_cb_lk);
   1727   8863     Edward }
   1728   8863     Edward 
   1729   8863     Edward /*
   1730   8863     Edward  * synthetic geometry
   1731   8863     Edward  */
   1732   8863     Edward #define	XDF_NSECTS	256
   1733   8863     Edward #define	XDF_NHEADS	16
   1734   8863     Edward 
   1735   8863     Edward static void
   1736   8863     Edward xdf_synthetic_pgeom(dev_info_t *dip, cmlb_geom_t *geomp)
   1737   8863     Edward {
   1738   8863     Edward 	xdf_t *vdp;
   1739   8863     Edward 	uint_t ncyl;
   1740   8863     Edward 
   1741   8863     Edward 	vdp = ddi_get_soft_state(xdf_ssp, ddi_get_instance(dip));
   1742   8863     Edward 
   1743   8863     Edward 	ncyl = vdp->xdf_xdev_nblocks / (XDF_NHEADS * XDF_NSECTS);
   1744   8863     Edward 
   1745   8863     Edward 	bzero(geomp, sizeof (*geomp));
   1746   8863     Edward 	geomp->g_ncyl = ncyl == 0 ? 1 : ncyl;
   1747   8863     Edward 	geomp->g_acyl = 0;
   1748   8863     Edward 	geomp->g_nhead = XDF_NHEADS;
   1749   8863     Edward 	geomp->g_nsect = XDF_NSECTS;
   1750   9889      Larry 	geomp->g_secsize = vdp->xdf_xdev_secsize;
   1751   8863     Edward 	geomp->g_capacity = vdp->xdf_xdev_nblocks;
   1752   8863     Edward 	geomp->g_intrlv = 0;
   1753   8863     Edward 	geomp->g_rpm = 7200;
   1754   8863     Edward }
   1755   8863     Edward 
   1756   8863     Edward /*
   1757   8863     Edward  * Finish other initialization after we've connected to backend
   1758   8863     Edward  * Status should be XD_INIT before calling this routine
   1759   8863     Edward  * On success, status should be changed to XD_CONNECTED.
   1760   8863     Edward  * On error, status should stay XD_INIT
   1761   8863     Edward  */
   1762   8863     Edward static int
   1763   8863     Edward xdf_setstate_connected(xdf_t *vdp)
   1764   8863     Edward {
   1765   8863     Edward 	dev_info_t	*dip = vdp->xdf_dip;
   1766   8863     Edward 	cmlb_geom_t	pgeom;
   1767   8863     Edward 	diskaddr_t	nblocks = 0;
   1768   9889      Larry 	uint_t		secsize = 0;
   1769   8863     Edward 	char		*oename, *xsname, *str;
   1770   8863     Edward 	uint_t		dinfo;
   1771   8863     Edward 
   1772   8863     Edward 	ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk));
   1773   8863     Edward 	ASSERT(MUTEX_NOT_HELD(&vdp->xdf_dev_lk));
   1774   8863     Edward 	ASSERT(vdp->xdf_state == XD_INIT);
   1775   8863     Edward 
   1776   8863     Edward 	if (((xsname = xvdi_get_xsname(dip)) == NULL) ||
   1777   8863     Edward 	    ((oename = xvdi_get_oename(dip)) == NULL))
   1778   8863     Edward 		return (DDI_FAILURE);
   1779   8863     Edward 
   1780   9471     Edward 	/* Make sure the other end is XenbusStateConnected */
   1781   9471     Edward 	if (xenbus_read_driver_state(oename) != XenbusStateConnected)
   1782   9471     Edward 		return (DDI_FAILURE);
   1783   9471     Edward 
   1784   8863     Edward 	/* Determine if feature barrier is supported by backend */
   1785   8863     Edward 	if (!(vdp->xdf_feature_barrier = xenbus_exists(oename, XBP_FB)))
   1786   9893       Mark 		cmn_err(CE_NOTE, "!xdf@%s: feature-barrier not supported",
   1787   8863     Edward 		    vdp->xdf_addr);
   1788   8863     Edward 
   1789   8863     Edward 	/*
   1790   8863     Edward 	 * Probe backend.  Read the device size into xdf_xdev_nblocks
   1791   8863     Edward 	 * and set the VDISK_READONLY, VDISK_CDROM, and VDISK_REMOVABLE
   1792   8863     Edward 	 * flags in xdf_dinfo.  If the emulated device type is "cdrom",
   1793   8863     Edward 	 * we always set VDISK_CDROM, regardless of if it's present in
   1794   8863     Edward 	 * the xenbus info parameter.
   1795   8863     Edward 	 */
   1796   8863     Edward 	if (xenbus_gather(XBT_NULL, oename,
   1797   8863     Edward 	    XBP_SECTORS, "%"SCNu64, &nblocks,
   1798   9889      Larry 	    XBP_SECTOR_SIZE, "%u", &secsize,
   1799   8863     Edward 	    XBP_INFO, "%u", &dinfo,
   1800   8863     Edward 	    NULL) != 0) {
   1801   8863     Edward 		cmn_err(CE_WARN, "xdf@%s: xdf_setstate_connected: "
   1802   8863     Edward 		    "cannot read backend info", vdp->xdf_addr);
   1803   8863     Edward 		return (DDI_FAILURE);
   1804   8863     Edward 	}
   1805   8863     Edward 	if (xenbus_read_str(xsname, XBP_DEV_TYPE, &str) != 0) {
   1806   8863     Edward 		cmn_err(CE_WARN, "xdf@%s: cannot read device-type",
   1807   8863     Edward 		    vdp->xdf_addr);
   1808   8863     Edward 		return (DDI_FAILURE);
   1809   8863     Edward 	}
   1810   8863     Edward 	if (strcmp(str, XBV_DEV_TYPE_CD) == 0)
   1811   8863     Edward 		dinfo |= VDISK_CDROM;
   1812   8863     Edward 	strfree(str);
   1813   8863     Edward 
   1814   9889      Larry 	if (secsize == 0 || !(ISP2(secsize / DEV_BSIZE)))
   1815   9889      Larry 		secsize = DEV_BSIZE;
   1816   8863     Edward 	vdp->xdf_xdev_nblocks = nblocks;
   1817   9889      Larry 	vdp->xdf_xdev_secsize = secsize;
   1818   8863     Edward #ifdef _ILP32
   1819   8863     Edward 	if (vdp->xdf_xdev_nblocks > DK_MAX_BLOCKS) {
   1820   8863     Edward 		cmn_err(CE_WARN, "xdf@%s: xdf_setstate_connected: "
   1821   8863     Edward 		    "backend disk device too large with %llu blocks for"
   1822   8863     Edward 		    " 32-bit kernel", vdp->xdf_addr, vdp->xdf_xdev_nblocks);
   1823   8863     Edward 		xvdi_fatal_error(dip, EFBIG, "reading backend info");
   1824   8863     Edward 		return (DDI_FAILURE);
   1825   8863     Edward 	}
   1826   8863     Edward #endif
   1827   8863     Edward 
   1828   8863     Edward 	/*
   1829   8863     Edward 	 * If the physical geometry for a fixed disk has been explicity
   1830   8863     Edward 	 * set then make sure that the specified physical geometry isn't
   1831   8863     Edward 	 * larger than the device we connected to.
   1832   8863     Edward 	 */
   1833   8863     Edward 	if (vdp->xdf_pgeom_fixed &&
   1834   8863     Edward 	    (vdp->xdf_pgeom.g_capacity > vdp->xdf_xdev_nblocks)) {
   1835   8863     Edward 		cmn_err(CE_WARN,
   1836   8863     Edward 		    "xdf@%s: connect failed, fixed geometry too large",
   1837   8863     Edward 		    vdp->xdf_addr);
   1838   8863     Edward 		return (DDI_FAILURE);
   1839   8863     Edward 	}
   1840   8863     Edward 
   1841   8863     Edward 	vdp->xdf_media_req_supported = xenbus_exists(oename, XBP_MEDIA_REQ_SUP);
   1842   8863     Edward 
   1843   8863     Edward 	/* mark vbd is ready for I/O */
   1844   8863     Edward 	mutex_enter(&vdp->xdf_dev_lk);
   1845   8863     Edward 	xdf_set_state(vdp, XD_CONNECTED);
   1846   8863     Edward 
   1847   8863     Edward 	/* check if the cmlb label should be updated */
   1848   8863     Edward 	xdf_synthetic_pgeom(dip, &pgeom);
   1849   8863     Edward 	if ((vdp->xdf_dinfo != dinfo) ||
   1850   8863     Edward 	    (!vdp->xdf_pgeom_fixed &&
   1851   8863     Edward 	    (memcmp(&vdp->xdf_pgeom, &pgeom, sizeof (pgeom)) != 0))) {
   1852   8863     Edward 		vdp->xdf_cmbl_reattach = B_TRUE;
   1853   8863     Edward 
   1854   8863     Edward 		vdp->xdf_dinfo = dinfo;
   1855   8863     Edward 		if (!vdp->xdf_pgeom_fixed)
   1856   8863     Edward 			vdp->xdf_pgeom = pgeom;
   1857   8863     Edward 	}
   1858   8863     Edward 
   1859   8863     Edward 	if (XD_IS_CD(vdp) || XD_IS_RM(vdp)) {
   1860   8863     Edward 		if (vdp->xdf_xdev_nblocks == 0) {
   1861   8863     Edward 			vdp->xdf_mstate = DKIO_EJECTED;
   1862   8863     Edward 			cv_broadcast(&vdp->xdf_mstate_cv);
   1863   8863     Edward 		} else {
   1864   8863     Edward 			vdp->xdf_mstate = DKIO_INSERTED;
   1865   8863     Edward 			cv_broadcast(&vdp->xdf_mstate_cv);
   1866   8863     Edward 		}
   1867   8863     Edward 	} else {
   1868   8863     Edward 		if (vdp->xdf_mstate != DKIO_NONE) {
   1869   8863     Edward 			vdp->xdf_mstate = DKIO_NONE;
   1870   8863     Edward 			cv_broadcast(&vdp->xdf_mstate_cv);
   1871   8863     Edward 		}
   1872   8863     Edward 	}
   1873   8863     Edward 
   1874   8863     Edward 	mutex_exit(&vdp->xdf_dev_lk);
   1875   8863     Edward 
   1876   8863     Edward 	cmn_err(CE_CONT, "?xdf@%s: %"PRIu64" blocks", vdp->xdf_addr,
   1877   8863     Edward 	    (uint64_t)vdp->xdf_xdev_nblocks);
   1878   8863     Edward 
   1879   8863     Edward 	/* Restart any currently queued up io */
   1880   8863     Edward 	xdf_io_start(vdp);
   1881   8863     Edward 
   1882   8863     Edward 	/*
   1883   8863     Edward 	 * To get to the ready state we have to do IO to the backend device,
   1884   8863     Edward 	 * but we can't initiate IO from the other end change callback thread
   1885   8863     Edward 	 * (which is the current context we're executing in.)  This is because
   1886   8863     Edward 	 * if the other end disconnects while we're doing IO from the callback
   1887   8863     Edward 	 * thread, then we can't recieve that disconnect event and we hang
   1888   8863     Edward 	 * waiting for an IO that can never complete.
   1889   8863     Edward 	 */
   1890   8863     Edward 	(void) ddi_taskq_dispatch(vdp->xdf_ready_tq, xdf_setstate_ready, vdp,
   1891   8863     Edward 	    DDI_SLEEP);
   1892   8863     Edward 
   1893   8863     Edward 	(void) xvdi_switch_state(dip, XBT_NULL, XenbusStateConnected);
   1894   8863     Edward 	return (DDI_SUCCESS);
   1895   8863     Edward }
   1896   8863     Edward 
   1897   8863     Edward /*ARGSUSED*/
   1898   8863     Edward static void
   1899   8863     Edward xdf_oe_change(dev_info_t *dip, ddi_eventcookie_t id, void *arg, void *impl_data)
   1900   8863     Edward {
   1901   8863     Edward 	XenbusState new_state = *(XenbusState *)impl_data;
   1902   8863     Edward 	xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip);
   1903   8863     Edward 
   1904   8863     Edward 	DPRINTF(DDI_DBG, ("xdf@%s: otherend state change to %d!\n",
   1905   8863     Edward 	    vdp->xdf_addr, new_state));
   1906   8863     Edward 
   1907   8863     Edward 	mutex_enter(&vdp->xdf_cb_lk);
   1908   8863     Edward 
   1909   8863     Edward 	/* We assume that this callback is single threaded */
   1910   8863     Edward 	ASSERT(vdp->xdf_oe_change_thread == NULL);
   1911   8863     Edward 	DEBUG_EVAL(vdp->xdf_oe_change_thread = curthread);
   1912   8863     Edward 
   1913   8863     Edward 	/* ignore any backend state changes if we're suspending/suspended */
   1914   8863     Edward 	if (vdp->xdf_suspending || (vdp->xdf_state == XD_SUSPEND)) {
   1915   8863     Edward 		DEBUG_EVAL(vdp->xdf_oe_change_thread = NULL);
   1916   8863     Edward 		mutex_exit(&vdp->xdf_cb_lk);
   1917   8863     Edward 		return;
   1918   8863     Edward 	}
   1919   8863     Edward 
   1920   8863     Edward 	switch (new_state) {
   1921   8863     Edward 	case XenbusStateUnknown:
   1922   8863     Edward 	case XenbusStateInitialising:
   1923   8863     Edward 	case XenbusStateInitWait:
   1924   8863     Edward 	case XenbusStateInitialised:
   1925   8863     Edward 		if (vdp->xdf_state == XD_INIT)
   1926   8863     Edward 			break;
   1927   8863     Edward 
   1928   8863     Edward 		xdf_disconnect(vdp, XD_UNKNOWN, B_FALSE);
   1929   8863     Edward 		if (xdf_setstate_init(vdp) != DDI_SUCCESS)
   1930   8863     Edward 			break;
   1931   8863     Edward 		ASSERT(vdp->xdf_state == XD_INIT);
   1932   8863     Edward 		break;
   1933   8863     Edward 
   1934   8863     Edward 	case XenbusStateConnected:
   1935   8863     Edward 		if ((vdp->xdf_state == XD_CONNECTED) ||
   1936   8863     Edward 		    (vdp->xdf_state == XD_READY))
   1937   8863     Edward 			break;
   1938   8863     Edward 
   1939   8863     Edward 		if (vdp->xdf_state != XD_INIT) {
   1940   8863     Edward 			xdf_disconnect(vdp, XD_UNKNOWN, B_FALSE);
   1941   8863     Edward 			if (xdf_setstate_init(vdp) != DDI_SUCCESS)
   1942   8863     Edward 				break;
   1943   8863     Edward 			ASSERT(vdp->xdf_state == XD_INIT);
   1944   8863     Edward 		}
   1945   8863     Edward 
   1946   8863     Edward 		if (xdf_setstate_connected(vdp) != DDI_SUCCESS) {
   1947   8863     Edward 			xdf_disconnect(vdp, XD_UNKNOWN, B_FALSE);
   1948   8863     Edward 			break;
   1949   8863     Edward 		}
   1950   8863     Edward 		ASSERT(vdp->xdf_state == XD_CONNECTED);
   1951   8863     Edward 		break;
   1952   8863     Edward 
   1953   8863     Edward 	case XenbusStateClosing:
   1954   8863     Edward 		if (xdf_isopen(vdp, -1)) {
   1955   8863     Edward 			cmn_err(CE_NOTE,
   1956   8863     Edward 			    "xdf@%s: hot-unplug failed, still in use",
   1957   8863     Edward 			    vdp->xdf_addr);
   1958   8863     Edward 			break;
   1959   8863     Edward 		}
   1960   8863     Edward 		/*FALLTHROUGH*/
   1961   8863     Edward 	case XenbusStateClosed:
   1962   8863     Edward 		xdf_disconnect(vdp, XD_CLOSED, B_FALSE);
   1963   8863     Edward 		break;
   1964   8863     Edward 	}
   1965   8863     Edward 
   1966   8863     Edward 	/* notify anybody waiting for oe state change */
   1967   8863     Edward 	cv_broadcast(&vdp->xdf_dev_cv);
   1968   8863     Edward 	DEBUG_EVAL(vdp->xdf_oe_change_thread = NULL);
   1969   8863     Edward 	mutex_exit(&vdp->xdf_cb_lk);
   1970   8863     Edward }
   1971   8863     Edward 
   1972   8863     Edward static int
   1973   8863     Edward xdf_connect_locked(xdf_t *vdp, boolean_t wait)
   1974   8863     Edward {
   1975   9471     Edward 	int	rv, timeouts = 0, reset = 20;
   1976   8863     Edward 
   1977   8863     Edward 	ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk));
   1978   8863     Edward 	ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk));
   1979   8863     Edward 
   1980   8863     Edward 	/* we can't connect once we're in the closed state */
   1981   8863     Edward 	if (vdp->xdf_state == XD_CLOSED)
   1982   8863     Edward 		return (XD_CLOSED);
   1983   8863     Edward 
   1984   8863     Edward 	vdp->xdf_connect_req++;
   1985   8863     Edward 	while (vdp->xdf_state != XD_READY) {
   1986   8863     Edward 		mutex_exit(&vdp->xdf_dev_lk);
   1987   9471     Edward 
   1988   9471     Edward 		/* only one thread at a time can be the connection thread */
   1989   9471     Edward 		if (vdp->xdf_connect_thread == NULL)
   1990   9471     Edward 			vdp->xdf_connect_thread = curthread;
   1991   9471     Edward 
   1992   9471     Edward 		if (vdp->xdf_connect_thread == curthread) {
   1993   9471     Edward 			if ((timeouts > 0) && ((timeouts % reset) == 0)) {
   1994   9471     Edward 				/*
   1995   9471     Edward 				 * If we haven't establised a connection
   1996   9471     Edward 				 * within the reset time, then disconnect
   1997   9471     Edward 				 * so we can try again, and double the reset
   1998   9471     Edward 				 * time.  The reset time starts at 2 sec.
   1999   9471     Edward 				 */
   2000   9471     Edward 				(void) xdf_disconnect(vdp, XD_UNKNOWN, B_TRUE);
   2001   9471     Edward 				reset *= 2;
   2002   9471     Edward 			}
   2003   9471     Edward 			if (vdp->xdf_state == XD_UNKNOWN)
   2004   9471     Edward 				(void) xdf_setstate_init(vdp);
   2005   9471     Edward 			if (vdp->xdf_state == XD_INIT)
   2006   9471     Edward 				(void) xdf_setstate_connected(vdp);
   2007   9471     Edward 		}
   2008   9471     Edward 
   2009   8863     Edward 		mutex_enter(&vdp->xdf_dev_lk);
   2010   8863     Edward 		if (!wait || (vdp->xdf_state == XD_READY))
   2011   8863     Edward 			goto out;
   2012   8863     Edward 
   2013   8863     Edward 		mutex_exit((&vdp->xdf_cb_lk));
   2014   9471     Edward 		if (vdp->xdf_connect_thread != curthread) {
   2015   9471     Edward 			rv = cv_wait_sig(&vdp->xdf_dev_cv, &vdp->xdf_dev_lk);
   2016   9471     Edward 		} else {
   2017   9471     Edward 			/* delay for 0.1 sec */
   2018  11066     rafael 			rv = cv_reltimedwait_sig(&vdp->xdf_dev_cv,
   2019  11066     rafael 			    &vdp->xdf_dev_lk, drv_usectohz(100*1000),
   2020  11066     rafael 			    TR_CLOCK_TICK);
   2021   9471     Edward 			if (rv == -1)
   2022   9471     Edward 				timeouts++;
   2023   9471     Edward 		}
   2024   8863     Edward 		mutex_exit((&vdp->xdf_dev_lk));
   2025   8863     Edward 		mutex_enter((&vdp->xdf_cb_lk));
   2026   8863     Edward 		mutex_enter((&vdp->xdf_dev_lk));
   2027   8863     Edward 		if (rv == 0)
   2028   8863     Edward 			goto out;
   2029   8863     Edward 	}
   2030   8863     Edward 
   2031   8863     Edward out:
   2032   8863     Edward 	ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk));
   2033   8863     Edward 	ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk));
   2034   8863     Edward 
   2035   9471     Edward 	if (vdp->xdf_connect_thread == curthread) {
   2036   9471     Edward 		/*
   2037   9471     Edward 		 * wake up someone else so they can become the connection
   2038   9471     Edward 		 * thread.
   2039   9471     Edward 		 */
   2040   9471     Edward 		cv_signal(&vdp->xdf_dev_cv);
   2041   9471     Edward 		vdp->xdf_connect_thread = NULL;
   2042   9471     Edward 	}
   2043   9471     Edward 
   2044   8863     Edward 	/* Try to lock the media */
   2045   9471     Edward 	mutex_exit((&vdp->xdf_dev_lk));
   2046   8863     Edward 	(void) xdf_media_req(vdp, XBV_MEDIA_REQ_LOCK, B_TRUE);
   2047   9471     Edward 	mutex_enter((&vdp->xdf_dev_lk));
   2048   8863     Edward 
   2049   8863     Edward 	vdp->xdf_connect_req--;
   2050   8863     Edward 	return (vdp->xdf_state);
   2051   8863     Edward }
   2052   8863     Edward 
   2053   8863     Edward static uint_t
   2054   8863     Edward xdf_iorestart(caddr_t arg)
   2055   8863     Edward {
   2056   8863     Edward 	xdf_t *vdp = (xdf_t *)arg;
   2057   8863     Edward 
   2058   8863     Edward 	ASSERT(vdp != NULL);
   2059   8863     Edward 
   2060   8863     Edward 	mutex_enter(&vdp->xdf_dev_lk);
   2061   8863     Edward 	ASSERT(ISDMACBON(vdp));
   2062   8863     Edward 	SETDMACBOFF(vdp);
   2063   8863     Edward 	mutex_exit(&vdp->xdf_dev_lk);
   2064   8863     Edward 
   2065   8863     Edward 	xdf_io_start(vdp);
   2066   8863     Edward 
   2067   8863     Edward 	return (DDI_INTR_CLAIMED);
   2068   6318        edp }
   2069   6318        edp 
   2070   6318        edp #if defined(XPV_HVM_DRIVER)
   2071   6318        edp 
   2072   6318        edp typedef struct xdf_hvm_entry {
   2073   6318        edp 	list_node_t	xdf_he_list;
   2074   6318        edp 	char		*xdf_he_path;
   2075   6318        edp 	dev_info_t	*xdf_he_dip;
   2076   6318        edp } xdf_hvm_entry_t;
   2077   6318        edp 
   2078   6318        edp static list_t xdf_hvm_list;
   2079   6318        edp static kmutex_t xdf_hvm_list_lock;
   2080   6318        edp 
   2081   6318        edp static xdf_hvm_entry_t *
   2082   8863     Edward i_xdf_hvm_find(const char *path, dev_info_t *dip)
   2083   6318        edp {
   2084   6318        edp 	xdf_hvm_entry_t	*i;
   2085   6318        edp 
   2086   6318        edp 	ASSERT((path != NULL) || (dip != NULL));
   2087   6318        edp 	ASSERT(MUTEX_HELD(&xdf_hvm_list_lock));
   2088   6318        edp 
   2089   6318        edp 	i = list_head(&xdf_hvm_list);
   2090   6318        edp 	while (i != NULL) {
   2091   6318        edp 		if ((path != NULL) && strcmp(i->xdf_he_path, path) != 0) {
   2092   6318        edp 			i = list_next(&xdf_hvm_list, i);
   2093   6318        edp 			continue;
   2094   6318        edp 		}
   2095   6318        edp 		if ((dip != NULL) && (i->xdf_he_dip != dip)) {
   2096   6318        edp 			i = list_next(&xdf_hvm_list, i);
   2097   6318        edp 			continue;
   2098   6318        edp 		}
   2099   6318        edp 		break;
   2100   6318        edp 	}
   2101   6318        edp 	return (i);
   2102   6318        edp }
   2103   6318        edp 
   2104   6318        edp dev_info_t *
   2105   8863     Edward xdf_hvm_hold(const char *path)
   2106   6318        edp {
   2107   6318        edp 	xdf_hvm_entry_t	*i;
   2108   6318        edp 	dev_info_t	*dip;
   2109   6318        edp 
   2110   6318        edp 	mutex_enter(&xdf_hvm_list_lock);
   2111   6318        edp 	i = i_xdf_hvm_find(path, NULL);
   2112   6318        edp 	if (i == NULL) {
   2113   6318        edp 		mutex_exit(&xdf_hvm_list_lock);
   2114   6318        edp 		return (B_FALSE);
   2115   6318        edp 	}
   2116   6318        edp 	ndi_hold_devi(dip = i->xdf_he_dip);
   2117   6318        edp 	mutex_exit(&xdf_hvm_list_lock);
   2118   6318        edp 	return (dip);
   2119   6318        edp }
   2120   6318        edp 
   2121   6318        edp static void
   2122   6318        edp xdf_hvm_add(dev_info_t *dip)
   2123   6318        edp {
   2124   6318        edp 	xdf_hvm_entry_t	*i;
   2125   6318        edp 	char		*path;
   2126   6318        edp 
   2127   6318        edp 	/* figure out the path for the dip */
   2128   6318        edp 	path = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
   2129   6318        edp 	(void) ddi_pathname(dip, path);
   2130   6318        edp 
   2131   6318        edp 	i = kmem_alloc(sizeof (*i), KM_SLEEP);
   2132   6318        edp 	i->xdf_he_dip = dip;
   2133   6318        edp 	i->xdf_he_path = i_ddi_strdup(path, KM_SLEEP);
   2134   6318        edp 
   2135   6318        edp 	mutex_enter(&xdf_hvm_list_lock);
   2136   6318        edp 	ASSERT(i_xdf_hvm_find(path, NULL) == NULL);
   2137   6318        edp 	ASSERT(i_xdf_hvm_find(NULL, dip) == NULL);
   2138   6318        edp 	list_insert_head(&xdf_hvm_list, i);
   2139   6318        edp 	mutex_exit(&xdf_hvm_list_lock);
   2140   6318        edp 
   2141   6318        edp 	kmem_free(path, MAXPATHLEN);
   2142   6318        edp }
   2143   6318        edp 
   2144   6318        edp static void
   2145   6318        edp xdf_hvm_rm(dev_info_t *dip)
   2146   6318        edp {
   2147   6318        edp 	xdf_hvm_entry_t	*i;
   2148   6318        edp 
   2149   6318        edp 	mutex_enter(&xdf_hvm_list_lock);
   2150   6318        edp 	VERIFY((i = i_xdf_hvm_find(NULL, dip)) != NULL);
   2151   6318        edp 	list_remove(&xdf_hvm_list, i);
   2152   6318        edp 	mutex_exit(&xdf_hvm_list_lock);
   2153   6318        edp 
   2154   6318        edp 	kmem_free(i->xdf_he_path, strlen(i->xdf_he_path) + 1);
   2155   6318        edp 	kmem_free(i, sizeof (*i));
   2156   6318        edp }
   2157   6318        edp 
   2158   6318        edp static void
   2159   6318        edp xdf_hvm_init(void)
   2160   6318        edp {
   2161   6318        edp 	list_create(&xdf_hvm_list, sizeof (xdf_hvm_entry_t),
   2162   6318        edp 	    offsetof(xdf_hvm_entry_t, xdf_he_list));
   2163   6318        edp 	mutex_init(&xdf_hvm_list_lock, NULL, MUTEX_DEFAULT, NULL);
   2164   6318        edp }
   2165   6318        edp 
   2166   6318        edp static void
   2167   6318        edp xdf_hvm_fini(void)
   2168   6318        edp {
   2169   6318        edp 	ASSERT(list_head(&xdf_hvm_list) == NULL);
   2170   6318        edp 	list_destroy(&xdf_hvm_list);
   2171   6318        edp 	mutex_destroy(&xdf_hvm_list_lock);
   2172   6318        edp }
   2173   6318        edp 
   2174   8863     Edward boolean_t
   2175   6318        edp xdf_hvm_connect(dev_info_t *dip)
   2176   6318        edp {
   2177   6318        edp 	xdf_t	*vdp = (xdf_t *)ddi_get_driver_private(dip);
   2178   8863     Edward 	char	*oename, *str;
   2179   6318        edp 	int	rv;
   2180   6318        edp 
   2181   8863     Edward 	mutex_enter(&vdp->xdf_cb_lk);
   2182   8863     Edward 
   2183   8863     Edward 	/*
   2184   8863     Edward 	 * Before try to establish a connection we need to wait for the
   2185   8863     Edward 	 * backend hotplug scripts to have run.  Once they are run the
   2186   8863     Edward 	 * "<oename>/hotplug-status" property will be set to "connected".
   2187   8863     Edward 	 */
   2188   8863     Edward 	for (;;) {
   2189   8863     Edward 		ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk));
   2190   8863     Edward 
   2191   8863     Edward 		/*
   2192   8863     Edward 		 * Get the xenbus path to the backend device.  Note that
   2193   8863     Edward 		 * we can't cache this path (and we look it up on each pass
   2194   8863     Edward 		 * through this loop) because it could change during
   2195   8863     Edward 		 * suspend, resume, and migration operations.
   2196   8863     Edward 		 */
   2197   8863     Edward 		if ((oename = xvdi_get_oename(dip)) == NULL) {
   2198   8863     Edward 			mutex_exit(&vdp->xdf_cb_lk);
   2199   8863     Edward 			return (B_FALSE);
   2200   8863     Edward 		}
   2201   8863     Edward 
   2202   8863     Edward 		str = NULL;
   2203   8863     Edward 		if ((xenbus_read_str(oename, XBP_HP_STATUS, &str) == 0) &&
   2204   8863     Edward 		    (strcmp(str, XBV_HP_STATUS_CONN) == 0))
   2205   8863     Edward 			break;
   2206   8863     Edward 
   2207   8863     Edward 		if (str != NULL)
   2208   8863     Edward 			strfree(str);
   2209   8863     Edward 
   2210   8863     Edward 		/* wait for an update to "<oename>/hotplug-status" */
   2211   8863     Edward 		if (cv_wait_sig(&vdp->xdf_hp_status_cv, &vdp->xdf_cb_lk) == 0) {
   2212   8863     Edward 			/* we got interrupted by a signal */
   2213   8863     Edward 			mutex_exit(&vdp->xdf_cb_lk);
   2214   8863     Edward 			return (B_FALSE);
   2215   8863     Edward 		}
   2216   8863     Edward 	}
   2217   8863     Edward 
   2218   8863     Edward 	/* Good news.  The backend hotplug scripts have been run. */
   2219   8863     Edward 	ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk));
   2220   8863     Edward 	ASSERT(strcmp(str, XBV_HP_STATUS_CONN) == 0);
   2221   8863     Edward 	strfree(str);
   2222   8863     Edward 
   2223   8863     Edward 	/*
   2224   8863     Edward 	 * If we're emulating a cd device and if the backend doesn't support
   2225   8863     Edward 	 * media request opreations, then we're not going to bother trying
   2226   8863     Edward 	 * to establish a connection for a couple reasons.  First off, media
   2227   8863     Edward 	 * requests support is required to support operations like eject and
   2228   8863     Edward 	 * media locking.  Second, other backend platforms like Linux don't
   2229   8863     Edward 	 * support hvm pv cdrom access.  They don't even have a backend pv
   2230   8863     Edward 	 * driver for cdrom device nodes, so we don't want to block forever
   2231   8863     Edward 	 * waiting for a connection to a backend driver that doesn't exist.
   2232   8863     Edward 	 */
   2233   8863     Edward 	if (XD_IS_CD(vdp) && !xenbus_exists(oename, XBP_MEDIA_REQ_SUP)) {
   2234   8863     Edward 		mutex_exit(&vdp->xdf_cb_lk);
   2235   8863     Edward 		return (B_FALSE);
   2236   8863     Edward 	}
   2237   8863     Edward 
   2238   9471     Edward 	mutex_enter(&vdp->xdf_dev_lk);
   2239   8863     Edward 	rv = xdf_connect_locked(vdp, B_TRUE);
   2240   6318        edp 	mutex_exit(&vdp->xdf_dev_lk);
   2241   8863     Edward 	mutex_exit(&vdp->xdf_cb_lk);
   2242   8863     Edward 
   2243   8863     Edward 	return ((rv == XD_READY) ? B_TRUE : B_FALSE);
   2244   6318        edp }
   2245   6318        edp 
   2246   6318        edp int
   2247   6318        edp xdf_hvm_setpgeom(dev_info_t *dip, cmlb_geom_t *geomp)
   2248   6318        edp {
   2249   6318        edp 	xdf_t	*vdp = (xdf_t *)ddi_get_driver_private(dip);
   2250   6318        edp 
   2251   6318        edp 	/* sanity check the requested physical geometry */
   2252   6318        edp 	mutex_enter(&vdp->xdf_dev_lk);
   2253   6318        edp 	if ((geomp->g_secsize != XB_BSIZE) ||
   2254   6318        edp 	    (geomp->g_capacity == 0)) {
   2255   6318        edp 		mutex_exit(&vdp->xdf_dev_lk);
   2256   6318        edp 		return (EINVAL);
   2257   6318        edp 	}
   2258   6318        edp 
   2259   6318        edp 	/*
   2260   6318        edp 	 * If we've already connected to the backend device then make sure
   2261   6318        edp 	 * we're not defining a physical geometry larger than our backend
   2262   6318        edp 	 * device.
   2263   6318        edp 	 */
   2264   6318        edp 	if ((vdp->xdf_xdev_nblocks != 0) &&
   2265   6318        edp 	    (geomp->g_capacity > vdp->xdf_xdev_nblocks)) {
   2266   6318        edp 		mutex_exit(&vdp->xdf_dev_lk);
   2267   6318        edp 		return (EINVAL);
   2268   6318        edp 	}
   2269   6318        edp 
   2270   8863     Edward 	bzero(&vdp->xdf_pgeom, sizeof (vdp->xdf_pgeom));
   2271   8863     Edward 	vdp->xdf_pgeom.g_ncyl = geomp->g_ncyl;
   2272   8863     Edward 	vdp->xdf_pgeom.g_acyl = geomp->g_acyl;
   2273   8863     Edward 	vdp->xdf_pgeom.g_nhead = geomp->g_nhead;
   2274   8863     Edward 	vdp->xdf_pgeom.g_nsect = geomp->g_nsect;
   2275   8863     Edward 	vdp->xdf_pgeom.g_secsize = geomp->g_secsize;
   2276   8863     Edward 	vdp->xdf_pgeom.g_capacity = geomp->g_capacity;
   2277   8863     Edward 	vdp->xdf_pgeom.g_intrlv = geomp->g_intrlv;
   2278   8863     Edward 	vdp->xdf_pgeom.g_rpm = geomp->g_rpm;
   2279   8863     Edward 
   2280   8863     Edward 	vdp->xdf_pgeom_fixed = B_TRUE;
   2281   6318        edp 	mutex_exit(&vdp->xdf_dev_lk);
   2282   6318        edp 
   2283   6318        edp 	/* force a re-validation */
   2284   6318        edp 	cmlb_invalidate(vdp->xdf_vd_lbl, NULL);
   2285   6318        edp 
   2286   6318        edp 	return (0);
   2287   6318        edp }
   2288   6318        edp 
   2289   8863     Edward boolean_t
   2290   8863     Edward xdf_is_cd(dev_info_t *dip)
   2291   8863     Edward {
   2292   8863     Edward 	xdf_t		*vdp = (xdf_t *)ddi_get_driver_private(dip);
   2293   8863     Edward 	boolean_t	rv;
   2294   8863     Edward 
   2295   8863     Edward 	mutex_enter(&vdp->xdf_cb_lk);
   2296   8863     Edward 	rv = XD_IS_CD(vdp);
   2297   8863     Edward 	mutex_exit(&vdp->xdf_cb_lk);
   2298   8863     Edward 	return (rv);
   2299   8863     Edward }
   2300   8863     Edward 
   2301   8863     Edward boolean_t
   2302   8863     Edward xdf_is_rm(dev_info_t *dip)
   2303   8863     Edward {
   2304   8863     Edward 	xdf_t		*vdp = (xdf_t *)ddi_get_driver_private(dip);
   2305   8863     Edward 	boolean_t	rv;
   2306   8863     Edward 
   2307   8863     Edward 	mutex_enter(&vdp->xdf_cb_lk);
   2308   8863     Edward 	rv = XD_IS_RM(vdp);
   2309   8863     Edward 	mutex_exit(&vdp->xdf_cb_lk);
   2310   8863     Edward 	return (rv);
   2311   8863     Edward }
   2312   8863     Edward 
   2313   8863     Edward boolean_t
   2314   8863     Edward xdf_media_req_supported(dev_info_t *dip)
   2315   8863     Edward {
   2316   8863     Edward 	xdf_t		*vdp = (xdf_t *)ddi_get_driver_private(dip);
   2317   8863     Edward 	boolean_t	rv;
   2318   8863     Edward 
   2319   8863     Edward 	mutex_enter(&vdp->xdf_cb_lk);
   2320   8863     Edward 	rv = vdp->xdf_media_req_supported;
   2321   8863     Edward 	mutex_exit(&vdp->xdf_cb_lk);
   2322   8863     Edward 	return (rv);
   2323   8863     Edward }
   2324   8863     Edward 
   2325   6318        edp #endif /* XPV_HVM_DRIVER */
   2326   8863     Edward 
   2327   8863     Edward static int
   2328   8863     Edward xdf_lb_getcap(dev_info_t *dip, diskaddr_t *capp)
   2329   8863     Edward {
   2330   8863     Edward 	xdf_t *vdp;
   2331   8863     Edward 	vdp = ddi_get_soft_state(xdf_ssp, ddi_get_instance(dip));
   2332   8863     Edward 
   2333   8863     Edward 	if (vdp == NULL)
   2334   8863     Edward 		return (ENXIO);
   2335   8863     Edward 
   2336   8863     Edward 	mutex_enter(&vdp->xdf_dev_lk);
   2337   8863     Edward 	*capp = vdp->xdf_pgeom.g_capacity;
   2338   8863     Edward 	DPRINTF(LBL_DBG, ("xdf@%s:capacity %llu\n", vdp->xdf_addr, *capp));
   2339   8863     Edward 	mutex_exit(&vdp->xdf_dev_lk);
   2340   8863     Edward 	return (0);
   2341   8863     Edward }
   2342   8863     Edward 
   2343   8863     Edward static int
   2344   8863     Edward xdf_lb_getpgeom(dev_info_t *dip, cmlb_geom_t *geomp)
   2345   8863     Edward {
   2346   8863     Edward 	xdf_t *vdp;
   2347   8863     Edward 
   2348   8863     Edward 	if ((vdp = ddi_get_soft_state(xdf_ssp, ddi_get_instance(dip))) == NULL)
   2349   8863     Edward 		return (ENXIO);
   2350   8863     Edward 	*geomp = vdp->xdf_pgeom;
   2351   8863     Edward 	return (0);
   2352   8863     Edward }
   2353   8863     Edward 
   2354   8863     Edward /*
   2355   8863     Edward  * No real HBA, no geometry available from it
   2356   8863     Edward  */
   2357   8863     Edward /*ARGSUSED*/
   2358   8863     Edward static int
   2359   8863     Edward xdf_lb_getvgeom(dev_info_t *dip, cmlb_geom_t *geomp)
   2360   8863     Edward {
   2361   8863     Edward 	return (EINVAL);
   2362   8863     Edward }
   2363   8863     Edward 
   2364   8863     Edward static int
   2365   8863     Edward xdf_lb_getattribute(dev_info_t *dip, tg_attribute_t *tgattributep)
   2366   8863     Edward {
   2367   8863     Edward 	xdf_t *vdp;
   2368   8863     Edward 
   2369   8863     Edward 	if (!(vdp = ddi_get_soft_state(xdf_ssp, ddi_get_instance(dip))))
   2370   8863     Edward 		return (ENXIO);
   2371   8863     Edward 
   2372   8863     Edward 	if (XD_IS_RO(vdp))
   2373   8863     Edward 		tgattributep->media_is_writable = 0;
   2374   8863     Edward 	else
   2375   8863     Edward 		tgattributep->media_is_writable = 1;
   2376   8863     Edward 	return (0);
   2377   8863     Edward }
   2378   8863     Edward 
   2379   8863     Edward /* ARGSUSED3 */
   2380   8863     Edward int
   2381   8863     Edward xdf_lb_getinfo(dev_info_t *dip, int cmd, void *arg, void *tg_cookie)
   2382   8863     Edward {
   2383   9889      Larry 	int instance;
   2384   9889      Larry 	xdf_t   *vdp;
   2385   9889      Larry 
   2386   9889      Larry 	instance = ddi_get_instance(dip);
   2387   9889      Larry 
   2388   9889      Larry 	if ((vdp = ddi_get_soft_state(xdf_ssp, instance)) == NULL)
   2389   9889      Larry 		return (ENXIO);
   2390   9889      Larry 
   2391   8863     Edward 	switch (cmd) {
   2392   8863     Edward 	case TG_GETPHYGEOM:
   2393   8863     Edward 		return (xdf_lb_getpgeom(dip, (cmlb_geom_t *)arg));
   2394   8863     Edward 	case TG_GETVIRTGEOM:
   2395   8863     Edward 		return (xdf_lb_getvgeom(dip, (cmlb_geom_t *)arg));
   2396   8863     Edward 	case TG_GETCAPACITY:
   2397   8863     Edward 		return (xdf_lb_getcap(dip, (diskaddr_t *)arg));
   2398   8863     Edward 	case TG_GETBLOCKSIZE:
   2399   9889      Larry 		mutex_enter(&vdp->xdf_cb_lk);
   2400   9889      Larry 		*(uint32_t *)arg = vdp->xdf_xdev_secsize;
   2401   9889      Larry 		mutex_exit(&vdp->xdf_cb_lk);
   2402   8863     Edward 		return (0);
   2403   8863     Edward 	case TG_GETATTR:
   2404   8863     Edward 		return (xdf_lb_getattribute(dip, (tg_attribute_t *)arg));
   2405   8863     Edward 	default:
   2406   8863     Edward 		return (ENOTTY);
   2407   8863     Edward 	}
   2408   8863     Edward }
   2409   8863     Edward 
   2410   8863     Edward /* ARGSUSED5 */
   2411   8863     Edward int
   2412   8863     Edward xdf_lb_rdwr(dev_info_t *dip, uchar_t cmd, void *bufp,
   2413   8863     Edward     diskaddr_t start, size_t reqlen, void *tg_cookie)
   2414   8863     Edward {
   2415   8863     Edward 	xdf_t *vdp;
   2416   8863     Edward 	struct buf *bp;
   2417   8863     Edward 	int err = 0;
   2418   8863     Edward 
   2419   8863     Edward 	vdp = ddi_get_soft_state(xdf_ssp, ddi_get_instance(dip));
   2420   8863     Edward 
   2421   8863     Edward 	/* We don't allow IO from the oe_change callback thread */
   2422   8863     Edward 	ASSERT(curthread != vdp->xdf_oe_change_thread);
   2423   8863     Edward 
   2424   9889      Larry 	if ((start + ((reqlen / (vdp->xdf_xdev_secsize / DEV_BSIZE))
   2425   9889      Larry 	    >> DEV_BSHIFT)) > vdp->xdf_pgeom.g_capacity)
   2426   8863     Edward 		return (EINVAL);
   2427   8863     Edward 
   2428   8863     Edward 	bp = getrbuf(KM_SLEEP);
   2429   8863     Edward 	if (cmd == TG_READ)
   2430   8863     Edward 		bp->b_flags = B_BUSY | B_READ;
   2431   8863     Edward 	else
   2432   8863     Edward 		bp->b_flags = B_BUSY | B_WRITE;
   2433   9889      Larry 
   2434   8863     Edward 	bp->b_un.b_addr = bufp;
   2435   8863     Edward 	bp->b_bcount = reqlen;
   2436   9889      Larry 	bp->b_blkno = start * (vdp->xdf_xdev_secsize / DEV_BSIZE);
   2437   8863     Edward 	bp->b_edev = DDI_DEV_T_NONE; /* don't have dev_t */
   2438   8863     Edward 
   2439   8863     Edward 	mutex_enter(&vdp->xdf_dev_lk);
   2440   8863     Edward 	xdf_bp_push(vdp, bp);
   2441   8863     Edward 	mutex_exit(&vdp->xdf_dev_lk);
   2442   8863     Edward 	xdf_io_start(vdp);
   2443   8863     Edward 	if (curthread == vdp->xdf_ready_tq_thread)
   2444   8863     Edward 		(void) xdf_ring_drain(vdp);
   2445   8863     Edward 	err = biowait(bp);
   2446   8863     Edward 	ASSERT(bp->b_flags & B_DONE);
   2447   8863     Edward 	freerbuf(bp);
   2448   8863     Edward 	return (err);
   2449   8863     Edward }
   2450   8863     Edward 
   2451   8863     Edward /*
   2452   8863     Edward  * Lock the current media.  Set the media state to "lock".
   2453   8863     Edward  * (Media locks are only respected by the backend driver.)
   2454   8863     Edward  */
   2455   8863     Edward static int
   2456   8863     Edward xdf_ioctl_mlock(xdf_t *vdp)
   2457   8863     Edward {
   2458   8863     Edward 	int rv;
   2459   8863     Edward 	mutex_enter(&vdp->xdf_cb_lk);
   2460   8863     Edward 	rv = xdf_media_req(vdp, XBV_MEDIA_REQ_LOCK, B_TRUE);
   2461   8863     Edward 	mutex_exit(&vdp->xdf_cb_lk);
   2462   8863     Edward 	return (rv);
   2463   8863     Edward }
   2464   8863     Edward 
   2465   8863     Edward /*
   2466   8863     Edward  * Release a media lock.  Set the media state to "none".
   2467   8863     Edward  */
   2468   8863     Edward static int
   2469   8863     Edward xdf_ioctl_munlock(xdf_t *vdp)
   2470   8863     Edward {
   2471   8863     Edward 	int rv;
   2472   8863     Edward 	mutex_enter(&vdp->xdf_cb_lk);
   2473   8863     Edward 	rv = xdf_media_req(vdp, XBV_MEDIA_REQ_NONE, B_TRUE);
   2474   8863     Edward 	mutex_exit(&vdp->xdf_cb_lk);
   2475   8863     Edward 	return (rv);
   2476   8863     Edward }
   2477   8863     Edward 
   2478   8863     Edward /*
   2479   8863     Edward  * Eject the current media.  Ignores any media locks.  (Media locks
   2480   8863     Edward  * are only for benifit of the the backend.)
   2481   8863     Edward  */
   2482   8863     Edward static int
   2483   8863     Edward xdf_ioctl_eject(xdf_t *vdp)
   2484   8863     Edward {
   2485   8863     Edward 	int rv;
   2486   8863     Edward 
   2487   8863     Edward 	mutex_enter(&vdp->xdf_cb_lk);
   2488   8863     Edward 	if ((rv = xdf_media_req(vdp, XBV_MEDIA_REQ_EJECT, B_FALSE)) != 0) {
   2489   8863     Edward 		mutex_exit(&vdp->xdf_cb_lk);
   2490   8863     Edward 		return (rv);
   2491   8863     Edward 	}
   2492   8863     Edward 
   2493   8863     Edward 	/*
   2494   8863     Edward 	 * We've set the media requests xenbus parameter to eject, so now
   2495   8863     Edward 	 * disconnect from the backend, wait for the backend to clear
   2496   8863     Edward 	 * the media requets xenbus paramter, and then we can reconnect
   2497   8863     Edward 	 * to the backend.
   2498   8863     Edward 	 */
   2499   8863     Edward 	(void) xdf_disconnect(vdp, XD_UNKNOWN, B_TRUE);
   2500   8863     Edward 	mutex_enter(&vdp->xdf_dev_lk);
   2501   8863     Edward 	if (xdf_connect_locked(vdp, B_TRUE) != XD_READY) {
   2502   8863     Edward 		mutex_exit(&vdp->xdf_dev_lk);
   2503   8863     Edward 		mutex_exit(&vdp->xdf_cb_lk);
   2504   8863     Edward 		return (EIO);
   2505   8863     Edward 	}
   2506   8863     Edward 	mutex_exit(&vdp->xdf_dev_lk);
   2507   8863     Edward 	mutex_exit(&vdp->xdf_cb_lk);
   2508   8863     Edward 	return (0);
   2509   8863     Edward }
   2510   8863     Edward 
   2511   8863     Edward /*
   2512   8863     Edward  * Watch for media state changes.  This can be an insertion of a device
   2513   8863     Edward  * (triggered by a 'xm block-configure' request in another domain) or
   2514   8863     Edward  * the ejection of a device (triggered by a local "eject" operation).
   2515   8863     Edward  * For a full description of the DKIOCSTATE ioctl behavior see dkio(7I).
   2516   8863     Edward  */
   2517   8863     Edward static int
   2518   8863     Edward xdf_dkstate(xdf_t *vdp, enum dkio_state mstate)
   2519   8863     Edward {
   2520   8863     Edward 	enum dkio_state		prev_state;
   2521   8863     Edward 
   2522   8863     Edward 	mutex_enter(&vdp->xdf_cb_lk);
   2523   8863     Edward 	prev_state = vdp->xdf_mstate;
   2524   8863     Edward 
   2525   8863     Edward 	if (vdp->xdf_mstate == mstate) {
   2526   8863     Edward 		while (vdp->xdf_mstate == prev_state) {
   2527   8863     Edward 			if (cv_wait_sig(&vdp->xdf_mstate_cv,
   2528   8863     Edward 			    &vdp->xdf_cb_lk) == 0) {
   2529   8863     Edward 				mutex_exit(&vdp->xdf_cb_lk);
   2530   8863     Edward 				return (EINTR);
   2531   8863     Edward 			}
   2532   8863     Edward 		}
   2533   8863     Edward 	}
   2534   8863     Edward 
   2535   8863     Edward 	if ((prev_state != DKIO_INSERTED) &&
   2536   8863     Edward 	    (vdp->xdf_mstate == DKIO_INSERTED)) {
   2537   8863     Edward 		(void) xdf_media_req(vdp, XBV_MEDIA_REQ_LOCK, B_TRUE);
   2538   8863     Edward 		mutex_exit(&vdp->xdf_cb_lk);
   2539   8863     Edward 		return (0);
   2540   8863     Edward 	}
   2541   8863     Edward 
   2542   8863     Edward 	mutex_exit(&vdp->xdf_cb_lk);
   2543   8863     Edward 	return (0);
   2544   8863     Edward }
   2545   8863     Edward 
   2546   8863     Edward /*ARGSUSED*/
   2547   8863     Edward static int
   2548   8863     Edward xdf_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
   2549   8863     Edward     int *rvalp)
   2550   8863     Edward {
   2551   8863     Edward 	minor_t		minor = getminor(dev);
   2552   8863     Edward 	int		part = XDF_PART(minor);
   2553   8863     Edward 	xdf_t		*vdp;
   2554   8863     Edward 	int		rv;
   2555   8863     Edward 
   2556   8863     Edward 	if (((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL) ||
   2557   8863     Edward 	    (!xdf_isopen(vdp, part)))
   2558   8863     Edward 		return (ENXIO);
   2559   8863     Edward 
   2560   8863     Edward 	DPRINTF(IOCTL_DBG, ("xdf@%s:ioctl: cmd %d (0x%x)\n",
   2561   8863     Edward 	    vdp->xdf_addr, cmd, cmd));
   2562   8863     Edward 
   2563   8863     Edward 	switch (cmd) {
   2564   8863     Edward 	default:
   2565   8863     Edward 		return (ENOTTY);
   2566   8863     Edward 	case DKIOCG_PHYGEOM:
   2567   8863     Edward 	case DKIOCG_VIRTGEOM:
   2568   8863     Edward 	case DKIOCGGEOM:
   2569   8863     Edward 	case DKIOCSGEOM:
   2570   8863     Edward 	case DKIOCGAPART:
   2571   8863     Edward 	case DKIOCSAPART:
   2572   8863     Edward 	case DKIOCGVTOC:
   2573   8863     Edward 	case DKIOCSVTOC:
   2574   8863     Edward 	case DKIOCPARTINFO:
   2575   8863     Edward 	case DKIOCGEXTVTOC:
   2576   8863     Edward 	case DKIOCSEXTVTOC:
   2577   8863     Edward 	case DKIOCEXTPARTINFO:
   2578   8863     Edward 	case DKIOCGMBOOT:
   2579   8863     Edward 	case DKIOCSMBOOT:
   2580   8863     Edward 	case DKIOCGETEFI:
   2581   8863     Edward 	case DKIOCSETEFI:
   2582  10021  Sheshadri 	case DKIOCSETEXTPART:
   2583   8863     Edward 	case DKIOCPARTITION:
   2584   8863     Edward 		return (cmlb_ioctl(vdp->xdf_vd_lbl, dev, cmd, arg, mode, credp,
   2585   8863     Edward 		    rvalp, NULL));
   2586   8863     Edward 	case FDEJECT:
   2587   8863     Edward 	case DKIOCEJECT:
   2588   8863     Edward 	case CDROMEJECT:
   2589   8863     Edward 		return (xdf_ioctl_eject(vdp));
   2590   8863     Edward 	case DKIOCLOCK:
   2591   8863     Edward 		return (xdf_ioctl_mlock(vdp));
   2592   8863     Edward 	case DKIOCUNLOCK:
   2593   8863     Edward 		return (xdf_ioctl_munlock(vdp));
   2594   8863     Edward 	case CDROMREADOFFSET: {
   2595   8863     Edward 		int offset = 0;
   2596   8863     Edward 		if (!XD_IS_CD(vdp))
   2597   8863     Edward 			return (ENOTTY);
   2598   8863     Edward 		if (ddi_copyout(&offset, (void *)arg, sizeof (int), mode))
   2599   8863     Edward 			return (EFAULT);
   2600   8863     Edward 		return (0);
   2601   8863     Edward 	}
   2602   8863     Edward 	case DKIOCGMEDIAINFO: {
   2603   8863     Edward 		struct dk_minfo media_info;
   2604   8863     Edward 
   2605   9889      Larry 		media_info.dki_lbsize = vdp->xdf_xdev_secsize;
   2606   8863     Edward 		media_info.dki_capacity = vdp->xdf_pgeom.g_capacity;
   2607   8863     Edward 		if (XD_IS_CD(vdp))
   2608   8863     Edward 			media_info.dki_media_type = DK_CDROM;
   2609   8863     Edward 		else
   2610   8863     Edward 			media_info.dki_media_type = DK_FIXED_DISK;
   2611   8863     Edward 
   2612   8863     Edward 		if (ddi_copyout(&media_info, (void *)arg,
   2613   8863     Edward 		    sizeof (struct dk_minfo), mode))
   2614   8863     Edward 			return (EFAULT);
   2615   8863     Edward 		return (0);
   2616   8863     Edward 	}
   2617   8863     Edward 	case DKIOCINFO: {
   2618   8863     Edward 		struct dk_cinfo info;
   2619   8863     Edward 
   2620   8863     Edward 		/* controller information */
   2621   8863     Edward 		if (XD_IS_CD(vdp))
   2622   8863     Edward 			info.dki_ctype = DKC_CDROM;
   2623   8863     Edward 		else
   2624   8863     Edward 			info.dki_ctype = DKC_VBD;
   2625   8863     Edward 
   2626   8863     Edward 		info.dki_cnum = 0;
   2627   8863     Edward 		(void) strncpy((char *)(&info.dki_cname), "xdf", 8);
   2628   8863     Edward 
   2629   8863     Edward 		/* unit information */
   2630   8863     Edward 		info.dki_unit = ddi_get_instance(vdp->xdf_dip);
   2631   8863     Edward 		(void) strncpy((char *)(&info.dki_dname), "xdf", 8);
   2632   8863     Edward 		info.dki_flags = DKI_FMTVOL;
   2633   8863     Edward 		info.dki_partition = part;
   2634   8863     Edward 		info.dki_maxtransfer = maxphys / DEV_BSIZE;
   2635   8863     Edward 		info.dki_addr = 0;
   2636   8863     Edward 		info.dki_space = 0;
   2637   8863     Edward 		info.dki_prio = 0;
   2638   8863     Edward 		info.dki_vec = 0;
   2639   8863     Edward 
   2640   8863     Edward 		if (ddi_copyout(&info, (void *)arg, sizeof (info), mode))
   2641   8863     Edward 			return (EFAULT);
   2642   8863     Edward 		return (0);
   2643   8863     Edward 	}
   2644   8863     Edward 	case DKIOCSTATE: {
   2645   8863     Edward 		enum dkio_state mstate;
   2646   8863     Edward 
   2647   8863     Edward 		if (ddi_copyin((void *)arg, &mstate,
   2648   8863     Edward 		    sizeof (mstate), mode) != 0)
   2649   8863     Edward 			return (EFAULT);
   2650   8863     Edward 		if ((rv = xdf_dkstate(vdp, mstate)) != 0)
   2651   8863     Edward 			return (rv);
   2652   8863     Edward 		mstate = vdp->xdf_mstate;
   2653   8863     Edward 		if (ddi_copyout(&mstate, (void *)arg,
   2654   8863     Edward 		    sizeof (mstate), mode) != 0)
   2655   8863     Edward 			return (EFAULT);
   2656   8863     Edward 		return (0);
   2657   8863     Edward 	}
   2658   8863     Edward 	case DKIOCREMOVABLE: {
   2659   8863     Edward 		int i = BOOLEAN2VOID(XD_IS_RM(vdp));
   2660   8863     Edward 		if (ddi_copyout(&i, (caddr_t)arg, sizeof (i), mode))
   2661   8863     Edward 			return (EFAULT);
   2662   8863     Edward 		return (0);
   2663   8863     Edward 	}
   2664   8863     Edward 	case DKIOCGETWCE: {
   2665   8863     Edward 		int i = BOOLEAN2VOID(XD_IS_RM(vdp));
   2666   8863     Edward 		if (ddi_copyout(&i, (void *)arg, sizeof (i), mode))
   2667   8863     Edward 			return (EFAULT);
   2668   8863     Edward 		return (0);
   2669   8863     Edward 	}
   2670   8863     Edward 	case DKIOCSETWCE: {
   2671   8863     Edward 		int i;
   2672   8863     Edward 		if (ddi_copyin((void *)arg, &i, sizeof (i), mode))
   2673   8863     Edward 			return (EFAULT);
   2674   8863     Edward 		vdp->xdf_wce = VOID2BOOLEAN(i);
   2675   8863     Edward 		return (0);
   2676   8863     Edward 	}
   2677   8863     Edward 	case DKIOCFLUSHWRITECACHE: {
   2678   8863     Edward 		struct dk_callback *dkc = (struct dk_callback *)arg;
   2679   8863     Edward 
   2680   8863     Edward 		if (vdp->xdf_flush_supported) {
   2681   8863     Edward 			rv = xdf_lb_rdwr(vdp->xdf_dip, TG_WRITE,
   2682   8863     Edward 			    NULL, 0, 0, (void *)dev);
   2683   8863     Edward 		} else if (vdp->xdf_feature_barrier &&
   2684   8863     Edward 		    !xdf_barrier_flush_disable) {
   2685   8863     Edward 			rv = xdf_lb_rdwr(vdp->xdf_dip, TG_WRITE,
   2686   8863     Edward 			    vdp->xdf_cache_flush_block, xdf_flush_block,
   2687   9889      Larry 			    vdp->xdf_xdev_secsize, (void *)dev);
   2688   8863     Edward 		} else {
   2689   8863     Edward 			return (ENOTTY);
   2690   8863     Edward 		}
   2691   8863     Edward 		if ((mode & FKIOCTL) && (dkc != NULL) &&
   2692   8863     Edward 		    (dkc->dkc_callback != NULL)) {
   2693   8863     Edward 			(*dkc->dkc_callback)(dkc->dkc_cookie, rv);
   2694   8863     Edward 			/* need to return 0 after calling callback */
   2695   8863     Edward 			rv = 0;
   2696   8863     Edward 		}
   2697   8863     Edward 		return (rv);
   2698   8863     Edward 	}
   2699   8863     Edward 	}
   2700   8863     Edward 	/*NOTREACHED*/
   2701   8863     Edward }
   2702   8863     Edward 
   2703   8863     Edward static int
   2704   8863     Edward xdf_strategy(struct buf *bp)
   2705   8863     Edward {
   2706   8863     Edward 	xdf_t	*vdp;
   2707   8863     Edward 	minor_t minor;
   2708   8863     Edward 	diskaddr_t p_blkct, p_blkst;
   2709   9889      Larry 	daddr_t blkno;
   2710   8863     Edward 	ulong_t nblks;
   2711   8863     Edward 	int part;
   2712   8863     Edward 
   2713   8863     Edward 	minor = getminor(bp->b_edev);
   2714   8863     Edward 	part = XDF_PART(minor);
   2715   8863     Edward 	vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor));
   2716   8863     Edward 
   2717   8863     Edward 	mutex_enter(&vdp->xdf_dev_lk);
   2718   8863     Edward 	if (!xdf_isopen(vdp, part)) {
   2719   8863     Edward 		mutex_exit(&vdp->xdf_dev_lk);
   2720   8863     Edward 		xdf_io_err(bp, ENXIO, 0);
   2721   8863     Edward 		return (0);
   2722   8863     Edward 	}
   2723   8863     Edward 
   2724   8863     Edward 	/* We don't allow IO from the oe_change callback thread */
   2725   8863     Edward 	ASSERT(curthread != vdp->xdf_oe_change_thread);
   2726   8863     Edward 
   2727   8863     Edward 	/* Check for writes to a read only device */
   2728   8863     Edward 	if (!IS_READ(bp) && XD_IS_RO(vdp)) {
   2729   8863     Edward 		mutex_exit(&vdp->xdf_dev_lk);
   2730   8863     Edward 		xdf_io_err(bp, EROFS, 0);
   2731   8863     Edward 		return (0);
   2732   8863     Edward 	}
   2733   8863     Edward 
   2734   8863     Edward 	/* Check if this I/O is accessing a partition or the entire disk */
   2735   8863     Edward 	if ((long)bp->b_private == XB_SLICE_NONE) {
   2736   8863     Edward 		/* This I/O is using an absolute offset */
   2737   8863     Edward 		p_blkct = vdp->xdf_xdev_nblocks;
   2738   8863     Edward 		p_blkst = 0;
   2739   8863     Edward 	} else {
   2740   8863     Edward 		/* This I/O is using a partition relative offset */
   2741   8863     Edward 		mutex_exit(&vdp->xdf_dev_lk);
   2742   8863     Edward 		if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkct,
   2743   8863     Edward 		    &p_blkst, NULL, NULL, NULL)) {
   2744   8863     Edward 			xdf_io_err(bp, ENXIO, 0);
   2745   8863     Edward 			return (0);
   2746   8863     Edward 		}
   2747   8863     Edward 		mutex_enter(&vdp->xdf_dev_lk);
   2748   8863     Edward 	}
   2749   8863     Edward 
   2750   9889      Larry 	/*
   2751   9889      Larry 	 * Adjust the real blkno and bcount according to the underline
   2752   9889      Larry 	 * physical sector size.
   2753   9889      Larry 	 */
   2754   9889      Larry 	blkno = bp->b_blkno / (vdp->xdf_xdev_secsize / XB_BSIZE);
   2755   9889      Larry 
   2756   8863     Edward 	/* check for a starting block beyond the disk or partition limit */
   2757   9889      Larry 	if (blkno > p_blkct) {
   2758   8863     Edward 		DPRINTF(IO_DBG, ("xdf@%s: block %lld exceeds VBD size %"PRIu64,
   2759   9889      Larry 		    vdp->xdf_addr, (longlong_t)blkno, (uint64_t)p_blkct));
   2760   9889      Larry 		mutex_exit(&vdp->xdf_dev_lk);
   2761   8863     Edward 		xdf_io_err(bp, EINVAL, 0);
   2762   8863     Edward 		return (0);
   2763   8863     Edward 	}
   2764   8863     Edward 
   2765   8863     Edward 	/* Legacy: don't set error flag at this case */
   2766   9889      Larry 	if (blkno == p_blkct) {
   2767   9889      Larry 		mutex_exit(&vdp->xdf_dev_lk);
   2768   8863     Edward 		bp->b_resid = bp->b_bcount;
   2769   8863     Edward 		biodone(bp);
   2770   8863     Edward 		return (0);
   2771   8863     Edward 	}
   2772   8863     Edward 
   2773   8863     Edward 	/* sanitize the input buf */
   2774   8863     Edward 	bioerror(bp, 0);
   2775   8863     Edward 	bp->b_resid = 0;
   2776   8863     Edward 	bp->av_back = bp->av_forw = NULL;
   2777   8863     Edward 
   2778   8863     Edward 	/* Adjust for partial transfer, this will result in an error later */
   2779   9889      Larry 	if (vdp->xdf_xdev_secsize != 0 &&
   2780   9889      Larry 	    vdp->xdf_xdev_secsize != XB_BSIZE) {
   2781   9889      Larry 		nblks = bp->b_bcount / vdp->xdf_xdev_secsize;
   2782   9889      Larry 	} else {
   2783   9889      Larry 		nblks = bp->b_bcount >> XB_BSHIFT;
   2784   9889      Larry 	}
   2785   9889      Larry 
   2786   9889      Larry 	if ((blkno + nblks) > p_blkct) {
   2787   9889      Larry 		if (vdp->xdf_xdev_secsize != 0 &&
   2788   9889      Larry 		    vdp->xdf_xdev_secsize != XB_BSIZE) {
   2789   9889      Larry 			bp->b_resid =
   2790   9889      Larry 			    ((blkno + nblks) - p_blkct) *
   2791   9889      Larry 			    vdp->xdf_xdev_secsize;
   2792   9889      Larry 		} else {
   2793   9889      Larry 			bp->b_resid =
   2794   9889      Larry 			    ((blkno + nblks) - p_blkct) <<
   2795   9889      Larry 			    XB_BSHIFT;
   2796   9889      Larry 		}
   2797   8863     Edward 		bp->b_bcount -= bp->b_resid;
   2798   8863     Edward 	}
   2799   8863     Edward 
   2800   8863     Edward 	DPRINTF(IO_DBG, ("xdf@%s: strategy blk %lld len %lu\n",
   2801   9889      Larry 	    vdp->xdf_addr, (longlong_t)blkno, (ulong_t)bp->b_bcount));
   2802   8863     Edward 
   2803   8863     Edward 	/* Fix up the buf struct */
   2804   8863     Edward 	bp->b_flags |= B_BUSY;
   2805   8863     Edward 	bp->b_private = (void *)(uintptr_t)p_blkst;
   2806   8863     Edward 
   2807   8863     Edward 	xdf_bp_push(vdp, bp);
   2808   8863     Edward 	mutex_exit(&vdp->xdf_dev_lk);
   2809   8863     Edward 	xdf_io_start(vdp);
   2810   8863     Edward 	if (do_polled_io)
   2811   8863     Edward 		(void) xdf_ring_drain(vdp);
   2812   8863     Edward 	return (0);
   2813   8863     Edward }
   2814   8863     Edward 
   2815   8863     Edward /*ARGSUSED*/
   2816   8863     Edward static int
   2817   8863     Edward xdf_read(dev_t dev, struct uio *uiop, cred_t *credp)
   2818   8863     Edward {
   2819   8863     Edward 	xdf_t	*vdp;
   2820   8863     Edward 	minor_t minor;
   2821   8863     Edward 	diskaddr_t p_blkcnt;
   2822   8863     Edward 	int part;
   2823   8863     Edward 
   2824   8863     Edward 	minor = getminor(dev);
   2825   8863     Edward 	if ((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL)
   2826   8863     Edward 		return (ENXIO);
   2827   8863     Edward 
   2828   8863     Edward 	DPRINTF(IO_DBG, ("xdf@%s: read offset 0x%"PRIx64"\n",
   2829   8863     Edward 	    vdp->xdf_addr, (int64_t)uiop->uio_offset));
   2830   8863     Edward 
   2831   8863     Edward 	part = XDF_PART(minor);
   2832   8863     Edward 	if (!xdf_isopen(vdp, part))
   2833   8863     Edward 		return (ENXIO);
   2834   8863     Edward 
   2835   8863     Edward 	if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt,
   2836   8863     Edward 	    NULL, NULL, NULL, NULL))
   2837   8863     Edward 		return (ENXIO);
   2838   8863     Edward 
   2839   9889      Larry 	if (uiop->uio_loffset >= XB_DTOB(p_blkcnt, vdp))
   2840   9889      Larry 		return (ENOSPC);
   2841   9889      Larry 
   2842   8863     Edward 	if (U_INVAL(uiop))
   2843   8863     Edward 		return (EINVAL);
   2844   8863     Edward 
   2845   8863     Edward 	return (physio(xdf_strategy, NULL, dev, B_READ, xdfmin, uiop));
   2846   8863     Edward }
   2847   8863     Edward 
   2848   8863     Edward /*ARGSUSED*/
   2849   8863     Edward static int
   2850   8863     Edward xdf_write(dev_t dev, struct uio *uiop, cred_t *credp)
   2851   8863     Edward {
   2852   8863     Edward 	xdf_t *vdp;
   2853   8863     Edward 	minor_t minor;
   2854   8863     Edward 	diskaddr_t p_blkcnt;
   2855   8863     Edward 	int part;
   2856   8863     Edward 
   2857   8863     Edward 	minor = getminor(dev);
   2858   8863     Edward 	if ((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL)
   2859   8863     Edward 		return (ENXIO);
   2860   8863     Edward 
   2861   8863     Edward 	DPRINTF(IO_DBG, ("xdf@%s: write offset 0x%"PRIx64"\n",
   2862   8863     Edward 	    vdp->xdf_addr, (int64_t)uiop->uio_offset));
   2863   8863     Edward 
   2864   8863     Edward 	part = XDF_PART(minor);
   2865   8863     Edward 	if (!xdf_isopen(vdp, part))
   2866   8863     Edward 		return (ENXIO);
   2867   8863     Edward 
   2868   8863     Edward 	if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt,
   2869   8863     Edward 	    NULL, NULL, NULL, NULL))
   2870   8863     Edward 		return (ENXIO);
   2871   8863     Edward 
   2872   9889      Larry 	if (uiop->uio_loffset >= XB_DTOB(p_blkcnt, vdp))
   2873   8863     Edward 		return (ENOSPC);
   2874   8863     Edward 
   2875   8863     Edward 	if (U_INVAL(uiop))
   2876   8863     Edward 		return (EINVAL);
   2877   8863     Edward 
   2878   8863     Edward 	return (physio(xdf_strategy, NULL, dev, B_WRITE, xdfmin, uiop));
   2879   8863     Edward }
   2880   8863     Edward 
   2881   8863     Edward /*ARGSUSED*/
   2882   8863     Edward static int
   2883   8863     Edward xdf_aread(dev_t dev, struct aio_req *aiop, cred_t *credp)
   2884   8863     Edward {
   2885   8863     Edward 	xdf_t	*vdp;
   2886   8863     Edward 	minor_t minor;
   2887   8863     Edward 	struct uio *uiop = aiop->aio_uio;
   2888   8863     Edward 	diskaddr_t p_blkcnt;
   2889   8863     Edward 	int part;
   2890   8863     Edward 
   2891   8863     Edward 	minor = getminor(dev);
   2892   8863     Edward 	if ((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL)
   2893   8863     Edward 		return (ENXIO);
   2894   8863     Edward 
   2895   8863     Edward 	part = XDF_PART(minor);
   2896   8863     Edward 	if (!xdf_isopen(vdp, part))
   2897   8863     Edward 		return (ENXIO);
   2898   8863     Edward 
   2899   8863     Edward 	if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt,
   2900   8863     Edward 	    NULL, NULL, NULL, NULL))
   2901   8863     Edward 		return (ENXIO);
   2902   8863     Edward 
   2903   9889      Larry 	if (uiop->uio_loffset >= XB_DTOB(p_blkcnt, vdp))
   2904   8863     Edward 		return (ENOSPC);
   2905   8863     Edward 
   2906   8863     Edward 	if (U_INVAL(uiop))
   2907   8863     Edward 		return (EINVAL);
   2908   8863     Edward 
   2909   8863     Edward 	return (aphysio(xdf_strategy, anocancel, dev, B_READ, xdfmin, aiop));
   2910   8863     Edward }
   2911   8863     Edward 
   2912   8863     Edward /*ARGSUSED*/
   2913   8863     Edward static int
   2914   8863     Edward xdf_awrite(dev_t dev, struct aio_req *aiop, cred_t *credp)
   2915   8863     Edward {
   2916   8863     Edward 	xdf_t *vdp;
   2917   8863     Edward 	minor_t minor;
   2918   8863     Edward 	struct uio *uiop = aiop->aio_uio;
   2919   8863     Edward 	diskaddr_t p_blkcnt;
   2920   8863     Edward 	int part;
   2921   8863     Edward 
   2922   8863     Edward 	minor = getminor(dev);
   2923   8863     Edward 	if ((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL)
   2924   8863     Edward 		return (ENXIO);
   2925   8863     Edward 
   2926   8863     Edward 	part = XDF_PART(minor);
   2927   8863     Edward 	if (!xdf_isopen(vdp, part))
   2928   8863     Edward 		return (ENXIO);
   2929   8863     Edward 
   2930   8863     Edward 	if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt,
   2931   8863     Edward 	    NULL, NULL, NULL, NULL))
   2932   8863     Edward 		return (ENXIO);
   2933   8863     Edward 
   2934   9889      Larry 	if (uiop->uio_loffset >= XB_DTOB(p_blkcnt, vdp))
   2935   8863     Edward 		return (ENOSPC);
   2936   8863     Edward 
   2937   8863     Edward 	if (U_INVAL(uiop))
   2938   8863     Edward 		return (EINVAL);
   2939   8863     Edward 
   2940   8863     Edward 	return (aphysio(xdf_strategy, anocancel, dev, B_WRITE, xdfmin, aiop));
   2941   8863     Edward }
   2942   8863     Edward 
   2943   8863     Edward static int
   2944   8863     Edward xdf_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk)
   2945   8863     Edward {
   2946   8863     Edward 	struct buf dumpbuf, *dbp = &dumpbuf;
   2947   8863     Edward 	xdf_t	*vdp;
   2948   8863     Edward 	minor_t minor;
   2949   8863     Edward 	int err = 0;
   2950   8863     Edward 	int part;
   2951   8863     Edward 	diskaddr_t p_blkcnt, p_blkst;
   2952   8863     Edward 
   2953   8863     Edward 	minor = getminor(dev);
   2954   8863     Edward 	if ((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL)
   2955   8863     Edward 		return (ENXIO);
   2956   8863     Edward 
   2957   8863     Edward 	DPRINTF(IO_DBG, ("xdf@%s: dump addr (0x%p) blk (%ld) nblks (%d)\n",
   2958   8863     Edward 	    vdp->xdf_addr, (void *)addr, blkno, nblk));
   2959   8863     Edward 
   2960   8863     Edward 	/* We don't allow IO from the oe_change callback thread */
   2961   8863     Edward 	ASSERT(curthread != vdp->xdf_oe_change_thread);
   2962   8863     Edward 
   2963   8863     Edward 	part = XDF_PART(minor);
   2964   8863     Edward 	if (!xdf_isopen(vdp, part))
   2965   8863     Edward 		return (ENXIO);
   2966   8863     Edward 
   2967   8863     Edward 	if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, &p_blkst,
   2968   8863     Edward 	    NULL, NULL, NULL))
   2969   8863     Edward 		return (ENXIO);
   2970   8863     Edward 
   2971   9889      Larry 	if ((blkno + nblk) >
   2972   9889      Larry 	    (p_blkcnt * (vdp->xdf_xdev_secsize / XB_BSIZE))) {
   2973   8863     Edward 		cmn_err(CE_WARN, "xdf@%s: block %ld exceeds VBD size %"PRIu64,
   2974   9889      Larry 		    vdp->xdf_addr, (daddr_t)((blkno + nblk) /
   2975   9889      Larry 		    (vdp->xdf_xdev_secsize / XB_BSIZE)), (uint64_t)p_blkcnt);
   2976   8863     Edward 		return (EINVAL);
   2977   8863     Edward 	}
   2978   8863     Edward 
   2979   8863     Edward 	bioinit(dbp);
   2980   8863     Edward 	dbp->b_flags = B_BUSY;
   2981   8863     Edward 	dbp->b_un.b_addr = addr;
   2982   8863     Edward 	dbp->b_bcount = nblk << DEV_BSHIFT;
   2983   8863     Edward 	dbp->b_blkno = blkno;
   2984   8863     Edward 	dbp->b_edev = dev;
   2985   8863     Edward 	dbp->b_private = (void *)(uintptr_t)p_blkst;
   2986   8863     Edward 
   2987   8863     Edward 	mutex_enter(&vdp->xdf_dev_lk);
   2988   8863     Edward 	xdf_bp_push(vdp, dbp);
   2989   8863     Edward 	mutex_exit(&vdp->xdf_dev_lk);
   2990   8863     Edward 	xdf_io_start(vdp);
   2991   8863     Edward 	err = xdf_ring_drain(vdp);
   2992   8863     Edward 	biofini(dbp);
   2993   8863     Edward 	return (err);
   2994   8863     Edward }
   2995   8863     Edward 
   2996   8863     Edward /*ARGSUSED*/
   2997   8863     Edward static int
   2998   8863     Edward xdf_close(dev_t dev, int flag, int otyp, struct cred *credp)
   2999   8863     Edward {
   3000   8863     Edward 	minor_t	minor;
   3001   8863     Edward 	xdf_t	*vdp;
   3002   8863     Edward 	int part;
   3003   8863     Edward 	ulong_t parbit;
   3004   8863     Edward 
   3005   8863     Edward 	minor = getminor(dev);
   3006   8863     Edward 	if ((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL)
   3007   8863     Edward 		return (ENXIO);
   3008   8863     Edward 
   3009   8863     Edward 	mutex_enter(&vdp->xdf_dev_lk);
   3010   8863     Edward 	part = XDF_PART(minor);
   3011   8863     Edward 	if (!xdf_isopen(vdp, part)) {
   3012   8863