Home | History | Annotate | Download | only in io
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 
     22 /*
     23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     24  * Use is subject to license terms.
     25  */
     26 
     27 /*
     28  * Note: This is the backend part of the split PV disk driver. This driver
     29  * is not a nexus driver, nor is it a leaf driver(block/char/stream driver).
     30  * Currently, it does not create any minor node. So, although, it runs in
     31  * backend domain, it will not be used directly from within dom0.
     32  * It simply gets block I/O requests issued by frontend from a shared page
     33  * (blkif ring buffer - defined by Xen) between backend and frontend domain,
     34  * generates a buf, and push it down to underlying disk target driver via
     35  * ldi interface. When buf is done, this driver will generate a response
     36  * and put it into ring buffer to inform frontend of the status of the I/O
     37  * request issued by it. When a new virtual device entry is added in xenstore,
     38  * there will be an watch event sent from Xen to xvdi framework, who will,
     39  * in turn, create the devinfo node and try to attach this driver
     40  * (see xvdi_create_dev). When frontend peer changes its state to
     41  * XenbusStateClose, an event will also be sent from Xen to xvdi framework,
     42  * who will detach and remove this devinfo node (see i_xvdi_oestate_handler).
     43  * I/O requests get from ring buffer and event coming from xenstore cannot be
     44  * trusted. We verify them in xdb_get_buf() and xdb_check_state_transition().
     45  *
     46  * Virtual device configuration is read/written from/to the database via
     47  * xenbus_* interfaces. Driver also use xvdi_* to interact with hypervisor.
     48  * There is an on-going effort to make xvdi_* cover all xenbus_*.
     49  */
     50 
     51 #include <sys/types.h>
     52 #include <sys/conf.h>
     53 #include <sys/ddi.h>
     54 #include <sys/dditypes.h>
     55 #include <sys/sunddi.h>
     56 #include <sys/list.h>
     57 #include <sys/dkio.h>
     58 #include <sys/cmlb.h>
     59 #include <sys/vtoc.h>
     60 #include <sys/modctl.h>
     61 #include <sys/bootconf.h>
     62 #include <sys/promif.h>
     63 #include <sys/sysmacros.h>
     64 #include <public/io/xenbus.h>
     65 #include <public/io/xs_wire.h>
     66 #include <xen/sys/xenbus_impl.h>
     67 #include <xen/sys/xendev.h>
     68 #include <sys/gnttab.h>
     69 #include <sys/scsi/generic/inquiry.h>
     70 #include <vm/seg_kmem.h>
     71 #include <vm/hat_i86.h>
     72 #include <sys/gnttab.h>
     73 #include <sys/lofi.h>
     74 #include <io/xdf.h>
     75 #include <xen/io/blkif_impl.h>
     76 #include <io/xdb.h>
     77 
     78 static xdb_t *xdb_statep;
     79 static int xdb_debug = 0;
     80 
     81 static void xdb_close(dev_info_t *);
     82 static int xdb_push_response(xdb_t *, uint64_t, uint8_t, uint16_t);
     83 static int xdb_get_request(xdb_t *, blkif_request_t *);
     84 static void blkif_get_x86_32_req(blkif_request_t *, blkif_x86_32_request_t *);
     85 static void blkif_get_x86_64_req(blkif_request_t *, blkif_x86_64_request_t *);
     86 static int xdb_biodone(buf_t *);
     87 
     88 
     89 #ifdef DEBUG
     90 /*
     91  * debug aid functions
     92  */
     93 
     94 static void
     95 logva(xdb_t *vdp, uint64_t va)
     96 {
     97 	uint64_t *page_addrs;
     98 	int i;
     99 
    100 	page_addrs = vdp->page_addrs;
    101 	for (i = 0; i < XDB_MAX_IO_PAGES(vdp); i++) {
    102 		if (page_addrs[i] == va)
    103 			debug_enter("VA remapping found!");
    104 	}
    105 
    106 	for (i = 0; i < XDB_MAX_IO_PAGES(vdp); i++) {
    107 		if (page_addrs[i] == 0) {
    108 			page_addrs[i] = va;
    109 			break;
    110 		}
    111 	}
    112 	ASSERT(i < XDB_MAX_IO_PAGES(vdp));
    113 }
    114 
    115 static void
    116 unlogva(xdb_t *vdp, uint64_t va)
    117 {
    118 	uint64_t *page_addrs;
    119 	int i;
    120 
    121 	page_addrs = vdp->page_addrs;
    122 	for (i = 0; i < XDB_MAX_IO_PAGES(vdp); i++) {
    123 		if (page_addrs[i] == va) {
    124 			page_addrs[i] = 0;
    125 			break;
    126 		}
    127 	}
    128 	ASSERT(i < XDB_MAX_IO_PAGES(vdp));
    129 }
    130 
    131 static void
    132 xdb_dump_request_oe(blkif_request_t *req)
    133 {
    134 	int i;
    135 
    136 	/*
    137 	 * Exploit the public interface definitions for BLKIF_OP_READ
    138 	 * etc..
    139 	 */
    140 	char *op_name[] = { "read", "write", "barrier", "flush" };
    141 
    142 	XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE, "op=%s", op_name[req->operation]));
    143 	XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE, "num of segments=%d",
    144 	    req->nr_segments));
    145 	XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE, "handle=%d", req->handle));
    146 	XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE, "id=%llu",
    147 	    (unsigned long long)req->id));
    148 	XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE, "start sector=%llu",
    149 	    (unsigned long long)req->sector_number));
    150 	for (i = 0; i < req->nr_segments; i++) {
    151 		XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE, "gref=%d, first sec=%d,"
    152 		    "last sec=%d", req->seg[i].gref, req->seg[i].first_sect,
    153 		    req->seg[i].last_sect));
    154 	}
    155 }
    156 #endif /* DEBUG */
    157 
    158 /*
    159  * Statistics.
    160  */
    161 static char *xdb_stats[] = {
    162 	"rd_reqs",
    163 	"wr_reqs",
    164 	"br_reqs",
    165 	"fl_reqs",
    166 	"oo_reqs"
    167 };
    168 
    169 static int
    170 xdb_kstat_update(kstat_t *ksp, int flag)
    171 {
    172 	xdb_t *vdp;
    173 	kstat_named_t *knp;
    174 
    175 	if (flag != KSTAT_READ)
    176 		return (EACCES);
    177 
    178 	vdp = ksp->ks_private;
    179 	knp = ksp->ks_data;
    180 
    181 	/*
    182 	 * Assignment order should match that of the names in
    183 	 * xdb_stats.
    184 	 */
    185 	(knp++)->value.ui64 = vdp->xs_stat_req_reads;
    186 	(knp++)->value.ui64 = vdp->xs_stat_req_writes;
    187 	(knp++)->value.ui64 = vdp->xs_stat_req_barriers;
    188 	(knp++)->value.ui64 = vdp->xs_stat_req_flushes;
    189 	(knp++)->value.ui64 = 0; /* oo_req */
    190 
    191 	return (0);
    192 }
    193 
    194 static boolean_t
    195 xdb_kstat_init(xdb_t *vdp)
    196 {
    197 	int nstat = sizeof (xdb_stats) / sizeof (xdb_stats[0]);
    198 	char **cp = xdb_stats;
    199 	kstat_named_t *knp;
    200 
    201 	if ((vdp->xs_kstats = kstat_create("xdb",
    202 	    ddi_get_instance(vdp->xs_dip),
    203 	    "req_statistics", "block", KSTAT_TYPE_NAMED,
    204 	    nstat, 0)) == NULL)
    205 		return (B_FALSE);
    206 
    207 	vdp->xs_kstats->ks_private = vdp;
    208 	vdp->xs_kstats->ks_update = xdb_kstat_update;
    209 
    210 	knp = vdp->xs_kstats->ks_data;
    211 	while (nstat > 0) {
    212 		kstat_named_init(knp, *cp, KSTAT_DATA_UINT64);
    213 		knp++;
    214 		cp++;
    215 		nstat--;
    216 	}
    217 
    218 	kstat_install(vdp->xs_kstats);
    219 
    220 	return (B_TRUE);
    221 }
    222 
    223 static char *
    224 i_pathname(dev_info_t *dip)
    225 {
    226 	char *path, *rv;
    227 
    228 	path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
    229 	(void) ddi_pathname(dip, path);
    230 	rv = strdup(path);
    231 	kmem_free(path, MAXPATHLEN);
    232 
    233 	return (rv);
    234 }
    235 
    236 static buf_t *
    237 xdb_get_buf(xdb_t *vdp, blkif_request_t *req, xdb_request_t *xreq)
    238 {
    239 	buf_t *bp;
    240 	uint8_t segs, curseg;
    241 	int sectors;
    242 	int i, err;
    243 	gnttab_map_grant_ref_t mapops[BLKIF_MAX_SEGMENTS_PER_REQUEST];
    244 	ddi_acc_handle_t acchdl;
    245 
    246 	acchdl = vdp->xs_ring_hdl;
    247 	bp = XDB_XREQ2BP(xreq);
    248 	curseg = xreq->xr_curseg;
    249 	/* init a new xdb request */
    250 	if (req != NULL) {
    251 		ASSERT(MUTEX_HELD(&vdp->xs_iomutex));
    252 		boolean_t pagemapok = B_TRUE;
    253 		uint8_t op = ddi_get8(acchdl, &req->operation);
    254 
    255 		xreq->xr_vdp = vdp;
    256 		xreq->xr_op = op;
    257 		xreq->xr_id = ddi_get64(acchdl, &req->id);
    258 		segs = xreq->xr_buf_pages = ddi_get8(acchdl, &req->nr_segments);
    259 		if (segs == 0) {
    260 			if (op != BLKIF_OP_FLUSH_DISKCACHE)
    261 				cmn_err(CE_WARN, "!non-BLKIF_OP_FLUSH_DISKCACHE"
    262 				    " is seen from domain %d with zero "
    263 				    "length data buffer!", vdp->xs_peer);
    264 			bioinit(bp);
    265 			bp->b_bcount = 0;
    266 			bp->b_lblkno = 0;
    267 			bp->b_un.b_addr = NULL;
    268 			return (bp);
    269 		} else if (op == BLKIF_OP_FLUSH_DISKCACHE) {
    270 			cmn_err(CE_WARN, "!BLKIF_OP_FLUSH_DISKCACHE"
    271 			    " is seen from domain %d with non-zero "
    272 			    "length data buffer!", vdp->xs_peer);
    273 		}
    274 
    275 		/*
    276 		 * segs should be no bigger than BLKIF_MAX_SEGMENTS_PER_REQUEST
    277 		 * according to the definition of blk interface by Xen
    278 		 * we do sanity check here
    279 		 */
    280 		if (segs > BLKIF_MAX_SEGMENTS_PER_REQUEST)
    281 			segs = xreq->xr_buf_pages =
    282 			    BLKIF_MAX_SEGMENTS_PER_REQUEST;
    283 
    284 		for (i = 0; i < segs; i++) {
    285 			uint8_t fs, ls;
    286 
    287 			mapops[i].host_addr =
    288 			    (uint64_t)(uintptr_t)XDB_IOPAGE_VA(
    289 			    vdp->xs_iopage_va, xreq->xr_idx, i);
    290 			mapops[i].dom = vdp->xs_peer;
    291 			mapops[i].ref = ddi_get32(acchdl, &req->seg[i].gref);
    292 			mapops[i].flags = GNTMAP_host_map;
    293 			if (op != BLKIF_OP_READ)
    294 				mapops[i].flags |= GNTMAP_readonly;
    295 
    296 			fs = ddi_get8(acchdl, &req->seg[i].first_sect);
    297 			ls = ddi_get8(acchdl, &req->seg[i].last_sect);
    298 
    299 			/*
    300 			 * first_sect should be no bigger than last_sect and
    301 			 * both of them should be no bigger than
    302 			 * XB_LAST_SECTOR_IN_SEG according to definition
    303 			 * of blk interface by Xen, so sanity check again
    304 			 */
    305 			if (fs > XB_LAST_SECTOR_IN_SEG)
    306 				fs = XB_LAST_SECTOR_IN_SEG;
    307 			if (ls > XB_LAST_SECTOR_IN_SEG)
    308 				ls = XB_LAST_SECTOR_IN_SEG;
    309 			if (fs > ls)
    310 				fs = ls;
    311 
    312 			xreq->xr_segs[i].fs = fs;
    313 			xreq->xr_segs[i].ls = ls;
    314 		}
    315 
    316 		/* map in io pages */
    317 		err = xen_map_gref(GNTTABOP_map_grant_ref, mapops, i, B_FALSE);
    318 		if (err != 0)
    319 			return (NULL);
    320 		for (i = 0; i < segs; i++) {
    321 			/*
    322 			 * Although HYPERVISOR_grant_table_op() returned no
    323 			 * error, mapping of each single page can fail. So,
    324 			 * we have to do the check here and handle the error
    325 			 * if needed
    326 			 */
    327 			if (mapops[i].status != GNTST_okay) {
    328 				int j;
    329 				for (j = 0; j < i; j++) {
    330 #ifdef DEBUG
    331 					unlogva(vdp, mapops[j].host_addr);
    332 #endif
    333 					xen_release_pfn(
    334 					    xreq->xr_plist[j].p_pagenum);
    335 				}
    336 				pagemapok = B_FALSE;
    337 				break;
    338 			}
    339 			/* record page mapping handle for unmapping later */
    340 			xreq->xr_page_hdls[i] = mapops[i].handle;
    341 #ifdef DEBUG
    342 			logva(vdp, mapops[i].host_addr);
    343 #endif
    344 			/*
    345 			 * Pass the MFNs down using the shadow list (xr_pplist)
    346 			 *
    347 			 * This is pretty ugly since we have implict knowledge
    348 			 * of how the rootnex binds buffers.
    349 			 * The GNTTABOP_map_grant_ref op makes us do some ugly
    350 			 * stuff since we're not allowed to touch these PTEs
    351 			 * from the VM.
    352 			 *
    353 			 * Obviously, these aren't real page_t's. The rootnex
    354 			 * only needs p_pagenum.
    355 			 * Also, don't use btop() here or 32 bit PAE breaks.
    356 			 */
    357 			xreq->xr_pplist[i] = &xreq->xr_plist[i];
    358 			xreq->xr_plist[i].p_pagenum =
    359 			    xen_assign_pfn(mapops[i].dev_bus_addr >> PAGESHIFT);
    360 		}
    361 
    362 		/*
    363 		 * not all pages mapped in successfully, unmap those mapped-in
    364 		 * page and return failure
    365 		 */
    366 		if (!pagemapok) {
    367 			gnttab_unmap_grant_ref_t unmapop;
    368 
    369 			for (i = 0; i < segs; i++) {
    370 				if (mapops[i].status != GNTST_okay)
    371 					continue;
    372 				unmapop.host_addr =
    373 				    (uint64_t)(uintptr_t)XDB_IOPAGE_VA(
    374 				    vdp->xs_iopage_va, xreq->xr_idx, i);
    375 				unmapop.dev_bus_addr = NULL;
    376 				unmapop.handle = mapops[i].handle;
    377 				(void) HYPERVISOR_grant_table_op(
    378 				    GNTTABOP_unmap_grant_ref, &unmapop, 1);
    379 			}
    380 
    381 			return (NULL);
    382 		}
    383 		bioinit(bp);
    384 		bp->b_lblkno = ddi_get64(acchdl, &req->sector_number);
    385 		bp->b_flags = B_BUSY | B_SHADOW | B_PHYS;
    386 		bp->b_flags |= (ddi_get8(acchdl, &req->operation) ==
    387 		    BLKIF_OP_READ) ? B_READ : (B_WRITE | B_ASYNC);
    388 	} else {
    389 		uint64_t blkst;
    390 		int isread;
    391 
    392 		/* reuse this buf */
    393 		blkst = bp->b_lblkno + bp->b_bcount / DEV_BSIZE;
    394 		isread = bp->b_flags & B_READ;
    395 		bioreset(bp);
    396 		bp->b_lblkno = blkst;
    397 		bp->b_flags = B_BUSY | B_SHADOW | B_PHYS;
    398 		bp->b_flags |= isread ? B_READ : (B_WRITE | B_ASYNC);
    399 		XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE, "reuse buf, xreq is %d!!",
    400 		    xreq->xr_idx));
    401 	}
    402 
    403 	/* form a buf */
    404 	bp->b_un.b_addr = XDB_IOPAGE_VA(vdp->xs_iopage_va, xreq->xr_idx,
    405 	    curseg) + xreq->xr_segs[curseg].fs * DEV_BSIZE;
    406 	bp->b_shadow = &xreq->xr_pplist[curseg];
    407 	bp->b_iodone = xdb_biodone;
    408 	sectors = 0;
    409 
    410 	/*
    411 	 * Run through the segments. There are XB_NUM_SECTORS_PER_SEG sectors
    412 	 * per segment. On some OSes (e.g. Linux), there may be empty gaps
    413 	 * between segments. (i.e. the first segment may end on sector 6 and
    414 	 * the second segment start on sector 4).
    415 	 *
    416 	 * if a segments first sector is not set to 0, and this is not the
    417 	 * first segment in our buf, end this buf now.
    418 	 *
    419 	 * if a segments last sector is not set to XB_LAST_SECTOR_IN_SEG, and
    420 	 * this is not the last segment in the request, add this segment into
    421 	 * the buf, then end this buf (updating the pointer to point to the
    422 	 * next segment next time around).
    423 	 */
    424 	for (i = curseg; i < xreq->xr_buf_pages; i++) {
    425 		if ((xreq->xr_segs[i].fs != 0) && (i != curseg)) {
    426 			break;
    427 		}
    428 		sectors += (xreq->xr_segs[i].ls - xreq->xr_segs[i].fs + 1);
    429 		if ((xreq->xr_segs[i].ls != XB_LAST_SECTOR_IN_SEG) &&
    430 		    (i != (xreq->xr_buf_pages - 1))) {
    431 			i++;
    432 			break;
    433 		}
    434 	}
    435 	xreq->xr_curseg = i;
    436 	bp->b_bcount = sectors * DEV_BSIZE;
    437 	bp->b_bufsize = bp->b_bcount;
    438 
    439 	return (bp);
    440 }
    441 
    442 static xdb_request_t *
    443 xdb_get_req(xdb_t *vdp)
    444 {
    445 	xdb_request_t *req;
    446 	int idx;
    447 
    448 	ASSERT(MUTEX_HELD(&vdp->xs_iomutex));
    449 	ASSERT(vdp->xs_free_req != -1);
    450 	req = &vdp->xs_req[vdp->xs_free_req];
    451 	vdp->xs_free_req = req->xr_next;
    452 	idx = req->xr_idx;
    453 	bzero(req, sizeof (xdb_request_t));
    454 	req->xr_idx = idx;
    455 	return (req);
    456 }
    457 
    458 static void
    459 xdb_free_req(xdb_request_t *req)
    460 {
    461 	xdb_t *vdp = req->xr_vdp;
    462 
    463 	ASSERT(MUTEX_HELD(&vdp->xs_iomutex));
    464 	req->xr_next = vdp->xs_free_req;
    465 	vdp->xs_free_req = req->xr_idx;
    466 }
    467 
    468 static void
    469 xdb_response(xdb_t *vdp, blkif_request_t *req, boolean_t ok)
    470 {
    471 	ddi_acc_handle_t acchdl = vdp->xs_ring_hdl;
    472 
    473 	if (xdb_push_response(vdp, ddi_get64(acchdl, &req->id),
    474 	    ddi_get8(acchdl, &req->operation), ok))
    475 		xvdi_notify_oe(vdp->xs_dip);
    476 }
    477 
    478 static void
    479 xdb_init_ioreqs(xdb_t *vdp)
    480 {
    481 	int i;
    482 
    483 	ASSERT(vdp->xs_nentry);
    484 
    485 	if (vdp->xs_req == NULL)
    486 		vdp->xs_req = kmem_alloc(vdp->xs_nentry *
    487 		    sizeof (xdb_request_t), KM_SLEEP);
    488 #ifdef DEBUG
    489 	if (vdp->page_addrs == NULL)
    490 		vdp->page_addrs = kmem_zalloc(XDB_MAX_IO_PAGES(vdp) *
    491 		    sizeof (uint64_t), KM_SLEEP);
    492 #endif
    493 	for (i = 0; i < vdp->xs_nentry; i++) {
    494 		vdp->xs_req[i].xr_idx = i;
    495 		vdp->xs_req[i].xr_next = i + 1;
    496 	}
    497 	vdp->xs_req[vdp->xs_nentry - 1].xr_next = -1;
    498 	vdp->xs_free_req = 0;
    499 
    500 	/* alloc va in host dom for io page mapping */
    501 	vdp->xs_iopage_va = vmem_xalloc(heap_arena,
    502 	    XDB_MAX_IO_PAGES(vdp) * PAGESIZE, PAGESIZE, 0, 0, 0, 0,
    503 	    VM_SLEEP);
    504 	for (i = 0; i < XDB_MAX_IO_PAGES(vdp); i++)
    505 		hat_prepare_mapping(kas.a_hat,
    506 		    vdp->xs_iopage_va + i * PAGESIZE, NULL);
    507 }
    508 
    509 static void
    510 xdb_uninit_ioreqs(xdb_t *vdp)
    511 {
    512 	int i;
    513 
    514 	for (i = 0; i < XDB_MAX_IO_PAGES(vdp); i++)
    515 		hat_release_mapping(kas.a_hat,
    516 		    vdp->xs_iopage_va + i * PAGESIZE);
    517 	vmem_xfree(heap_arena, vdp->xs_iopage_va,
    518 	    XDB_MAX_IO_PAGES(vdp) * PAGESIZE);
    519 	if (vdp->xs_req != NULL) {
    520 		kmem_free(vdp->xs_req, vdp->xs_nentry * sizeof (xdb_request_t));
    521 		vdp->xs_req = NULL;
    522 	}
    523 #ifdef DEBUG
    524 	if (vdp->page_addrs != NULL) {
    525 		kmem_free(vdp->page_addrs, XDB_MAX_IO_PAGES(vdp) *
    526 		    sizeof (uint64_t));
    527 		vdp->page_addrs = NULL;
    528 	}
    529 #endif
    530 }
    531 
    532 static uint_t
    533 xdb_intr(caddr_t arg)
    534 {
    535 	xdb_t		*vdp = (xdb_t *)arg;
    536 	dev_info_t	*dip = vdp->xs_dip;
    537 	blkif_request_t	req, *reqp = &req;
    538 	xdb_request_t	*xreq;
    539 	buf_t		*bp;
    540 	uint8_t		op;
    541 	int		ret = DDI_INTR_UNCLAIMED;
    542 
    543 	XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE,
    544 	    "xdb@%s: I/O request received from dom %d",
    545 	    ddi_get_name_addr(dip), vdp->xs_peer));
    546 
    547 	mutex_enter(&vdp->xs_iomutex);
    548 
    549 	/* shouldn't touch ring buffer if not in connected state */
    550 	if (!vdp->xs_if_connected) {
    551 		mutex_exit(&vdp->xs_iomutex);
    552 		return (DDI_INTR_UNCLAIMED);
    553 	}
    554 	ASSERT(vdp->xs_hp_connected && vdp->xs_fe_initialised);
    555 
    556 	/*
    557 	 * We'll loop till there is no more request in the ring
    558 	 * We won't stuck in this loop for ever since the size of ring buffer
    559 	 * is limited, and frontend will stop pushing requests into it when
    560 	 * the ring buffer is full
    561 	 */
    562 
    563 	/* req_event will be increased in xvdi_ring_get_request() */
    564 	while (xdb_get_request(vdp, reqp)) {
    565 		ret = DDI_INTR_CLAIMED;
    566 
    567 		op = ddi_get8(vdp->xs_ring_hdl, &reqp->operation);
    568 		if (op == BLKIF_OP_READ			||
    569 		    op == BLKIF_OP_WRITE		||
    570 		    op == BLKIF_OP_WRITE_BARRIER	||
    571 		    op == BLKIF_OP_FLUSH_DISKCACHE) {
    572 #ifdef DEBUG
    573 			xdb_dump_request_oe(reqp);
    574 #endif
    575 			xreq = xdb_get_req(vdp);
    576 			ASSERT(xreq);
    577 			switch (op) {
    578 			case BLKIF_OP_READ:
    579 				vdp->xs_stat_req_reads++;
    580 				break;
    581 			case BLKIF_OP_WRITE_BARRIER:
    582 				vdp->xs_stat_req_barriers++;
    583 				/* FALLTHRU */
    584 			case BLKIF_OP_WRITE:
    585 				vdp->xs_stat_req_writes++;
    586 				break;
    587 			case BLKIF_OP_FLUSH_DISKCACHE:
    588 				vdp->xs_stat_req_flushes++;
    589 				break;
    590 			}
    591 
    592 			xreq->xr_curseg = 0; /* start from first segment */
    593 			bp = xdb_get_buf(vdp, reqp, xreq);
    594 			if (bp == NULL) {
    595 				/* failed to form a buf */
    596 				xdb_free_req(xreq);
    597 				xdb_response(vdp, reqp, B_FALSE);
    598 				continue;
    599 			}
    600 			bp->av_forw = NULL;
    601 
    602 			XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE,
    603 			    " buf %p, blkno %lld, size %lu, addr %p",
    604 			    (void *)bp, (longlong_t)bp->b_blkno,
    605 			    (ulong_t)bp->b_bcount, (void *)bp->b_un.b_addr));
    606 
    607 			/* send bp to underlying blk driver */
    608 			if (vdp->xs_f_iobuf == NULL) {
    609 				vdp->xs_f_iobuf = vdp->xs_l_iobuf = bp;
    610 			} else {
    611 				vdp->xs_l_iobuf->av_forw = bp;
    612 				vdp->xs_l_iobuf = bp;
    613 			}
    614 		} else {
    615 			xdb_response(vdp, reqp, B_FALSE);
    616 			XDB_DBPRINT(XDB_DBG_IO, (CE_WARN, "xdb@%s: "
    617 			    "Unsupported cmd received from dom %d",
    618 			    ddi_get_name_addr(dip), vdp->xs_peer));
    619 		}
    620 	}
    621 	/* notify our taskq to push buf to underlying blk driver */
    622 	if (ret == DDI_INTR_CLAIMED)
    623 		cv_broadcast(&vdp->xs_iocv);
    624 
    625 	mutex_exit(&vdp->xs_iomutex);
    626 
    627 	return (ret);
    628 }
    629 
    630 static int
    631 xdb_biodone(buf_t *bp)
    632 {
    633 	int i, err, bioerr;
    634 	uint8_t segs;
    635 	gnttab_unmap_grant_ref_t unmapops[BLKIF_MAX_SEGMENTS_PER_REQUEST];
    636 	xdb_request_t *xreq = XDB_BP2XREQ(bp);
    637 	xdb_t *vdp = xreq->xr_vdp;
    638 	buf_t *nbp;
    639 
    640 	bioerr = geterror(bp);
    641 	if (bioerr)
    642 		XDB_DBPRINT(XDB_DBG_IO, (CE_WARN, "xdb@%s: I/O error %d",
    643 		    ddi_get_name_addr(vdp->xs_dip), bioerr));
    644 
    645 	/* check if we are done w/ this I/O request */
    646 	if ((bioerr == 0) && (xreq->xr_curseg < xreq->xr_buf_pages)) {
    647 		nbp = xdb_get_buf(vdp, NULL, xreq);
    648 		if (nbp) {
    649 			err = ldi_strategy(vdp->xs_ldi_hdl, nbp);
    650 			if (err == 0) {
    651 				XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE,
    652 				    "sent buf to backend ok"));
    653 				return (DDI_SUCCESS);
    654 			}
    655 			bioerr = EIO;
    656 			XDB_DBPRINT(XDB_DBG_IO, (CE_WARN, "xdb@%s: "
    657 			    "sent buf to backend dev failed, err=%d",
    658 			    ddi_get_name_addr(vdp->xs_dip), err));
    659 		} else {
    660 			bioerr = EIO;
    661 		}
    662 	}
    663 
    664 	/* unmap io pages */
    665 	segs = xreq->xr_buf_pages;
    666 	/*
    667 	 * segs should be no bigger than BLKIF_MAX_SEGMENTS_PER_REQUEST
    668 	 * according to the definition of blk interface by Xen
    669 	 */
    670 	ASSERT(segs <= BLKIF_MAX_SEGMENTS_PER_REQUEST);
    671 	for (i = 0; i < segs; i++) {
    672 		unmapops[i].host_addr = (uint64_t)(uintptr_t)XDB_IOPAGE_VA(
    673 		    vdp->xs_iopage_va, xreq->xr_idx, i);
    674 #ifdef DEBUG
    675 		mutex_enter(&vdp->xs_iomutex);
    676 		unlogva(vdp, unmapops[i].host_addr);
    677 		mutex_exit(&vdp->xs_iomutex);
    678 #endif
    679 		unmapops[i].dev_bus_addr = NULL;
    680 		unmapops[i].handle = xreq->xr_page_hdls[i];
    681 	}
    682 	err = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
    683 	    unmapops, segs);
    684 	ASSERT(!err);
    685 
    686 	/*
    687 	 * If we have reached a barrier write or a cache flush , then we must
    688 	 * flush all our I/Os.
    689 	 */
    690 	if (xreq->xr_op == BLKIF_OP_WRITE_BARRIER ||
    691 	    xreq->xr_op == BLKIF_OP_FLUSH_DISKCACHE) {
    692 		/*
    693 		 * XXX At this point the write did succeed, so I don't
    694 		 * believe we should report an error because the flush
    695 		 * failed. However, this is a debatable point, so
    696 		 * maybe we need to think more carefully about this.
    697 		 * For now, just cast to void.
    698 		 */
    699 		(void) ldi_ioctl(vdp->xs_ldi_hdl,
    700 		    DKIOCFLUSHWRITECACHE, NULL, FKIOCTL, kcred, NULL);
    701 	}
    702 
    703 	mutex_enter(&vdp->xs_iomutex);
    704 
    705 	/* send response back to frontend */
    706 	if (vdp->xs_if_connected) {
    707 		ASSERT(vdp->xs_hp_connected && vdp->xs_fe_initialised);
    708 		if (xdb_push_response(vdp, xreq->xr_id, xreq->xr_op, bioerr))
    709 			xvdi_notify_oe(vdp->xs_dip);
    710 		XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE,
    711 		    "sent resp back to frontend, id=%llu",
    712 		    (unsigned long long)xreq->xr_id));
    713 	}
    714 	/* free io resources */
    715 	biofini(bp);
    716 	xdb_free_req(xreq);
    717 
    718 	vdp->xs_ionum--;
    719 	if (!vdp->xs_if_connected && (vdp->xs_ionum == 0)) {
    720 		/* we're closing, someone is waiting for I/O clean-up */
    721 		cv_signal(&vdp->xs_ionumcv);
    722 	}
    723 
    724 	mutex_exit(&vdp->xs_iomutex);
    725 
    726 	return (DDI_SUCCESS);
    727 }
    728 
    729 static int
    730 xdb_bindto_frontend(xdb_t *vdp)
    731 {
    732 	int err;
    733 	char *oename;
    734 	grant_ref_t gref;
    735 	evtchn_port_t evtchn;
    736 	dev_info_t *dip = vdp->xs_dip;
    737 	char protocol[64] = "";
    738 
    739 	ASSERT(MUTEX_HELD(&vdp->xs_cbmutex));
    740 
    741 	/*
    742 	 * Switch to the XenbusStateInitialised state.  This let's the
    743 	 * frontend know that we're about to negotiate a connection.
    744 	 */
    745 	(void) xvdi_switch_state(dip, XBT_NULL, XenbusStateInitialised);
    746 
    747 	/*
    748 	 * Gather info from frontend
    749 	 */
    750 	oename = xvdi_get_oename(dip);
    751 	if (oename == NULL)
    752 		return (DDI_FAILURE);
    753 
    754 	err = xenbus_gather(XBT_NULL, oename,
    755 	    XBP_RING_REF, "%lu", &gref,
    756 	    XBP_EVENT_CHAN, "%u", &evtchn,
    757 	    NULL);
    758 	if (err != 0) {
    759 		xvdi_dev_error(dip, err,
    760 		    "Getting ring-ref and evtchn from frontend");
    761 		return (DDI_FAILURE);
    762 	}
    763 
    764 	vdp->xs_blk_protocol = BLKIF_PROTOCOL_NATIVE;
    765 	vdp->xs_nentry = BLKIF_RING_SIZE;
    766 	vdp->xs_entrysize = sizeof (union blkif_sring_entry);
    767 
    768 	err = xenbus_gather(XBT_NULL, oename,
    769 	    XBP_PROTOCOL, "%63s", protocol, NULL);
    770 	if (err)
    771 		(void) strcpy(protocol, "unspecified, assuming native");
    772 	else {
    773 		/*
    774 		 * We must check for NATIVE first, so that the fast path
    775 		 * is taken for copying data from the guest to the host.
    776 		 */
    777 		if (strcmp(protocol, XEN_IO_PROTO_ABI_NATIVE) != 0) {
    778 			if (strcmp(protocol, XEN_IO_PROTO_ABI_X86_32) == 0) {
    779 				vdp->xs_blk_protocol = BLKIF_PROTOCOL_X86_32;
    780 				vdp->xs_nentry = BLKIF_X86_32_RING_SIZE;
    781 				vdp->xs_entrysize =
    782 				    sizeof (union blkif_x86_32_sring_entry);
    783 			} else if (strcmp(protocol, XEN_IO_PROTO_ABI_X86_64) ==
    784 			    0) {
    785 				vdp->xs_blk_protocol = BLKIF_PROTOCOL_X86_64;
    786 				vdp->xs_nentry = BLKIF_X86_64_RING_SIZE;
    787 				vdp->xs_entrysize =
    788 				    sizeof (union blkif_x86_64_sring_entry);
    789 			} else {
    790 				xvdi_fatal_error(dip, err, "unknown protocol");
    791 				return (DDI_FAILURE);
    792 			}
    793 		}
    794 	}
    795 #ifdef DEBUG
    796 	cmn_err(CE_NOTE, "!xdb@%s: blkif protocol '%s' ",
    797 	    ddi_get_name_addr(dip), protocol);
    798 #endif
    799 
    800 	/*
    801 	 * Map and init ring.  The ring parameters must match those which
    802 	 * have been allocated in the front end.
    803 	 */
    804 	if (xvdi_map_ring(dip, vdp->xs_nentry, vdp->xs_entrysize,
    805 	    gref, &vdp->xs_ring) != DDI_SUCCESS)
    806 		return (DDI_FAILURE);
    807 
    808 	/*
    809 	 * This will be removed after we use shadow I/O ring request since
    810 	 * we don't need to access the ring itself directly, thus the access
    811 	 * handle is not needed
    812 	 */
    813 	vdp->xs_ring_hdl = vdp->xs_ring->xr_acc_hdl;
    814 
    815 	/* bind event channel */
    816 	err = xvdi_bind_evtchn(dip, evtchn);
    817 	if (err != DDI_SUCCESS) {
    818 		xvdi_unmap_ring(vdp->xs_ring);
    819 		return (DDI_FAILURE);
    820 	}
    821 
    822 	return (DDI_SUCCESS);
    823 }
    824 
    825 static void
    826 xdb_unbindfrom_frontend(xdb_t *vdp)
    827 {
    828 	ASSERT(MUTEX_HELD(&vdp->xs_cbmutex));
    829 
    830 	xvdi_free_evtchn(vdp->xs_dip);
    831 	xvdi_unmap_ring(vdp->xs_ring);
    832 }
    833 
    834 /*
    835  * xdb_params_change() initiates a allows change to the underlying device/file
    836  * that the backend is accessing.  It does this by disconnecting from the
    837  * frontend, closing the old device, clearing a bunch of xenbus parameters,
    838  * and switching back to the XenbusStateInitialising state.  The frontend
    839  * should notice this transition to the XenbusStateInitialising state and
    840  * should attempt to reconnect to us (the backend).
    841  */
    842 static void
    843 xdb_params_change(xdb_t *vdp, char *params, boolean_t update_xs)
    844 {
    845 	xenbus_transaction_t	xbt;
    846 	dev_info_t		*dip = vdp->xs_dip;
    847 	char			*xsname;
    848 	int			err;
    849 
    850 	ASSERT(MUTEX_HELD(&vdp->xs_cbmutex));
    851 	ASSERT(vdp->xs_params_path != NULL);
    852 
    853 	if ((xsname = xvdi_get_xsname(dip)) == NULL)
    854 		return;
    855 	if (strcmp(vdp->xs_params_path, params) == 0)
    856 		return;
    857 
    858 	/*
    859 	 * Close the device we're currently accessing and update the
    860 	 * path which points to our backend device/file.
    861 	 */
    862 	xdb_close(dip);
    863 	vdp->xs_fe_initialised = B_FALSE;
    864 
    865 trans_retry:
    866 	if ((err = xenbus_transaction_start(&xbt)) != 0) {
    867 		xvdi_dev_error(dip, err, "params change transaction init");
    868 		goto errout;
    869 	}
    870 
    871 	/*
    872 	 * Delete all the xenbus properties that are connection dependant
    873 	 * and go back to the initializing state so that the frontend
    874 	 * driver can re-negotiate a connection.
    875 	 */
    876 	if (((err = xenbus_rm(xbt, xsname, XBP_FB)) != 0) ||
    877 	    ((err = xenbus_rm(xbt, xsname, XBP_INFO)) != 0) ||
    878 	    ((err = xenbus_rm(xbt, xsname, "sector-size")) != 0) ||
    879 	    ((err = xenbus_rm(xbt, xsname, XBP_SECTORS)) != 0) ||
    880 	    ((err = xenbus_rm(xbt, xsname, "instance")) != 0) ||
    881 	    ((err = xenbus_rm(xbt, xsname, "node")) != 0) ||
    882 	    (update_xs && ((err = xenbus_printf(xbt, xsname,
    883 	    "params", "%s", params)) != 0)) ||
    884 	    ((err = xvdi_switch_state(dip,
    885 	    xbt, XenbusStateInitialising) > 0))) {
    886 		(void) xenbus_transaction_end(xbt, 1);
    887 		xvdi_dev_error(dip, err, "params change transaction setup");
    888 		goto errout;
    889 	}
    890 
    891 	if ((err = xenbus_transaction_end(xbt, 0)) != 0) {
    892 		if (err == EAGAIN) {
    893 			/* transaction is ended, don't need to abort it */
    894 			goto trans_retry;
    895 		}
    896 		xvdi_dev_error(dip, err, "params change transaction commit");
    897 		goto errout;
    898 	}
    899 
    900 	/* Change the device that we plan to access */
    901 	strfree(vdp->xs_params_path);
    902 	vdp->xs_params_path = strdup(params);
    903 	return;
    904 
    905 errout:
    906 	(void) xvdi_switch_state(dip, xbt, XenbusStateInitialising);
    907 }
    908 
    909 /*
    910  * xdb_watch_params_cb() - This callback is invoked whenever there
    911  * is an update to the following xenbus parameter:
    912  *     /local/domain/0/backend/vbd/<domU_id>/<domU_dev>/params
    913  *
    914  * This normally happens during xm block-configure operations, which
    915  * are used to change CD device images for HVM domUs.
    916  */
    917 /*ARGSUSED*/
    918 static void
    919 xdb_watch_params_cb(dev_info_t *dip, const char *path, void *arg)
    920 {
    921 	xdb_t			*vdp = (xdb_t *)ddi_get_driver_private(dip);
    922 	char			*xsname, *oename, *str, *str2;
    923 
    924 	if (((xsname = xvdi_get_xsname(dip)) == NULL) ||
    925 	    ((oename = xvdi_get_oename(dip)) == NULL)) {
    926 		return;
    927 	}
    928 
    929 	mutex_enter(&vdp->xs_cbmutex);
    930 
    931 	if (xenbus_read_str(xsname, "params", &str) != 0) {
    932 		mutex_exit(&vdp->xs_cbmutex);
    933 		return;
    934 	}
    935 
    936 	if (strcmp(vdp->xs_params_path, str) == 0) {
    937 		/* Nothing todo */
    938 		mutex_exit(&vdp->xs_cbmutex);
    939 		strfree(str);
    940 		return;
    941 	}
    942 
    943 	/*
    944 	 * If the frontend isn't a cd device, doesn't support media
    945 	 * requests, or has locked the media, then we can't change
    946 	 * the params value.  restore the current value.
    947 	 */
    948 	str2 = NULL;
    949 	if (!XDB_IS_FE_CD(vdp) ||
    950 	    (xenbus_read_str(oename, XBP_MEDIA_REQ, &str2) != 0) ||
    951 	    (strcmp(str2, XBV_MEDIA_REQ_LOCK) == 0)) {
    952 		if (str2 != NULL)
    953 			strfree(str2);
    954 		strfree(str);
    955 
    956 		str = i_pathname(dip);
    957 		cmn_err(CE_NOTE,
    958 		    "!%s: media locked, ignoring params update", str);
    959 		strfree(str);
    960 
    961 		mutex_exit(&vdp->xs_cbmutex);
    962 		return;
    963 	}
    964 
    965 	XDB_DBPRINT(XDB_DBG_INFO, (CE_NOTE,
    966 	    "block-configure params request: \"%s\"", str));
    967 
    968 	xdb_params_change(vdp, str, B_FALSE);
    969 	mutex_exit(&vdp->xs_cbmutex);
    970 	strfree(str);
    971 }
    972 
    973 /*
    974  * xdb_watch_media_req_cb() - This callback is invoked whenever there
    975  * is an update to the following xenbus parameter:
    976  *     /local/domain/<domU_id>/device/vbd/<domU_dev>/media-req
    977  *
    978  * Media requests are only supported on CD devices and are issued by
    979  * the frontend.  Currently the only supported media request operaions
    980  * are "lock" and "eject".  A "lock" prevents the backend from changing
    981  * the backing device/file (via xm block-configure).  An "eject" requests
    982  * tells the backend device that it should disconnect from the frontend
    983  * and closing the backing device/file that is currently in use.
    984  */
    985 /*ARGSUSED*/
    986 static void
    987 xdb_watch_media_req_cb(dev_info_t *dip, const char *path, void *arg)
    988 {
    989 	xdb_t			*vdp = (xdb_t *)ddi_get_driver_private(dip);
    990 	char			*oename, *str;
    991 
    992 	mutex_enter(&vdp->xs_cbmutex);
    993 
    994 	if ((oename = xvdi_get_oename(dip)) == NULL) {
    995 		mutex_exit(&vdp->xs_cbmutex);
    996 		return;
    997 	}
    998 
    999 	if (xenbus_read_str(oename, XBP_MEDIA_REQ, &str) != 0) {
   1000 		mutex_exit(&vdp->xs_cbmutex);
   1001 		return;
   1002 	}
   1003 
   1004 	if (!XDB_IS_FE_CD(vdp)) {
   1005 		xvdi_dev_error(dip, EINVAL,
   1006 		    "media-req only supported for cdrom devices");
   1007 		mutex_exit(&vdp->xs_cbmutex);
   1008 		return;
   1009 	}
   1010 
   1011 	if (strcmp(str, XBV_MEDIA_REQ_EJECT) != 0) {
   1012 		mutex_exit(&vdp->xs_cbmutex);
   1013 		strfree(str);
   1014 		return;
   1015 	}
   1016 	strfree(str);
   1017 
   1018 	XDB_DBPRINT(XDB_DBG_INFO, (CE_NOTE, "media eject request"));
   1019 
   1020 	xdb_params_change(vdp, "", B_TRUE);
   1021 	(void) xenbus_printf(XBT_NULL, oename,
   1022 	    XBP_MEDIA_REQ, "%s", XBV_MEDIA_REQ_NONE);
   1023 	mutex_exit(&vdp->xs_cbmutex);
   1024 }
   1025 
   1026 /*
   1027  * If we're dealing with a cdrom device, let the frontend know that
   1028  * we support media requests via XBP_MEDIA_REQ_SUP, and setup a watch
   1029  * to handle those frontend media request changes, which modify the
   1030  * following xenstore parameter:
   1031  *	/local/domain/<domU_id>/device/vbd/<domU_dev>/media-req
   1032  */
   1033 static boolean_t
   1034 xdb_media_req_init(xdb_t *vdp)
   1035 {
   1036 	dev_info_t		*dip = vdp->xs_dip;
   1037 	char			*xsname, *oename;
   1038 
   1039 	ASSERT(MUTEX_HELD(&vdp->xs_cbmutex));
   1040 
   1041 	if (((xsname = xvdi_get_xsname(dip)) == NULL) ||
   1042 	    ((oename = xvdi_get_oename(dip)) == NULL))
   1043 		return (B_FALSE);
   1044 
   1045 	if (!XDB_IS_FE_CD(vdp))
   1046 		return (B_TRUE);
   1047 
   1048 	if (xenbus_printf(XBT_NULL, xsname, XBP_MEDIA_REQ_SUP, "%d", 1) != 0)
   1049 		return (B_FALSE);
   1050 
   1051 	if (xvdi_add_xb_watch_handler(dip, oename,
   1052 	    XBP_MEDIA_REQ, xdb_watch_media_req_cb, NULL) != DDI_SUCCESS) {
   1053 		xvdi_dev_error(dip, EAGAIN,
   1054 		    "Failed to register watch for cdrom media requests");
   1055 		return (B_FALSE);
   1056 	}
   1057 
   1058 	return (B_TRUE);
   1059 }
   1060 
   1061 /*
   1062  * Get our params value.  Also, if we're using "params" then setup a
   1063  * watch to handle xm block-configure operations which modify the
   1064  * following xenstore parameter:
   1065  *	/local/domain/0/backend/vbd/<domU_id>/<domU_dev>/params
   1066  */
   1067 static boolean_t
   1068 xdb_params_init(xdb_t *vdp)
   1069 {
   1070 	dev_info_t		*dip = vdp->xs_dip;
   1071 	char			*str, *xsname;
   1072 	int			err;
   1073 
   1074 	ASSERT(MUTEX_HELD(&vdp->xs_cbmutex));
   1075 	ASSERT(vdp->xs_params_path == NULL);
   1076 
   1077 	if ((xsname = xvdi_get_xsname(dip)) == NULL)
   1078 		return (B_FALSE);
   1079 
   1080 	err = xenbus_read_str(xsname, "params", &str);
   1081 	if (err != 0) {
   1082 		return (B_FALSE);
   1083 	}
   1084 	vdp->xs_params_path = str;
   1085 
   1086 	if (xvdi_add_xb_watch_handler(dip, xsname, "params",
   1087 	    xdb_watch_params_cb, NULL) != DDI_SUCCESS) {
   1088 		strfree(vdp->xs_params_path);
   1089 		vdp->xs_params_path = NULL;
   1090 		return (B_FALSE);
   1091 	}
   1092 
   1093 	return (B_TRUE);
   1094 }
   1095 
   1096 #define	LOFI_CTRL_NODE	"/dev/lofictl"
   1097 #define	LOFI_DEV_NODE	"/devices/pseudo/lofi@0:"
   1098 #define	LOFI_MODE	(FREAD | FWRITE | FEXCL)
   1099 
   1100 static int
   1101 xdb_setup_node(xdb_t *vdp, char *path)
   1102 {
   1103 	dev_info_t		*dip = vdp->xs_dip;
   1104 	char			*xsname, *str;
   1105 	ldi_handle_t		ldi_hdl;
   1106 	struct lofi_ioctl	*li;
   1107 	int			minor, err;
   1108 
   1109 	ASSERT(MUTEX_HELD(&vdp->xs_cbmutex));
   1110 
   1111 	if ((xsname = xvdi_get_xsname(dip)) == NULL)
   1112 		return (DDI_FAILURE);
   1113 
   1114 	if ((err = xenbus_read_str(xsname, "type", &str)) != 0) {
   1115 		xvdi_dev_error(dip, err, "Getting type from backend device");
   1116 		return (DDI_FAILURE);
   1117 	}
   1118 	if (strcmp(str, "file") == 0)
   1119 		vdp->xs_type |= XDB_DEV_BE_LOFI;
   1120 	strfree(str);
   1121 
   1122 	if (!XDB_IS_BE_LOFI(vdp)) {
   1123 		(void) strlcpy(path, vdp->xs_params_path, MAXPATHLEN);
   1124 		ASSERT(vdp->xs_lofi_path == NULL);
   1125 		return (DDI_SUCCESS);
   1126 	}
   1127 
   1128 	do {
   1129 		err = ldi_open_by_name(LOFI_CTRL_NODE, LOFI_MODE, kcred,
   1130 		    &ldi_hdl, vdp->xs_ldi_li);
   1131 	} while (err == EBUSY);
   1132 	if (err != 0) {
   1133 		return (DDI_FAILURE);
   1134 	}
   1135 
   1136 	li = kmem_zalloc(sizeof (*li), KM_SLEEP);
   1137 	(void) strlcpy(li->li_filename, vdp->xs_params_path,
   1138 	    sizeof (li->li_filename));
   1139 	err = ldi_ioctl(ldi_hdl, LOFI_MAP_FILE, (intptr_t)li,
   1140 	    LOFI_MODE | FKIOCTL, kcred, &minor);
   1141 	(void) ldi_close(ldi_hdl, LOFI_MODE, kcred);
   1142 	kmem_free(li, sizeof (*li));
   1143 
   1144 	if (err != 0) {
   1145 		cmn_err(CE_WARN, "xdb@%s: Failed to create lofi dev for %s",
   1146 		    ddi_get_name_addr(dip), vdp->xs_params_path);
   1147 		return (DDI_FAILURE);
   1148 	}
   1149 
   1150 	/*
   1151 	 * return '/devices/...' instead of '/dev/lofi/...' since the
   1152 	 * former is available immediately after calling ldi_ioctl
   1153 	 */
   1154 	(void) snprintf(path, MAXPATHLEN, LOFI_DEV_NODE "%d", minor);
   1155 	(void) xenbus_printf(XBT_NULL, xsname, "node", "%s", path);
   1156 
   1157 	ASSERT(vdp->xs_lofi_path == NULL);
   1158 	vdp->xs_lofi_path = strdup(path);
   1159 
   1160 	return (DDI_SUCCESS);
   1161 }
   1162 
   1163 static void
   1164 xdb_teardown_node(xdb_t *vdp)
   1165 {
   1166 	dev_info_t *dip = vdp->xs_dip;
   1167 	ldi_handle_t ldi_hdl;
   1168 	struct lofi_ioctl *li;
   1169 	int err;
   1170 
   1171 	ASSERT(MUTEX_HELD(&vdp->xs_cbmutex));
   1172 
   1173 	if (!XDB_IS_BE_LOFI(vdp))
   1174 		return;
   1175 
   1176 	vdp->xs_type &= ~XDB_DEV_BE_LOFI;
   1177 	ASSERT(vdp->xs_lofi_path != NULL);
   1178 
   1179 	li = kmem_zalloc(sizeof (*li), KM_SLEEP);
   1180 	(void) strlcpy(li->li_filename, vdp->xs_params_path,
   1181 	    sizeof (li->li_filename));
   1182 
   1183 	do {
   1184 		err = ldi_open_by_name(LOFI_CTRL_NODE, LOFI_MODE, kcred,
   1185 		    &ldi_hdl, vdp->xs_ldi_li);
   1186 	} while (err == EBUSY);
   1187 
   1188 	if (err != 0) {
   1189 		kmem_free(li, sizeof (*li));
   1190 		return;
   1191 	}
   1192 
   1193 	if (ldi_ioctl(ldi_hdl, LOFI_UNMAP_FILE, (intptr_t)li,
   1194 	    LOFI_MODE | FKIOCTL, kcred, NULL) != 0) {
   1195 		cmn_err(CE_WARN, "xdb@%s: Failed to delete lofi dev for %s",
   1196 		    ddi_get_name_addr(dip), li->li_filename);
   1197 	}
   1198 
   1199 	(void) ldi_close(ldi_hdl, LOFI_MODE, kcred);
   1200 	kmem_free(li, sizeof (*li));
   1201 
   1202 	strfree(vdp->xs_lofi_path);
   1203 	vdp->xs_lofi_path = NULL;
   1204 }
   1205 
   1206 static int
   1207 xdb_open_device(xdb_t *vdp)
   1208 {
   1209 	dev_info_t *dip = vdp->xs_dip;
   1210 	uint64_t devsize;
   1211 	int blksize;
   1212 	char *nodepath;
   1213 	char *xsname;
   1214 	char *str;
   1215 	int err;
   1216 
   1217 	ASSERT(MUTEX_HELD(&vdp->xs_cbmutex));
   1218 
   1219 	if (strlen(vdp->xs_params_path) == 0) {
   1220 		/*
   1221 		 * it's possible to have no backing device when dealing
   1222 		 * with a pv cdrom drive that has no virtual cd associated
   1223 		 * with it.
   1224 		 */
   1225 		ASSERT(XDB_IS_FE_CD(vdp));
   1226 		ASSERT(vdp->xs_sectors == 0);
   1227 		ASSERT(vdp->xs_ldi_li == NULL);
   1228 		ASSERT(vdp->xs_ldi_hdl == NULL);
   1229 		return (DDI_SUCCESS);
   1230 	}
   1231 
   1232 	/*
   1233 	 * after the hotplug scripts have "connected" the device, check to see
   1234 	 * if we're using a dynamic device.  If so, replace the params path
   1235 	 * with the dynamic one.
   1236 	 */
   1237 	xsname = xvdi_get_xsname(dip);
   1238 	err = xenbus_read_str(xsname, "dynamic-device-path", &str);
   1239 	if (err == 0) {
   1240 		strfree(vdp->xs_params_path);
   1241 		vdp->xs_params_path = str;
   1242 	}
   1243 
   1244 	if (ldi_ident_from_dip(dip, &vdp->xs_ldi_li) != 0)
   1245 		return (DDI_FAILURE);
   1246 
   1247 	nodepath = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
   1248 
   1249 	/* try to open backend device */
   1250 	if (xdb_setup_node(vdp, nodepath) != DDI_SUCCESS) {
   1251 		xvdi_dev_error(dip, ENXIO,
   1252 		    "Getting device path of backend device");
   1253 		ldi_ident_release(vdp->xs_ldi_li);
   1254 		kmem_free(nodepath, MAXPATHLEN);
   1255 		return (DDI_FAILURE);
   1256 	}
   1257 
   1258 	if (ldi_open_by_name(nodepath,
   1259 	    FREAD | (XDB_IS_RO(vdp) ? 0 : FWRITE),
   1260 	    kcred, &vdp->xs_ldi_hdl, vdp->xs_ldi_li) != 0) {
   1261 		xdb_teardown_node(vdp);
   1262 		ldi_ident_release(vdp->xs_ldi_li);
   1263 		cmn_err(CE_WARN, "xdb@%s: Failed to open: %s",
   1264 		    ddi_get_name_addr(dip), nodepath);
   1265 		kmem_free(nodepath, MAXPATHLEN);
   1266 		return (DDI_FAILURE);
   1267 	}
   1268 
   1269 	if (ldi_get_size(vdp->xs_ldi_hdl, &devsize) != DDI_SUCCESS) {
   1270 		(void) ldi_close(vdp->xs_ldi_hdl,
   1271 		    FREAD | (XDB_IS_RO(vdp) ? 0 : FWRITE), kcred);
   1272 		xdb_teardown_node(vdp);
   1273 		ldi_ident_release(vdp->xs_ldi_li);
   1274 		kmem_free(nodepath, MAXPATHLEN);
   1275 		return (DDI_FAILURE);
   1276 	}
   1277 
   1278 	blksize = ldi_prop_get_int64(vdp->xs_ldi_hdl,
   1279 	    DDI_PROP_DONTPASS | DDI_PROP_NOTPROM,
   1280 	    "blksize", DEV_BSIZE);
   1281 	if (blksize == DEV_BSIZE)
   1282 		blksize = ldi_prop_get_int(vdp->xs_ldi_hdl,
   1283 		    LDI_DEV_T_ANY | DDI_PROP_DONTPASS |
   1284 		    DDI_PROP_NOTPROM, "device-blksize", DEV_BSIZE);
   1285 
   1286 	vdp->xs_sec_size = blksize;
   1287 	vdp->xs_sectors = devsize / blksize;
   1288 
   1289 	/* check if the underlying device is a CD/DVD disc */
   1290 	if (ldi_prop_get_int(vdp->xs_ldi_hdl, LDI_DEV_T_ANY | DDI_PROP_DONTPASS,
   1291 	    INQUIRY_DEVICE_TYPE, DTYPE_DIRECT) == DTYPE_RODIRECT)
   1292 		vdp->xs_type |= XDB_DEV_BE_CD;
   1293 
   1294 	/* check if the underlying device is a removable disk */
   1295 	if (ldi_prop_exists(vdp->xs_ldi_hdl,
   1296 	    LDI_DEV_T_ANY | DDI_PROP_DONTPASS | DDI_PROP_NOTPROM,
   1297 	    "removable-media"))
   1298 		vdp->xs_type |= XDB_DEV_BE_RMB;
   1299 
   1300 	kmem_free(nodepath, MAXPATHLEN);
   1301 	return (DDI_SUCCESS);
   1302 }
   1303 
   1304 static void
   1305 xdb_close_device(xdb_t *vdp)
   1306 {
   1307 	ASSERT(MUTEX_HELD(&vdp->xs_cbmutex));
   1308 
   1309 	if (strlen(vdp->xs_params_path) == 0) {
   1310 		ASSERT(XDB_IS_FE_CD(vdp));
   1311 		ASSERT(vdp->xs_sectors == 0);
   1312 		ASSERT(vdp->xs_ldi_li == NULL);
   1313 		ASSERT(vdp->xs_ldi_hdl == NULL);
   1314 		return;
   1315 	}
   1316 
   1317 	(void) ldi_close(vdp->xs_ldi_hdl,
   1318 	    FREAD | (XDB_IS_RO(vdp) ? 0 : FWRITE), kcred);
   1319 	xdb_teardown_node(vdp);
   1320 	ldi_ident_release(vdp->xs_ldi_li);
   1321 	vdp->xs_type &= ~(XDB_DEV_BE_CD | XDB_DEV_BE_RMB);
   1322 	vdp->xs_sectors = 0;
   1323 	vdp->xs_ldi_li = NULL;
   1324 	vdp->xs_ldi_hdl = NULL;
   1325 }
   1326 
   1327 /*
   1328  * Kick-off connect process
   1329  * If xs_fe_initialised == B_TRUE and xs_hp_connected == B_TRUE
   1330  * the xs_if_connected will be changed to B_TRUE on success,
   1331  */
   1332 static void
   1333 xdb_start_connect(xdb_t *vdp)
   1334 {
   1335 	xenbus_transaction_t	xbt;
   1336 	dev_info_t		*dip = vdp->xs_dip;
   1337 	boolean_t		fb_exists;
   1338 	int			err, instance = ddi_get_instance(dip);
   1339 	uint64_t		sectors;
   1340 	uint_t			dinfo, ssize;
   1341 	char			*xsname;
   1342 
   1343 	ASSERT(MUTEX_HELD(&vdp->xs_cbmutex));
   1344 
   1345 	if (((xsname = xvdi_get_xsname(dip)) == NULL) ||
   1346 	    ((vdp->xs_peer = xvdi_get_oeid(dip)) == (domid_t)-1))
   1347 		return;
   1348 
   1349 	mutex_enter(&vdp->xs_iomutex);
   1350 	/*
   1351 	 * if the hotplug scripts haven't run or if the frontend is not
   1352 	 * initialized, then we can't try to connect.
   1353 	 */
   1354 	if (!vdp->xs_hp_connected || !vdp->xs_fe_initialised) {
   1355 		ASSERT(!vdp->xs_if_connected);
   1356 		mutex_exit(&vdp->xs_iomutex);
   1357 		return;
   1358 	}
   1359 
   1360 	/* If we're already connected then there's nothing todo */
   1361 	if (vdp->xs_if_connected) {
   1362 		mutex_exit(&vdp->xs_iomutex);
   1363 		return;
   1364 	}
   1365 	mutex_exit(&vdp->xs_iomutex);
   1366 
   1367 	/*
   1368 	 * Start connect to frontend only when backend device are ready
   1369 	 * and frontend has moved to XenbusStateInitialised, which means
   1370 	 * ready to connect.
   1371 	 */
   1372 	XDB_DBPRINT(XDB_DBG_INFO, (CE_NOTE,
   1373 	    "xdb@%s: starting connection process", ddi_get_name_addr(dip)));
   1374 
   1375 	if (xdb_open_device(vdp) != DDI_SUCCESS)
   1376 		return;
   1377 
   1378 	if (xdb_bindto_frontend(vdp) != DDI_SUCCESS) {
   1379 		xdb_close_device(vdp);
   1380 		return;
   1381 	}
   1382 
   1383 	/* init i/o requests */
   1384 	xdb_init_ioreqs(vdp);
   1385 
   1386 	if (ddi_add_intr(dip, 0, NULL, NULL, xdb_intr, (caddr_t)vdp)
   1387 	    != DDI_SUCCESS) {
   1388 		xdb_uninit_ioreqs(vdp);
   1389 		xdb_unbindfrom_frontend(vdp);
   1390 		xdb_close_device(vdp);
   1391 		return;
   1392 	}
   1393 
   1394 	dinfo = 0;
   1395 	if (XDB_IS_RO(vdp))
   1396 		dinfo |= VDISK_READONLY;
   1397 	if (XDB_IS_BE_RMB(vdp))
   1398 		dinfo |= VDISK_REMOVABLE;
   1399 	if (XDB_IS_BE_CD(vdp))
   1400 		dinfo |= VDISK_CDROM;
   1401 	if (XDB_IS_FE_CD(vdp))
   1402 		dinfo |= VDISK_REMOVABLE | VDISK_CDROM;
   1403 
   1404 	/*
   1405 	 * we can recieve intr any time from now on
   1406 	 * mark that we're ready to take intr
   1407 	 */
   1408 	mutex_enter(&vdp->xs_iomutex);
   1409 	ASSERT(vdp->xs_fe_initialised);
   1410 	vdp->xs_if_connected = B_TRUE;
   1411 	mutex_exit(&vdp->xs_iomutex);
   1412 
   1413 trans_retry:
   1414 	/* write into xenstore the info needed by frontend */
   1415 	if ((err = xenbus_transaction_start(&xbt)) != 0) {
   1416 		xvdi_dev_error(dip, err, "connect transaction init");
   1417 		goto errout;
   1418 	}
   1419 
   1420 	/* If feature-barrier isn't present in xenstore, add it.  */
   1421 	fb_exists = xenbus_exists(xsname, XBP_FB);
   1422 
   1423 	ssize = (vdp->xs_sec_size == 0) ? DEV_BSIZE : vdp->xs_sec_size;
   1424 	sectors = vdp->xs_sectors;
   1425 	if (((!fb_exists &&
   1426 	    (err = xenbus_printf(xbt, xsname, XBP_FB, "%d", 1)))) ||
   1427 	    (err = xenbus_printf(xbt, xsname, XBP_INFO, "%u", dinfo)) ||
   1428 	    (err = xenbus_printf(xbt, xsname, XBP_SECTOR_SIZE, "%u", ssize)) ||
   1429 	    (err = xenbus_printf(xbt, xsname,
   1430 	    XBP_SECTORS, "%"PRIu64, sectors)) ||
   1431 	    (err = xenbus_printf(xbt, xsname, "instance", "%d", instance)) ||
   1432 	    ((err = xvdi_switch_state(dip, xbt, XenbusStateConnected)) > 0)) {
   1433 		(void) xenbus_transaction_end(xbt, 1);
   1434 		xvdi_dev_error(dip, err, "connect transaction setup");
   1435 		goto errout;
   1436 	}
   1437 
   1438 	if ((err = xenbus_transaction_end(xbt, 0)) != 0) {
   1439 		if (err == EAGAIN) {
   1440 			/* transaction is ended, don't need to abort it */
   1441 			goto trans_retry;
   1442 		}
   1443 		xvdi_dev_error(dip, err, "connect transaction commit");
   1444 		goto errout;
   1445 	}
   1446 
   1447 	return;
   1448 
   1449 errout:
   1450 	xdb_close(dip);
   1451 }
   1452 
   1453 /*
   1454  * Disconnect from frontend and close backend device
   1455  */
   1456 static void
   1457 xdb_close(dev_info_t *dip)
   1458 {
   1459 	xdb_t *vdp = (xdb_t *)ddi_get_driver_private(dip);
   1460 
   1461 	ASSERT(MUTEX_HELD(&vdp->xs_cbmutex));
   1462 	mutex_enter(&vdp->xs_iomutex);
   1463 
   1464 	/*
   1465 	 * if the hotplug scripts haven't run or if the frontend is not
   1466 	 * initialized, then we can't be connected, so there's no
   1467 	 * connection to close.
   1468 	 */
   1469 	if (!vdp->xs_hp_connected || !vdp->xs_fe_initialised) {
   1470 		ASSERT(!vdp->xs_if_connected);
   1471 		mutex_exit(&vdp->xs_iomutex);
   1472 		return;
   1473 	}
   1474 
   1475 	/* if we're not connected, there's nothing to do */
   1476 	if (!vdp->xs_if_connected) {
   1477 		cv_broadcast(&vdp->xs_iocv);
   1478 		mutex_exit(&vdp->xs_iomutex);
   1479 		return;
   1480 	}
   1481 
   1482 	XDB_DBPRINT(XDB_DBG_INFO, (CE_NOTE, "closing while connected"));
   1483 
   1484 	vdp->xs_if_connected = B_FALSE;
   1485 	cv_broadcast(&vdp->xs_iocv);
   1486 
   1487 	mutex_exit(&vdp->xs_iomutex);
   1488 
   1489 	/* stop accepting I/O request from frontend */
   1490 	ddi_remove_intr(dip, 0, NULL);
   1491 
   1492 	/* clear all on-going I/Os, if any */
   1493 	mutex_enter(&vdp->xs_iomutex);
   1494 	while (vdp->xs_ionum > 0)
   1495 		cv_wait(&vdp->xs_ionumcv, &vdp->xs_iomutex);
   1496 	mutex_exit(&vdp->xs_iomutex);
   1497 
   1498 	/* clean up resources and close this interface */
   1499 	xdb_uninit_ioreqs(vdp);
   1500 	xdb_unbindfrom_frontend(vdp);
   1501 	xdb_close_device(vdp);
   1502 	vdp->xs_peer = (domid_t)-1;
   1503 }
   1504 
   1505 static void
   1506 xdb_send_buf(void *arg)
   1507 {
   1508 	xdb_t	*vdp = (xdb_t *)arg;
   1509 	buf_t	*bp;
   1510 	int	err;
   1511 
   1512 	mutex_enter(&vdp->xs_iomutex);
   1513 	while (vdp->xs_send_buf) {
   1514 		if ((bp = vdp->xs_f_iobuf) == NULL) {
   1515 			/* wait for some io to send */
   1516 			XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE,
   1517 			    "send buf waiting for io"));
   1518 			cv_wait(&vdp->xs_iocv, &vdp->xs_iomutex);
   1519 			continue;
   1520 		}
   1521 
   1522 		vdp->xs_f_iobuf = bp->av_forw;
   1523 		bp->av_forw = NULL;
   1524 		vdp->xs_ionum++;
   1525 
   1526 		mutex_exit(&vdp->xs_iomutex);
   1527 		if (bp->b_bcount == 0) {
   1528 			/* no I/O needs to be done */
   1529 			(void) xdb_biodone(bp);
   1530 			mutex_enter(&vdp->xs_iomutex);
   1531 			continue;
   1532 		}
   1533 
   1534 		err = EIO;
   1535 		if (vdp->xs_ldi_hdl != NULL)
   1536 			err = ldi_strategy(vdp->xs_ldi_hdl, bp);
   1537 		if (err != 0) {
   1538 			bp->b_flags |= B_ERROR;
   1539 			(void) xdb_biodone(bp);
   1540 			XDB_DBPRINT(XDB_DBG_IO, (CE_WARN,
   1541 			    "xdb@%s: sent buf to backend devfailed, err=%d",
   1542 			    ddi_get_name_addr(vdp->xs_dip), err));
   1543 		} else {
   1544 			XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE,
   1545 			    "sent buf to backend ok"));
   1546 		}
   1547 		mutex_enter(&vdp->xs_iomutex);
   1548 	}
   1549 	XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE, "send buf finishing"));
   1550 	mutex_exit(&vdp->xs_iomutex);
   1551 }
   1552 
   1553 /*ARGSUSED*/
   1554 static void
   1555 xdb_hp_state_change(dev_info_t *dip, ddi_eventcookie_t id, void *arg,
   1556     void *impl_data)
   1557 {
   1558 	xendev_hotplug_state_t state = *(xendev_hotplug_state_t *)impl_data;
   1559 	xdb_t *vdp = (xdb_t *)ddi_get_driver_private(dip);
   1560 
   1561 	XDB_DBPRINT(XDB_DBG_INFO, (CE_NOTE, "xdb@%s: "
   1562 	    "hotplug status change to %d!", ddi_get_name_addr(dip), state));
   1563 
   1564 	if (state != Connected)
   1565 		return;
   1566 
   1567 	mutex_enter(&vdp->xs_cbmutex);
   1568 
   1569 	/* If hotplug script have already run, there's nothing todo */
   1570 	if (vdp->xs_hp_connected) {
   1571 		mutex_exit(&vdp->xs_cbmutex);
   1572 		return;
   1573 	}
   1574 
   1575 	vdp->xs_hp_connected = B_TRUE;
   1576 	xdb_start_connect(vdp);
   1577 	mutex_exit(&vdp->xs_cbmutex);
   1578 }
   1579 
   1580 /*ARGSUSED*/
   1581 static void
   1582 xdb_oe_state_change(dev_info_t *dip, ddi_eventcookie_t id, void *arg,
   1583     void *impl_data)
   1584 {
   1585 	XenbusState new_state = *(XenbusState *)impl_data;
   1586 	xdb_t *vdp = (xdb_t *)ddi_get_driver_private(dip);
   1587 
   1588 	XDB_DBPRINT(XDB_DBG_INFO, (CE_NOTE, "xdb@%s: "
   1589 	    "otherend state change to %d!", ddi_get_name_addr(dip), new_state));
   1590 
   1591 	mutex_enter(&vdp->xs_cbmutex);
   1592 
   1593 	/*
   1594 	 * Now it'd really be nice if there was a well defined state
   1595 	 * transition model for xen frontend drivers, but unfortunatly
   1596 	 * there isn't.  So we're stuck with assuming that all state
   1597 	 * transitions are possible, and we'll just have to deal with
   1598 	 * them regardless of what state we're in.
   1599 	 */
   1600 	switch (new_state) {
   1601 	case XenbusStateUnknown:
   1602 	case XenbusStateInitialising:
   1603 	case XenbusStateInitWait:
   1604 		/* tear down our connection to the frontend */
   1605 		xdb_close(dip);
   1606 		vdp->xs_fe_initialised = B_FALSE;
   1607 		break;
   1608 
   1609 	case XenbusStateInitialised:
   1610 		/*
   1611 		 * If we were conected, then we need to drop the connection
   1612 		 * and re-negotiate it.
   1613 		 */
   1614 		xdb_close(dip);
   1615 		vdp->xs_fe_initialised = B_TRUE;
   1616 		xdb_start_connect(vdp);
   1617 		break;
   1618 
   1619 	case XenbusStateConnected:
   1620 		/* nothing todo here other than congratulate the frontend */
   1621 		break;
   1622 
   1623 	case XenbusStateClosing:
   1624 		/* monkey see monkey do */
   1625 		(void) xvdi_switch_state(dip, XBT_NULL, XenbusStateClosing);
   1626 		break;
   1627 
   1628 	case XenbusStateClosed:
   1629 		/* tear down our connection to the frontend */
   1630 		xdb_close(dip);
   1631 		vdp->xs_fe_initialised = B_FALSE;
   1632 		(void) xvdi_switch_state(dip, XBT_NULL, new_state);
   1633 		break;
   1634 	}
   1635 
   1636 	mutex_exit(&vdp->xs_cbmutex);
   1637 }
   1638 
   1639 static int
   1640 xdb_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
   1641 {
   1642 	ddi_iblock_cookie_t	ibc;
   1643 	xdb_t			*vdp;
   1644 	int			instance = ddi_get_instance(dip);
   1645 	char			*xsname, *oename;
   1646 	char			*str;
   1647 
   1648 	switch (cmd) {
   1649 	case DDI_RESUME:
   1650 		return (DDI_FAILURE);
   1651 	case DDI_ATTACH:
   1652 		break;
   1653 	default:
   1654 		return (DDI_FAILURE);
   1655 	}
   1656 	/* DDI_ATTACH */
   1657 
   1658 	if (((xsname = xvdi_get_xsname(dip)) == NULL) ||
   1659 	    ((oename = xvdi_get_oename(dip)) == NULL))
   1660 		return (DDI_FAILURE);
   1661 
   1662 	/*
   1663 	 * Disable auto-detach.  This is necessary so that we don't get
   1664 	 * detached while we're disconnected from the front end.
   1665 	 */
   1666 	(void) ddi_prop_update_int(DDI_DEV_T_NONE, dip, DDI_NO_AUTODETACH, 1);
   1667 
   1668 	if (ddi_get_iblock_cookie(dip, 0, &ibc) != DDI_SUCCESS)
   1669 		return (DDI_FAILURE);
   1670 
   1671 	if (ddi_soft_state_zalloc(xdb_statep, instance) != DDI_SUCCESS)
   1672 		return (DDI_FAILURE);
   1673 
   1674 	vdp = ddi_get_soft_state(xdb_statep, instance);
   1675 	vdp->xs_dip = dip;
   1676 	mutex_init(&vdp->xs_iomutex, NULL, MUTEX_DRIVER, (void *)ibc);
   1677 	mutex_init(&vdp->xs_cbmutex, NULL, MUTEX_DRIVER, (void *)ibc);
   1678 	cv_init(&vdp->xs_iocv, NULL, CV_DRIVER, NULL);
   1679 	cv_init(&vdp->xs_ionumcv, NULL, CV_DRIVER, NULL);
   1680 	ddi_set_driver_private(dip, vdp);
   1681 
   1682 	if (!xdb_kstat_init(vdp))
   1683 		goto errout1;
   1684 
   1685 	/* Check if the frontend device is supposed to be a cdrom */
   1686 	if (xenbus_read_str(oename, XBP_DEV_TYPE, &str) != 0)
   1687 		return (DDI_FAILURE);
   1688 	if (strcmp(str, XBV_DEV_TYPE_CD) == 0)
   1689 		vdp->xs_type |= XDB_DEV_FE_CD;
   1690 	strfree(str);
   1691 
   1692 	/* Check if the frontend device is supposed to be read only */
   1693 	if (xenbus_read_str(xsname, "mode", &str) != 0)
   1694 		return (DDI_FAILURE);
   1695 	if ((strcmp(str, "r") == NULL) || (strcmp(str, "ro") == NULL))
   1696 		vdp->xs_type |= XDB_DEV_RO;
   1697 	strfree(str);
   1698 
   1699 	mutex_enter(&vdp->xs_cbmutex);
   1700 	if (!xdb_media_req_init(vdp) || !xdb_params_init(vdp)) {
   1701 		xvdi_remove_xb_watch_handlers(dip);
   1702 		mutex_exit(&vdp->xs_cbmutex);
   1703 		goto errout2;
   1704 	}
   1705 	mutex_exit(&vdp->xs_cbmutex);
   1706 
   1707 	vdp->xs_send_buf = B_TRUE;
   1708 	vdp->xs_iotaskq = ddi_taskq_create(dip, "xdb_iotask", 1,
   1709 	    TASKQ_DEFAULTPRI, 0);
   1710 	(void) ddi_taskq_dispatch(vdp->xs_iotaskq, xdb_send_buf, vdp,
   1711 	    DDI_SLEEP);
   1712 
   1713 	/* Watch frontend and hotplug state change */
   1714 	if ((xvdi_add_event_handler(dip, XS_OE_STATE, xdb_oe_state_change,
   1715 	    NULL) != DDI_SUCCESS) ||
   1716 	    (xvdi_add_event_handler(dip, XS_HP_STATE, xdb_hp_state_change,
   1717 	    NULL) != DDI_SUCCESS))
   1718 		goto errout3;
   1719 
   1720 	/*
   1721 	 * Kick-off hotplug script
   1722 	 */
   1723 	if (xvdi_post_event(dip, XEN_HP_ADD) != DDI_SUCCESS) {
   1724 		cmn_err(CE_WARN, "xdb@%s: failed to start hotplug script",
   1725 		    ddi_get_name_addr(dip));
   1726 		goto errout3;
   1727 	}
   1728 
   1729 	/*
   1730 	 * start waiting for hotplug event and otherend state event
   1731 	 * mainly for debugging, frontend will not take any op seeing this
   1732 	 */
   1733 	(void) xvdi_switch_state(dip, XBT_NULL, XenbusStateInitWait);
   1734 
   1735 	XDB_DBPRINT(XDB_DBG_INFO, (CE_NOTE, "xdb@%s: attached!",
   1736 	    ddi_get_name_addr(dip)));
   1737 	return (DDI_SUCCESS);
   1738 
   1739 errout3:
   1740 	ASSERT(vdp->xs_hp_connected && vdp->xs_if_connected);
   1741 
   1742 	xvdi_remove_event_handler(dip, NULL);
   1743 
   1744 	/* Disconnect from the backend */
   1745 	mutex_enter(&vdp->xs_cbmutex);
   1746 	mutex_enter(&vdp->xs_iomutex);
   1747 	vdp->xs_send_buf = B_FALSE;
   1748 	cv_broadcast(&vdp->xs_iocv);
   1749 	mutex_exit(&vdp->xs_iomutex);
   1750 	mutex_exit(&vdp->xs_cbmutex);
   1751 
   1752 	/* wait for all io to dtrain and destroy io taskq */
   1753 	ddi_taskq_destroy(vdp->xs_iotaskq);
   1754 
   1755 	/* tear down block-configure watch */
   1756 	mutex_enter(&vdp->xs_cbmutex);
   1757 	xvdi_remove_xb_watch_handlers(dip);
   1758 	mutex_exit(&vdp->xs_cbmutex);
   1759 
   1760 errout2:
   1761 	/* remove kstats */
   1762 	kstat_delete(vdp->xs_kstats);
   1763 
   1764 errout1:
   1765 	/* free up driver state */
   1766 	ddi_set_driver_private(dip, NULL);
   1767 	cv_destroy(&vdp->xs_iocv);
   1768 	cv_destroy(&vdp->xs_ionumcv);
   1769 	mutex_destroy(&vdp->xs_cbmutex);
   1770 	mutex_destroy(&vdp->xs_iomutex);
   1771 	ddi_soft_state_free(xdb_statep, instance);
   1772 
   1773 	return (DDI_FAILURE);
   1774 }
   1775 
   1776 /*ARGSUSED*/
   1777 static int
   1778 xdb_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
   1779 {
   1780 	int instance = ddi_get_instance(dip);
   1781 	xdb_t *vdp = XDB_INST2SOFTS(instance);
   1782 
   1783 	switch (cmd) {
   1784 	case DDI_SUSPEND:
   1785 		return (DDI_FAILURE);
   1786 	case DDI_DETACH:
   1787 		break;
   1788 	default:
   1789 		return (DDI_FAILURE);
   1790 	}
   1791 
   1792 	/* DDI_DETACH handling */
   1793 
   1794 	/* refuse to detach if we're still in use by the frontend */
   1795 	mutex_enter(&vdp->xs_iomutex);
   1796 	if (vdp->xs_if_connected) {
   1797 		mutex_exit(&vdp->xs_iomutex);
   1798 		return (DDI_FAILURE);
   1799 	}
   1800 	vdp->xs_send_buf = B_FALSE;
   1801 	cv_broadcast(&vdp->xs_iocv);
   1802 	mutex_exit(&vdp->xs_iomutex);
   1803 
   1804 	xvdi_remove_event_handler(dip, NULL);
   1805 	(void) xvdi_post_event(dip, XEN_HP_REMOVE);
   1806 
   1807 	ddi_taskq_destroy(vdp->xs_iotaskq);
   1808 
   1809 	mutex_enter(&vdp->xs_cbmutex);
   1810 	xvdi_remove_xb_watch_handlers(dip);
   1811 	mutex_exit(&vdp->xs_cbmutex);
   1812 
   1813 	cv_destroy(&vdp->xs_iocv);
   1814 	cv_destroy(&vdp->xs_ionumcv);
   1815 	mutex_destroy(&vdp->xs_cbmutex);
   1816 	mutex_destroy(&vdp->xs_iomutex);
   1817 	kstat_delete(vdp->xs_kstats);
   1818 	ddi_set_driver_private(dip, NULL);
   1819 	ddi_soft_state_free(xdb_statep, instance);
   1820 
   1821 	XDB_DBPRINT(XDB_DBG_INFO, (CE_NOTE, "xdb@%s: detached!",
   1822 	    ddi_get_name_addr(dip)));
   1823 	return (DDI_SUCCESS);
   1824 }
   1825 
   1826 static struct dev_ops xdb_dev_ops = {
   1827 	DEVO_REV,	/* devo_rev */
   1828 	0,		/* devo_refcnt */
   1829 	ddi_getinfo_1to1, /* devo_getinfo */
   1830 	nulldev,	/* devo_identify */
   1831 	nulldev,	/* devo_probe */
   1832 	xdb_attach,	/* devo_attach */
   1833 	xdb_detach,	/* devo_detach */
   1834 	nodev,		/* devo_reset */
   1835 	NULL,		/* devo_cb_ops */
   1836 	NULL,		/* devo_bus_ops */
   1837 	NULL,		/* power */
   1838 	ddi_quiesce_not_needed, /* quiesce */
   1839 };
   1840 
   1841 /*
   1842  * Module linkage information for the kernel.
   1843  */
   1844 static struct modldrv modldrv = {
   1845 	&mod_driverops,			/* Type of module. */
   1846 	"vbd backend driver",		/* Name of the module */
   1847 	&xdb_dev_ops			/* driver ops */
   1848 };
   1849 
   1850 static struct modlinkage xdb_modlinkage = {
   1851 	MODREV_1,
   1852 	&modldrv,
   1853 	NULL
   1854 };
   1855 
   1856 int
   1857 _init(void)
   1858 {
   1859 	int rv;
   1860 
   1861 	if ((rv = ddi_soft_state_init((void **)&xdb_statep,
   1862 	    sizeof (xdb_t), 0)) == 0)
   1863 		if ((rv = mod_install(&xdb_modlinkage)) != 0)
   1864 			ddi_soft_state_fini((void **)&xdb_statep);
   1865 	return (rv);
   1866 }
   1867 
   1868 int
   1869 _fini(void)
   1870 {
   1871 	int rv;
   1872 
   1873 	if ((rv = mod_remove(&xdb_modlinkage)) != 0)
   1874 		return (rv);
   1875 	ddi_soft_state_fini((void **)&xdb_statep);
   1876 	return (rv);
   1877 }
   1878 
   1879 int
   1880 _info(struct modinfo *modinfop)
   1881 {
   1882 	return (mod_info(&xdb_modlinkage, modinfop));
   1883 }
   1884 
   1885 static int
   1886 xdb_get_request(xdb_t *vdp, blkif_request_t *req)
   1887 {
   1888 	void *src = xvdi_ring_get_request(vdp->xs_ring);
   1889 
   1890 	if (src == NULL)
   1891 		return (0);
   1892 
   1893 	switch (vdp->xs_blk_protocol) {
   1894 	case BLKIF_PROTOCOL_NATIVE:
   1895 		(void) memcpy(req, src, sizeof (*req));
   1896 		break;
   1897 	case BLKIF_PROTOCOL_X86_32:
   1898 		blkif_get_x86_32_req(req, src);
   1899 		break;
   1900 	case BLKIF_PROTOCOL_X86_64:
   1901 		blkif_get_x86_64_req(req, src);
   1902 		break;
   1903 	default:
   1904 		cmn_err(CE_PANIC, "xdb@%s: unrecognised protocol: %d",
   1905 		    ddi_get_name_addr(vdp->xs_dip),
   1906 		    vdp->xs_blk_protocol);
   1907 	}
   1908 	return (1);
   1909 }
   1910 
   1911 static int
   1912 xdb_push_response(xdb_t *vdp, uint64_t id, uint8_t op, uint16_t status)
   1913 {
   1914 	ddi_acc_handle_t acchdl = vdp->xs_ring_hdl;
   1915 	blkif_response_t *rsp = xvdi_ring_get_response(vdp->xs_ring);
   1916 	blkif_x86_32_response_t *rsp_32 = (blkif_x86_32_response_t *)rsp;
   1917 	blkif_x86_64_response_t *rsp_64 = (blkif_x86_64_response_t *)rsp;
   1918 
   1919 	ASSERT(rsp);
   1920 
   1921 	switch (vdp->xs_blk_protocol) {
   1922 	case BLKIF_PROTOCOL_NATIVE:
   1923 		ddi_put64(acchdl, &rsp->id, id);
   1924 		ddi_put8(acchdl, &rsp->operation, op);
   1925 		ddi_put16(acchdl, (uint16_t *)&rsp->status,
   1926 		    status == 0 ? BLKIF_RSP_OKAY : BLKIF_RSP_ERROR);
   1927 		break;
   1928 	case BLKIF_PROTOCOL_X86_32:
   1929 		ddi_put64(acchdl, &rsp_32->id, id);
   1930 		ddi_put8(acchdl, &rsp_32->operation, op);
   1931 		ddi_put16(acchdl, (uint16_t *)&rsp_32->status,
   1932 		    status == 0 ? BLKIF_RSP_OKAY : BLKIF_RSP_ERROR);
   1933 		break;
   1934 	case BLKIF_PROTOCOL_X86_64:
   1935 		ddi_put64(acchdl, &rsp_64->id, id);
   1936 		ddi_put8(acchdl, &rsp_64->operation, op);
   1937 		ddi_put16(acchdl, (uint16_t *)&rsp_64->status,
   1938 		    status == 0 ? BLKIF_RSP_OKAY : BLKIF_RSP_ERROR);
   1939 		break;
   1940 	default:
   1941 		cmn_err(CE_PANIC, "xdb@%s: unrecognised protocol: %d",
   1942 		    ddi_get_name_addr(vdp->xs_dip),
   1943 		    vdp->xs_blk_protocol);
   1944 	}
   1945 
   1946 	return (xvdi_ring_push_response(vdp->xs_ring));
   1947 }
   1948 
   1949 static void
   1950 blkif_get_x86_32_req(blkif_request_t *dst, blkif_x86_32_request_t *src)
   1951 {
   1952 	int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST;
   1953 	dst->operation = src->operation;
   1954 	dst->nr_segments = src->nr_segments;
   1955 	dst->handle = src->handle;
   1956 	dst->id = src->id;
   1957 	dst->sector_number = src->sector_number;
   1958 	if (n > src->nr_segments)
   1959 		n = src->nr_segments;
   1960 	for (i = 0; i < n; i++)
   1961 		dst->seg[i] = src->seg[i];
   1962 }
   1963 
   1964 static void
   1965 blkif_get_x86_64_req(blkif_request_t *dst, blkif_x86_64_request_t *src)
   1966 {
   1967 	int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST;
   1968 	dst->operation = src->operation;
   1969 	dst->nr_segments = src->nr_segments;
   1970 	dst->handle = src->handle;
   1971 	dst->id = src->id;
   1972 	dst->sector_number = src->sector_number;
   1973 	if (n > src->nr_segments)
   1974 		n = src->nr_segments;
   1975 	for (i = 0; i < n; i++)
   1976 		dst->seg[i] = src->seg[i];
   1977 }
   1978