Home | History | Annotate | Download | only in io
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 
     22 /*
     23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     24  * Use is subject to license terms.
     25  */
     26 
     27 /*
     28  *
     29  * xenbus_xs.c
     30  *
     31  * This is the kernel equivalent of the "xs" library.  We don't need everything
     32  * and we use xenbus_comms for communication.
     33  *
     34  * Copyright (C) 2005 Rusty Russell, IBM Corporation
     35  *
     36  * This file may be distributed separately from the Linux kernel, or
     37  * incorporated into other software packages, subject to the following license:
     38  *
     39  * Permission is hereby granted, free of charge, to any person obtaining a copy
     40  * of this source file (the "Software"), to deal in the Software without
     41  * restriction, including without limitation the rights to use, copy, modify,
     42  * merge, publish, distribute, sublicense, and/or sell copies of the Software,
     43  * and to permit persons to whom the Software is furnished to do so, subject to
     44  * the following conditions:
     45  *
     46  * The above copyright notice and this permission notice shall be included in
     47  * all copies or substantial portions of the Software.
     48  *
     49  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     50  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     51  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
     52  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     53  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     54  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
     55  * IN THE SOFTWARE.
     56  */
     57 
     58 /*
     59  * NOTE: To future maintainers of the Solaris version of this file:
     60  * I found the Linux version of this code to be very disgusting in
     61  * overloading pointers and error codes into void * return values.
     62  * The main difference you will find is that all such usage is changed
     63  * to pass pointers to void* to be filled in with return values and
     64  * the functions return error codes.
     65  */
     66 
     67 #include <sys/errno.h>
     68 #include <sys/types.h>
     69 #include <sys/sysmacros.h>
     70 #include <sys/uio.h>
     71 #include <sys/mutex.h>
     72 #include <sys/condvar.h>
     73 #include <sys/rwlock.h>
     74 #include <sys/disp.h>
     75 #include <sys/ddi.h>
     76 #include <sys/sunddi.h>
     77 #include <sys/avintr.h>
     78 #include <sys/cmn_err.h>
     79 #include <sys/mach_mmu.h>
     80 #include <util/sscanf.h>
     81 #define	_XSD_ERRORS_DEFINED
     82 #ifdef XPV_HVM_DRIVER
     83 #include <sys/xpv_support.h>
     84 #endif
     85 #include <sys/hypervisor.h>
     86 #include <sys/taskq.h>
     87 #include <sys/sdt.h>
     88 #include <xen/sys/xenbus_impl.h>
     89 #include <xen/sys/xenbus_comms.h>
     90 #include <xen/sys/xendev.h>
     91 #include <xen/public/io/xs_wire.h>
     92 
     93 #define	streq(a, b) (strcmp((a), (b)) == 0)
     94 
     95 #define	list_empty(list) (list_head(list) == NULL)
     96 
     97 struct xs_stored_msg {
     98 	list_node_t list;
     99 
    100 	struct xsd_sockmsg hdr;
    101 
    102 	union {
    103 		/* Queued replies. */
    104 		struct {
    105 			char *body;
    106 		} reply;
    107 
    108 		/* Queued watch events. */
    109 		struct {
    110 			struct xenbus_watch *handle;
    111 			char **vec;
    112 			unsigned int vec_size;
    113 		} watch;
    114 	} un;
    115 };
    116 
    117 static struct xs_handle {
    118 	/* A list of replies. Currently only one will ever be outstanding. */
    119 	list_t reply_list;
    120 	kmutex_t reply_lock;
    121 	kcondvar_t reply_cv;
    122 
    123 	/* One request at a time. */
    124 	kmutex_t request_mutex;
    125 
    126 	/* Protect transactions against save/restore. */
    127 	krwlock_t suspend_lock;
    128 } xs_state;
    129 
    130 static int last_req_id;
    131 
    132 /*
    133  * List of clients wanting a xenstore up notification, and a lock to protect it
    134  */
    135 static boolean_t xenstore_up;
    136 static list_t notify_list;
    137 static kmutex_t notify_list_lock;
    138 static taskq_t *xenbus_taskq;
    139 
    140 /* List of registered watches, and a lock to protect it. */
    141 static list_t watches;
    142 static kmutex_t watches_lock;
    143 
    144 /* List of pending watch callback events, and a lock to protect it. */
    145 static list_t watch_events;
    146 static kmutex_t watch_events_lock;
    147 
    148 /*
    149  * Details of the xenwatch callback kernel thread. The thread waits on the
    150  * watch_events_cv for work to do (queued on watch_events list). When it
    151  * wakes up it acquires the xenwatch_mutex before reading the list and
    152  * carrying out work.
    153  */
    154 static kmutex_t xenwatch_mutex;
    155 static kcondvar_t watch_events_cv;
    156 
    157 static int process_msg(void);
    158 
    159 static int
    160 get_error(const char *errorstring)
    161 {
    162 	unsigned int i;
    163 
    164 	for (i = 0; !streq(errorstring, xsd_errors[i].errstring); i++) {
    165 		if (i == (sizeof (xsd_errors) / sizeof (xsd_errors[0])) - 1) {
    166 			cmn_err(CE_WARN,
    167 			    "XENBUS xen store gave: unknown error %s",
    168 			    errorstring);
    169 			return (EINVAL);
    170 		}
    171 	}
    172 	return (xsd_errors[i].errnum);
    173 }
    174 
    175 /*
    176  * Read a synchronous reply from xenstore.  Since we can return early before
    177  * reading a relevant reply, we discard any messages not matching the request
    178  * ID.  Caller must free returned message on success.
    179  */
    180 static int
    181 read_reply(struct xsd_sockmsg *req_hdr, struct xs_stored_msg **reply)
    182 {
    183 	extern int do_polled_io;
    184 
    185 	mutex_enter(&xs_state.reply_lock);
    186 
    187 	for (;;) {
    188 		while (list_empty(&xs_state.reply_list)) {
    189 			if (interrupts_unleashed && !do_polled_io) {
    190 				if (cv_wait_sig(&xs_state.reply_cv,
    191 				    &xs_state.reply_lock) == 0) {
    192 					mutex_exit(&xs_state.reply_lock);
    193 					*reply = NULL;
    194 					return (EINTR);
    195 				}
    196 			} else { /* polled mode needed for early probes */
    197 				mutex_exit(&xs_state.reply_lock);
    198 				(void) HYPERVISOR_yield();
    199 				(void) process_msg();
    200 				mutex_enter(&xs_state.reply_lock);
    201 			}
    202 		}
    203 
    204 		*reply = list_head(&xs_state.reply_list);
    205 		list_remove(&xs_state.reply_list, *reply);
    206 
    207 		if ((*reply)->hdr.req_id == req_hdr->req_id)
    208 			break;
    209 	}
    210 
    211 	mutex_exit(&xs_state.reply_lock);
    212 	return (0);
    213 }
    214 
    215 /* Emergency write. */
    216 void
    217 xenbus_debug_write(const char *str, unsigned int count)
    218 {
    219 	struct xsd_sockmsg msg = { 0 };
    220 
    221 	msg.type = XS_DEBUG;
    222 	msg.len = sizeof ("print") + count + 1;
    223 
    224 	mutex_enter(&xs_state.request_mutex);
    225 	(void) xb_write(&msg, sizeof (msg));
    226 	(void) xb_write("print", sizeof ("print"));
    227 	(void) xb_write(str, count);
    228 	(void) xb_write("", 1);
    229 	mutex_exit(&xs_state.request_mutex);
    230 }
    231 
    232 /*
    233  * This is pretty unpleasant.  First off, there's the horrible logic around
    234  * suspend_lock and transactions.  Also, we can be interrupted either before we
    235  * write a message, or before we receive a reply.  A client that wants to
    236  * survive this can't know which case happened.  Luckily all clients don't care
    237  * about signals currently, and the alternative (a hard wait on a userspace
    238  * daemon) isn't exactly preferable.  Caller must free 'reply' on success.
    239  */
    240 int
    241 xenbus_dev_request_and_reply(struct xsd_sockmsg *msg, void **reply)
    242 {
    243 	struct xsd_sockmsg req_msg = *msg;
    244 	struct xs_stored_msg *reply_msg = NULL;
    245 	int err;
    246 
    247 	if (req_msg.type == XS_TRANSACTION_START)
    248 		rw_enter(&xs_state.suspend_lock, RW_READER);
    249 
    250 	mutex_enter(&xs_state.request_mutex);
    251 
    252 	msg->req_id = last_req_id++;
    253 
    254 	err = xb_write(msg, sizeof (*msg) + msg->len);
    255 	if (err) {
    256 		if (req_msg.type == XS_TRANSACTION_START)
    257 			rw_exit(&xs_state.suspend_lock);
    258 		msg->type = XS_ERROR;
    259 		*reply = NULL;
    260 		goto out;
    261 	}
    262 
    263 	err = read_reply(msg, &reply_msg);
    264 
    265 	if (err) {
    266 		if (msg->type == XS_TRANSACTION_START)
    267 			rw_exit(&xs_state.suspend_lock);
    268 		*reply = NULL;
    269 		goto out;
    270 	}
    271 
    272 	*reply = reply_msg->un.reply.body;
    273 	*msg = reply_msg->hdr;
    274 
    275 	if (reply_msg->hdr.type == XS_TRANSACTION_END)
    276 		rw_exit(&xs_state.suspend_lock);
    277 
    278 out:
    279 	if (reply_msg != NULL)
    280 		kmem_free(reply_msg, sizeof (*reply_msg));
    281 
    282 	mutex_exit(&xs_state.request_mutex);
    283 	return (err);
    284 }
    285 
    286 /*
    287  * Send message to xs, return errcode, rval filled in with pointer
    288  * to kmem_alloc'ed reply.
    289  */
    290 static int
    291 xs_talkv(xenbus_transaction_t t,
    292 		    enum xsd_sockmsg_type type,
    293 		    const iovec_t *iovec,
    294 		    unsigned int num_vecs,
    295 		    void **rval,
    296 		    unsigned int *len)
    297 {
    298 	struct xsd_sockmsg msg;
    299 	struct xs_stored_msg *reply_msg;
    300 	char *reply;
    301 	unsigned int i;
    302 	int err;
    303 
    304 	msg.tx_id = (uint32_t)(unsigned long)t;
    305 	msg.type = type;
    306 	msg.len = 0;
    307 	for (i = 0; i < num_vecs; i++)
    308 		msg.len += iovec[i].iov_len;
    309 
    310 	mutex_enter(&xs_state.request_mutex);
    311 
    312 	msg.req_id = last_req_id++;
    313 
    314 	err = xb_write(&msg, sizeof (msg));
    315 	if (err) {
    316 		mutex_exit(&xs_state.request_mutex);
    317 		return (err);
    318 	}
    319 
    320 	for (i = 0; i < num_vecs; i++) {
    321 		err = xb_write(iovec[i].iov_base, iovec[i].iov_len);
    322 		if (err) {
    323 			mutex_exit(&xs_state.request_mutex);
    324 			return (err);
    325 		}
    326 	}
    327 
    328 	err = read_reply(&msg, &reply_msg);
    329 
    330 	mutex_exit(&xs_state.request_mutex);
    331 
    332 	if (err)
    333 		return (err);
    334 
    335 	reply = reply_msg->un.reply.body;
    336 
    337 	if (reply_msg->hdr.type == XS_ERROR) {
    338 		err = get_error(reply);
    339 		kmem_free(reply, reply_msg->hdr.len + 1);
    340 		goto out;
    341 	}
    342 
    343 	if (len != NULL)
    344 		*len = reply_msg->hdr.len + 1;
    345 
    346 	ASSERT(reply_msg->hdr.type == type);
    347 
    348 	if (rval != NULL)
    349 		*rval = reply;
    350 	else
    351 		kmem_free(reply, reply_msg->hdr.len + 1);
    352 
    353 out:
    354 	kmem_free(reply_msg, sizeof (*reply_msg));
    355 	return (err);
    356 }
    357 
    358 /* Simplified version of xs_talkv: single message. */
    359 static int
    360 xs_single(xenbus_transaction_t t,
    361 			enum xsd_sockmsg_type type,
    362 			const char *string, void **ret,
    363 			unsigned int *len)
    364 {
    365 	iovec_t iovec;
    366 
    367 	iovec.iov_base = (char *)string;
    368 	iovec.iov_len = strlen(string) + 1;
    369 	return (xs_talkv(t, type, &iovec, 1, ret, len));
    370 }
    371 
    372 static unsigned int
    373 count_strings(const char *strings, unsigned int len)
    374 {
    375 	unsigned int num;
    376 	const char *p;
    377 
    378 	for (p = strings, num = 0; p < strings + len; p += strlen(p) + 1)
    379 		num++;
    380 
    381 	return (num);
    382 }
    383 
    384 /* Return the path to dir with /name appended. Buffer must be kmem_free()'ed */
    385 static char *
    386 join(const char *dir, const char *name)
    387 {
    388 	char *buffer;
    389 	size_t slashlen;
    390 
    391 	slashlen = streq(name, "") ? 0 : 1;
    392 	buffer = kmem_alloc(strlen(dir) + slashlen + strlen(name) + 1,
    393 	    KM_SLEEP);
    394 
    395 	(void) strcpy(buffer, dir);
    396 	if (slashlen != 0) {
    397 		(void) strcat(buffer, "/");
    398 		(void) strcat(buffer, name);
    399 	}
    400 	return (buffer);
    401 }
    402 
    403 static char **
    404 split(char *strings, unsigned int len, unsigned int *num)
    405 {
    406 	char *p, **ret;
    407 
    408 	/* Count the strings. */
    409 	if ((*num = count_strings(strings, len - 1)) == 0)
    410 		return (NULL);
    411 
    412 	/* Transfer to one big alloc for easy freeing. */
    413 	ret = kmem_alloc(*num * sizeof (char *) + (len - 1), KM_SLEEP);
    414 	(void) memcpy(&ret[*num], strings, len - 1);
    415 	kmem_free(strings, len);
    416 
    417 	strings = (char *)&ret[*num];
    418 	for (p = strings, *num = 0; p < strings + (len - 1);
    419 	    p += strlen(p) + 1) {
    420 		ret[(*num)++] = p;
    421 	}
    422 
    423 	return (ret);
    424 }
    425 
    426 char **
    427 xenbus_directory(xenbus_transaction_t t,
    428 			const char *dir, const char *node, unsigned int *num)
    429 {
    430 	char *strings, *path;
    431 	unsigned int len;
    432 	int err;
    433 
    434 	path = join(dir, node);
    435 	err = xs_single(t, XS_DIRECTORY, path, (void **)&strings, &len);
    436 	kmem_free(path, strlen(path) + 1);
    437 	if (err != 0 || strings == NULL) {
    438 		/* sigh, we lose error code info here */
    439 		*num = 0;
    440 		return (NULL);
    441 	}
    442 
    443 	return (split(strings, len, num));
    444 }
    445 
    446 /* Check if a path exists. */
    447 boolean_t
    448 xenbus_exists(const char *dir, const char *node)
    449 {
    450 	void	*p;
    451 	uint_t	n;
    452 
    453 	if (xenbus_read(XBT_NULL, dir, node, &p, &n) != 0)
    454 		return (B_FALSE);
    455 	kmem_free(p, n);
    456 	return (B_TRUE);
    457 }
    458 
    459 /* Check if a directory path exists. */
    460 boolean_t
    461 xenbus_exists_dir(const char *dir, const char *node)
    462 {
    463 	char **d;
    464 	unsigned int dir_n;
    465 	int i, len;
    466 
    467 	d = xenbus_directory(XBT_NULL, dir, node, &dir_n);
    468 	if (d == NULL)
    469 		return (B_FALSE);
    470 	for (i = 0, len = 0; i < dir_n; i++)
    471 		len += strlen(d[i]) + 1 + sizeof (char *);
    472 	kmem_free(d, len);
    473 	return (B_TRUE);
    474 }
    475 
    476 /*
    477  * Get the value of a single file.
    478  * Returns a kmem_alloced value in retp: call kmem_free() on it after use.
    479  * len indicates length in bytes.
    480  */
    481 int
    482 xenbus_read(xenbus_transaction_t t,
    483 	    const char *dir, const char *node, void **retp, unsigned int *len)
    484 {
    485 	char *path;
    486 	int err;
    487 
    488 	path = join(dir, node);
    489 	err = xs_single(t, XS_READ, path, retp, len);
    490 	kmem_free(path, strlen(path) + 1);
    491 	return (err);
    492 }
    493 
    494 int
    495 xenbus_read_str(const char *dir, const char *node, char **retp)
    496 {
    497 	uint_t	n;
    498 	int	err;
    499 	char	*str;
    500 
    501 	/*
    502 	 * Since we access the xenbus value immediatly we can't be
    503 	 * part of a transaction.
    504 	 */
    505 	if ((err = xenbus_read(XBT_NULL, dir, node, (void **)&str, &n)) != 0)
    506 		return (err);
    507 	ASSERT((str != NULL) && (n > 0));
    508 
    509 	/*
    510 	 * Why bother with this?  Because xenbus is truly annoying in the
    511 	 * fact that when it returns a string, it doesn't guarantee that
    512 	 * the memory that holds the string is of size strlen() + 1.
    513 	 * This forces callers to keep track of the size of the memory
    514 	 * containing the string.  Ugh.  We'll work around this by
    515 	 * re-allocate strings to always be of size strlen() + 1.
    516 	 */
    517 	*retp = strdup(str);
    518 	kmem_free(str, n);
    519 	return (0);
    520 }
    521 
    522 /*
    523  * Write the value of a single file.
    524  * Returns err on failure.
    525  */
    526 int
    527 xenbus_write(xenbus_transaction_t t,
    528 		const char *dir, const char *node, const char *string)
    529 {
    530 	char *path;
    531 	iovec_t iovec[2];
    532 	int ret;
    533 
    534 	path = join(dir, node);
    535 
    536 	iovec[0].iov_base = (void *)path;
    537 	iovec[0].iov_len = strlen(path) + 1;
    538 	iovec[1].iov_base = (void *)string;
    539 	iovec[1].iov_len = strlen(string);
    540 
    541 	ret = xs_talkv(t, XS_WRITE, iovec, 2, NULL, NULL);
    542 	kmem_free(path, iovec[0].iov_len);
    543 	return (ret);
    544 }
    545 
    546 /* Create a new directory. */
    547 int
    548 xenbus_mkdir(xenbus_transaction_t t, const char *dir, const char *node)
    549 {
    550 	char *path;
    551 	int ret;
    552 
    553 	path = join(dir, node);
    554 	ret = xs_single(t, XS_MKDIR, path, NULL, NULL);
    555 	kmem_free(path, strlen(path) + 1);
    556 	return (ret);
    557 }
    558 
    559 /* Destroy a file or directory (directories must be empty). */
    560 int
    561 xenbus_rm(xenbus_transaction_t t, const char *dir, const char *node)
    562 {
    563 	char *path;
    564 	int ret;
    565 
    566 	path = join(dir, node);
    567 	ret = xs_single(t, XS_RM, path, NULL, NULL);
    568 	kmem_free(path, strlen(path) + 1);
    569 	return (ret);
    570 }
    571 
    572 /*
    573  * Start a transaction: changes by others will not be seen during this
    574  * transaction, and changes will not be visible to others until end.
    575  */
    576 int
    577 xenbus_transaction_start(xenbus_transaction_t *t)
    578 {
    579 	void *id_str;
    580 	unsigned long id;
    581 	int err;
    582 	unsigned int len;
    583 
    584 	rw_enter(&xs_state.suspend_lock, RW_READER);
    585 
    586 	err = xs_single(XBT_NULL, XS_TRANSACTION_START, "", &id_str, &len);
    587 	if (err) {
    588 		rw_exit(&xs_state.suspend_lock);
    589 		return (err);
    590 	}
    591 
    592 	(void) ddi_strtoul((char *)id_str, NULL, 0, &id);
    593 	*t = (xenbus_transaction_t)id;
    594 	kmem_free(id_str, len);
    595 
    596 	return (0);
    597 }
    598 
    599 /*
    600  * End a transaction.
    601  * If abandon is true, transaction is discarded instead of committed.
    602  */
    603 int
    604 xenbus_transaction_end(xenbus_transaction_t t, int abort)
    605 {
    606 	char abortstr[2];
    607 	int err;
    608 
    609 	if (abort)
    610 		(void) strcpy(abortstr, "F");
    611 	else
    612 		(void) strcpy(abortstr, "T");
    613 
    614 	err = xs_single(t, XS_TRANSACTION_END, abortstr, NULL, NULL);
    615 
    616 	rw_exit(&xs_state.suspend_lock);
    617 
    618 	return (err);
    619 }
    620 
    621 /*
    622  * Single read and scanf: returns errno or 0.  This can only handle a single
    623  * conversion specifier.
    624  */
    625 /* SCANFLIKE4 */
    626 int
    627 xenbus_scanf(xenbus_transaction_t t,
    628 		const char *dir, const char *node, const char *fmt, ...)
    629 {
    630 	va_list ap;
    631 	int ret;
    632 	char *val;
    633 	unsigned int len;
    634 
    635 	ret = xenbus_read(t, dir, node, (void **)&val, &len);
    636 	if (ret)
    637 		return (ret);
    638 
    639 	va_start(ap, fmt);
    640 	if (vsscanf(val, fmt, ap) != 1)
    641 		ret = ERANGE;
    642 	va_end(ap);
    643 	kmem_free(val, len);
    644 	return (ret);
    645 }
    646 
    647 /* Single printf and write: returns errno or 0. */
    648 /* PRINTFLIKE4 */
    649 int
    650 xenbus_printf(xenbus_transaction_t t,
    651 		const char *dir, const char *node, const char *fmt, ...)
    652 {
    653 	va_list ap;
    654 	int ret;
    655 #define	PRINTF_BUFFER_SIZE 4096
    656 	char *printf_buffer;
    657 
    658 	printf_buffer = kmem_alloc(PRINTF_BUFFER_SIZE, KM_SLEEP);
    659 
    660 	va_start(ap, fmt);
    661 	ret = vsnprintf(printf_buffer, PRINTF_BUFFER_SIZE, fmt, ap);
    662 	va_end(ap);
    663 
    664 	ASSERT(ret <= PRINTF_BUFFER_SIZE-1);
    665 	ret = xenbus_write(t, dir, node, printf_buffer);
    666 
    667 	kmem_free(printf_buffer, PRINTF_BUFFER_SIZE);
    668 
    669 	return (ret);
    670 }
    671 
    672 
    673 /* Takes tuples of names, scanf-style args, and void **, NULL terminated. */
    674 int
    675 xenbus_gather(xenbus_transaction_t t, const char *dir, ...)
    676 {
    677 	va_list ap;
    678 	const char *name;
    679 	int ret = 0;
    680 	unsigned int len;
    681 
    682 	va_start(ap, dir);
    683 	while (ret == 0 && (name = va_arg(ap, char *)) != NULL) {
    684 		const char *fmt = va_arg(ap, char *);
    685 		void *result = va_arg(ap, void *);
    686 		char *p;
    687 
    688 		ret = xenbus_read(t, dir, name, (void **)&p, &len);
    689 		if (ret)
    690 			break;
    691 		if (fmt) {
    692 			ASSERT(result != NULL);
    693 			if (sscanf(p, fmt, result) != 1)
    694 				ret = EINVAL;
    695 			kmem_free(p, len);
    696 		} else
    697 			*(char **)result = p;
    698 	}
    699 	va_end(ap);
    700 	return (ret);
    701 }
    702 
    703 static int
    704 xs_watch(const char *path, const char *token)
    705 {
    706 	iovec_t iov[2];
    707 
    708 	iov[0].iov_base = (void *)path;
    709 	iov[0].iov_len = strlen(path) + 1;
    710 	iov[1].iov_base = (void *)token;
    711 	iov[1].iov_len = strlen(token) + 1;
    712 
    713 	return (xs_talkv(XBT_NULL, XS_WATCH, iov, 2, NULL, NULL));
    714 }
    715 
    716 static int
    717 xs_unwatch(const char *path, const char *token)
    718 {
    719 	iovec_t iov[2];
    720 
    721 	iov[0].iov_base = (char *)path;
    722 	iov[0].iov_len = strlen(path) + 1;
    723 	iov[1].iov_base = (char *)token;
    724 	iov[1].iov_len = strlen(token) + 1;
    725 
    726 	return (xs_talkv(XBT_NULL, XS_UNWATCH, iov, 2, NULL, NULL));
    727 }
    728 
    729 static struct xenbus_watch *
    730 find_watch(const char *token)
    731 {
    732 	struct xenbus_watch *i, *cmp;
    733 
    734 	(void) ddi_strtoul(token, NULL, 16, (unsigned long *)&cmp);
    735 
    736 	for (i = list_head(&watches); i != NULL; i = list_next(&watches, i))
    737 		if (i == cmp)
    738 			break;
    739 
    740 	return (i);
    741 }
    742 
    743 /* Register a xenstore state notify callback */
    744 int
    745 xs_register_xenbus_callback(void (*callback)(int))
    746 {
    747 	struct xenbus_notify *xbn, *xnp;
    748 
    749 	xbn = kmem_alloc(sizeof (struct xenbus_notify), KM_SLEEP);
    750 	xbn->notify_func = callback;
    751 	mutex_enter(&notify_list_lock);
    752 	/*
    753 	 * Make sure not already on the list
    754 	 */
    755 	xnp = list_head(&notify_list);
    756 	for (; xnp != NULL; xnp = list_next(&notify_list, xnp)) {
    757 		if (xnp->notify_func == callback) {
    758 			kmem_free(xbn, sizeof (struct xenbus_notify));
    759 			mutex_exit(&notify_list_lock);
    760 			return (EEXIST);
    761 		}
    762 	}
    763 	xnp = xbn;
    764 	list_insert_tail(&notify_list, xbn);
    765 done:
    766 	if (xenstore_up)
    767 		xnp->notify_func(XENSTORE_UP);
    768 	mutex_exit(&notify_list_lock);
    769 	return (0);
    770 }
    771 
    772 /*
    773  * Notify clients of xenstore state
    774  */
    775 static void
    776 do_notify_callbacks(void *arg)
    777 {
    778 	struct xenbus_notify *xnp;
    779 
    780 	mutex_enter(&notify_list_lock);
    781 	xnp = list_head(&notify_list);
    782 	for (; xnp != NULL; xnp = list_next(&notify_list, xnp)) {
    783 		xnp->notify_func((int)((uintptr_t)arg));
    784 	}
    785 	mutex_exit(&notify_list_lock);
    786 }
    787 
    788 void
    789 xs_notify_xenstore_up(void)
    790 {
    791 	xenstore_up = B_TRUE;
    792 	(void) taskq_dispatch(xenbus_taskq, do_notify_callbacks,
    793 	    (void *)XENSTORE_UP, 0);
    794 }
    795 
    796 void
    797 xs_notify_xenstore_down(void)
    798 {
    799 	xenstore_up = B_FALSE;
    800 	(void) taskq_dispatch(xenbus_taskq, do_notify_callbacks,
    801 	    (void *)XENSTORE_DOWN, 0);
    802 }
    803 
    804 /* Register callback to watch this node. */
    805 int
    806 register_xenbus_watch(struct xenbus_watch *watch)
    807 {
    808 	/* Pointer in ascii is the token. */
    809 	char token[sizeof (watch) * 2 + 1];
    810 	int err;
    811 
    812 	ASSERT(xenstore_up);
    813 	(void) snprintf(token, sizeof (token), "%lX", (long)watch);
    814 
    815 	rw_enter(&xs_state.suspend_lock, RW_READER);
    816 
    817 	mutex_enter(&watches_lock);
    818 	/*
    819 	 * May be re-registering a watch if xenstore daemon was restarted
    820 	 */
    821 	if (find_watch(token) == NULL)
    822 		list_insert_tail(&watches, watch);
    823 	mutex_exit(&watches_lock);
    824 
    825 	DTRACE_XPV3(xenbus__register__watch, const char *, watch->node,
    826 	    uintptr_t, watch->callback, struct xenbus_watch *, watch);
    827 
    828 	err = xs_watch(watch->node, token);
    829 
    830 	/* Ignore errors due to multiple registration. */
    831 	if ((err != 0) && (err != EEXIST)) {
    832 		mutex_enter(&watches_lock);
    833 		list_remove(&watches, watch);
    834 		mutex_exit(&watches_lock);
    835 	}
    836 
    837 	rw_exit(&xs_state.suspend_lock);
    838 
    839 	return (err);
    840 }
    841 
    842 static void
    843 free_stored_msg(struct xs_stored_msg *msg)
    844 {
    845 	int i, len = 0;
    846 
    847 	for (i = 0; i < msg->un.watch.vec_size; i++)
    848 		len += strlen(msg->un.watch.vec[i]) + 1 + sizeof (char *);
    849 	kmem_free(msg->un.watch.vec, len);
    850 	kmem_free(msg, sizeof (*msg));
    851 }
    852 
    853 void
    854 unregister_xenbus_watch(struct xenbus_watch *watch)
    855 {
    856 	struct xs_stored_msg *msg;
    857 	char token[sizeof (watch) * 2 + 1];
    858 	int err;
    859 
    860 	(void) snprintf(token, sizeof (token), "%lX", (long)watch);
    861 
    862 	rw_enter(&xs_state.suspend_lock, RW_READER);
    863 
    864 	mutex_enter(&watches_lock);
    865 	ASSERT(find_watch(token));
    866 	list_remove(&watches, watch);
    867 	mutex_exit(&watches_lock);
    868 
    869 	DTRACE_XPV3(xenbus__unregister__watch, const char *, watch->node,
    870 	    uintptr_t, watch->callback, struct xenbus_watch *, watch);
    871 
    872 	err = xs_unwatch(watch->node, token);
    873 	if (err)
    874 		cmn_err(CE_WARN, "XENBUS Failed to release watch %s: %d",
    875 		    watch->node, err);
    876 
    877 	rw_exit(&xs_state.suspend_lock);
    878 
    879 	/* Cancel pending watch events. */
    880 	mutex_enter(&watch_events_lock);
    881 	msg = list_head(&watch_events);
    882 
    883 	while (msg != NULL) {
    884 		struct xs_stored_msg *tmp = list_next(&watch_events, msg);
    885 		if (msg->un.watch.handle == watch) {
    886 			list_remove(&watch_events, msg);
    887 			free_stored_msg(msg);
    888 		}
    889 		msg = tmp;
    890 	}
    891 
    892 	mutex_exit(&watch_events_lock);
    893 
    894 	/* Flush any currently-executing callback, unless we are it. :-) */
    895 	if (mutex_owner(&xenwatch_mutex) != curthread) {
    896 		mutex_enter(&xenwatch_mutex);
    897 		mutex_exit(&xenwatch_mutex);
    898 	}
    899 }
    900 
    901 void
    902 xenbus_suspend(void)
    903 {
    904 	rw_enter(&xs_state.suspend_lock, RW_WRITER);
    905 	mutex_enter(&xs_state.request_mutex);
    906 
    907 	xb_suspend();
    908 }
    909 
    910 void
    911 xenbus_resume(void)
    912 {
    913 	struct xenbus_watch *watch;
    914 	char token[sizeof (watch) * 2 + 1];
    915 
    916 	mutex_exit(&xs_state.request_mutex);
    917 
    918 	xb_init();
    919 	xb_setup_intr();
    920 
    921 	/* No need for watches_lock: the suspend_lock is sufficient. */
    922 	for (watch = list_head(&watches); watch != NULL;
    923 	    watch = list_next(&watches, watch)) {
    924 		(void) snprintf(token, sizeof (token), "%lX", (long)watch);
    925 		(void) xs_watch(watch->node, token);
    926 	}
    927 
    928 	rw_exit(&xs_state.suspend_lock);
    929 }
    930 
    931 static void
    932 xenwatch_thread(void)
    933 {
    934 	struct xs_stored_msg *msg;
    935 	struct xenbus_watch *watch;
    936 
    937 	for (;;) {
    938 		mutex_enter(&watch_events_lock);
    939 		while (list_empty(&watch_events))
    940 			cv_wait(&watch_events_cv, &watch_events_lock);
    941 		msg = list_head(&watch_events);
    942 		ASSERT(msg != NULL);
    943 		list_remove(&watch_events, msg);
    944 		watch = msg->un.watch.handle;
    945 		mutex_exit(&watch_events_lock);
    946 
    947 		mutex_enter(&xenwatch_mutex);
    948 
    949 		DTRACE_XPV4(xenbus__fire__watch,
    950 		    const char *, watch->node,
    951 		    uintptr_t, watch->callback,
    952 		    struct xenbus_watch *, watch,
    953 		    const char *, msg->un.watch.vec[XS_WATCH_PATH]);
    954 
    955 		watch->callback(watch, (const char **)msg->un.watch.vec,
    956 		    msg->un.watch.vec_size);
    957 
    958 		free_stored_msg(msg);
    959 		mutex_exit(&xenwatch_mutex);
    960 	}
    961 }
    962 
    963 static int
    964 process_msg(void)
    965 {
    966 	struct xs_stored_msg *msg;
    967 	char *body;
    968 	int err, mlen;
    969 
    970 	msg = kmem_alloc(sizeof (*msg), KM_SLEEP);
    971 
    972 	err = xb_read(&msg->hdr, sizeof (msg->hdr));
    973 	if (err) {
    974 		kmem_free(msg, sizeof (*msg));
    975 		return (err);
    976 	}
    977 
    978 	mlen = msg->hdr.len + 1;
    979 	body = kmem_alloc(mlen, KM_SLEEP);
    980 
    981 	err = xb_read(body, msg->hdr.len);
    982 	if (err) {
    983 		kmem_free(body, mlen);
    984 		kmem_free(msg, sizeof (*msg));
    985 		return (err);
    986 	}
    987 
    988 	body[mlen - 1] = '\0';
    989 
    990 	if (msg->hdr.type == XS_WATCH_EVENT) {
    991 		const char *token;
    992 		msg->un.watch.vec = split(body, msg->hdr.len + 1,
    993 		    &msg->un.watch.vec_size);
    994 		if (msg->un.watch.vec == NULL) {
    995 			kmem_free(msg, sizeof (*msg));
    996 			return (EIO);
    997 		}
    998 
    999 		mutex_enter(&watches_lock);
   1000 		token = msg->un.watch.vec[XS_WATCH_TOKEN];
   1001 		if ((msg->un.watch.handle = find_watch(token)) != NULL) {
   1002 			mutex_enter(&watch_events_lock);
   1003 
   1004 			DTRACE_XPV4(xenbus__enqueue__watch,
   1005 			    const char *, msg->un.watch.handle->node,
   1006 			    uintptr_t, msg->un.watch.handle->callback,
   1007 			    struct xenbus_watch *, msg->un.watch.handle,
   1008 			    const char *, msg->un.watch.vec[XS_WATCH_PATH]);
   1009 
   1010 			list_insert_tail(&watch_events, msg);
   1011 			cv_broadcast(&watch_events_cv);
   1012 			mutex_exit(&watch_events_lock);
   1013 		} else {
   1014 			free_stored_msg(msg);
   1015 		}
   1016 		mutex_exit(&watches_lock);
   1017 	} else {
   1018 		msg->un.reply.body = body;
   1019 		mutex_enter(&xs_state.reply_lock);
   1020 		list_insert_tail(&xs_state.reply_list, msg);
   1021 		mutex_exit(&xs_state.reply_lock);
   1022 		cv_signal(&xs_state.reply_cv);
   1023 	}
   1024 
   1025 	return (0);
   1026 }
   1027 
   1028 static void
   1029 xenbus_thread(void)
   1030 {
   1031 	int err;
   1032 
   1033 	/*
   1034 	 * We have to wait for interrupts to be ready, so we don't clash
   1035 	 * with the polled-IO code in read_reply().
   1036 	 */
   1037 	while (!interrupts_unleashed)
   1038 		delay(10);
   1039 
   1040 	for (;;) {
   1041 		err = process_msg();
   1042 		if (err)
   1043 			cmn_err(CE_WARN, "XENBUS error %d while reading "
   1044 			    "message", err);
   1045 	}
   1046 }
   1047 
   1048 /*
   1049  * When setting up xenbus, dom0 and domU have to take different paths, which
   1050  * makes this code a little confusing. For dom0:
   1051  *
   1052  * xs_early_init - mutex init only
   1053  * xs_dom0_init - called on xenbus dev attach: set up our xenstore page and
   1054  * event channel; start xenbus threads for responding to interrupts.
   1055  *
   1056  * And for domU:
   1057  *
   1058  * xs_early_init - mutex init; set up our xenstore page and event channel
   1059  * xs_domu_init - installation of IRQ handler; start xenbus threads.
   1060  *
   1061  * We need an early init on domU so we can use xenbus in polled mode to
   1062  * discover devices, VCPUs etc.
   1063  *
   1064  * On resume, we use xb_init() and xb_setup_intr() to restore xenbus to a
   1065  * working state.
   1066  */
   1067 
   1068 void
   1069 xs_early_init(void)
   1070 {
   1071 	list_create(&xs_state.reply_list, sizeof (struct xs_stored_msg),
   1072 	    offsetof(struct xs_stored_msg, list));
   1073 	list_create(&watch_events, sizeof (struct xs_stored_msg),
   1074 	    offsetof(struct xs_stored_msg, list));
   1075 	list_create(&watches, sizeof (struct xenbus_watch),
   1076 	    offsetof(struct xenbus_watch, list));
   1077 	list_create(&notify_list, sizeof (struct xenbus_notify),
   1078 	    offsetof(struct xenbus_notify, list));
   1079 	mutex_init(&xs_state.reply_lock, NULL, MUTEX_DEFAULT, NULL);
   1080 	mutex_init(&xs_state.request_mutex, NULL, MUTEX_DEFAULT, NULL);
   1081 	mutex_init(&notify_list_lock, NULL, MUTEX_DEFAULT, NULL);
   1082 	rw_init(&xs_state.suspend_lock, NULL, RW_DEFAULT, NULL);
   1083 	cv_init(&xs_state.reply_cv, NULL, CV_DEFAULT, NULL);
   1084 
   1085 	if (DOMAIN_IS_INITDOMAIN(xen_info))
   1086 		return;
   1087 
   1088 	xb_init();
   1089 	xenstore_up = B_TRUE;
   1090 }
   1091 
   1092 static void
   1093 xs_thread_init(void)
   1094 {
   1095 	(void) thread_create(NULL, 0, xenwatch_thread, NULL, 0, &p0,
   1096 	    TS_RUN, minclsyspri);
   1097 	(void) thread_create(NULL, 0, xenbus_thread, NULL, 0, &p0,
   1098 	    TS_RUN, minclsyspri);
   1099 	xenbus_taskq = taskq_create("xenbus_taskq", 1,
   1100 	    maxclsyspri - 1, 1, 1, TASKQ_PREPOPULATE);
   1101 	ASSERT(xenbus_taskq != NULL);
   1102 }
   1103 
   1104 void
   1105 xs_domu_init(void)
   1106 {
   1107 	if (DOMAIN_IS_INITDOMAIN(xen_info))
   1108 		return;
   1109 
   1110 	/*
   1111 	 * Add interrupt handler for xenbus now, must wait till after
   1112 	 * psm module is loaded.  All use of xenbus is in polled mode
   1113 	 * until xs_init is called since it is what kicks off the xs
   1114 	 * server threads.
   1115 	 */
   1116 	xs_thread_init();
   1117 	xb_setup_intr();
   1118 }
   1119 
   1120 
   1121 void
   1122 xs_dom0_init(void)
   1123 {
   1124 	static boolean_t initialized = B_FALSE;
   1125 
   1126 	ASSERT(DOMAIN_IS_INITDOMAIN(xen_info));
   1127 
   1128 	/*
   1129 	 * The xenbus driver might be re-attaching.
   1130 	 */
   1131 	if (initialized)
   1132 		return;
   1133 
   1134 	xb_init();
   1135 	xs_thread_init();
   1136 	xb_setup_intr();
   1137 
   1138 	initialized = B_TRUE;
   1139 }
   1140