Home | History | Annotate | Download | only in diskomizer
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 
     22 #pragma ident	"@(#)daio_posix.c	1.21	09/05/26 SMI"
     23 
     24 /*
     25  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     26  * Use is subject to license terms.
     27  */
     28 
     29 #include <diskomizer/daio.h>
     30 #include <aio.h>
     31 #include <errno.h>
     32 #include <stdlib.h>
     33 #include <signal.h>
     34 #include <poll.h>
     35 #include <sys/types.h>
     36 #include <sys/stat.h>
     37 #include <unistd.h>
     38 #include <fcntl.h>
     39 #include <diskomizer/tnf.h>
     40 #include <sys/systeminfo.h>
     41 #include <strings.h>
     42 #include <diskomizer/daio_dev.h>
     43 #include <diskomizer/assert.h>
     44 #include "args.h"
     45 #include "findap.h"
     46 
     47 #if defined(__SunOS_5_8) || defined(__SunOS_5_9)
     48 
     49 /*
     50  * Various definitions required to allow the event port code to compile
     51  * but not run on 5.8 and 5.9.
     52  *
     53  * Shamelessly stolen from the 5.10 header files.
     54  */
     55 
     56 #ifndef PORT_SOURCE_AIO
     57 #define	PORT_SOURCE_AIO		1
     58 #endif
     59 
     60 #ifndef SIGEV_PORT
     61 #define	SIGEV_PORT		4
     62 #endif
     63 
     64 typedef struct port_event {
     65 	int		portev_events;	/* event data is source specific */
     66 	ushort_t	portev_source;	/* event source */
     67 	ushort_t	portev_pad;	/* port internal use */
     68 	uintptr_t	portev_object;	/* source specific object */
     69 	void		*portev_user;	/* user cookie */
     70 } port_event_t;
     71 
     72 typedef	struct	port_notify {
     73 	int		portnfy_port;	/* bind request(s) to port */
     74 	void		*portnfy_user;	/* user defined */
     75 } port_notify_t;
     76 
     77 int	port_create(void);
     78 int	port_get(int, port_event_t *, struct timespec *);
     79 int	port_getn(int, port_event_t [], uint_t, uint_t *, struct timespec *);
     80 
     81 #pragma weak port_create
     82 #pragma weak port_get
     83 #pragma weak port_getn
     84 
     85 #else
     86 #include <port.h>
     87 #endif
     88 
     89 
     90 struct daio_posix {
     91 	int next;
     92 	int error;
     93 	/* return count is only useful for debugging */
     94 	int return_count:4;
     95 	int is_read:1;
     96 	hrtime_t start_time;
     97 	hrtime_t end_time;
     98 	daio_result_t *resultp;
     99 	struct daio_id *id;
    100 	port_notify_t   pn;
    101 	struct aiocb aiocb;
    102 };
    103 
    104 /*
    105  * XOR this value into the sival_int so that this sival_int is zero. This
    106  * allows diskomizer to spot bug 4520934 as the sival_int will never be zero.
    107  */
    108 #define	SIGVAL_INTMASK active_list_max
    109 
    110 TNF_DECLARE_RECORD(struct daio_posix, daio_posix_str);
    111 TNF_DEFINE_RECORD_4(struct daio_posix, daio_posix_str,
    112 		tnf_uint, aiocb.aio_fildes,
    113 		tnf_uint, aiocb.aio_nbytes,
    114 		tnf_opaque, aiocb.aio_buf,
    115 		tnf_opaque, aiocb.aio_offset)
    116 
    117 static struct daio_posix *daio_posix_base;
    118 static struct aiocb const **active_list;
    119 static int active_list_max;
    120 static int active_count = 0;
    121 
    122 struct error {
    123 	int sig;
    124 	int exit;
    125 	int entry;
    126 } error;
    127 /*
    128  * There are three lists of io structures.
    129  * 1: The free_list containing unused io structures.
    130  * 2: The siglist, containing ios that have been reaped by the signal
    131  *	handler. This list is protected by calls to sigprocmask, when
    132  *	accessed from outside of the signal handler. This is not used when
    133  *	using ports.
    134  * 3: The done_list.  This contains ios reaped by the signal handler but
    135  * 	not yet reaped by the wait routine.  When this list is empty this
    136  *	filled again by copying the siglist to it, saving the number of times
    137  *	the signal mask has to be changed. fast as changes to the signal
    138  *	handler are, this still buys quite a bit or performance when you are
    139  *	on the edge. This is also not used when using ports.
    140  */
    141 struct list {
    142 	sigset_t mask;
    143 	int head;
    144 	int tail;
    145 };
    146 static struct list siglist;
    147 static struct list done_list;
    148 static struct list free_list;
    149 
    150 static int sigrtmin;
    151 static int port;
    152 
    153 typedef enum {
    154 	NEVER = 0,
    155 	USE_PORTS_CHILD, /* create each port in the child processes. */
    156 	USE_PORTS_PARENT /* create one shared port in the master process */
    157 } use_ports_t;
    158 
    159 static use_ports_t use_ports_enum;
    160 
    161 static void *daio_posix_init(int count);
    162 static void daio_posix_init_checker(const char *checker,
    163 	off64_t max_block_size);
    164 static data_checker_t daio_posix_what_checker(void);
    165 static ssize_t daio_posix_pwrite(int fildes, const uchar_t *bufp, size_t bufs,
    166 		off_t off, struct daio_id *id);
    167 static ssize_t daio_posix_pread(int fildes, uchar_t *bufp, size_t bufs,
    168 		off_t off, struct daio_id *id);
    169 static int daio_posix_awrite(int fildes, uchar_t *bufp, int bufs, off_t offset,
    170 	daio_result_t *resultp, struct daio_id *id);
    171 static int daio_posix_aread(int fildes, uchar_t *bufp, int bufs, off_t offset,
    172 	daio_result_t *resultp, struct daio_id *id);
    173 static daio_status_t daio_posix_status(daio_result_t *resultp);
    174 static hrtime_t daio_posix_start_time(daio_result_t *resultp);
    175 static hrtime_t daio_posix_end_time(daio_result_t *resultp);
    176 static daio_result_t *daio_posix_wait(const struct timeval *timeout);
    177 static daio_result_t *daio_posix_sigwait(const struct timeval *timeout);
    178 static daio_result_t *daio_posix_portwait(const struct timeval *timeout);
    179 static int daio_posix_cancel(daio_result_t *resultp);
    180 static void daio_posix_fini(void *handle);
    181 static int (*data_check)(uchar_t *buf, size_t bufs, struct daio_id *id);
    182 static daio_result_t *(*do_daio_posix_wait)(const struct timeval *timeout);
    183 
    184 struct daio_ops DAIO_OPS = {
    185 	daio_posix_init,
    186 	daio_posix_init_checker,
    187 	daio_posix_what_checker,
    188 	findap,
    189 	(daio_open_t)daio_dev_open,
    190 	daio_dev_close,
    191 	unlink,
    192 	stat64,
    193 	fstat64,
    194 	ioctl,
    195 	ftruncate64,
    196 	daio_dev_directio,
    197 	read_disko_vtoc,
    198 	daio_posix_pwrite,
    199 	daio_posix_pread,
    200 	daio_posix_awrite,
    201 	daio_posix_aread,
    202 	daio_posix_status,
    203 	daio_posix_start_time,
    204 	daio_posix_end_time,
    205 	daio_posix_wait,
    206 	daio_posix_cancel,
    207 	daio_posix_fini,
    208 	daio_dev_dd
    209 };
    210 
    211 static int
    212 is56()
    213 {
    214 	char buf[257];
    215 
    216 	if (sysinfo(SI_RELEASE, buf, sizeof (buf)) == -1) {
    217 		return (1);
    218 	} else if (strcmp(buf, "5.6") == 0) {
    219 		return (1);
    220 	} else {
    221 		return (0);
    222 	}
    223 }
    224 
    225 static int
    226 none_active_port(void)
    227 {
    228 	return (active_count == 0 ? 1 : 0);
    229 }
    230 
    231 static int
    232 none_active_sig(void)
    233 {
    234 	int i;
    235 	int ret = 1;
    236 
    237 	if (done_list.head == -1) {
    238 		sigprocmask(SIG_BLOCK, &siglist.mask, NULL);
    239 		if (siglist.head == -1) {
    240 			for (i = 0; i < active_list_max; i++) {
    241 				if (active_list[i]) {
    242 					ret = 0;
    243 					break;
    244 				}
    245 			}
    246 		} else {
    247 			done_list.head = siglist.head;
    248 			done_list.tail = siglist.tail;
    249 			siglist.head = siglist.tail = -1;
    250 			ret = 0;
    251 		}
    252 		sigprocmask(SIG_UNBLOCK, &siglist.mask, NULL);
    253 	} else {
    254 		ret = 0;
    255 	}
    256 	return (ret);
    257 }
    258 static int
    259 bounds_check(daio_result_t *res)
    260 {
    261 	long x;
    262 
    263 	if ((x = (long)res->private_data) >= 0 && x < active_list_max &&
    264 	    daio_posix_base[x].resultp == res) {
    265 		return (x);
    266 	} else {
    267 		return (-1);
    268 	}
    269 }
    270 static void
    271 push_nosig(struct list *l, int entry)
    272 {
    273 	assert(daio_posix_base[entry].next == -1);
    274 	if (l->tail == -1) {
    275 		assert(l->head == -1);
    276 		l->tail = entry;
    277 	} else {
    278 		daio_posix_base[l->head].next = entry;
    279 	}
    280 	daio_posix_base[entry].next = l->head;
    281 	l->head = entry;
    282 }
    283 static void
    284 push(struct list *l, int entry, int inhandler)
    285 {
    286 	if (!inhandler) {
    287 		sigprocmask(SIG_BLOCK, &l->mask, NULL);
    288 		push_nosig(l, entry);
    289 		sigprocmask(SIG_UNBLOCK, &l->mask, NULL);
    290 	} else {
    291 		push_nosig(l, entry);
    292 	}
    293 }
    294 static int
    295 pop_nosig(struct list *l)
    296 {
    297 	int x;
    298 	x = l->tail;
    299 	if (x == l->head) {
    300 		l->tail = l->head = -1;
    301 	} else {
    302 		l->tail = daio_posix_base[x].next;
    303 	}
    304 	daio_posix_base[x].next = -1;
    305 	return (x);
    306 }
    307 static int
    308 pop(struct list *l)
    309 {
    310 	int x;
    311 
    312 	sigprocmask(SIG_BLOCK, &l->mask, NULL);
    313 	x = pop_nosig(l);
    314 	sigprocmask(SIG_UNBLOCK, &l->mask, NULL);
    315 	return (x);
    316 }
    317 #define	ADD_TO_SIG(X, Y) \
    318 		assert(daio_posix_base[X].error != EINPROGRESS); \
    319 		assert(siglist.head == -1 || \
    320 		daio_posix_base[siglist.head].aiocb.aio_buf != NULL); \
    321 		push(&siglist, X, Y)
    322 /*
    323  * The free list is only manipulated in the read, write and wait routines
    324  * so does not need protecting from signals
    325  */
    326 #define	ADD_TO_FREE(X) push_nosig(&free_list, X)
    327 
    328 static int
    329 get_free(void)
    330 {
    331 	return (pop_nosig(&free_list));
    332 }
    333 static int
    334 pop_from_done(void)
    335 {
    336 	int i;
    337 
    338 
    339 	i = done_list.head;
    340 	if (i != -1) {
    341 		assert(daio_posix_base[i].error != EINPROGRESS);
    342 		assert(daio_posix_base[done_list.head].aiocb.aio_buf !=
    343 		    NULL);
    344 		i = pop_nosig(&done_list);
    345 		daio_posix_base[i].next = -1;
    346 	} else {
    347 		sigprocmask(SIG_BLOCK, &siglist.mask, NULL);
    348 		done_list.head = siglist.head;
    349 		done_list.tail = siglist.tail;
    350 		siglist.head = siglist.tail = -1;
    351 		sigprocmask(SIG_UNBLOCK, &siglist.mask, NULL);
    352 		if (-1 != done_list.head) {
    353 			i = pop_nosig(&done_list);
    354 		}
    355 	}
    356 	return (i);
    357 }
    358 static void
    359 add_to_active(int entry)
    360 {
    361 	assert(daio_posix_base[entry].next == -1);
    362 	active_list[entry] = &daio_posix_base[entry].aiocb;
    363 	daio_posix_base[entry].error = EINPROGRESS;
    364 	daio_posix_base[entry].start_time = gethrtime();
    365 	active_count++;
    366 }
    367 static int
    368 check_error(int i)
    369 {
    370 	if (daio_posix_base[i].error == EINPROGRESS) {
    371 		daio_posix_base[i].error =
    372 		    aio_error(&daio_posix_base[i].aiocb);
    373 	}
    374 	return (daio_posix_base[i].error);
    375 }
    376 #define	is_active(entry) (active_list[entry] != NULL)
    377 
    378 static void
    379 remove_from_active(int entry)
    380 {
    381 	assert(active_list[entry] != NULL);
    382 	daio_posix_base[entry].end_time = gethrtime();
    383 	active_list[entry] = NULL;
    384 	active_count--;
    385 }
    386 
    387 static use_ports_t
    388 use_ports(void)
    389 {
    390 	return (use_ports_enum);
    391 }
    392 
    393 /*ARGSUSED*/
    394 static void
    395 daio_posix_handler(int signo, siginfo_t *siginfo, void *context)
    396 {
    397 	int entry = siginfo->si_value.sival_int ^ SIGVAL_INTMASK;
    398 	int x;
    399 
    400 	/* Have to work around 4120502 */
    401 	if (siginfo->si_code != SI_ASYNCIO) {
    402 		error.sig++;
    403 		return;
    404 	}
    405 
    406 	if (entry < 0 || entry >= SIGVAL_INTMASK ||
    407 	    active_list[entry] == NULL) {
    408 		error.exit++;
    409 		error.entry = entry;
    410 		return;
    411 	}
    412 	x = check_error(entry);
    413 	if (x != EINPROGRESS && x != ECANCELED) {
    414 		daio_posix_base[entry].return_count++;
    415 		remove_from_active(entry);
    416 		ADD_TO_SIG(entry, 1);
    417 	}
    418 }
    419 
    420 static void
    421 daio_posix_opts(void)
    422 {
    423 	const struct option_ops *ops;
    424 	char x;
    425 
    426 #if defined(__SunOS_5_8) || defined(__SunOS_5_9)
    427 	/*
    428 	 * Check the weak symbols are actually available.
    429 	 */
    430 	if (port_create == NULL || port_get == NULL || port_getn == NULL) {
    431 		return;
    432 	}
    433 #endif
    434 
    435 	ops = opts_init();
    436 	if (ops->opt_bool("DAIO_POSIX_USE_PORTS", &x) == OPT_OK && x == 1) {
    437 		use_ports_enum = USE_PORTS_CHILD;
    438 		if (ops->opt_bool("DAIO_POSIX_SHARE_PORT", &x) == OPT_OK &&
    439 		    x == 1) {
    440 			use_ports_enum = USE_PORTS_PARENT;
    441 			port = port_create();
    442 		}
    443 	} else {
    444 		use_ports_enum = USE_PORTS_CHILD;
    445 	}
    446 	opts_fini();
    447 }
    448 
    449 static void *
    450 daio_posix_init(int count)
    451 {
    452 	int i;
    453 	struct sigaction action;
    454 	sigrtmin = SIGRTMIN;
    455 
    456 	free_list.head = free_list.tail = -1;
    457 	done_list.head = done_list.tail = -1;
    458 	siglist.head = siglist.tail = -1;
    459 
    460 	if (!use_ports()) {
    461 
    462 		sigemptyset(&free_list.mask);
    463 		sigemptyset(&done_list.mask);
    464 		sigemptyset(&siglist.mask);
    465 		sigaddset(&siglist.mask, sigrtmin);
    466 
    467 		action.sa_flags = SA_SIGINFO;
    468 		do_daio_posix_wait = daio_posix_sigwait;
    469 		action.sa_sigaction = daio_posix_handler;
    470 		sigemptyset(&action.sa_mask);
    471 		(void) sigaction(sigrtmin, &action, NULL);
    472 	} else {
    473 		do_daio_posix_wait = daio_posix_portwait;
    474 		if (use_ports() != 2) {
    475 			port = port_create();
    476 		}
    477 		if (port == -1) {
    478 			return (NULL);
    479 		}
    480 	}
    481 
    482 
    483 	if ((daio_posix_base =
    484 		calloc(count, sizeof (struct daio_posix))) == NULL) {
    485 		return (NULL);
    486 	}
    487 	if ((active_list = calloc(count, sizeof (struct aiocb *))) == NULL) {
    488 		free(daio_posix_base);
    489 		return (NULL);
    490 	}
    491 	for (i = 0; i < count; i++) {
    492 		daio_posix_base[i].next = -1;
    493 		ADD_TO_FREE(i);
    494 	}
    495 
    496 	active_list_max = count;
    497 
    498 	return (daio_posix_base);
    499 }
    500 
    501 /*ARGSUSED*/
    502 static void
    503 daio_posix_init_checker(const char *checker, off64_t max_block_size)
    504 {
    505 	daio_posix_opts();
    506 
    507 	data_check = choose_data_checker(checker);
    508 }
    509 static data_checker_t
    510 daio_posix_what_checker(void)
    511 {
    512 	return (data_check);
    513 }
    514 /*ARGSUSED*/
    515 static ssize_t
    516 daio_posix_pwrite(int fildes, const uchar_t *bufp, size_t bufs, off_t off,
    517 		struct daio_id *id)
    518 {
    519 	return (pwrite(fildes, bufp, bufs, off));
    520 }
    521 static ssize_t
    522 daio_posix_pread(int fildes, uchar_t *bufp, size_t bufs, off_t off,
    523 		struct daio_id *id)
    524 {
    525 	ssize_t ret;
    526 
    527 	ret = pread(fildes, bufp, bufs, off);
    528 
    529 	if (id != NULL && ret == bufs) {
    530 		if (data_check(bufp, ret, id) < 0)
    531 			ret = DAIO_CORRUPT;
    532 	}
    533 	return (ret);
    534 }
    535 static void
    536 set_notification(int this_one)
    537 {
    538 	if (use_ports()) {
    539 
    540 		daio_posix_base[this_one].pn.portnfy_port = port;
    541 		daio_posix_base[this_one].pn.portnfy_user = (void *)this_one;
    542 		daio_posix_base[this_one].aiocb.aio_sigevent.sigev_notify =
    543 		    SIGEV_PORT;
    544 		daio_posix_base[this_one
    545 		    ].aiocb.aio_sigevent.sigev_value.sival_ptr =
    546 		    &daio_posix_base[this_one].pn;
    547 
    548 	} else {
    549 		daio_posix_base[this_one].aiocb.aio_sigevent.sigev_notify =
    550 		    SIGEV_SIGNAL;
    551 		daio_posix_base[this_one].aiocb.aio_sigevent.sigev_signo =
    552 		    sigrtmin;
    553 		daio_posix_base[this_one
    554 		    ].aiocb.aio_sigevent.sigev_value.sival_int =
    555 		    this_one ^ SIGVAL_INTMASK;
    556 	}
    557 }
    558 
    559 static int
    560 daio_posix_aread(int fildes, uchar_t *bufp, int bufs, off_t offset,
    561 	daio_result_t *resultp, struct daio_id *id)
    562 {
    563 	int this_one;
    564 	int status;
    565 
    566 	if (free_list.head == -1) {
    567 		return (-1);
    568 	}
    569 	this_one = get_free();
    570 
    571 	daio_posix_base[this_one].id = id;
    572 	daio_posix_base[this_one].return_count = 0;
    573 	daio_posix_base[this_one].is_read = 1;
    574 	daio_posix_base[this_one].aiocb.aio_buf = bufp;
    575 	daio_posix_base[this_one].aiocb.aio_fildes = fildes;
    576 	daio_posix_base[this_one].aiocb.aio_nbytes = bufs;
    577 	daio_posix_base[this_one].aiocb.aio_offset = offset;
    578 
    579 	set_notification(this_one);
    580 	assert(bufp);
    581 	add_to_active(this_one);
    582 	TNF_PROBE_2(aio_read, "aio_read", "sunw%cte%diskomizer%posix_aio read",
    583 	    tnf_uint, this_one, this_one,
    584 	    daio_posix_str, &daio_posix_base[this_one],
    585 	    &daio_posix_base[this_one]);
    586 	if ((status = aio_read(&daio_posix_base[this_one].aiocb)) == -1) {
    587 		remove_from_active(this_one);
    588 		ADD_TO_FREE(this_one);
    589 	} else {
    590 		resultp->private_data = (void *) ((long)this_one);
    591 		daio_posix_base[this_one].resultp = resultp;
    592 	}
    593 	return (status);
    594 }
    595 static int
    596 daio_posix_awrite(int fildes, uchar_t *bufp, int bufs, off_t offset,
    597 	daio_result_t *resultp, struct daio_id *id)
    598 {
    599 	int this_one;
    600 	int status;
    601 
    602 	if (free_list.head == -1) {
    603 		return (-1);
    604 	}
    605 	this_one = get_free();
    606 
    607 	assert(bufp);
    608 	daio_posix_base[this_one].id = id;
    609 	daio_posix_base[this_one].return_count = 0;
    610 	daio_posix_base[this_one].is_read = 0;
    611 	daio_posix_base[this_one].aiocb.aio_buf = bufp;
    612 	daio_posix_base[this_one].aiocb.aio_fildes = fildes;
    613 	daio_posix_base[this_one].aiocb.aio_nbytes = bufs;
    614 	daio_posix_base[this_one].aiocb.aio_offset = offset;
    615 
    616 	set_notification(this_one);
    617 
    618 	add_to_active(this_one);
    619 	TNF_PROBE_2(aio_write, "aio_write",
    620 	    "sunw%cte%diskomizer%posix_aio write",
    621 	    tnf_uint, this_one, this_one,
    622 	    daio_posix_str, &daio_posix_base[this_one],
    623 	    &daio_posix_base[this_one]);
    624 	if ((status = aio_write(&daio_posix_base[this_one].aiocb)) == -1) {
    625 		remove_from_active(this_one);
    626 		ADD_TO_FREE(this_one);
    627 	} else {
    628 		resultp->private_data = (void *)((long)this_one);
    629 		daio_posix_base[this_one].resultp = resultp;
    630 	}
    631 	return (status);
    632 }
    633 static daio_status_t
    634 daio_posix_status(daio_result_t *resultp)
    635 {
    636 	int x;
    637 
    638 	if ((x = bounds_check(resultp)) == -1) {
    639 		return (DAIO_COMPLETE);
    640 	}
    641 
    642 	if (is_active(x)) {
    643 		return (DAIO_INPROGRESS);
    644 	}
    645 	return (DAIO_COMPLETE);
    646 }
    647 static void
    648 check_exit(void)
    649 {
    650 	/*
    651 	 * No need to fiddle with the signal mask unless there is an error.
    652 	 * If there are errors to report they will get caught next time round.
    653 	 */
    654 	if (error.exit || error.sig) {
    655 		sigset_t mask;
    656 		sigemptyset(&mask);
    657 		sigaddset(&mask, sigrtmin);
    658 		sigprocmask(SIG_BLOCK, &mask, NULL);
    659 		if (error.sig)  {
    660 			plog(LOG_WARNING,
    661 			    "%d SIGRTMIN signal%s received not from aio,"
    662 			    " ignored.\n",
    663 			    error.sig, error.sig > 1 ? "s" : "");
    664 			error.sig = 0;
    665 		}
    666 		if (error.exit) {
    667 			plog(LOG_ERR, "invalid aio signal for entry %d\n",
    668 			    error.entry);
    669 			sigprocmask(SIG_UNBLOCK, &mask, NULL);
    670 			exit(1);
    671 		}
    672 		sigprocmask(SIG_UNBLOCK, &mask, NULL);
    673 	}
    674 }
    675 
    676 daio_result_t *
    677 daio_posix_wait(const struct timeval *timeout)
    678 {
    679 	return (do_daio_posix_wait(timeout));
    680 }
    681 
    682 daio_result_t *
    683 daio_posix_sigwait(const struct timeval *timeout)
    684 {
    685 	int this_one;
    686 	daio_result_t *resultp;
    687 
    688 	if (daio_posix_base == NULL) {
    689 		errno = EINVAL;
    690 		return ((daio_result_t *)-1);
    691 	}
    692 
    693 	check_exit();
    694 
    695 	if ((this_one = pop_from_done()) == -1) {
    696 		if (timeout && !none_active_sig()) {
    697 			(void) poll(NULL, 0, (timeout->tv_sec * 1000) +
    698 			    timeout->tv_usec/1000);
    699 			this_one = pop_from_done();
    700 		}
    701 		if (this_one == -1) {
    702 			if (none_active_sig()) {
    703 				errno = EINVAL;
    704 				return ((daio_result_t *)-1);
    705 			} else {
    706 				return ((daio_result_t *)NULL);
    707 			}
    708 		}
    709 	}
    710 	resultp = daio_posix_base[this_one].resultp;
    711 	daio_posix_base[this_one].resultp = NULL;
    712 	resultp->result.daio_errno = daio_posix_base[this_one].error;
    713 	resultp->result.daio_return =
    714 	    aio_return(&daio_posix_base[this_one].aiocb);
    715 	resultp->result.daio_start_time = daio_posix_base[this_one].start_time;
    716 	resultp->result.daio_end_time = daio_posix_base[this_one].end_time;
    717 	if (daio_posix_base[this_one].is_read &&
    718 	    resultp->result.daio_return ==
    719 	    daio_posix_base[this_one].aiocb.aio_nbytes &&
    720 	    data_check((uchar_t *)daio_posix_base[this_one].aiocb.aio_buf,
    721 	    resultp->result.daio_return, daio_posix_base[this_one].id) < 0) {
    722 		resultp->result.daio_return = DAIO_CORRUPT;
    723 	}
    724 	assert(daio_posix_base[this_one].error != EINPROGRESS);
    725 
    726 	TNF_PROBE_4(aio_wait, "aio_wait",
    727 	    "sunw%cte%diskomizer%posix_aio pwait",
    728 	    tnf_uint, this_one, this_one,
    729 	    tnf_uint, daio_return, resultp->result.daio_return,
    730 	    tnf_uint, daio_errno, resultp->result.daio_errno,
    731 	    daio_posix_str, &daio_posix_base[this_one],
    732 	    &daio_posix_base[this_one]);
    733 
    734 	ADD_TO_FREE(this_one);
    735 
    736 	return (resultp);
    737 }
    738 
    739 daio_result_t *
    740 daio_posix_portwait(const struct timeval *timeout)
    741 {
    742 	port_event_t ev;
    743 	timespec_t *tsp;
    744 	timespec_t ts;
    745 	daio_result_t *resultp;
    746 	int this_one;
    747 
    748 	if (none_active_port()) {
    749 		errno = EINVAL;
    750 		return ((daio_result_t *)-1);
    751 	}
    752 
    753 	if (timeout != NULL) {
    754 		ts.tv_sec = timeout->tv_sec;
    755 		ts.tv_nsec = timeout->tv_usec * 1000;
    756 		tsp = &ts;
    757 	} else {
    758 		tsp = NULL;
    759 	}
    760 
    761 	if (port_get(port, &ev, tsp) == -1) {
    762 		return (NULL);
    763 	}
    764 	if (ev.portev_source != PORT_SOURCE_AIO) {
    765 		return (NULL); /* can't happen */
    766 	}
    767 
    768 	this_one = (int)ev.portev_user;
    769 
    770 	if ((check_error(this_one) == EINPROGRESS)) {
    771 		return (NULL); /* can't happen */
    772 	}
    773 	daio_posix_base[this_one].return_count++;
    774 	remove_from_active(this_one);
    775 
    776 	resultp = daio_posix_base[this_one].resultp;
    777 	daio_posix_base[this_one].resultp = NULL;
    778 	resultp->result.daio_errno = daio_posix_base[this_one].error;
    779 	resultp->result.daio_return =
    780 	    aio_return(&daio_posix_base[this_one].aiocb);
    781 	resultp->result.daio_start_time = daio_posix_base[this_one].start_time;
    782 	resultp->result.daio_end_time = daio_posix_base[this_one].end_time;
    783 	if (daio_posix_base[this_one].is_read &&
    784 	    resultp->result.daio_return ==
    785 	    daio_posix_base[this_one].aiocb.aio_nbytes &&
    786 	    data_check((uchar_t *)daio_posix_base[this_one].aiocb.aio_buf,
    787 	    resultp->result.daio_return, daio_posix_base[this_one].id) < 0) {
    788 		resultp->result.daio_return = DAIO_CORRUPT;
    789 	}
    790 	assert(daio_posix_base[this_one].error != EINPROGRESS);
    791 
    792 	TNF_PROBE_4(aio_wait, "aio_wait",
    793 	    "sunw%cte%diskomizer%posix_aio pwait",
    794 	    tnf_uint, this_one, this_one,
    795 	    tnf_uint, daio_return, resultp->result.daio_return,
    796 	    tnf_uint, daio_errno, resultp->result.daio_errno,
    797 	    daio_posix_str, &daio_posix_base[this_one],
    798 	    &daio_posix_base[this_one]);
    799 
    800 	ADD_TO_FREE(this_one);
    801 
    802 	return (resultp);
    803 }
    804 
    805 static int
    806 daio_posix_cancel(daio_result_t *resultp)
    807 {
    808 	int io;
    809 	int x;
    810 
    811 	if ((io = bounds_check(resultp)) == -1 || !is_active(io)) {
    812 		return (-1);
    813 	}
    814 	x = aio_cancel(daio_posix_base[io].aiocb.aio_fildes,
    815 	    &daio_posix_base[io].aiocb);
    816 	if (x == AIO_CANCELED) {
    817 		assert(check_error(io) == ECANCELED);
    818 	}
    819 	return (x == AIO_CANCELED ? 0 : -1);
    820 }
    821 
    822 /*ARGSUSED*/
    823 static void
    824 daio_posix_fini(void *handle)
    825 {
    826 	if (use_ports()) {
    827 		close(port);
    828 	}
    829 	free(active_list);
    830 	free(daio_posix_base);
    831 }
    832 
    833 static hrtime_t
    834 daio_posix_start_time(daio_result_t *resultp)
    835 {
    836 	int x;
    837 
    838 	if ((x = bounds_check(resultp)) == -1) {
    839 		return (DAIO_NOT_STARTED);
    840 	}
    841 	return (daio_posix_base[x].start_time);
    842 }
    843 static hrtime_t
    844 daio_posix_end_time(daio_result_t *resultp)
    845 {
    846 	int x;
    847 
    848 	if ((x = bounds_check(resultp)) == -1) {
    849 		return (DAIO_NOT_STARTED);
    850 	}
    851 	return (daio_posix_base[x].end_time);
    852 }
    853