1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 #pragma ident "@(#)daio_posix.c 1.21 09/05/26 SMI" 23 24 /* 25 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 26 * Use is subject to license terms. 27 */ 28 29 #include <diskomizer/daio.h> 30 #include <aio.h> 31 #include <errno.h> 32 #include <stdlib.h> 33 #include <signal.h> 34 #include <poll.h> 35 #include <sys/types.h> 36 #include <sys/stat.h> 37 #include <unistd.h> 38 #include <fcntl.h> 39 #include <diskomizer/tnf.h> 40 #include <sys/systeminfo.h> 41 #include <strings.h> 42 #include <diskomizer/daio_dev.h> 43 #include <diskomizer/assert.h> 44 #include "args.h" 45 #include "findap.h" 46 47 #if defined(__SunOS_5_8) || defined(__SunOS_5_9) 48 49 /* 50 * Various definitions required to allow the event port code to compile 51 * but not run on 5.8 and 5.9. 52 * 53 * Shamelessly stolen from the 5.10 header files. 54 */ 55 56 #ifndef PORT_SOURCE_AIO 57 #define PORT_SOURCE_AIO 1 58 #endif 59 60 #ifndef SIGEV_PORT 61 #define SIGEV_PORT 4 62 #endif 63 64 typedef struct port_event { 65 int portev_events; /* event data is source specific */ 66 ushort_t portev_source; /* event source */ 67 ushort_t portev_pad; /* port internal use */ 68 uintptr_t portev_object; /* source specific object */ 69 void *portev_user; /* user cookie */ 70 } port_event_t; 71 72 typedef struct port_notify { 73 int portnfy_port; /* bind request(s) to port */ 74 void *portnfy_user; /* user defined */ 75 } port_notify_t; 76 77 int port_create(void); 78 int port_get(int, port_event_t *, struct timespec *); 79 int port_getn(int, port_event_t [], uint_t, uint_t *, struct timespec *); 80 81 #pragma weak port_create 82 #pragma weak port_get 83 #pragma weak port_getn 84 85 #else 86 #include <port.h> 87 #endif 88 89 90 struct daio_posix { 91 int next; 92 int error; 93 /* return count is only useful for debugging */ 94 int return_count:4; 95 int is_read:1; 96 hrtime_t start_time; 97 hrtime_t end_time; 98 daio_result_t *resultp; 99 struct daio_id *id; 100 port_notify_t pn; 101 struct aiocb aiocb; 102 }; 103 104 /* 105 * XOR this value into the sival_int so that this sival_int is zero. This 106 * allows diskomizer to spot bug 4520934 as the sival_int will never be zero. 107 */ 108 #define SIGVAL_INTMASK active_list_max 109 110 TNF_DECLARE_RECORD(struct daio_posix, daio_posix_str); 111 TNF_DEFINE_RECORD_4(struct daio_posix, daio_posix_str, 112 tnf_uint, aiocb.aio_fildes, 113 tnf_uint, aiocb.aio_nbytes, 114 tnf_opaque, aiocb.aio_buf, 115 tnf_opaque, aiocb.aio_offset) 116 117 static struct daio_posix *daio_posix_base; 118 static struct aiocb const **active_list; 119 static int active_list_max; 120 static int active_count = 0; 121 122 struct error { 123 int sig; 124 int exit; 125 int entry; 126 } error; 127 /* 128 * There are three lists of io structures. 129 * 1: The free_list containing unused io structures. 130 * 2: The siglist, containing ios that have been reaped by the signal 131 * handler. This list is protected by calls to sigprocmask, when 132 * accessed from outside of the signal handler. This is not used when 133 * using ports. 134 * 3: The done_list. This contains ios reaped by the signal handler but 135 * not yet reaped by the wait routine. When this list is empty this 136 * filled again by copying the siglist to it, saving the number of times 137 * the signal mask has to be changed. fast as changes to the signal 138 * handler are, this still buys quite a bit or performance when you are 139 * on the edge. This is also not used when using ports. 140 */ 141 struct list { 142 sigset_t mask; 143 int head; 144 int tail; 145 }; 146 static struct list siglist; 147 static struct list done_list; 148 static struct list free_list; 149 150 static int sigrtmin; 151 static int port; 152 153 typedef enum { 154 NEVER = 0, 155 USE_PORTS_CHILD, /* create each port in the child processes. */ 156 USE_PORTS_PARENT /* create one shared port in the master process */ 157 } use_ports_t; 158 159 static use_ports_t use_ports_enum; 160 161 static void *daio_posix_init(int count); 162 static void daio_posix_init_checker(const char *checker, 163 off64_t max_block_size); 164 static data_checker_t daio_posix_what_checker(void); 165 static ssize_t daio_posix_pwrite(int fildes, const uchar_t *bufp, size_t bufs, 166 off_t off, struct daio_id *id); 167 static ssize_t daio_posix_pread(int fildes, uchar_t *bufp, size_t bufs, 168 off_t off, struct daio_id *id); 169 static int daio_posix_awrite(int fildes, uchar_t *bufp, int bufs, off_t offset, 170 daio_result_t *resultp, struct daio_id *id); 171 static int daio_posix_aread(int fildes, uchar_t *bufp, int bufs, off_t offset, 172 daio_result_t *resultp, struct daio_id *id); 173 static daio_status_t daio_posix_status(daio_result_t *resultp); 174 static hrtime_t daio_posix_start_time(daio_result_t *resultp); 175 static hrtime_t daio_posix_end_time(daio_result_t *resultp); 176 static daio_result_t *daio_posix_wait(const struct timeval *timeout); 177 static daio_result_t *daio_posix_sigwait(const struct timeval *timeout); 178 static daio_result_t *daio_posix_portwait(const struct timeval *timeout); 179 static int daio_posix_cancel(daio_result_t *resultp); 180 static void daio_posix_fini(void *handle); 181 static int (*data_check)(uchar_t *buf, size_t bufs, struct daio_id *id); 182 static daio_result_t *(*do_daio_posix_wait)(const struct timeval *timeout); 183 184 struct daio_ops DAIO_OPS = { 185 daio_posix_init, 186 daio_posix_init_checker, 187 daio_posix_what_checker, 188 findap, 189 (daio_open_t)daio_dev_open, 190 daio_dev_close, 191 unlink, 192 stat64, 193 fstat64, 194 ioctl, 195 ftruncate64, 196 daio_dev_directio, 197 read_disko_vtoc, 198 daio_posix_pwrite, 199 daio_posix_pread, 200 daio_posix_awrite, 201 daio_posix_aread, 202 daio_posix_status, 203 daio_posix_start_time, 204 daio_posix_end_time, 205 daio_posix_wait, 206 daio_posix_cancel, 207 daio_posix_fini, 208 daio_dev_dd 209 }; 210 211 static int 212 is56() 213 { 214 char buf[257]; 215 216 if (sysinfo(SI_RELEASE, buf, sizeof (buf)) == -1) { 217 return (1); 218 } else if (strcmp(buf, "5.6") == 0) { 219 return (1); 220 } else { 221 return (0); 222 } 223 } 224 225 static int 226 none_active_port(void) 227 { 228 return (active_count == 0 ? 1 : 0); 229 } 230 231 static int 232 none_active_sig(void) 233 { 234 int i; 235 int ret = 1; 236 237 if (done_list.head == -1) { 238 sigprocmask(SIG_BLOCK, &siglist.mask, NULL); 239 if (siglist.head == -1) { 240 for (i = 0; i < active_list_max; i++) { 241 if (active_list[i]) { 242 ret = 0; 243 break; 244 } 245 } 246 } else { 247 done_list.head = siglist.head; 248 done_list.tail = siglist.tail; 249 siglist.head = siglist.tail = -1; 250 ret = 0; 251 } 252 sigprocmask(SIG_UNBLOCK, &siglist.mask, NULL); 253 } else { 254 ret = 0; 255 } 256 return (ret); 257 } 258 static int 259 bounds_check(daio_result_t *res) 260 { 261 long x; 262 263 if ((x = (long)res->private_data) >= 0 && x < active_list_max && 264 daio_posix_base[x].resultp == res) { 265 return (x); 266 } else { 267 return (-1); 268 } 269 } 270 static void 271 push_nosig(struct list *l, int entry) 272 { 273 assert(daio_posix_base[entry].next == -1); 274 if (l->tail == -1) { 275 assert(l->head == -1); 276 l->tail = entry; 277 } else { 278 daio_posix_base[l->head].next = entry; 279 } 280 daio_posix_base[entry].next = l->head; 281 l->head = entry; 282 } 283 static void 284 push(struct list *l, int entry, int inhandler) 285 { 286 if (!inhandler) { 287 sigprocmask(SIG_BLOCK, &l->mask, NULL); 288 push_nosig(l, entry); 289 sigprocmask(SIG_UNBLOCK, &l->mask, NULL); 290 } else { 291 push_nosig(l, entry); 292 } 293 } 294 static int 295 pop_nosig(struct list *l) 296 { 297 int x; 298 x = l->tail; 299 if (x == l->head) { 300 l->tail = l->head = -1; 301 } else { 302 l->tail = daio_posix_base[x].next; 303 } 304 daio_posix_base[x].next = -1; 305 return (x); 306 } 307 static int 308 pop(struct list *l) 309 { 310 int x; 311 312 sigprocmask(SIG_BLOCK, &l->mask, NULL); 313 x = pop_nosig(l); 314 sigprocmask(SIG_UNBLOCK, &l->mask, NULL); 315 return (x); 316 } 317 #define ADD_TO_SIG(X, Y) \ 318 assert(daio_posix_base[X].error != EINPROGRESS); \ 319 assert(siglist.head == -1 || \ 320 daio_posix_base[siglist.head].aiocb.aio_buf != NULL); \ 321 push(&siglist, X, Y) 322 /* 323 * The free list is only manipulated in the read, write and wait routines 324 * so does not need protecting from signals 325 */ 326 #define ADD_TO_FREE(X) push_nosig(&free_list, X) 327 328 static int 329 get_free(void) 330 { 331 return (pop_nosig(&free_list)); 332 } 333 static int 334 pop_from_done(void) 335 { 336 int i; 337 338 339 i = done_list.head; 340 if (i != -1) { 341 assert(daio_posix_base[i].error != EINPROGRESS); 342 assert(daio_posix_base[done_list.head].aiocb.aio_buf != 343 NULL); 344 i = pop_nosig(&done_list); 345 daio_posix_base[i].next = -1; 346 } else { 347 sigprocmask(SIG_BLOCK, &siglist.mask, NULL); 348 done_list.head = siglist.head; 349 done_list.tail = siglist.tail; 350 siglist.head = siglist.tail = -1; 351 sigprocmask(SIG_UNBLOCK, &siglist.mask, NULL); 352 if (-1 != done_list.head) { 353 i = pop_nosig(&done_list); 354 } 355 } 356 return (i); 357 } 358 static void 359 add_to_active(int entry) 360 { 361 assert(daio_posix_base[entry].next == -1); 362 active_list[entry] = &daio_posix_base[entry].aiocb; 363 daio_posix_base[entry].error = EINPROGRESS; 364 daio_posix_base[entry].start_time = gethrtime(); 365 active_count++; 366 } 367 static int 368 check_error(int i) 369 { 370 if (daio_posix_base[i].error == EINPROGRESS) { 371 daio_posix_base[i].error = 372 aio_error(&daio_posix_base[i].aiocb); 373 } 374 return (daio_posix_base[i].error); 375 } 376 #define is_active(entry) (active_list[entry] != NULL) 377 378 static void 379 remove_from_active(int entry) 380 { 381 assert(active_list[entry] != NULL); 382 daio_posix_base[entry].end_time = gethrtime(); 383 active_list[entry] = NULL; 384 active_count--; 385 } 386 387 static use_ports_t 388 use_ports(void) 389 { 390 return (use_ports_enum); 391 } 392 393 /*ARGSUSED*/ 394 static void 395 daio_posix_handler(int signo, siginfo_t *siginfo, void *context) 396 { 397 int entry = siginfo->si_value.sival_int ^ SIGVAL_INTMASK; 398 int x; 399 400 /* Have to work around 4120502 */ 401 if (siginfo->si_code != SI_ASYNCIO) { 402 error.sig++; 403 return; 404 } 405 406 if (entry < 0 || entry >= SIGVAL_INTMASK || 407 active_list[entry] == NULL) { 408 error.exit++; 409 error.entry = entry; 410 return; 411 } 412 x = check_error(entry); 413 if (x != EINPROGRESS && x != ECANCELED) { 414 daio_posix_base[entry].return_count++; 415 remove_from_active(entry); 416 ADD_TO_SIG(entry, 1); 417 } 418 } 419 420 static void 421 daio_posix_opts(void) 422 { 423 const struct option_ops *ops; 424 char x; 425 426 #if defined(__SunOS_5_8) || defined(__SunOS_5_9) 427 /* 428 * Check the weak symbols are actually available. 429 */ 430 if (port_create == NULL || port_get == NULL || port_getn == NULL) { 431 return; 432 } 433 #endif 434 435 ops = opts_init(); 436 if (ops->opt_bool("DAIO_POSIX_USE_PORTS", &x) == OPT_OK && x == 1) { 437 use_ports_enum = USE_PORTS_CHILD; 438 if (ops->opt_bool("DAIO_POSIX_SHARE_PORT", &x) == OPT_OK && 439 x == 1) { 440 use_ports_enum = USE_PORTS_PARENT; 441 port = port_create(); 442 } 443 } else { 444 use_ports_enum = USE_PORTS_CHILD; 445 } 446 opts_fini(); 447 } 448 449 static void * 450 daio_posix_init(int count) 451 { 452 int i; 453 struct sigaction action; 454 sigrtmin = SIGRTMIN; 455 456 free_list.head = free_list.tail = -1; 457 done_list.head = done_list.tail = -1; 458 siglist.head = siglist.tail = -1; 459 460 if (!use_ports()) { 461 462 sigemptyset(&free_list.mask); 463 sigemptyset(&done_list.mask); 464 sigemptyset(&siglist.mask); 465 sigaddset(&siglist.mask, sigrtmin); 466 467 action.sa_flags = SA_SIGINFO; 468 do_daio_posix_wait = daio_posix_sigwait; 469 action.sa_sigaction = daio_posix_handler; 470 sigemptyset(&action.sa_mask); 471 (void) sigaction(sigrtmin, &action, NULL); 472 } else { 473 do_daio_posix_wait = daio_posix_portwait; 474 if (use_ports() != 2) { 475 port = port_create(); 476 } 477 if (port == -1) { 478 return (NULL); 479 } 480 } 481 482 483 if ((daio_posix_base = 484 calloc(count, sizeof (struct daio_posix))) == NULL) { 485 return (NULL); 486 } 487 if ((active_list = calloc(count, sizeof (struct aiocb *))) == NULL) { 488 free(daio_posix_base); 489 return (NULL); 490 } 491 for (i = 0; i < count; i++) { 492 daio_posix_base[i].next = -1; 493 ADD_TO_FREE(i); 494 } 495 496 active_list_max = count; 497 498 return (daio_posix_base); 499 } 500 501 /*ARGSUSED*/ 502 static void 503 daio_posix_init_checker(const char *checker, off64_t max_block_size) 504 { 505 daio_posix_opts(); 506 507 data_check = choose_data_checker(checker); 508 } 509 static data_checker_t 510 daio_posix_what_checker(void) 511 { 512 return (data_check); 513 } 514 /*ARGSUSED*/ 515 static ssize_t 516 daio_posix_pwrite(int fildes, const uchar_t *bufp, size_t bufs, off_t off, 517 struct daio_id *id) 518 { 519 return (pwrite(fildes, bufp, bufs, off)); 520 } 521 static ssize_t 522 daio_posix_pread(int fildes, uchar_t *bufp, size_t bufs, off_t off, 523 struct daio_id *id) 524 { 525 ssize_t ret; 526 527 ret = pread(fildes, bufp, bufs, off); 528 529 if (id != NULL && ret == bufs) { 530 if (data_check(bufp, ret, id) < 0) 531 ret = DAIO_CORRUPT; 532 } 533 return (ret); 534 } 535 static void 536 set_notification(int this_one) 537 { 538 if (use_ports()) { 539 540 daio_posix_base[this_one].pn.portnfy_port = port; 541 daio_posix_base[this_one].pn.portnfy_user = (void *)this_one; 542 daio_posix_base[this_one].aiocb.aio_sigevent.sigev_notify = 543 SIGEV_PORT; 544 daio_posix_base[this_one 545 ].aiocb.aio_sigevent.sigev_value.sival_ptr = 546 &daio_posix_base[this_one].pn; 547 548 } else { 549 daio_posix_base[this_one].aiocb.aio_sigevent.sigev_notify = 550 SIGEV_SIGNAL; 551 daio_posix_base[this_one].aiocb.aio_sigevent.sigev_signo = 552 sigrtmin; 553 daio_posix_base[this_one 554 ].aiocb.aio_sigevent.sigev_value.sival_int = 555 this_one ^ SIGVAL_INTMASK; 556 } 557 } 558 559 static int 560 daio_posix_aread(int fildes, uchar_t *bufp, int bufs, off_t offset, 561 daio_result_t *resultp, struct daio_id *id) 562 { 563 int this_one; 564 int status; 565 566 if (free_list.head == -1) { 567 return (-1); 568 } 569 this_one = get_free(); 570 571 daio_posix_base[this_one].id = id; 572 daio_posix_base[this_one].return_count = 0; 573 daio_posix_base[this_one].is_read = 1; 574 daio_posix_base[this_one].aiocb.aio_buf = bufp; 575 daio_posix_base[this_one].aiocb.aio_fildes = fildes; 576 daio_posix_base[this_one].aiocb.aio_nbytes = bufs; 577 daio_posix_base[this_one].aiocb.aio_offset = offset; 578 579 set_notification(this_one); 580 assert(bufp); 581 add_to_active(this_one); 582 TNF_PROBE_2(aio_read, "aio_read", "sunw%cte%diskomizer%posix_aio read", 583 tnf_uint, this_one, this_one, 584 daio_posix_str, &daio_posix_base[this_one], 585 &daio_posix_base[this_one]); 586 if ((status = aio_read(&daio_posix_base[this_one].aiocb)) == -1) { 587 remove_from_active(this_one); 588 ADD_TO_FREE(this_one); 589 } else { 590 resultp->private_data = (void *) ((long)this_one); 591 daio_posix_base[this_one].resultp = resultp; 592 } 593 return (status); 594 } 595 static int 596 daio_posix_awrite(int fildes, uchar_t *bufp, int bufs, off_t offset, 597 daio_result_t *resultp, struct daio_id *id) 598 { 599 int this_one; 600 int status; 601 602 if (free_list.head == -1) { 603 return (-1); 604 } 605 this_one = get_free(); 606 607 assert(bufp); 608 daio_posix_base[this_one].id = id; 609 daio_posix_base[this_one].return_count = 0; 610 daio_posix_base[this_one].is_read = 0; 611 daio_posix_base[this_one].aiocb.aio_buf = bufp; 612 daio_posix_base[this_one].aiocb.aio_fildes = fildes; 613 daio_posix_base[this_one].aiocb.aio_nbytes = bufs; 614 daio_posix_base[this_one].aiocb.aio_offset = offset; 615 616 set_notification(this_one); 617 618 add_to_active(this_one); 619 TNF_PROBE_2(aio_write, "aio_write", 620 "sunw%cte%diskomizer%posix_aio write", 621 tnf_uint, this_one, this_one, 622 daio_posix_str, &daio_posix_base[this_one], 623 &daio_posix_base[this_one]); 624 if ((status = aio_write(&daio_posix_base[this_one].aiocb)) == -1) { 625 remove_from_active(this_one); 626 ADD_TO_FREE(this_one); 627 } else { 628 resultp->private_data = (void *)((long)this_one); 629 daio_posix_base[this_one].resultp = resultp; 630 } 631 return (status); 632 } 633 static daio_status_t 634 daio_posix_status(daio_result_t *resultp) 635 { 636 int x; 637 638 if ((x = bounds_check(resultp)) == -1) { 639 return (DAIO_COMPLETE); 640 } 641 642 if (is_active(x)) { 643 return (DAIO_INPROGRESS); 644 } 645 return (DAIO_COMPLETE); 646 } 647 static void 648 check_exit(void) 649 { 650 /* 651 * No need to fiddle with the signal mask unless there is an error. 652 * If there are errors to report they will get caught next time round. 653 */ 654 if (error.exit || error.sig) { 655 sigset_t mask; 656 sigemptyset(&mask); 657 sigaddset(&mask, sigrtmin); 658 sigprocmask(SIG_BLOCK, &mask, NULL); 659 if (error.sig) { 660 plog(LOG_WARNING, 661 "%d SIGRTMIN signal%s received not from aio," 662 " ignored.\n", 663 error.sig, error.sig > 1 ? "s" : ""); 664 error.sig = 0; 665 } 666 if (error.exit) { 667 plog(LOG_ERR, "invalid aio signal for entry %d\n", 668 error.entry); 669 sigprocmask(SIG_UNBLOCK, &mask, NULL); 670 exit(1); 671 } 672 sigprocmask(SIG_UNBLOCK, &mask, NULL); 673 } 674 } 675 676 daio_result_t * 677 daio_posix_wait(const struct timeval *timeout) 678 { 679 return (do_daio_posix_wait(timeout)); 680 } 681 682 daio_result_t * 683 daio_posix_sigwait(const struct timeval *timeout) 684 { 685 int this_one; 686 daio_result_t *resultp; 687 688 if (daio_posix_base == NULL) { 689 errno = EINVAL; 690 return ((daio_result_t *)-1); 691 } 692 693 check_exit(); 694 695 if ((this_one = pop_from_done()) == -1) { 696 if (timeout && !none_active_sig()) { 697 (void) poll(NULL, 0, (timeout->tv_sec * 1000) + 698 timeout->tv_usec/1000); 699 this_one = pop_from_done(); 700 } 701 if (this_one == -1) { 702 if (none_active_sig()) { 703 errno = EINVAL; 704 return ((daio_result_t *)-1); 705 } else { 706 return ((daio_result_t *)NULL); 707 } 708 } 709 } 710 resultp = daio_posix_base[this_one].resultp; 711 daio_posix_base[this_one].resultp = NULL; 712 resultp->result.daio_errno = daio_posix_base[this_one].error; 713 resultp->result.daio_return = 714 aio_return(&daio_posix_base[this_one].aiocb); 715 resultp->result.daio_start_time = daio_posix_base[this_one].start_time; 716 resultp->result.daio_end_time = daio_posix_base[this_one].end_time; 717 if (daio_posix_base[this_one].is_read && 718 resultp->result.daio_return == 719 daio_posix_base[this_one].aiocb.aio_nbytes && 720 data_check((uchar_t *)daio_posix_base[this_one].aiocb.aio_buf, 721 resultp->result.daio_return, daio_posix_base[this_one].id) < 0) { 722 resultp->result.daio_return = DAIO_CORRUPT; 723 } 724 assert(daio_posix_base[this_one].error != EINPROGRESS); 725 726 TNF_PROBE_4(aio_wait, "aio_wait", 727 "sunw%cte%diskomizer%posix_aio pwait", 728 tnf_uint, this_one, this_one, 729 tnf_uint, daio_return, resultp->result.daio_return, 730 tnf_uint, daio_errno, resultp->result.daio_errno, 731 daio_posix_str, &daio_posix_base[this_one], 732 &daio_posix_base[this_one]); 733 734 ADD_TO_FREE(this_one); 735 736 return (resultp); 737 } 738 739 daio_result_t * 740 daio_posix_portwait(const struct timeval *timeout) 741 { 742 port_event_t ev; 743 timespec_t *tsp; 744 timespec_t ts; 745 daio_result_t *resultp; 746 int this_one; 747 748 if (none_active_port()) { 749 errno = EINVAL; 750 return ((daio_result_t *)-1); 751 } 752 753 if (timeout != NULL) { 754 ts.tv_sec = timeout->tv_sec; 755 ts.tv_nsec = timeout->tv_usec * 1000; 756 tsp = &ts; 757 } else { 758 tsp = NULL; 759 } 760 761 if (port_get(port, &ev, tsp) == -1) { 762 return (NULL); 763 } 764 if (ev.portev_source != PORT_SOURCE_AIO) { 765 return (NULL); /* can't happen */ 766 } 767 768 this_one = (int)ev.portev_user; 769 770 if ((check_error(this_one) == EINPROGRESS)) { 771 return (NULL); /* can't happen */ 772 } 773 daio_posix_base[this_one].return_count++; 774 remove_from_active(this_one); 775 776 resultp = daio_posix_base[this_one].resultp; 777 daio_posix_base[this_one].resultp = NULL; 778 resultp->result.daio_errno = daio_posix_base[this_one].error; 779 resultp->result.daio_return = 780 aio_return(&daio_posix_base[this_one].aiocb); 781 resultp->result.daio_start_time = daio_posix_base[this_one].start_time; 782 resultp->result.daio_end_time = daio_posix_base[this_one].end_time; 783 if (daio_posix_base[this_one].is_read && 784 resultp->result.daio_return == 785 daio_posix_base[this_one].aiocb.aio_nbytes && 786 data_check((uchar_t *)daio_posix_base[this_one].aiocb.aio_buf, 787 resultp->result.daio_return, daio_posix_base[this_one].id) < 0) { 788 resultp->result.daio_return = DAIO_CORRUPT; 789 } 790 assert(daio_posix_base[this_one].error != EINPROGRESS); 791 792 TNF_PROBE_4(aio_wait, "aio_wait", 793 "sunw%cte%diskomizer%posix_aio pwait", 794 tnf_uint, this_one, this_one, 795 tnf_uint, daio_return, resultp->result.daio_return, 796 tnf_uint, daio_errno, resultp->result.daio_errno, 797 daio_posix_str, &daio_posix_base[this_one], 798 &daio_posix_base[this_one]); 799 800 ADD_TO_FREE(this_one); 801 802 return (resultp); 803 } 804 805 static int 806 daio_posix_cancel(daio_result_t *resultp) 807 { 808 int io; 809 int x; 810 811 if ((io = bounds_check(resultp)) == -1 || !is_active(io)) { 812 return (-1); 813 } 814 x = aio_cancel(daio_posix_base[io].aiocb.aio_fildes, 815 &daio_posix_base[io].aiocb); 816 if (x == AIO_CANCELED) { 817 assert(check_error(io) == ECANCELED); 818 } 819 return (x == AIO_CANCELED ? 0 : -1); 820 } 821 822 /*ARGSUSED*/ 823 static void 824 daio_posix_fini(void *handle) 825 { 826 if (use_ports()) { 827 close(port); 828 } 829 free(active_list); 830 free(daio_posix_base); 831 } 832 833 static hrtime_t 834 daio_posix_start_time(daio_result_t *resultp) 835 { 836 int x; 837 838 if ((x = bounds_check(resultp)) == -1) { 839 return (DAIO_NOT_STARTED); 840 } 841 return (daio_posix_base[x].start_time); 842 } 843 static hrtime_t 844 daio_posix_end_time(daio_result_t *resultp) 845 { 846 int x; 847 848 if ((x = bounds_check(resultp)) == -1) { 849 return (DAIO_NOT_STARTED); 850 } 851 return (daio_posix_base[x].end_time); 852 } 853