1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 #pragma ident "@(#)daio_fs.c 1.11 09/05/26 SMI" 23 24 /* 25 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 26 * Use is subject to license terms. 27 */ 28 29 #include <diskomizer/daio.h> 30 #include <diskomizer/log.h> 31 #include "findap.h" 32 #include <sys/types.h> 33 #include <errno.h> 34 #include <sys/stat.h> 35 #include <unistd.h> 36 #include <fcntl.h> 37 #include <stdlib.h> 38 #include <string.h> 39 #include <limits.h> 40 #include <dirent.h> 41 #include "args.h" 42 #include "daio_async.h" 43 44 /* These are required so this will build, but not run, on 5.6, 5.7 and 5.8 */ 45 #if defined(__SunOS_5_6) || defined(__SunOS_5_7) || defined(__SunOS_5_8) 46 47 extern int openat(int fildes, const char *path, int oflag, mode_t mode); 48 extern int unlinkat(int dirfd, const char *path, int flag); 49 extern int renameat(int fromfd, const char *old, int tofd, const 50 char *new); 51 extern DIR* fdopendir(int fd); 52 #define O_XATTR 0x4000 53 54 #pragma weak openat 55 #pragma weak unlinkat 56 #pragma weak renameat 57 #pragma weak fdopendir 58 59 #endif 60 61 62 static int daio_fsrw_write(int fildes, uchar_t *bufp, int bufs, off_t 63 offset, daio_result_t *resultp, struct daio_id *id); 64 static int daio_fsrw_read(int fildes, uchar_t *bufp, int bufs, off_t 65 offset, daio_result_t *resultp, struct daio_id *id); 66 static ssize_t do_pwrite(int fildes, void *bufp, size_t bufs, off_t off); 67 static ssize_t do_unlinkat_and_pwrite(int fildes, void *bufp, size_t bufs, 68 off_t off); 69 static ssize_t do_unlinkpath_and_pwrite(int fildes, void *bufp, size_t bufs, 70 off_t off); 71 static ssize_t do_renameat_and_pwrite_and_unlink(int fildes, void *bufp, 72 size_t bufs, off_t off); 73 static ssize_t do_renamepath_and_pwrite_and_unlink(int fildes, void *bufp, 74 size_t bufs, off_t off); 75 static ssize_t (*do_pwritep)(int fildes, void *bufp, size_t bufs, off_t off); 76 static int fs_close(int); 77 static int fd_openat_blk(int fildes, int flag, off_t off); 78 static int fd_openpath_blk(int fildes, int flag, off_t off); 79 static int (*fd_open_blk)(int fildes, int flag, off_t off) = fd_openat_blk; 80 static int (*fd_directio_open_blk)(int fildes, int flag, off_t off); 81 82 #define USEOPENAT (fd_open_blk == fd_openat_blk) 83 84 static struct { 85 mode_t mode; 86 off64_t obj_size; /* the size of the "device" */ 87 off64_t file_size; /* the size of each file */ 88 off64_t block_size; /* the daio_fs block size */ 89 off64_t max_block_size; /* the diskomizer block_size */ 90 off64_t blocks_per_file; /* number of blocks to store per file */ 91 uint_t wflags; 92 uint_t attr; 93 } fs_opts; 94 95 struct fs_fds { 96 char *path; 97 int pathlen; 98 int fd; 99 }; 100 101 static struct fs_fds *fs_fdsp; 102 static int fd_nfds; 103 104 static char * 105 fd2pathname(int fildes, offset_t off) 106 { 107 char *buf = malloc(fs_fdsp[fildes].pathlen + 19); 108 int ret; 109 110 if (buf == NULL) { 111 return (NULL); 112 } 113 114 sprintf(buf, "%s/%llx", 115 fs_fdsp[fildes].path, off/fs_opts.file_size); 116 return (buf); 117 } 118 119 static void 120 fs_init(const char *checker, off64_t max_block_size) 121 { 122 const struct option_ops *ops; 123 uint64_t ll; 124 long i; 125 char c; 126 127 ops = opts_init(); 128 129 fs_opts.max_block_size = max_block_size; 130 131 if (ops->opt_long_long("FILE_SIZE", (long long *)&ll) != OPT_OK || 132 ll == 0) { 133 plog(LOG_ERR, "DAIO_FS requires FILE_SIZE to be set\n"); 134 exit(-1); 135 } 136 fs_opts.obj_size = ll; 137 if (ops->opt_long("DAIO_FS_BLOCKS_PER_FILE", &i) != OPT_OK || i == 0) { 138 i = 1; 139 } 140 fs_opts.blocks_per_file = i; 141 if (ops->opt_long_long("DAIO_FS_BLOCK_SIZE", 142 (long long *)&fs_opts.block_size) != OPT_OK || 143 fs_opts.block_size == 0) { 144 fs_opts.block_size = fs_opts.max_block_size; 145 } 146 if (fs_opts.block_size < fs_opts.max_block_size) { 147 plog(LOG_WARNING, "DAIO_FS_BLOCK_SIZE must be greater than or " 148 "equal to the maximum block size\n"); 149 exit(-1); 150 } 151 152 if (ops->opt_int("DAIO_FS_MODE", (int *)&fs_opts.mode) != OPT_OK) { 153 fs_opts.mode = 0600; 154 } 155 fs_opts.file_size = i * fs_opts.max_block_size; 156 if ((ops->opt_bool("DAIO_FS_NOT_XXXAT", &c) == OPT_OK && c == 1) || 157 openat == NULL) { 158 fd_open_blk = fd_openpath_blk; 159 } 160 161 if (ops->opt_bool("DAIO_FS_O_XATTR", &c) == OPT_OK && c == 1) { 162 if (!USEOPENAT) { 163 if (openat == NULL) { 164 plog(LOG_ERR, "DAIO_FS_O_XATTR not available " 165 "on this OS"); 166 } else { 167 plog(LOG_ERR, "DAIO_FS_O_XATTR and " 168 "DAIO_FS_NOT_XXXAT are mutually exclusive"); 169 } 170 exit(-1); 171 } 172 fs_opts.attr = O_XATTR; 173 } 174 if (ops->opt_bool("DAIO_FS_TRUNC", &c) == OPT_OK && c == 1 && i == 1) { 175 fs_opts.wflags = O_TRUNC; 176 } 177 178 if (ops->opt_bool("DAIO_FS_UNLINK", &c) == OPT_OK && c == 1 && i == 1) { 179 if (!USEOPENAT) { 180 do_pwritep = do_unlinkpath_and_pwrite; 181 } else { 182 do_pwritep = do_unlinkat_and_pwrite; 183 } 184 } else { 185 do_pwritep = do_pwrite; 186 } 187 if (ops->opt_bool("DAIO_FS_RENAME_AND_UNLINK", &c) == OPT_OK && 188 c == 1 && i == 1) { 189 if (do_pwritep != do_pwrite) { 190 plog(LOG_WARNING, "DAIO_FS_UNLINK and " 191 "DAIO_FS_RENAME_AND_UNLINK are mutually exclusive, " 192 "using DAIO_FS_UNLINK"); 193 } else { 194 if (!USEOPENAT) { 195 do_pwritep = 196 do_renamepath_and_pwrite_and_unlink; 197 } else { 198 do_pwritep = do_renameat_and_pwrite_and_unlink; 199 } 200 } 201 } 202 203 opts_fini(); 204 daio_async_init_checker(checker, max_block_size); 205 } 206 207 static off_t 208 fd_off2real_off(int fildes, off_t off) 209 { 210 off_t x = fs_opts.block_size * 211 ((off % fs_opts.file_size)/fs_opts.max_block_size); 212 return (x); 213 } 214 215 static int 216 fd_openpath_blk(int fildes, int flag, off_t off) 217 { 218 char *buf = fd2pathname(fildes, off); 219 int ret; 220 221 if (buf == NULL) { 222 return (-1); 223 } 224 225 ret = open(buf, flag, fs_opts.mode); 226 free(buf); 227 return (ret); 228 } 229 230 static int 231 fd_openat_blk(int fildes, int flag, off_t off) 232 { 233 char buf[17]; /* big enough to hold the largest 64 bit quantity */ 234 235 if (snprintf(buf, sizeof (buf), "%llx", off/fs_opts.file_size) > 236 sizeof (buf) - 1) { 237 return (-1); 238 } 239 240 return (openat(fs_fdsp[fildes].fd, buf, flag, fs_opts.mode)); 241 } 242 243 static int 244 fs_ioctl(int fdes, int a, void *b) 245 { 246 errno = EINVAL; 247 return (-1); 248 } 249 250 static int 251 do_open(const char *path, int flag, mode_t mode) 252 { 253 int fd; 254 255 if ((fd = open(path, O_RDONLY|(O_EXCL & flag), mode)) == -1) { 256 return (-1); 257 } 258 if (fs_opts.attr) { 259 int fd2; 260 if (!USEOPENAT || 261 (fd2 = openat(fd, ".", fs_opts.attr, 0)) == -1) { 262 close(fd); 263 return (-1); 264 } 265 (void) close(fd); 266 fd = fd2; 267 } 268 return (fd); 269 } 270 271 static int 272 fs_open(const char *path, int flag, mode_t mode) 273 { 274 struct stat buf; 275 struct fs_fds *tmp; 276 int fd; 277 278 if (stat(path, &buf) == -1) { 279 if ((mode & O_RDONLY) != 0) { 280 errno = EACCES; 281 return (-1); 282 } else if (mkdir(path, fs_opts.mode | S_IXUSR) == -1) { 283 return (-1); 284 } 285 } 286 287 if ((fd = do_open(path, flag, mode)) == -1) { 288 return (-1); 289 } 290 291 tmp = realloc(fs_fdsp, (fd_nfds + 1) * sizeof (struct fs_fds)); 292 293 if (tmp == NULL) { 294 return (-1); 295 } 296 fs_fdsp = tmp; 297 if (path[0] != '/') { 298 char *b = getcwd(NULL, PATH_MAX); 299 300 if (b == NULL) { 301 fs_close(fd); 302 return (-1); 303 } 304 fs_fdsp[fd_nfds].path = malloc(strlen(b) + strlen(path) + 2); 305 if (fs_fdsp[fd_nfds].path == NULL) { 306 free(b); 307 fs_close(fd); 308 return (-1); 309 } 310 311 sprintf(fs_fdsp[fd_nfds].path, "%s/%s", b, path); 312 free(b); 313 } else { 314 fs_fdsp[fd_nfds].path = strdup(path); 315 } 316 fs_fdsp[fd_nfds].pathlen = strlen(fs_fdsp[fd_nfds].path); 317 fs_fdsp[fd_nfds++].fd = fd; 318 319 return (fd_nfds - 1); 320 } 321 322 static int 323 fs_close(int fildes) 324 { 325 int ret; 326 327 if (fildes >= fd_nfds) { 328 errno = EINVAL; 329 return (-1); 330 } 331 if ((ret = close(fs_fdsp[fildes].fd)) == 0) { 332 fs_fdsp[fildes].fd = -1; 333 free(fs_fdsp[fildes].path); 334 fs_fdsp[fildes].path = NULL; 335 } 336 return (ret); 337 } 338 339 static int 340 fs_ftruncate64(int fildes, off64_t len) 341 { 342 fs_opts.obj_size = len; 343 return (0); /* this is a lie */ 344 } 345 346 static int 347 fd_do_open_and_directio(int fildes, int flag, off_t off) 348 { 349 int fd; 350 if ((fd = fd_directio_open_blk(fildes, flag, off)) != -1) { 351 (void) directio(fd, DIRECTIO_ON); 352 } 353 return (fd); 354 } 355 static int 356 fs_directio(int fildes, int advice) 357 { 358 if (ftruncate == NULL) { 359 return (-1); 360 } else { 361 if (advice) { 362 fd_directio_open_blk = fd_open_blk; 363 fd_open_blk = fd_do_open_and_directio; 364 } 365 } 366 return (0); 367 } 368 369 static int 370 fs_stat64(const char *path, struct stat64 *bp) 371 { 372 int ret; 373 374 ret = stat64(path, bp); 375 return (ret); 376 } 377 378 static int 379 fs_fstat64(int fildes, struct stat64 *bp) 380 { 381 int ret; 382 ret = fstat64(fs_fdsp[fildes].fd, bp); 383 bp->st_mode &= ~S_IFMT; 384 bp->st_mode |= S_IFREG; /* pretend to be an ordinary file */ 385 bp->st_size = fs_opts.obj_size; 386 return (ret); 387 } 388 #if defined(__SunOS_5_6) || defined(__SunOS_5_7) || defined(__SunOS_5_8) 389 static DIR * 390 do_opendir(int fd, const char *path) 391 { 392 if (fdopendir != NULL) { 393 return (fdopendir(fd)); 394 } else { 395 return (opendir(path)); 396 } 397 } 398 #else 399 #define do_opendir(FD, PATH) fdopendir(FD) 400 #endif 401 402 static int 403 fs_unlink(const char *path) 404 { 405 int fd; 406 if (rmdir(path) == 0) { 407 return (0); 408 } 409 410 if (errno != EEXIST) { 411 return (-1); 412 } 413 414 if (!USEOPENAT) { 415 off64_t off; 416 off64_t end = fs_opts.obj_size/fs_opts.file_size; 417 DIR *dirp; 418 void *scratch; 419 int len = strlen(path); 420 char *buf; 421 422 if ((buf = malloc(len + 18)) == NULL) { 423 return (-1); 424 } 425 426 strcpy(buf, path); 427 428 if ((scratch = malloc(sizeof (struct dirent) + 429 pathconf(path, PATH_MAX) + 1)) != NULL && 430 (dirp = opendir(path)) != NULL) { 431 struct dirent *dep; 432 433 while (readdir_r(dirp, scratch, &dep) == 0 && 434 dep != NULL) { 435 off64_t off; 436 437 if (dep->d_name[0] == '.') { 438 continue; 439 } 440 off = strtoll(dep->d_name, NULL, 16); 441 442 if (off >= 0 && off < end) { 443 sprintf(&buf[len], "/%s", dep->d_name); 444 unlink(buf); 445 } 446 } 447 closedir(dirp); 448 } 449 free(scratch); 450 free(buf); 451 } else { 452 if ((fd = do_open(path, O_RDONLY, 0)) != -1) { 453 DIR *dirp; 454 void *scratch; 455 off64_t end = fs_opts.obj_size/fs_opts.file_size; 456 457 if ((scratch = malloc(sizeof (struct dirent) + 458 fpathconf(fd, PATH_MAX) + 1)) != NULL && 459 (dirp = do_opendir(fd, path)) != NULL) { 460 struct dirent *dep; 461 462 while (readdir_r(dirp, scratch, &dep) == 0 && 463 dep != NULL) { 464 off64_t off; 465 466 if (dep->d_name[0] == '.') { 467 continue; 468 } 469 off = strtoll(dep->d_name, NULL, 16); 470 471 if (off >= 0 && off < end) { 472 unlinkat(fd, dep->d_name, 0); 473 } 474 } 475 closedir(dirp); 476 } 477 free(scratch); 478 close(fd); 479 } 480 } 481 return (rmdir(path)); 482 } 483 484 static ssize_t 485 do_pwrite(int fildes, void *bufp, size_t bufs, off_t off) 486 { 487 int fd; 488 ssize_t ret; 489 int err; 490 491 if ((fd = fd_open_blk(fildes, O_WRONLY|O_CREAT|fs_opts.wflags, off)) == 492 -1) { 493 return (-1); 494 } 495 ret = pwrite(fd, (char *)bufp, bufs, fd_off2real_off(fildes, off)); 496 err = errno; 497 if (close(fd) < 0) { 498 ret = -1; 499 } else { 500 errno = err; 501 } 502 return (ret); 503 } 504 505 static ssize_t 506 do_renameat_and_pwrite_and_unlink( 507 int fildes, void *bufp, size_t bufs, off_t off) 508 { 509 int ret; 510 char buf[6 + 17]; 511 512 snprintf(buf, sizeof (buf), "To_go.%llx", off/fs_opts.file_size); 513 if (renameat(fs_fdsp[fildes].fd, &buf[6], fs_fdsp[fildes].fd, buf) == 514 -1) { 515 if (errno != ENOENT) { 516 return (-1); 517 } else { 518 ret = do_pwrite(fildes, bufp, bufs, off); 519 } 520 } else { 521 ret = do_pwrite(fildes, bufp, bufs, off); 522 unlinkat(fs_fdsp[fildes].fd, buf, 0); 523 } 524 return (ret); 525 } 526 527 static ssize_t 528 do_renamepath_and_pwrite_and_unlink( 529 int fildes, void *bufp, size_t bufs, off_t off) 530 { 531 int ret; 532 char *bufo; 533 char *bufn; 534 535 if ((bufo = fd2pathname(fildes, off)) == NULL) { 536 return (-1); 537 } 538 539 bufn = malloc(fs_fdsp[fildes].pathlen + 16 + 6 + 2); 540 if (bufn != NULL) { 541 sprintf(bufn, "%s/To_go.%llx", 542 fs_fdsp[fildes].path, off/fs_opts.file_size); 543 544 if (rename(bufo, bufn) == -1) { 545 if (errno != ENOENT) { 546 ret = -1; 547 } else { 548 ret = do_pwrite(fildes, bufp, bufs, off); 549 } 550 } else { 551 ret = do_pwrite(fildes, bufp, bufs, off); 552 unlink(bufn); 553 } 554 free(bufn); 555 } else { 556 ret = -1; 557 } 558 free(bufo); 559 return (ret); 560 } 561 562 static ssize_t 563 do_unlinkat_and_pwrite(int fildes, void *bufp, size_t bufs, off_t off) 564 { 565 char buf[17]; 566 567 snprintf(buf, sizeof (buf), "%llx", off/fs_opts.file_size); 568 unlinkat(fs_fdsp[fildes].fd, buf, 0); 569 return (do_pwrite(fildes, bufp, bufs, off)); 570 } 571 572 static ssize_t 573 do_unlinkpath_and_pwrite(int fildes, void *bufp, size_t bufs, off_t off) 574 { 575 char *buf = fd2pathname(fildes, off); 576 577 if (buf == NULL) { 578 return (-1); 579 } 580 unlink(buf); 581 free(buf); 582 583 return (do_pwrite(fildes, bufp, bufs, off)); 584 } 585 586 ssize_t 587 fs_pwrite(int fildes, const uchar_t *bufp, size_t bufs, off_t off, 588 struct daio_id *id) 589 { 590 return (do_pwritep(fildes, (void *)bufp, bufs, off)); 591 } 592 593 ssize_t 594 do_pread(int fildes, void *bufp, size_t bufs, off_t off) 595 { 596 int fd; 597 int ret; 598 int err; 599 600 if ((fd = fd_open_blk(fildes, O_RDONLY, off)) == -1) { 601 return (-1); 602 } 603 ret = pread64(fd, bufp, bufs, fd_off2real_off(fildes, off)); 604 err = errno; 605 close(fd); 606 errno = err; 607 return (ret); 608 } 609 610 ssize_t 611 fs_pread(int fildes, void *bufp, size_t bufs, off_t off, 612 struct daio_id *id) 613 { 614 return (do_pread(fildes, bufp, bufs, off)); 615 } 616 617 ssize_t 618 fs_pread_and_check(int fildes, uchar_t *bufp, size_t bufs, off_t off, 619 struct daio_id *id) 620 { 621 int ret; 622 623 ret = do_pread(fildes, bufp, bufs, off); 624 if (id != NULL && ret >= 0) { 625 if ((daio_async_get_checker())(bufp, ret, id) != 0) { 626 ret = DAIO_CORRUPT; 627 } 628 } 629 return (ret); 630 } 631 632 static int 633 daio_fsrw_read(int fildes, uchar_t *bufp, int bufs, off_t offset, 634 daio_result_t *resultp, struct daio_id *id) 635 { 636 return (daio_async_rw(fildes, bufp, bufs, offset, resultp, id, 637 do_pread, IS_READ)); 638 } 639 640 static int 641 daio_fsrw_write(int fildes, uchar_t *bufp, int bufs, off_t offset, 642 daio_result_t *resultp, struct daio_id *id) 643 { 644 return (daio_async_rw(fildes, bufp, bufs, offset, resultp, id, 645 do_pwritep, IS_WRITE)); 646 } 647 648 static void * 649 fs_read_disko_vtoc(int filedes) 650 { 651 return (NULL); 652 } 653 654 struct paths * 655 fs_findap(const char *inpath, const char *devices) 656 { 657 return (NULL); 658 } 659 660 static void 661 fs_dd(int pri, int fd, size_t len, size_t bs, off_t off) 662 { 663 if (USEOPENAT) { 664 plog(pri, "runat %s dd if=%llx bs=%d iseek=%d\n", 665 fs_fdsp[fd].path, off/fs_opts.file_size, 666 len, fd_off2real_off(fd, off)/bs); 667 } else { 668 plog(pri, "dd if=%s/%llx bs=%d iseek=%d\n", 669 fs_fdsp[fd].path, off/fs_opts.file_size, 670 len, fd_off2real_off(fd, off)/bs); 671 } 672 } 673 674 struct daio_ops DAIO_OPS = { 675 daio_async_init, 676 fs_init, 677 daio_async_get_checker, 678 fs_findap, 679 (int (*)(const char *, int, ...))fs_open, 680 fs_close, 681 fs_unlink, 682 fs_stat64, 683 fs_fstat64, 684 (int (*)(int, int, ...))fs_ioctl, 685 fs_ftruncate64, 686 fs_directio, 687 fs_read_disko_vtoc, 688 fs_pwrite, 689 fs_pread_and_check, 690 daio_fsrw_write, 691 daio_fsrw_read, 692 daio_async_status, 693 daio_async_start_time, 694 daio_async_end_time, 695 daio_async_wait, 696 daio_async_cancel, 697 daio_async_fini, 698 fs_dd 699 }; 700