1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ 27 /* All Rights Reserved */ 28 29 /* 30 * University Copyright- Copyright (c) 1982, 1986, 1988 31 * The Regents of the University of California 32 * All Rights Reserved 33 * 34 * University Acknowledgment- Portions of this document are derived from 35 * software developed by the University of California, Berkeley, and its 36 * contributors. 37 */ 38 39 #include <sys/types.h> 40 #include <sys/param.h> 41 #include <sys/systm.h> 42 #include <sys/cpuvar.h> 43 #include <sys/errno.h> 44 #include <sys/cred.h> 45 #include <sys/user.h> 46 #include <sys/uio.h> 47 #include <sys/vfs.h> 48 #include <sys/vnode.h> 49 #include <sys/pathname.h> 50 #include <sys/proc.h> 51 #include <sys/vtrace.h> 52 #include <sys/sysmacros.h> 53 #include <sys/debug.h> 54 #include <sys/dirent.h> 55 #include <c2/audit.h> 56 #include <sys/zone.h> 57 #include <sys/dnlc.h> 58 #include <sys/fs/snode.h> 59 60 /* Controls whether paths are stored with vnodes. */ 61 int vfs_vnode_path = 1; 62 63 int 64 lookupname( 65 char *fnamep, 66 enum uio_seg seg, 67 enum symfollow followlink, 68 vnode_t **dirvpp, 69 vnode_t **compvpp) 70 { 71 return (lookupnameat(fnamep, seg, followlink, dirvpp, compvpp, NULL)); 72 } 73 74 75 /* 76 * Lookup the user file name, 77 * Handle allocation and freeing of pathname buffer, return error. 78 */ 79 int 80 lookupnameat( 81 char *fnamep, /* user pathname */ 82 enum uio_seg seg, /* addr space that name is in */ 83 enum symfollow followlink, /* follow sym links */ 84 vnode_t **dirvpp, /* ret for ptr to parent dir vnode */ 85 vnode_t **compvpp, /* ret for ptr to component vnode */ 86 vnode_t *startvp) /* start path search from vp */ 87 { 88 char namebuf[TYPICALMAXPATHLEN]; 89 struct pathname lookpn; 90 int error; 91 92 error = pn_get_buf(fnamep, seg, &lookpn, namebuf, sizeof (namebuf)); 93 if (error == 0) { 94 if (audit_active) 95 audit_lookupname(); 96 error = lookuppnat(&lookpn, NULL, followlink, 97 dirvpp, compvpp, startvp); 98 } 99 if (error == ENAMETOOLONG) { 100 /* 101 * This thread used a pathname > TYPICALMAXPATHLEN bytes long. 102 */ 103 if (error = pn_get(fnamep, seg, &lookpn)) 104 return (error); 105 error = lookuppnat(&lookpn, NULL, followlink, 106 dirvpp, compvpp, startvp); 107 pn_free(&lookpn); 108 } 109 110 return (error); 111 } 112 113 /* 114 * Lookup the user file name from a given vp, 115 */ 116 int 117 lookuppn( 118 struct pathname *pnp, 119 struct pathname *rpnp, 120 enum symfollow followlink, 121 vnode_t **dirvpp, 122 vnode_t **compvpp) 123 { 124 return (lookuppnat(pnp, rpnp, followlink, dirvpp, compvpp, NULL)); 125 } 126 127 int 128 lookuppnat( 129 struct pathname *pnp, /* pathname to lookup */ 130 struct pathname *rpnp, /* if non-NULL, return resolved path */ 131 enum symfollow followlink, /* (don't) follow sym links */ 132 vnode_t **dirvpp, /* ptr for parent vnode */ 133 vnode_t **compvpp, /* ptr for entry vnode */ 134 vnode_t *startvp) /* start search from this vp */ 135 { 136 vnode_t *vp; /* current directory vp */ 137 vnode_t *rootvp; 138 proc_t *p = curproc; 139 140 if (pnp->pn_pathlen == 0) 141 return (ENOENT); 142 143 mutex_enter(&p->p_lock); /* for u_rdir and u_cdir */ 144 if ((rootvp = PTOU(p)->u_rdir) == NULL) 145 rootvp = rootdir; 146 else if (rootvp != rootdir) /* no need to VN_HOLD rootdir */ 147 VN_HOLD(rootvp); 148 149 if (pnp->pn_path[0] == '/') { 150 vp = rootvp; 151 } else { 152 vp = (startvp == NULL) ? PTOU(p)->u_cdir : startvp; 153 } 154 VN_HOLD(vp); 155 mutex_exit(&p->p_lock); 156 157 /* 158 * Skip over leading slashes 159 */ 160 if (pnp->pn_path[0] == '/') { 161 do { 162 pnp->pn_path++; 163 pnp->pn_pathlen--; 164 } while (pnp->pn_path[0] == '/'); 165 } 166 167 return (lookuppnvp(pnp, rpnp, followlink, dirvpp, 168 compvpp, rootvp, vp, CRED())); 169 } 170 171 /* Private flag to do our getcwd() dirty work */ 172 #define LOOKUP_CHECKREAD 0x10 173 #define LOOKUP_MASK (~LOOKUP_CHECKREAD) 174 175 /* 176 * Starting at current directory, translate pathname pnp to end. 177 * Leave pathname of final component in pnp, return the vnode 178 * for the final component in *compvpp, and return the vnode 179 * for the parent of the final component in dirvpp. 180 * 181 * This is the central routine in pathname translation and handles 182 * multiple components in pathnames, separating them at /'s. It also 183 * implements mounted file systems and processes symbolic links. 184 * 185 * vp is the vnode where the directory search should start. 186 * 187 * Reference counts: vp must be held prior to calling this function. rootvp 188 * should only be held if rootvp != rootdir. 189 */ 190 int 191 lookuppnvp( 192 struct pathname *pnp, /* pathname to lookup */ 193 struct pathname *rpnp, /* if non-NULL, return resolved path */ 194 int flags, /* follow symlinks */ 195 vnode_t **dirvpp, /* ptr for parent vnode */ 196 vnode_t **compvpp, /* ptr for entry vnode */ 197 vnode_t *rootvp, /* rootvp */ 198 vnode_t *vp, /* directory to start search at */ 199 cred_t *cr) /* user's credential */ 200 { 201 vnode_t *cvp; /* current component vp */ 202 vnode_t *tvp; /* addressable temp ptr */ 203 char component[MAXNAMELEN]; /* buffer for component (incl null) */ 204 int error; 205 int nlink; 206 int lookup_flags; 207 struct pathname presrvd; /* case preserved name */ 208 struct pathname *pp = NULL; 209 vnode_t *startvp; 210 vnode_t *zonevp = curproc->p_zone->zone_rootvp; /* zone root */ 211 int must_be_directory = 0; 212 boolean_t retry_with_kcred = B_FALSE; 213 214 CPU_STATS_ADDQ(CPU, sys, namei, 1); 215 nlink = 0; 216 cvp = NULL; 217 if (rpnp) 218 rpnp->pn_pathlen = 0; 219 220 lookup_flags = dirvpp ? LOOKUP_DIR : 0; 221 if (flags & FIGNORECASE) { 222 lookup_flags |= FIGNORECASE; 223 pn_alloc(&presrvd); 224 pp = &presrvd; 225 } 226 227 if (audit_active) 228 audit_anchorpath(pnp, vp == rootvp); 229 230 /* 231 * Eliminate any trailing slashes in the pathname. 232 * If there are any, we must follow all symlinks. 233 * Also, we must guarantee that the last component is a directory. 234 */ 235 if (pn_fixslash(pnp)) { 236 flags |= FOLLOW; 237 must_be_directory = 1; 238 } 239 240 startvp = vp; 241 next: 242 /* 243 * Make sure we have a directory. 244 */ 245 if (vp->v_type != VDIR) { 246 error = ENOTDIR; 247 goto bad; 248 } 249 250 if (rpnp && VN_CMP(vp, rootvp)) 251 (void) pn_set(rpnp, "/"); 252 253 /* 254 * Process the next component of the pathname. 255 */ 256 if (error = pn_getcomponent(pnp, component)) { 257 if (audit_active) 258 audit_addcomponent(pnp); 259 goto bad; 260 } 261 262 /* 263 * Handle "..": two special cases. 264 * 1. If we're at the root directory (e.g. after chroot or 265 * zone_enter) then change ".." to "." so we can't get 266 * out of this subtree. 267 * 2. If this vnode is the root of a mounted file system, 268 * then replace it with the vnode that was mounted on 269 * so that we take the ".." in the other file system. 270 */ 271 if (component[0] == '.' && component[1] == '.' && component[2] == 0) { 272 checkforroot: 273 if (VN_CMP(vp, rootvp) || VN_CMP(vp, zonevp)) { 274 component[1] = '\0'; 275 } else if (vp->v_flag & VROOT) { 276 vfs_t *vfsp; 277 cvp = vp; 278 279 /* 280 * While we deal with the vfs pointer from the vnode 281 * the filesystem could have been forcefully unmounted 282 * and the vnode's v_vfsp could have been invalidated 283 * by VFS_UNMOUNT. Hence, we cache v_vfsp and use it 284 * with vfs_rlock_wait/vfs_unlock. 285 * It is safe to use the v_vfsp even it is freed by 286 * VFS_UNMOUNT because vfs_rlock_wait/vfs_unlock 287 * do not dereference v_vfsp. It is just used as a 288 * magic cookie. 289 * One more corner case here is the memory getting 290 * reused for another vfs structure. In this case 291 * lookuppnvp's vfs_rlock_wait will succeed, domount's 292 * vfs_lock will fail and domount will bail out with an 293 * error (EBUSY). 294 */ 295 vfsp = cvp->v_vfsp; 296 297 /* 298 * This lock is used to synchronize 299 * mounts/unmounts and lookups. 300 * Threads doing mounts/unmounts hold the 301 * writers version vfs_lock_wait(). 302 */ 303 304 vfs_rlock_wait(vfsp); 305 306 /* 307 * If this vnode is on a file system that 308 * has been forcibly unmounted, 309 * we can't proceed. Cancel this operation 310 * and return EIO. 311 * 312 * vfs_vnodecovered is NULL if unmounted. 313 * Currently, nfs uses VFS_UNMOUNTED to 314 * check if it's a forced-umount. Keep the 315 * same checking here as well even though it 316 * may not be needed. 317 */ 318 if (((vp = cvp->v_vfsp->vfs_vnodecovered) == NULL) || 319 (cvp->v_vfsp->vfs_flag & VFS_UNMOUNTED)) { 320 vfs_unlock(vfsp); 321 VN_RELE(cvp); 322 if (pp) 323 pn_free(pp); 324 return (EIO); 325 } 326 VN_HOLD(vp); 327 vfs_unlock(vfsp); 328 VN_RELE(cvp); 329 cvp = NULL; 330 /* 331 * Crossing mount points. For eg: We are doing 332 * a lookup of ".." for file systems root vnode 333 * mounted here, and VOP_LOOKUP() (with covered vnode) 334 * will be on underlying file systems mount point 335 * vnode. Set retry_with_kcred flag as we might end 336 * up doing VOP_LOOKUP() with kcred if required. 337 */ 338 retry_with_kcred = B_TRUE; 339 goto checkforroot; 340 } 341 } 342 343 /* 344 * LOOKUP_CHECKREAD is a private flag used by vnodetopath() to indicate 345 * that we need to have read permission on every directory in the entire 346 * path. This is used to ensure that a forward-lookup of a cached value 347 * has the same effect as a reverse-lookup when the cached value cannot 348 * be found. 349 */ 350 if ((flags & LOOKUP_CHECKREAD) && 351 (error = VOP_ACCESS(vp, VREAD, 0, cr, NULL)) != 0) 352 goto bad; 353 354 /* 355 * Perform a lookup in the current directory. 356 */ 357 error = VOP_LOOKUP(vp, component, &tvp, pnp, lookup_flags, 358 rootvp, cr, NULL, NULL, pp); 359 360 /* 361 * Retry with kcred - If crossing mount points & error is EACCES. 362 * 363 * If we are crossing mount points here and doing ".." lookup, 364 * VOP_LOOKUP() might fail if the underlying file systems 365 * mount point has no execute permission. In cases like these, 366 * we retry VOP_LOOKUP() by giving as much privilage as possible 367 * by passing kcred credentials. 368 * 369 * In case of hierarchical file systems, passing kcred still may 370 * or may not work. 371 * For eg: UFS FS --> Mount NFS FS --> Again mount UFS on some 372 * directory inside NFS FS. 373 */ 374 if ((error == EACCES) && retry_with_kcred) 375 error = VOP_LOOKUP(vp, component, &tvp, pnp, lookup_flags, 376 rootvp, zone_kcred(), NULL, NULL, pp); 377 378 cvp = tvp; 379 if (error) { 380 cvp = NULL; 381 /* 382 * On error, return hard error if 383 * (a) we're not at the end of the pathname yet, or 384 * (b) the caller didn't want the parent directory, or 385 * (c) we failed for some reason other than a missing entry. 386 */ 387 if (pn_pathleft(pnp) || dirvpp == NULL || error != ENOENT) 388 goto bad; 389 if (audit_active) { /* directory access */ 390 if (error = audit_savepath(pnp, vp, error, cr)) 391 goto bad_noaudit; 392 } 393 pn_setlast(pnp); 394 /* 395 * We inform the caller that the desired entry must be 396 * a directory by adding a '/' to the component name. 397 */ 398 if (must_be_directory && (error = pn_addslash(pnp)) != 0) 399 goto bad; 400 *dirvpp = vp; 401 if (compvpp != NULL) 402 *compvpp = NULL; 403 if (rootvp != rootdir) 404 VN_RELE(rootvp); 405 if (pp) 406 pn_free(pp); 407 return (0); 408 } 409 410 /* 411 * Traverse mount points. 412 * XXX why don't we need to hold a read lock here (call vn_vfsrlock)? 413 * What prevents a concurrent update to v_vfsmountedhere? 414 * Possible answer: if mounting, we might not see the mount 415 * if it is concurrently coming into existence, but that's 416 * really not much different from the thread running a bit slower. 417 * If unmounting, we may get into traverse() when we shouldn't, 418 * but traverse() will catch this case for us. 419 * (For this to work, fetching v_vfsmountedhere had better 420 * be atomic!) 421 */ 422 if (vn_mountedvfs(cvp) != NULL) { 423 tvp = cvp; 424 if ((error = traverse(&tvp)) != 0) { 425 /* 426 * It is required to assign cvp here, because 427 * traverse() will return a held vnode which 428 * may different than the vnode that was passed 429 * in (even in the error case). If traverse() 430 * changes the vnode it releases the original, 431 * and holds the new one. 432 */ 433 cvp = tvp; 434 goto bad; 435 } 436 cvp = tvp; 437 } 438 439 /* 440 * If we hit a symbolic link and there is more path to be 441 * translated or this operation does not wish to apply 442 * to a link, then place the contents of the link at the 443 * front of the remaining pathname. 444 */ 445 if (cvp->v_type == VLNK && ((flags & FOLLOW) || pn_pathleft(pnp))) { 446 struct pathname linkpath; 447 if (audit_active) { 448 if (error = audit_pathcomp(pnp, cvp, cr)) 449 goto bad; 450 } 451 452 if (++nlink > MAXSYMLINKS) { 453 error = ELOOP; 454 goto bad; 455 } 456 pn_alloc(&linkpath); 457 if (error = pn_getsymlink(cvp, &linkpath, cr)) { 458 pn_free(&linkpath); 459 goto bad; 460 } 461 462 if (audit_active) 463 audit_symlink(pnp, &linkpath); 464 465 if (pn_pathleft(&linkpath) == 0) 466 (void) pn_set(&linkpath, "."); 467 error = pn_insert(pnp, &linkpath, strlen(component)); 468 pn_free(&linkpath); 469 if (error) 470 goto bad; 471 VN_RELE(cvp); 472 cvp = NULL; 473 if (pnp->pn_pathlen == 0) { 474 error = ENOENT; 475 goto bad; 476 } 477 if (pnp->pn_path[0] == '/') { 478 do { 479 pnp->pn_path++; 480 pnp->pn_pathlen--; 481 } while (pnp->pn_path[0] == '/'); 482 VN_RELE(vp); 483 vp = rootvp; 484 VN_HOLD(vp); 485 } 486 if (audit_active) 487 audit_anchorpath(pnp, vp == rootvp); 488 if (pn_fixslash(pnp)) { 489 flags |= FOLLOW; 490 must_be_directory = 1; 491 } 492 goto next; 493 } 494 495 /* 496 * If rpnp is non-NULL, remember the resolved path name therein. 497 * Do not include "." components. Collapse occurrences of 498 * "previous/..", so long as "previous" is not itself "..". 499 * Exhausting rpnp results in error ENAMETOOLONG. 500 */ 501 if (rpnp && strcmp(component, ".") != 0) { 502 size_t len; 503 504 if (strcmp(component, "..") == 0 && 505 rpnp->pn_pathlen != 0 && 506 !((rpnp->pn_pathlen > 2 && 507 strncmp(rpnp->pn_path+rpnp->pn_pathlen-3, "/..", 3) == 0) || 508 (rpnp->pn_pathlen == 2 && 509 strncmp(rpnp->pn_path, "..", 2) == 0))) { 510 while (rpnp->pn_pathlen && 511 rpnp->pn_path[rpnp->pn_pathlen-1] != '/') 512 rpnp->pn_pathlen--; 513 if (rpnp->pn_pathlen > 1) 514 rpnp->pn_pathlen--; 515 rpnp->pn_path[rpnp->pn_pathlen] = '\0'; 516 } else { 517 if (rpnp->pn_pathlen != 0 && 518 rpnp->pn_path[rpnp->pn_pathlen-1] != '/') 519 rpnp->pn_path[rpnp->pn_pathlen++] = '/'; 520 if (flags & FIGNORECASE) { 521 /* 522 * Return the case-preserved name 523 * within the resolved path. 524 */ 525 error = copystr(pp->pn_buf, 526 rpnp->pn_path + rpnp->pn_pathlen, 527 rpnp->pn_bufsize - rpnp->pn_pathlen, &len); 528 } else { 529 error = copystr(component, 530 rpnp->pn_path + rpnp->pn_pathlen, 531 rpnp->pn_bufsize - rpnp->pn_pathlen, &len); 532 } 533 if (error) /* copystr() returns ENAMETOOLONG */ 534 goto bad; 535 rpnp->pn_pathlen += (len - 1); 536 ASSERT(rpnp->pn_bufsize > rpnp->pn_pathlen); 537 } 538 } 539 540 /* 541 * If no more components, return last directory (if wanted) and 542 * last component (if wanted). 543 */ 544 if (pn_pathleft(pnp) == 0) { 545 /* 546 * If there was a trailing slash in the pathname, 547 * make sure the last component is a directory. 548 */ 549 if (must_be_directory && cvp->v_type != VDIR) { 550 error = ENOTDIR; 551 goto bad; 552 } 553 if (dirvpp != NULL) { 554 /* 555 * Check that we have the real parent and not 556 * an alias of the last component. 557 */ 558 if (vn_compare(vp, cvp)) { 559 if (audit_active) 560 (void) audit_savepath(pnp, cvp, 561 EINVAL, cr); 562 pn_setlast(pnp); 563 VN_RELE(vp); 564 VN_RELE(cvp); 565 if (rootvp != rootdir) 566 VN_RELE(rootvp); 567 if (pp) 568 pn_free(pp); 569 return (EINVAL); 570 } 571 if (audit_active) { 572 if (error = audit_pathcomp(pnp, vp, cr)) 573 goto bad; 574 } 575 *dirvpp = vp; 576 } else 577 VN_RELE(vp); 578 if (audit_active) 579 (void) audit_savepath(pnp, cvp, 0, cr); 580 if (pnp->pn_path == pnp->pn_buf) 581 (void) pn_set(pnp, "."); 582 else 583 pn_setlast(pnp); 584 if (rpnp) { 585 if (VN_CMP(cvp, rootvp)) 586 (void) pn_set(rpnp, "/"); 587 else if (rpnp->pn_pathlen == 0) 588 (void) pn_set(rpnp, "."); 589 } 590 591 if (compvpp != NULL) 592 *compvpp = cvp; 593 else 594 VN_RELE(cvp); 595 if (rootvp != rootdir) 596 VN_RELE(rootvp); 597 if (pp) 598 pn_free(pp); 599 return (0); 600 } 601 602 if (audit_active) { 603 if (error = audit_pathcomp(pnp, cvp, cr)) 604 goto bad; 605 } 606 607 /* 608 * Skip over slashes from end of last component. 609 */ 610 while (pnp->pn_path[0] == '/') { 611 pnp->pn_path++; 612 pnp->pn_pathlen--; 613 } 614 615 /* 616 * Searched through another level of directory: 617 * release previous directory handle and save new (result 618 * of lookup) as current directory. 619 */ 620 VN_RELE(vp); 621 vp = cvp; 622 cvp = NULL; 623 goto next; 624 625 bad: 626 if (audit_active) /* reached end of path */ 627 (void) audit_savepath(pnp, cvp, error, cr); 628 bad_noaudit: 629 /* 630 * Error. Release vnodes and return. 631 */ 632 if (cvp) 633 VN_RELE(cvp); 634 /* 635 * If the error was ESTALE and the current directory to look in 636 * was the root for this lookup, the root for a mounted file 637 * system, or the starting directory for lookups, then 638 * return ENOENT instead of ESTALE. In this case, no recovery 639 * is possible by the higher level. If ESTALE was returned for 640 * some intermediate directory along the path, then recovery 641 * is potentially possible and retrying from the higher level 642 * will either correct the situation by purging stale cache 643 * entries or eventually get back to the point where no recovery 644 * is possible. 645 */ 646 if (error == ESTALE && 647 (VN_CMP(vp, rootvp) || (vp->v_flag & VROOT) || vp == startvp)) 648 error = ENOENT; 649 VN_RELE(vp); 650 if (rootvp != rootdir) 651 VN_RELE(rootvp); 652 if (pp) 653 pn_free(pp); 654 return (error); 655 } 656 657 /* 658 * Traverse a mount point. Routine accepts a vnode pointer as a reference 659 * parameter and performs the indirection, releasing the original vnode. 660 */ 661 int 662 traverse(vnode_t **cvpp) 663 { 664 int error = 0; 665 vnode_t *cvp; 666 vnode_t *tvp; 667 vfs_t *vfsp; 668 669 cvp = *cvpp; 670 671 /* 672 * If this vnode is mounted on, then we transparently indirect 673 * to the vnode which is the root of the mounted file system. 674 * Before we do this we must check that an unmount is not in 675 * progress on this vnode. 676 */ 677 678 for (;;) { 679 /* 680 * Try to read lock the vnode. If this fails because 681 * the vnode is already write locked, then check to 682 * see whether it is the current thread which locked 683 * the vnode. If it is not, then read lock the vnode 684 * by waiting to acquire the lock. 685 * 686 * The code path in domount() is an example of support 687 * which needs to look up two pathnames and locks one 688 * of them in between the two lookups. 689 */ 690 error = vn_vfsrlock(cvp); 691 if (error) { 692 if (!vn_vfswlock_held(cvp)) 693 error = vn_vfsrlock_wait(cvp); 694 if (error != 0) { 695 /* 696 * lookuppn() expects a held vnode to be 697 * returned because it promptly calls 698 * VN_RELE after the error return 699 */ 700 *cvpp = cvp; 701 return (error); 702 } 703 } 704 705 /* 706 * Reached the end of the mount chain? 707 */ 708 vfsp = vn_mountedvfs(cvp); 709 if (vfsp == NULL) { 710 vn_vfsunlock(cvp); 711 break; 712 } 713 714 /* 715 * The read lock must be held across the call to VFS_ROOT() to 716 * prevent a concurrent unmount from destroying the vfs. 717 */ 718 error = VFS_ROOT(vfsp, &tvp); 719 vn_vfsunlock(cvp); 720 721 if (error) 722 break; 723 724 VN_RELE(cvp); 725 726 cvp = tvp; 727 } 728 729 *cvpp = cvp; 730 return (error); 731 } 732 733 /* 734 * Return the lowermost vnode if this is a mountpoint. 735 */ 736 static vnode_t * 737 vn_under(vnode_t *vp) 738 { 739 vnode_t *uvp; 740 vfs_t *vfsp; 741 742 while (vp->v_flag & VROOT) { 743 744 vfsp = vp->v_vfsp; 745 vfs_rlock_wait(vfsp); 746 if ((uvp = vfsp->vfs_vnodecovered) == NULL || 747 (vfsp->vfs_flag & VFS_UNMOUNTED)) { 748 vfs_unlock(vfsp); 749 break; 750 } 751 VN_HOLD(uvp); 752 vfs_unlock(vfsp); 753 VN_RELE(vp); 754 vp = uvp; 755 } 756 757 return (vp); 758 } 759 760 static int 761 vnode_match(vnode_t *v1, vnode_t *v2, cred_t *cr) 762 { 763 vattr_t v1attr, v2attr; 764 765 /* 766 * If we have a device file, check to see if is a cloned open of the 767 * same device. For self-cloning devices, the major numbers will match. 768 * For devices cloned through the 'clone' driver, the minor number of 769 * the source device will be the same as the major number of the cloned 770 * device. 771 */ 772 if ((v1->v_type == VCHR || v1->v_type == VBLK) && 773 v1->v_type == v2->v_type) { 774 if ((spec_is_selfclone(v1) || spec_is_selfclone(v2)) && 775 getmajor(v1->v_rdev) == getmajor(v2->v_rdev)) 776 return (1); 777 778 if (spec_is_clone(v1) && 779 getmajor(v1->v_rdev) == getminor(v2->v_rdev)) 780 return (1); 781 782 if (spec_is_clone(v2) && 783 getmajor(v2->v_rdev) == getminor(v1->v_rdev)) 784 return (1); 785 } 786 787 v1attr.va_mask = v2attr.va_mask = AT_TYPE; 788 789 /* 790 * This check for symbolic links handles the pseudo-symlinks in procfs. 791 * These particular links have v_type of VDIR, but the attributes have a 792 * type of VLNK. We need to avoid these links because otherwise if we 793 * are currently in '/proc/self/fd', then '/proc/self/cwd' will compare 794 * as the same vnode. 795 */ 796 if (VOP_GETATTR(v1, &v1attr, 0, cr, NULL) != 0 || 797 VOP_GETATTR(v2, &v2attr, 0, cr, NULL) != 0 || 798 v1attr.va_type == VLNK || v2attr.va_type == VLNK) 799 return (0); 800 801 v1attr.va_mask = v2attr.va_mask = AT_TYPE | AT_FSID | AT_NODEID; 802 803 if (VOP_GETATTR(v1, &v1attr, ATTR_REAL, cr, NULL) != 0 || 804 VOP_GETATTR(v2, &v2attr, ATTR_REAL, cr, NULL) != 0) 805 return (0); 806 807 return (v1attr.va_fsid == v2attr.va_fsid && 808 v1attr.va_nodeid == v2attr.va_nodeid); 809 } 810 811 812 /* 813 * Find the entry in the directory corresponding to the target vnode. 814 */ 815 int 816 dirfindvp(vnode_t *vrootp, vnode_t *dvp, vnode_t *tvp, cred_t *cr, char *dbuf, 817 size_t dlen, dirent64_t **rdp) 818 { 819 size_t dbuflen; 820 struct iovec iov; 821 struct uio uio; 822 int error; 823 int eof; 824 vnode_t *cmpvp; 825 struct dirent64 *dp; 826 pathname_t pnp; 827 828 ASSERT(dvp->v_type == VDIR); 829 830 /* 831 * This is necessary because of the strange semantics of VOP_LOOKUP(). 832 */ 833 bzero(&pnp, sizeof (pnp)); 834 835 eof = 0; 836 837 uio.uio_iov = &iov; 838 uio.uio_iovcnt = 1; 839 uio.uio_segflg = UIO_SYSSPACE; 840 uio.uio_fmode = 0; 841 uio.uio_extflg = UIO_COPY_CACHED; 842 uio.uio_loffset = 0; 843 844 if ((error = VOP_ACCESS(dvp, VREAD, 0, cr, NULL)) != 0) 845 return (error); 846 847 while (!eof) { 848 uio.uio_resid = dlen; 849 iov.iov_base = dbuf; 850 iov.iov_len = dlen; 851 852 (void) VOP_RWLOCK(dvp, V_WRITELOCK_FALSE, NULL); 853 error = VOP_READDIR(dvp, &uio, cr, &eof, NULL, 0); 854 VOP_RWUNLOCK(dvp, V_WRITELOCK_FALSE, NULL); 855 856 dbuflen = dlen - uio.uio_resid; 857 858 if (error || dbuflen == 0) 859 break; 860 861 dp = (dirent64_t *)dbuf; 862 while ((intptr_t)dp < (intptr_t)dbuf + dbuflen) { 863 /* 864 * Ignore '.' and '..' entries 865 */ 866 if (strcmp(dp->d_name, ".") == 0 || 867 strcmp(dp->d_name, "..") == 0) { 868 dp = (dirent64_t *)((intptr_t)dp + 869 dp->d_reclen); 870 continue; 871 } 872 873 error = VOP_LOOKUP(dvp, dp->d_name, &cmpvp, &pnp, 0, 874 vrootp, cr, NULL, NULL, NULL); 875 876 /* 877 * We only want to bail out if there was an error other 878 * than ENOENT. Otherwise, it could be that someone 879 * just removed an entry since the readdir() call, and 880 * the entry we want is further on in the directory. 881 */ 882 if (error == 0) { 883 if (vnode_match(tvp, cmpvp, cr)) { 884 VN_RELE(cmpvp); 885 *rdp = dp; 886 return (0); 887 } 888 889 VN_RELE(cmpvp); 890 } else if (error != ENOENT) { 891 return (error); 892 } 893 894 dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen); 895 } 896 } 897 898 /* 899 * Something strange has happened, this directory does not contain the 900 * specified vnode. This should never happen in the normal case, since 901 * we ensured that dvp is the parent of vp. This is possible in some 902 * rare conditions (races and the special .zfs directory). 903 */ 904 if (error == 0) { 905 error = VOP_LOOKUP(dvp, ".zfs", &cmpvp, &pnp, 0, vrootp, cr, 906 NULL, NULL, NULL); 907 if (error == 0) { 908 if (vnode_match(tvp, cmpvp, cr)) { 909 (void) strcpy(dp->d_name, ".zfs"); 910 dp->d_reclen = strlen(".zfs"); 911 dp->d_off = 2; 912 dp->d_ino = 1; 913 *rdp = dp; 914 } else { 915 error = ENOENT; 916 } 917 VN_RELE(cmpvp); 918 } 919 } 920 921 return (error); 922 } 923 924 /* 925 * Given a global path (from rootdir), and a vnode that is the current root, 926 * return the portion of the path that is beneath the current root or NULL on 927 * failure. The path MUST be a resolved path (no '..' entries or symlinks), 928 * otherwise this function will fail. 929 */ 930 static char * 931 localpath(char *path, struct vnode *vrootp, cred_t *cr) 932 { 933 vnode_t *vp; 934 vnode_t *cvp; 935 char component[MAXNAMELEN]; 936 char *ret = NULL; 937 pathname_t pn; 938 939 /* 940 * We use vn_compare() instead of VN_CMP() in order to detect lofs 941 * mounts and stacked vnodes. 942 */ 943 if (vn_compare(vrootp, rootdir)) 944 return (path); 945 946 if (pn_get(path, UIO_SYSSPACE, &pn) != 0) 947 return (NULL); 948 949 vp = rootdir; 950 VN_HOLD(vp); 951 952 if (vn_ismntpt(vp) && traverse(&vp) != 0) { 953 VN_RELE(vp); 954 pn_free(&pn); 955 return (NULL); 956 } 957 958 while (pn_pathleft(&pn)) { 959 pn_skipslash(&pn); 960 961 if (pn_getcomponent(&pn, component) != 0) 962 break; 963 964 if (VOP_LOOKUP(vp, component, &cvp, &pn, 0, rootdir, cr, 965 NULL, NULL, NULL) != 0) 966 break; 967 VN_RELE(vp); 968 vp = cvp; 969 970 if (vn_ismntpt(vp) && traverse(&vp) != 0) 971 break; 972 973 if (vn_compare(vp, vrootp)) { 974 ret = path + (pn.pn_path - pn.pn_buf); 975 break; 976 } 977 } 978 979 VN_RELE(vp); 980 pn_free(&pn); 981 982 return (ret); 983 } 984 985 /* 986 * Given a directory, return the full, resolved path. This looks up "..", 987 * searches for the given vnode in the parent, appends the component, etc. It 988 * is used to implement vnodetopath() and getcwd() when the cached path fails 989 * (or vfs_vnode_path is not set). 990 */ 991 static int 992 dirtopath(vnode_t *vrootp, vnode_t *vp, char *buf, size_t buflen, cred_t *cr) 993 { 994 pathname_t pn, rpn, emptypn; 995 vnode_t *cmpvp, *pvp = NULL; 996 vnode_t *startvp = vp; 997 int err = 0; 998 size_t complen; 999 char *dbuf; 1000 dirent64_t *dp; 1001 char *bufloc; 1002 size_t dlen = DIRENT64_RECLEN(MAXPATHLEN); 1003 refstr_t *mntpt; 1004 1005 /* Operation only allowed on directories */ 1006 ASSERT(vp->v_type == VDIR); 1007 1008 /* We must have at least enough space for "/" */ 1009 if (buflen < 2) 1010 return (ENAMETOOLONG); 1011 1012 /* Start at end of string with terminating null */ 1013 bufloc = &buf[buflen - 1]; 1014 *bufloc = '\0'; 1015 1016 pn_alloc(&pn); 1017 pn_alloc(&rpn); 1018 dbuf = kmem_alloc(dlen, KM_SLEEP); 1019 bzero(&emptypn, sizeof (emptypn)); 1020 1021 /* 1022 * Begin with an additional reference on vp. This will be decremented 1023 * during the loop. 1024 */ 1025 VN_HOLD(vp); 1026 1027 for (;;) { 1028 /* 1029 * Return if we've reached the root. If the buffer is empty, 1030 * return '/'. We explicitly don't use vn_compare(), since it 1031 * compares the real vnodes. A lofs mount of '/' would produce 1032 * incorrect results otherwise. 1033 */ 1034 if (VN_CMP(vrootp, vp)) { 1035 if (*bufloc == '\0') 1036 *--bufloc = '/'; 1037 break; 1038 } 1039 1040 /* 1041 * If we've reached the VFS root, something has gone wrong. We 1042 * should have reached the root in the above check. The only 1043 * explantation is that 'vp' is not contained withing the given 1044 * root, in which case we return EPERM. 1045 */ 1046 if (VN_CMP(rootdir, vp)) { 1047 err = EPERM; 1048 goto out; 1049 } 1050 1051 /* 1052 * Shortcut: see if this vnode is a mountpoint. If so, 1053 * grab the path information from the vfs_t. 1054 */ 1055 if (vp->v_flag & VROOT) { 1056 1057 mntpt = vfs_getmntpoint(vp->v_vfsp); 1058 if ((err = pn_set(&pn, (char *)refstr_value(mntpt))) 1059 == 0) { 1060 refstr_rele(mntpt); 1061 rpn.pn_path = rpn.pn_buf; 1062 1063 /* 1064 * Ensure the mointpoint still exists. 1065 */ 1066 VN_HOLD(vrootp); 1067 if (vrootp != rootdir) 1068 VN_HOLD(vrootp); 1069 if (lookuppnvp(&pn, &rpn, 0, NULL, 1070 &cmpvp, vrootp, vrootp, cr) == 0) { 1071 1072 if (VN_CMP(vp, cmpvp)) { 1073 VN_RELE(cmpvp); 1074 1075 complen = strlen(rpn.pn_path); 1076 bufloc -= complen; 1077 if (bufloc < buf) { 1078 err = ERANGE; 1079 goto out; 1080 } 1081 bcopy(rpn.pn_path, bufloc, 1082 complen); 1083 break; 1084 } else { 1085 VN_RELE(cmpvp); 1086 } 1087 } 1088 } else { 1089 refstr_rele(mntpt); 1090 } 1091 } 1092 1093 /* 1094 * Shortcuts failed, search for this vnode in its parent. If 1095 * this is a mountpoint, then get the vnode underneath. 1096 */ 1097 if (vp->v_flag & VROOT) 1098 vp = vn_under(vp); 1099 if ((err = VOP_LOOKUP(vp, "..", &pvp, &emptypn, 0, vrootp, cr, 1100 NULL, NULL, NULL)) != 0) 1101 goto out; 1102 1103 /* 1104 * With extended attributes, it's possible for a directory to 1105 * have a parent that is a regular file. Check for that here. 1106 */ 1107 if (pvp->v_type != VDIR) { 1108 err = ENOTDIR; 1109 goto out; 1110 } 1111 1112 /* 1113 * If this is true, something strange has happened. This is 1114 * only true if we are the root of a filesystem, which should 1115 * have been caught by the check above. 1116 */ 1117 if (VN_CMP(pvp, vp)) { 1118 err = ENOENT; 1119 goto out; 1120 } 1121 1122 /* 1123 * Search the parent directory for the entry corresponding to 1124 * this vnode. 1125 */ 1126 if ((err = dirfindvp(vrootp, pvp, vp, cr, dbuf, dlen, &dp)) 1127 != 0) 1128 goto out; 1129 complen = strlen(dp->d_name); 1130 bufloc -= complen; 1131 if (bufloc <= buf) { 1132 err = ENAMETOOLONG; 1133 goto out; 1134 } 1135 bcopy(dp->d_name, bufloc, complen); 1136 1137 /* Prepend a slash to the current path. */ 1138 *--bufloc = '/'; 1139 1140 /* And continue with the next component */ 1141 VN_RELE(vp); 1142 vp = pvp; 1143 pvp = NULL; 1144 } 1145 1146 /* 1147 * Place the path at the beginning of the buffer. 1148 */ 1149 if (bufloc != buf) 1150 ovbcopy(bufloc, buf, buflen - (bufloc - buf)); 1151 1152 out: 1153 /* 1154 * If the error was ESTALE and the current directory to look in 1155 * was the root for this lookup, the root for a mounted file 1156 * system, or the starting directory for lookups, then 1157 * return ENOENT instead of ESTALE. In this case, no recovery 1158 * is possible by the higher level. If ESTALE was returned for 1159 * some intermediate directory along the path, then recovery 1160 * is potentially possible and retrying from the high