1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ 27 /* All Rights Reserved */ 28 29 /* 30 * University Copyright- Copyright (c) 1982, 1986, 1988 31 * The Regents of the University of California 32 * All Rights Reserved 33 * 34 * University Acknowledgment- Portions of this document are derived from 35 * software developed by the University of California, Berkeley, and its 36 * contributors. 37 */ 38 39 40 #include <sys/types.h> 41 #include <sys/t_lock.h> 42 #include <sys/param.h> 43 #include <sys/errno.h> 44 #include <sys/user.h> 45 #include <sys/fstyp.h> 46 #include <sys/kmem.h> 47 #include <sys/systm.h> 48 #include <sys/proc.h> 49 #include <sys/mount.h> 50 #include <sys/vfs.h> 51 #include <sys/vfs_opreg.h> 52 #include <sys/fem.h> 53 #include <sys/mntent.h> 54 #include <sys/stat.h> 55 #include <sys/statvfs.h> 56 #include <sys/statfs.h> 57 #include <sys/cred.h> 58 #include <sys/vnode.h> 59 #include <sys/rwstlock.h> 60 #include <sys/dnlc.h> 61 #include <sys/file.h> 62 #include <sys/time.h> 63 #include <sys/atomic.h> 64 #include <sys/cmn_err.h> 65 #include <sys/buf.h> 66 #include <sys/swap.h> 67 #include <sys/debug.h> 68 #include <sys/vnode.h> 69 #include <sys/modctl.h> 70 #include <sys/ddi.h> 71 #include <sys/pathname.h> 72 #include <sys/bootconf.h> 73 #include <sys/dumphdr.h> 74 #include <sys/dc_ki.h> 75 #include <sys/poll.h> 76 #include <sys/sunddi.h> 77 #include <sys/sysmacros.h> 78 #include <sys/zone.h> 79 #include <sys/policy.h> 80 #include <sys/ctfs.h> 81 #include <sys/objfs.h> 82 #include <sys/console.h> 83 #include <sys/reboot.h> 84 #include <sys/attr.h> 85 #include <sys/spa.h> 86 #include <sys/lofi.h> 87 88 #include <vm/page.h> 89 90 #include <fs/fs_subr.h> 91 92 /* Private interfaces to create vopstats-related data structures */ 93 extern void initialize_vopstats(vopstats_t *); 94 extern vopstats_t *get_fstype_vopstats(struct vfs *, struct vfssw *); 95 extern vsk_anchor_t *get_vskstat_anchor(struct vfs *); 96 97 static void vfs_clearmntopt_nolock(mntopts_t *, const char *, int); 98 static void vfs_setmntopt_nolock(mntopts_t *, const char *, 99 const char *, int, int); 100 static int vfs_optionisset_nolock(const mntopts_t *, const char *, char **); 101 static void vfs_freemnttab(struct vfs *); 102 static void vfs_freeopt(mntopt_t *); 103 static void vfs_swapopttbl_nolock(mntopts_t *, mntopts_t *); 104 static void vfs_swapopttbl(mntopts_t *, mntopts_t *); 105 static void vfs_copyopttbl_extend(const mntopts_t *, mntopts_t *, int); 106 static void vfs_createopttbl_extend(mntopts_t *, const char *, 107 const mntopts_t *); 108 static char **vfs_copycancelopt_extend(char **const, int); 109 static void vfs_freecancelopt(char **); 110 static void getrootfs(char **, char **); 111 static int getmacpath(dev_info_t *, void *); 112 static void vfs_mnttabvp_setup(void); 113 114 struct ipmnt { 115 struct ipmnt *mip_next; 116 dev_t mip_dev; 117 struct vfs *mip_vfsp; 118 }; 119 120 static kmutex_t vfs_miplist_mutex; 121 static struct ipmnt *vfs_miplist = NULL; 122 static struct ipmnt *vfs_miplist_end = NULL; 123 124 static kmem_cache_t *vfs_cache; /* Pointer to VFS kmem cache */ 125 126 /* 127 * VFS global data. 128 */ 129 vnode_t *rootdir; /* pointer to root inode vnode. */ 130 vnode_t *devicesdir; /* pointer to inode of devices root */ 131 vnode_t *devdir; /* pointer to inode of dev root */ 132 133 char *server_rootpath; /* root path for diskless clients */ 134 char *server_hostname; /* hostname of diskless server */ 135 136 static struct vfs root; 137 static struct vfs devices; 138 static struct vfs dev; 139 struct vfs *rootvfs = &root; /* pointer to root vfs; head of VFS list. */ 140 rvfs_t *rvfs_list; /* array of vfs ptrs for vfs hash list */ 141 int vfshsz = 512; /* # of heads/locks in vfs hash arrays */ 142 /* must be power of 2! */ 143 timespec_t vfs_mnttab_ctime; /* mnttab created time */ 144 timespec_t vfs_mnttab_mtime; /* mnttab last modified time */ 145 char *vfs_dummyfstype = "\0"; 146 struct pollhead vfs_pollhd; /* for mnttab pollers */ 147 struct vnode *vfs_mntdummyvp; /* to fake mnttab read/write for file events */ 148 int mntfstype; /* will be set once mnt fs is mounted */ 149 150 /* 151 * Table for generic options recognized in the VFS layer and acted 152 * on at this level before parsing file system specific options. 153 * The nosuid option is stronger than any of the devices and setuid 154 * options, so those are canceled when nosuid is seen. 155 * 156 * All options which are added here need to be added to the 157 * list of standard options in usr/src/cmd/fs.d/fslib.c as well. 158 */ 159 /* 160 * VFS Mount options table 161 */ 162 static char *ro_cancel[] = { MNTOPT_RW, NULL }; 163 static char *rw_cancel[] = { MNTOPT_RO, NULL }; 164 static char *suid_cancel[] = { MNTOPT_NOSUID, NULL }; 165 static char *nosuid_cancel[] = { MNTOPT_SUID, MNTOPT_DEVICES, MNTOPT_NODEVICES, 166 MNTOPT_NOSETUID, MNTOPT_SETUID, NULL }; 167 static char *devices_cancel[] = { MNTOPT_NODEVICES, NULL }; 168 static char *nodevices_cancel[] = { MNTOPT_DEVICES, NULL }; 169 static char *setuid_cancel[] = { MNTOPT_NOSETUID, NULL }; 170 static char *nosetuid_cancel[] = { MNTOPT_SETUID, NULL }; 171 static char *nbmand_cancel[] = { MNTOPT_NONBMAND, NULL }; 172 static char *nonbmand_cancel[] = { MNTOPT_NBMAND, NULL }; 173 static char *exec_cancel[] = { MNTOPT_NOEXEC, NULL }; 174 static char *noexec_cancel[] = { MNTOPT_EXEC, NULL }; 175 176 static const mntopt_t mntopts[] = { 177 /* 178 * option name cancel options default arg flags 179 */ 180 { MNTOPT_REMOUNT, NULL, NULL, 181 MO_NODISPLAY, (void *)0 }, 182 { MNTOPT_RO, ro_cancel, NULL, 0, 183 (void *)0 }, 184 { MNTOPT_RW, rw_cancel, NULL, 0, 185 (void *)0 }, 186 { MNTOPT_SUID, suid_cancel, NULL, 0, 187 (void *)0 }, 188 { MNTOPT_NOSUID, nosuid_cancel, NULL, 0, 189 (void *)0 }, 190 { MNTOPT_DEVICES, devices_cancel, NULL, 0, 191 (void *)0 }, 192 { MNTOPT_NODEVICES, nodevices_cancel, NULL, 0, 193 (void *)0 }, 194 { MNTOPT_SETUID, setuid_cancel, NULL, 0, 195 (void *)0 }, 196 { MNTOPT_NOSETUID, nosetuid_cancel, NULL, 0, 197 (void *)0 }, 198 { MNTOPT_NBMAND, nbmand_cancel, NULL, 0, 199 (void *)0 }, 200 { MNTOPT_NONBMAND, nonbmand_cancel, NULL, 0, 201 (void *)0 }, 202 { MNTOPT_EXEC, exec_cancel, NULL, 0, 203 (void *)0 }, 204 { MNTOPT_NOEXEC, noexec_cancel, NULL, 0, 205 (void *)0 }, 206 }; 207 208 const mntopts_t vfs_mntopts = { 209 sizeof (mntopts) / sizeof (mntopt_t), 210 (mntopt_t *)&mntopts[0] 211 }; 212 213 /* 214 * File system operation dispatch functions. 215 */ 216 217 int 218 fsop_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr) 219 { 220 return (*(vfsp)->vfs_op->vfs_mount)(vfsp, mvp, uap, cr); 221 } 222 223 int 224 fsop_unmount(vfs_t *vfsp, int flag, cred_t *cr) 225 { 226 return (*(vfsp)->vfs_op->vfs_unmount)(vfsp, flag, cr); 227 } 228 229 int 230 fsop_root(vfs_t *vfsp, vnode_t **vpp) 231 { 232 refstr_t *mntpt; 233 int ret = (*(vfsp)->vfs_op->vfs_root)(vfsp, vpp); 234 /* 235 * Make sure this root has a path. With lofs, it is possible to have 236 * a NULL mountpoint. 237 */ 238 if (ret == 0 && vfsp->vfs_mntpt != NULL && (*vpp)->v_path == NULL) { 239 mntpt = vfs_getmntpoint(vfsp); 240 vn_setpath_str(*vpp, refstr_value(mntpt), 241 strlen(refstr_value(mntpt))); 242 refstr_rele(mntpt); 243 } 244 245 return (ret); 246 } 247 248 int 249 fsop_statfs(vfs_t *vfsp, statvfs64_t *sp) 250 { 251 return (*(vfsp)->vfs_op->vfs_statvfs)(vfsp, sp); 252 } 253 254 int 255 fsop_sync(vfs_t *vfsp, short flag, cred_t *cr) 256 { 257 return (*(vfsp)->vfs_op->vfs_sync)(vfsp, flag, cr); 258 } 259 260 int 261 fsop_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp) 262 { 263 /* 264 * In order to handle system attribute fids in a manner 265 * transparent to the underlying fs, we embed the fid for 266 * the sysattr parent object in the sysattr fid and tack on 267 * some extra bytes that only the sysattr layer knows about. 268 * 269 * This guarantees that sysattr fids are larger than other fids 270 * for this vfs. If the vfs supports the sysattr view interface 271 * (as indicated by VFSFT_SYSATTR_VIEWS), we cannot have a size 272 * collision with XATTR_FIDSZ. 273 */ 274 if (vfs_has_feature(vfsp, VFSFT_SYSATTR_VIEWS) && 275 fidp->fid_len == XATTR_FIDSZ) 276 return (xattr_dir_vget(vfsp, vpp, fidp)); 277 278 return (*(vfsp)->vfs_op->vfs_vget)(vfsp, vpp, fidp); 279 } 280 281 int 282 fsop_mountroot(vfs_t *vfsp, enum whymountroot reason) 283 { 284 return (*(vfsp)->vfs_op->vfs_mountroot)(vfsp, reason); 285 } 286 287 void 288 fsop_freefs(vfs_t *vfsp) 289 { 290 (*(vfsp)->vfs_op->vfs_freevfs)(vfsp); 291 } 292 293 int 294 fsop_vnstate(vfs_t *vfsp, vnode_t *vp, vntrans_t nstate) 295 { 296 return ((*(vfsp)->vfs_op->vfs_vnstate)(vfsp, vp, nstate)); 297 } 298 299 int 300 fsop_sync_by_kind(int fstype, short flag, cred_t *cr) 301 { 302 ASSERT((fstype >= 0) && (fstype < nfstype)); 303 304 if (ALLOCATED_VFSSW(&vfssw[fstype]) && VFS_INSTALLED(&vfssw[fstype])) 305 return (*vfssw[fstype].vsw_vfsops.vfs_sync) (NULL, flag, cr); 306 else 307 return (ENOTSUP); 308 } 309 310 /* 311 * File system initialization. vfs_setfsops() must be called from a file 312 * system's init routine. 313 */ 314 315 static int 316 fs_copyfsops(const fs_operation_def_t *template, vfsops_t *actual, 317 int *unused_ops) 318 { 319 static const fs_operation_trans_def_t vfs_ops_table[] = { 320 VFSNAME_MOUNT, offsetof(vfsops_t, vfs_mount), 321 fs_nosys, fs_nosys, 322 323 VFSNAME_UNMOUNT, offsetof(vfsops_t, vfs_unmount), 324 fs_nosys, fs_nosys, 325 326 VFSNAME_ROOT, offsetof(vfsops_t, vfs_root), 327 fs_nosys, fs_nosys, 328 329 VFSNAME_STATVFS, offsetof(vfsops_t, vfs_statvfs), 330 fs_nosys, fs_nosys, 331 332 VFSNAME_SYNC, offsetof(vfsops_t, vfs_sync), 333 (fs_generic_func_p) fs_sync, 334 (fs_generic_func_p) fs_sync, /* No errors allowed */ 335 336 VFSNAME_VGET, offsetof(vfsops_t, vfs_vget), 337 fs_nosys, fs_nosys, 338 339 VFSNAME_MOUNTROOT, offsetof(vfsops_t, vfs_mountroot), 340 fs_nosys, fs_nosys, 341 342 VFSNAME_FREEVFS, offsetof(vfsops_t, vfs_freevfs), 343 (fs_generic_func_p)fs_freevfs, 344 (fs_generic_func_p)fs_freevfs, /* Shouldn't fail */ 345 346 VFSNAME_VNSTATE, offsetof(vfsops_t, vfs_vnstate), 347 (fs_generic_func_p)fs_nosys, 348 (fs_generic_func_p)fs_nosys, 349 350 NULL, 0, NULL, NULL 351 }; 352 353 return (fs_build_vector(actual, unused_ops, vfs_ops_table, template)); 354 } 355 356 void 357 zfs_boot_init() { 358 359 if (strcmp(rootfs.bo_fstype, MNTTYPE_ZFS) == 0) 360 spa_boot_init(); 361 } 362 363 int 364 vfs_setfsops(int fstype, const fs_operation_def_t *template, vfsops_t **actual) 365 { 366 int error; 367 int unused_ops; 368 369 /* 370 * Verify that fstype refers to a valid fs. Note that 371 * 0 is valid since it's used to set "stray" ops. 372 */ 373 if ((fstype < 0) || (fstype >= nfstype)) 374 return (EINVAL); 375 376 if (!ALLOCATED_VFSSW(&vfssw[fstype])) 377 return (EINVAL); 378 379 /* Set up the operations vector. */ 380 381 error = fs_copyfsops(template, &vfssw[fstype].vsw_vfsops, &unused_ops); 382 383 if (error != 0) 384 return (error); 385 386 vfssw[fstype].vsw_flag |= VSW_INSTALLED; 387 388 if (actual != NULL) 389 *actual = &vfssw[fstype].vsw_vfsops; 390 391 #if DEBUG 392 if (unused_ops != 0) 393 cmn_err(CE_WARN, "vfs_setfsops: %s: %d operations supplied " 394 "but not used", vfssw[fstype].vsw_name, unused_ops); 395 #endif 396 397 return (0); 398 } 399 400 int 401 vfs_makefsops(const fs_operation_def_t *template, vfsops_t **actual) 402 { 403 int error; 404 int unused_ops; 405 406 *actual = (vfsops_t *)kmem_alloc(sizeof (vfsops_t), KM_SLEEP); 407 408 error = fs_copyfsops(template, *actual, &unused_ops); 409 if (error != 0) { 410 kmem_free(*actual, sizeof (vfsops_t)); 411 *actual = NULL; 412 return (error); 413 } 414 415 return (0); 416 } 417 418 /* 419 * Free a vfsops structure created as a result of vfs_makefsops(). 420 * NOTE: For a vfsops structure initialized by vfs_setfsops(), use 421 * vfs_freevfsops_by_type(). 422 */ 423 void 424 vfs_freevfsops(vfsops_t *vfsops) 425 { 426 kmem_free(vfsops, sizeof (vfsops_t)); 427 } 428 429 /* 430 * Since the vfsops structure is part of the vfssw table and wasn't 431 * really allocated, we're not really freeing anything. We keep 432 * the name for consistency with vfs_freevfsops(). We do, however, 433 * need to take care of a little bookkeeping. 434 * NOTE: For a vfsops structure created by vfs_setfsops(), use 435 * vfs_freevfsops_by_type(). 436 */ 437 int 438 vfs_freevfsops_by_type(int fstype) 439 { 440 441 /* Verify that fstype refers to a loaded fs (and not fsid 0). */ 442 if ((fstype <= 0) || (fstype >= nfstype)) 443 return (EINVAL); 444 445 WLOCK_VFSSW(); 446 if ((vfssw[fstype].vsw_flag & VSW_INSTALLED) == 0) { 447 WUNLOCK_VFSSW(); 448 return (EINVAL); 449 } 450 451 vfssw[fstype].vsw_flag &= ~VSW_INSTALLED; 452 WUNLOCK_VFSSW(); 453 454 return (0); 455 } 456 457 /* Support routines used to reference vfs_op */ 458 459 /* Set the operations vector for a vfs */ 460 void 461 vfs_setops(vfs_t *vfsp, vfsops_t *vfsops) 462 { 463 vfsops_t *op; 464 465 ASSERT(vfsp != NULL); 466 ASSERT(vfsops != NULL); 467 468 op = vfsp->vfs_op; 469 membar_consumer(); 470 if (vfsp->vfs_femhead == NULL && 471 casptr(&vfsp->vfs_op, op, vfsops) == op) { 472 return; 473 } 474 fsem_setvfsops(vfsp, vfsops); 475 } 476 477 /* Retrieve the operations vector for a vfs */ 478 vfsops_t * 479 vfs_getops(vfs_t *vfsp) 480 { 481 vfsops_t *op; 482 483 ASSERT(vfsp != NULL); 484 485 op = vfsp->vfs_op; 486 membar_consumer(); 487 if (vfsp->vfs_femhead == NULL && op == vfsp->vfs_op) { 488 return (op); 489 } else { 490 return (fsem_getvfsops(vfsp)); 491 } 492 } 493 494 /* 495 * Returns non-zero (1) if the vfsops matches that of the vfs. 496 * Returns zero (0) if not. 497 */ 498 int 499 vfs_matchops(vfs_t *vfsp, vfsops_t *vfsops) 500 { 501 return (vfs_getops(vfsp) == vfsops); 502 } 503 504 /* 505 * Returns non-zero (1) if the file system has installed a non-default, 506 * non-error vfs_sync routine. Returns zero (0) otherwise. 507 */ 508 int 509 vfs_can_sync(vfs_t *vfsp) 510 { 511 /* vfs_sync() routine is not the default/error function */ 512 return (vfs_getops(vfsp)->vfs_sync != fs_sync); 513 } 514 515 /* 516 * Initialize a vfs structure. 517 */ 518 void 519 vfs_init(vfs_t *vfsp, vfsops_t *op, void *data) 520 { 521 /* Other initialization has been moved to vfs_alloc() */ 522 vfsp->vfs_count = 0; 523 vfsp->vfs_next = vfsp; 524 vfsp->vfs_prev = vfsp; 525 vfsp->vfs_zone_next = vfsp; 526 vfsp->vfs_zone_prev = vfsp; 527 vfsp->vfs_lofi_minor = 0; 528 sema_init(&vfsp->vfs_reflock, 1, NULL, SEMA_DEFAULT, NULL); 529 vfsimpl_setup(vfsp); 530 vfsp->vfs_data = (data); 531 vfs_setops((vfsp), (op)); 532 } 533 534 /* 535 * Allocate and initialize the vfs implementation private data 536 * structure, vfs_impl_t. 537 */ 538 void 539 vfsimpl_setup(vfs_t *vfsp) 540 { 541 int i; 542 543 if (vfsp->vfs_implp != NULL) { 544 return; 545 } 546 547 vfsp->vfs_implp = kmem_alloc(sizeof (vfs_impl_t), KM_SLEEP); 548 /* Note that these are #define'd in vfs.h */ 549 vfsp->vfs_vskap = NULL; 550 vfsp->vfs_fstypevsp = NULL; 551 552 /* Set size of counted array, then zero the array */ 553 vfsp->vfs_featureset[0] = VFS_FEATURE_MAXSZ - 1; 554 for (i = 1; i < VFS_FEATURE_MAXSZ; i++) { 555 vfsp->vfs_featureset[i] = 0; 556 } 557 } 558 559 /* 560 * Release the vfs_impl_t structure, if it exists. Some unbundled 561 * filesystems may not use the newer version of vfs and thus 562 * would not contain this implementation private data structure. 563 */ 564 void 565 vfsimpl_teardown(vfs_t *vfsp) 566 { 567 vfs_impl_t *vip = vfsp->vfs_implp; 568 569 if (vip == NULL) 570 return; 571 572 kmem_free(vfsp->vfs_implp, sizeof (vfs_impl_t)); 573 vfsp->vfs_implp = NULL; 574 } 575 576 /* 577 * VFS system calls: mount, umount, syssync, statfs, fstatfs, statvfs, 578 * fstatvfs, and sysfs moved to common/syscall. 579 */ 580 581 /* 582 * Update every mounted file system. We call the vfs_sync operation of 583 * each file system type, passing it a NULL vfsp to indicate that all 584 * mounted file systems of that type should be updated. 585 */ 586 void 587 vfs_sync(int flag) 588 { 589 struct vfssw *vswp; 590 RLOCK_VFSSW(); 591 for (vswp = &vfssw[1]; vswp < &vfssw[nfstype]; vswp++) { 592 if (ALLOCATED_VFSSW(vswp) && VFS_INSTALLED(vswp)) { 593 vfs_refvfssw(vswp); 594 RUNLOCK_VFSSW(); 595 (void) (*vswp->vsw_vfsops.vfs_sync)(NULL, flag, 596 CRED()); 597 vfs_unrefvfssw(vswp); 598 RLOCK_VFSSW(); 599 } 600 } 601 RUNLOCK_VFSSW(); 602 } 603 604 void 605 sync(void) 606 { 607 vfs_sync(0); 608 } 609 610 /* 611 * External routines. 612 */ 613 614 krwlock_t vfssw_lock; /* lock accesses to vfssw */ 615 616 /* 617 * Lock for accessing the vfs linked list. Initialized in vfs_mountroot(), 618 * but otherwise should be accessed only via vfs_list_lock() and 619 * vfs_list_unlock(). Also used to protect the timestamp for mods to the list. 620 */ 621 static krwlock_t vfslist; 622 623 /* 624 * Mount devfs on /devices. This is done right after root is mounted 625 * to provide device access support for the system 626 */ 627 static void 628 vfs_mountdevices(void) 629 { 630 struct vfssw *vsw; 631 struct vnode *mvp; 632 struct mounta mounta = { /* fake mounta for devfs_mount() */ 633 NULL, 634 NULL, 635 MS_SYSSPACE, 636 NULL, 637 NULL, 638 0, 639 NULL, 640 0 641 }; 642 643 /* 644 * _init devfs module to fill in the vfssw 645 */ 646 if (modload("fs", "devfs") == -1) 647 panic("Cannot _init devfs module"); 648 649 /* 650 * Hold vfs 651 */ 652 RLOCK_VFSSW(); 653 vsw = vfs_getvfsswbyname("devfs"); 654 VFS_INIT(&devices, &vsw->vsw_vfsops, NULL); 655 VFS_HOLD(&devices); 656 657 /* 658 * Locate mount point 659 */ 660 if (lookupname("/devices", UIO_SYSSPACE, FOLLOW, NULLVPP, &mvp)) 661 panic("Cannot find /devices"); 662 663 /* 664 * Perform the mount of /devices 665 */ 666 if (VFS_MOUNT(&devices, mvp, &mounta, CRED())) 667 panic("Cannot mount /devices"); 668 669 RUNLOCK_VFSSW(); 670 671 /* 672 * Set appropriate members and add to vfs list for mnttab display 673 */ 674 vfs_setresource(&devices, "/devices"); 675 vfs_setmntpoint(&devices, "/devices"); 676 677 /* 678 * Hold the root of /devices so it won't go away 679 */ 680 if (VFS_ROOT(&devices, &devicesdir)) 681 panic("vfs_mountdevices: not devices root"); 682 683 if (vfs_lock(&devices) != 0) { 684 VN_RELE(devicesdir); 685 cmn_err(CE_NOTE, "Cannot acquire vfs_lock of /devices"); 686 return; 687 } 688 689 if (vn_vfswlock(mvp) != 0) { 690 vfs_unlock(&devices); 691 VN_RELE(devicesdir); 692 cmn_err(CE_NOTE, "Cannot acquire vfswlock of /devices"); 693 return; 694 } 695 696 vfs_add(mvp, &devices, 0); 697 vn_vfsunlock(mvp); 698 vfs_unlock(&devices); 699 VN_RELE(devicesdir); 700 } 701 702 /* 703 * mount the first instance of /dev to root and remain mounted 704 */ 705 static void 706 vfs_mountdev1(void) 707 { 708 struct vfssw *vsw; 709 struct vnode *mvp; 710 struct mounta mounta = { /* fake mounta for sdev_mount() */ 711 NULL, 712 NULL, 713 MS_SYSSPACE | MS_OVERLAY, 714 NULL, 715 NULL, 716 0, 717 NULL, 718 0 719 }; 720 721 /* 722 * _init dev module to fill in the vfssw 723 */ 724 if (modload("fs", "dev") == -1) 725 cmn_err(CE_PANIC, "Cannot _init dev module\n"); 726 727 /* 728 * Hold vfs 729 */ 730 RLOCK_VFSSW(); 731 vsw = vfs_getvfsswbyname("dev"); 732 VFS_INIT(&dev, &vsw->vsw_vfsops, NULL); 733 VFS_HOLD(&dev); 734 735 /* 736 * Locate mount point 737 */ 738 if (lookupname("/dev", UIO_SYSSPACE, FOLLOW, NULLVPP, &mvp)) 739 cmn_err(CE_PANIC, "Cannot find /dev\n"); 740 741 /* 742 * Perform the mount of /dev 743 */ 744 if (VFS_MOUNT(&dev, mvp, &mounta, CRED())) 745 cmn_err(CE_PANIC, "Cannot mount /dev 1\n"); 746 747 RUNLOCK_VFSSW(); 748 749 /* 750 * Set appropriate members and add to vfs list for mnttab display 751 */ 752 vfs_setresource(&dev, "/dev"); 753 vfs_setmntpoint(&dev, "/dev"); 754 755 /* 756 * Hold the root of /dev so it won't go away 757 */ 758 if (VFS_ROOT(&dev, &devdir)) 759 cmn_err(CE_PANIC, "vfs_mountdev1: not dev root"); 760 761 if (vfs_lock(&dev) != 0) { 762 VN_RELE(devdir); 763 cmn_err(CE_NOTE, "Cannot acquire vfs_lock of /dev"); 764 return; 765 } 766 767 if (vn_vfswlock(mvp) != 0) { 768 vfs_unlock(&dev); 769 VN_RELE(devdir); 770 cmn_err(CE_NOTE, "Cannot acquire vfswlock of /dev"); 771 return; 772 } 773 774 vfs_add(mvp, &dev, 0); 775 vn_vfsunlock(mvp); 776 vfs_unlock(&dev); 777 VN_RELE(devdir); 778 } 779 780 /* 781 * Mount required filesystem. This is done right after root is mounted. 782 */ 783 static void 784 vfs_mountfs(char *module, char *spec, char *path) 785 { 786 struct vnode *mvp; 787 struct mounta mounta; 788 vfs_t *vfsp; 789 790 mounta.flags = MS_SYSSPACE | MS_DATA; 791 mounta.fstype = module; 792 mounta.spec = spec; 793 mounta.dir = path; 794 if (lookupname(path, UIO_SYSSPACE, FOLLOW, NULLVPP, &mvp)) { 795 cmn_err(CE_WARN, "Cannot find %s", path); 796 return; 797 } 798 if (domount(NULL, &mounta, mvp, CRED(), &vfsp)) 799 cmn_err(CE_WARN, "Cannot mount %s", path); 800 else 801 VFS_RELE(vfsp); 802 VN_RELE(mvp); 803 } 804 805 /* 806 * vfs_mountroot is called by main() to mount the root filesystem. 807 */ 808 void 809 vfs_mountroot(void) 810 { 811 struct vnode *rvp = NULL; 812 char *path; 813 size_t plen; 814 struct vfssw *vswp; 815 816 rw_init(&vfssw_lock, NULL, RW_DEFAULT, NULL); 817 rw_init(&vfslist, NULL, RW_DEFAULT, NULL); 818 819 /* 820 * Alloc the vfs hash bucket array and locks 821 */ 822 rvfs_list = kmem_zalloc(vfshsz * sizeof (rvfs_t), KM_SLEEP); 823 824 /* 825 * Call machine-dependent routine "rootconf" to choose a root 826 * file system type. 827 */ 828 if (rootconf()) 829 panic("vfs_mountroot: cannot mount root"); 830 /* 831 * Get vnode for '/'. Set up rootdir, u.u_rdir and u.u_cdir 832 * to point to it. These are used by lookuppn() so that it 833 * knows where to start from ('/' or '.'). 834 */ 835 vfs_setmntpoint(rootvfs, "/"); 836 if (VFS_ROOT(rootvfs, &rootdir)) 837 panic("vfs_mountroot: no root vnode"); 838 PTOU(curproc)->u_cdir = rootdir; 839 VN_HOLD(PTOU(curproc)->u_cdir); 840 PTOU(curproc)->u_rdir = NULL; 841 842 /* 843 * Setup the global zone's rootvp, now that it exists. 844 */ 845 global_zone->zone_rootvp = rootdir; 846 VN_HOLD(global_zone->zone_rootvp); 847 848 /* 849 * Notify the module code that it can begin using the 850 * root filesystem instead of the boot program's services. 851 */ 852 modrootloaded = 1; 853 854 /* 855 * Special handling for a ZFS root file system. 856 */ 857 zfs_boot_init(); 858 859 /* 860 * Set up mnttab information for root 861 */ 862 vfs_setresource(rootvfs, rootfs.bo_name); 863 864 /* 865 * Notify cluster software that the root filesystem is available. 866 */ 867 clboot_mountroot(); 868 869 /* Now that we're all done with the root FS, set up its vopstats */ 870 if ((vswp = vfs_getvfsswbyvfsops(vfs_getops(rootvfs))) != NULL) { 871 /* Set flag for statistics collection */ 872 if (vswp->vsw_flag & VSW_STATS) { 873 initialize_vopstats(&rootvfs->vfs_vopstats); 874 rootvfs->vfs_flag |= VFS_STATS; 875 rootvfs->vfs_fstypevsp = 876 get_fstype_vopstats(rootvfs, vswp); 877 rootvfs->vfs_vskap = get_vskstat_anchor(rootvfs); 878 } 879 vfs_unrefvfssw(vswp); 880 } 881 882 /* 883 * Mount /devices, /dev instance 1, /system/contract, /etc/mnttab, 884 * /etc/svc/volatile, /etc/dfs/sharetab, /system/object, and /proc. 885 */ 886 vfs_mountdevices(); 887 vfs_mountdev1(); 888 889 vfs_mountfs("ctfs", "ctfs", CTFS_ROOT); 890 vfs_mountfs("proc", "/proc", "/proc"); 891 vfs_mountfs("mntfs", "/etc/mnttab", "/etc/mnttab"); 892 vfs_mountfs("tmpfs", "/etc/svc/volatile", "/etc/svc/volatile"); 893 vfs_mountfs("objfs", "objfs", OBJFS_ROOT); 894 895 if (getzoneid() == GLOBAL_ZONEID) { 896 vfs_mountfs("sharefs", "sharefs", "/etc/dfs/sharetab"); 897 } 898 899 #ifdef __sparc 900 /* 901 * This bit of magic can go away when we convert sparc to 902 * the new boot architecture based on ramdisk. 903 * 904 * Booting off a mirrored root volume: 905 * At this point, we have booted and mounted root on a 906 * single component of the mirror. Complete the boot 907 * by configuring SVM and converting the root to the 908 * dev_t of the mirrored root device. This dev_t conversion 909 * only works because the underlying device doesn't change. 910 */ 911 if (root_is_svm) { 912 if (svm_rootconf()) { 913 panic("vfs_mountroot: cannot remount root"); 914 } 915 916 /* 917 * mnttab should reflect the new root device 918 */ 919 vfs_lock_wait(rootvfs); 920 vfs_setresource(rootvfs, rootfs.bo_name); 921 vfs_unlock(rootvfs); 922 } 923 #endif /* __sparc */ 924 925 /* 926 * Look up the root device via devfs so that a dv_node is 927 * created for it. The vnode is never VN_RELE()ed. 928 * We allocate more than MAXPATHLEN so that the 929 * buffer passed to i_ddi_prompath_to_devfspath() is 930 * exactly MAXPATHLEN (the function expects a buffer 931 * of that length). 932 */ 933 plen = strlen("/devices"); 934 path = kmem_alloc(plen + MAXPATHLEN, KM_SLEEP); 935 (void) strcpy(path, "/devices"); 936 937 if (i_ddi_prompath_to_devfspath(rootfs.bo_name, path + plen) 938 != DDI_SUCCESS || 939 lookupname(path, UIO_SYSSPACE, FOLLOW, NULLVPP, &rvp)) { 940 941 /* NUL terminate in case "path" has garbage */ 942 path[plen + MAXPATHLEN - 1] = '\0'; 943 #ifdef DEBUG 944 cmn_err(CE_WARN, "!Cannot lookup root device: %s", path); 945 #endif 946 } 947 kmem_free(path, plen + MAXPATHLEN); 948 vfs_mnttabvp_setup(); 949 } 950 951 /* 952 * If remount failed and we're in a zone we need to check for the zone 953 * root path and strip it before the call to vfs_setpath(). 954 * 955 * If strpath doesn't begin with the zone_rootpath the original 956 * strpath is returned unchanged. 957 */ 958 static const char * 959 stripzonepath(const char *strpath) 960 { 961 char *str1, *str2; 962 int i; 963 zone_t *zonep = curproc->p_zone; 964 965 if (zonep->zone_rootpath == NULL || strpath == NULL) { 966 return (NULL); 967 } 968 969 /* 970 * we check for the end of the string at one past the 971 * current position because the zone_rootpath always 972 * ends with "/" but we don't want to strip that off. 973 */ 974 str1 = zonep->zone_rootpath; 975 str2 = (char *)strpath; 976 ASSERT(str1[0] != '\0'); 977 for (i = 0; str1[i + 1] != '\0'; i++) { 978 if (str1[i] != str2[i]) 979 return ((char *)strpath); 980 } 981 return (&str2[i]); 982 } 983 984 /* 985 * Check to see if our "block device" is actually a file. If so, 986 * automatically add a lofi device, and keep track of this fact. 987 */ 988 static int 989 lofi_add(const char *fsname, struct vfs *vfsp, 990 mntopts_t *mntopts, struct mounta *uap) 991 { 992 int fromspace = (uap->flags & MS_SYSSPACE) ? 993 UIO_SYSSPACE : UIO_USERSPACE; 994 struct lofi_ioctl *li = NULL; 995 struct vnode *vp = NULL; 996 struct pathname pn = { NULL }; 997 ldi_ident_t ldi_id; 998 ldi_handle_t ldi_hdl; 999 vfssw_t *vfssw; 1000 int minor; 1001 int err = 0; 1002 1003 if (fsname == NULL || 1004 (vfssw = vfs_getvfssw(fsname)) == NULL) 1005 return (0); 1006 1007 if (!(vfssw->vsw_flag & VSW_CANLOFI)) { 1008 vfs_unrefvfssw(vfssw); 1009 return (0); 1010 } 1011 1012 vfs_unrefvfssw(vfssw); 1013 vfssw = NULL; 1014 1015 if (pn_get(uap->spec, fromspace, &pn) != 0) 1016 return (0); 1017 1018 if (lookupname(uap->spec, fromspace, FOLLOW, NULL, &vp) != 0) 1019 goto out; 1020 1021 if (vp->v_type != VREG) 1022 goto out; 1023 1024 /* OK, this is a lofi mount. */ 1025 1026 if ((uap->flags & (MS_REMOUNT|MS_GLOBAL)) || 1027 vfs_optionisset_nolock(mntopts, MNTOPT_SUID, NULL) || 1028 vfs_optionisset_nolock(mntopts, MNTOPT_SETUID, NULL) || 1029 vfs_optionisset_nolock(mntopts, MNTOPT_DEVICES, NULL)) { 1030 err = EINVAL; 1031 goto out; 1032 } 1033 1034 ldi_id = ldi_ident_from_anon(); 1035 li = kmem_zalloc(sizeof (*li), KM_SLEEP); 1036 (void) strlcpy(li->li_filename, pn.pn_path, MAXPATHLEN + 1); 1037 1038 /* 1039 * The lofi control node is currently exclusive-open. We'd like 1040 * to improve this, but in the meantime, we'll loop waiting for 1041 * access. 1042 */ 1043 for (;;) { 1044 err = ldi_open_by_name("/dev/lofictl", FREAD | FWRITE | FEXCL, 1045 kcred, &ldi_hdl, ldi_id); 1046 1047 if (err != EBUSY) 1048 break; 1049 1050 if ((err = delay_sig(hz / 8)) == EINTR) 1051 break; 1052 } 1053 1054 if (err) 1055 goto out2; 1056 1057 err = ldi_ioctl(ldi_hdl, LOFI_MAP_FILE, (intptr_t)li, 1058 FREAD | FWRITE | FEXCL | FKIOCTL, kcred, &minor); 1059 1060 (void) ldi_close(ldi_hdl, FREAD | FWRITE | FEXCL, kcred); 1061 1062 if (!err) 1063 vfsp->vfs_lofi_minor = minor; 1064 1065 out2: 1066 ldi_ident_release(ldi_id); 1067 out: 1068 if (li != NULL) 1069 kmem_free(li, sizeof (*li)); 1070 if (vp != NULL) 1071 VN_RELE(vp); 1072 pn_free(&pn); 1073 return (err); 1074 } 1075 1076 static void 1077 lofi_remove(struct vfs *vfsp) 1078 { 1079 struct lofi_ioctl *li = NULL; 1080 ldi_ident_t ldi_id; 1081 ldi_handle_t ldi_hdl; 1082 int err; 1083 1084 if (vfsp->vfs_lofi_minor == 0) 1085 return; 1086 1087 ldi_id = ldi_ident_from_anon(); 1088 1089 li = kmem_zalloc(sizeof (*li), KM_SLEEP); 1090 li->li_minor = vfsp->vfs_lofi_minor; 1091 li->li_cleanup = B_TRUE; 1092 1093 do { 1094 err = ldi_open_by_name("/dev/lofictl", FREAD | FWRITE | FEXCL, 1095 kcred, &ldi_hdl, ldi_id); 1096 } while (err == EBUSY); 1097 1098 if (err) 1099 goto out; 1100 1101 err = ldi_ioctl(ldi_hdl, LOFI_UNMAP_FILE_MINOR, (intptr_t)li, 1102 FREAD | FWRITE | FEXCL | FKIOCTL, kcred,