1 0 stevel /* 2 0 stevel * CDDL HEADER START 3 0 stevel * 4 0 stevel * The contents of this file are subject to the terms of the 5 1488 rsb * Common Development and Distribution License (the "License"). 6 1488 rsb * You may not use this file except in compliance with the License. 7 0 stevel * 8 0 stevel * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 0 stevel * or http://www.opensolaris.org/os/licensing. 10 0 stevel * See the License for the specific language governing permissions 11 0 stevel * and limitations under the License. 12 0 stevel * 13 0 stevel * When distributing Covered Code, include this CDDL HEADER in each 14 0 stevel * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 0 stevel * If applicable, add the following below this CDDL HEADER, with the 16 0 stevel * fields enclosed by brackets "[]" replaced with your own identifying 17 0 stevel * information: Portions Copyright [yyyy] [name of copyright owner] 18 0 stevel * 19 0 stevel * CDDL HEADER END 20 0 stevel */ 21 0 stevel /* 22 10793 dai * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 0 stevel * Use is subject to license terms. 24 0 stevel */ 25 0 stevel 26 0 stevel /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ 27 0 stevel /* All Rights Reserved */ 28 0 stevel 29 0 stevel /* 30 0 stevel * University Copyright- Copyright (c) 1982, 1986, 1988 31 0 stevel * The Regents of the University of California 32 0 stevel * All Rights Reserved 33 0 stevel * 34 0 stevel * University Acknowledgment- Portions of this document are derived from 35 0 stevel * software developed by the University of California, Berkeley, and its 36 0 stevel * contributors. 37 0 stevel */ 38 0 stevel 39 0 stevel #include <sys/types.h> 40 0 stevel #include <sys/t_lock.h> 41 0 stevel #include <sys/param.h> 42 0 stevel #include <sys/errno.h> 43 0 stevel #include <sys/user.h> 44 0 stevel #include <sys/fstyp.h> 45 0 stevel #include <sys/kmem.h> 46 0 stevel #include <sys/systm.h> 47 0 stevel #include <sys/proc.h> 48 0 stevel #include <sys/mount.h> 49 0 stevel #include <sys/vfs.h> 50 3898 rsb #include <sys/vfs_opreg.h> 51 0 stevel #include <sys/fem.h> 52 0 stevel #include <sys/mntent.h> 53 0 stevel #include <sys/stat.h> 54 0 stevel #include <sys/statvfs.h> 55 0 stevel #include <sys/statfs.h> 56 0 stevel #include <sys/cred.h> 57 0 stevel #include <sys/vnode.h> 58 0 stevel #include <sys/rwstlock.h> 59 0 stevel #include <sys/dnlc.h> 60 0 stevel #include <sys/file.h> 61 0 stevel #include <sys/time.h> 62 0 stevel #include <sys/atomic.h> 63 0 stevel #include <sys/cmn_err.h> 64 0 stevel #include <sys/buf.h> 65 0 stevel #include <sys/swap.h> 66 0 stevel #include <sys/debug.h> 67 0 stevel #include <sys/vnode.h> 68 0 stevel #include <sys/modctl.h> 69 0 stevel #include <sys/ddi.h> 70 0 stevel #include <sys/pathname.h> 71 0 stevel #include <sys/bootconf.h> 72 0 stevel #include <sys/dumphdr.h> 73 0 stevel #include <sys/dc_ki.h> 74 0 stevel #include <sys/poll.h> 75 0 stevel #include <sys/sunddi.h> 76 0 stevel #include <sys/sysmacros.h> 77 0 stevel #include <sys/zone.h> 78 0 stevel #include <sys/policy.h> 79 0 stevel #include <sys/ctfs.h> 80 0 stevel #include <sys/objfs.h> 81 0 stevel #include <sys/console.h> 82 0 stevel #include <sys/reboot.h> 83 5331 amw #include <sys/attr.h> 84 10922 Jeff #include <sys/zio.h> 85 6423 gw25295 #include <sys/spa.h> 86 6734 johnlev #include <sys/lofi.h> 87 8194 Jack #include <sys/bootprops.h> 88 0 stevel 89 0 stevel #include <vm/page.h> 90 0 stevel 91 0 stevel #include <fs/fs_subr.h> 92 1520 rsb /* Private interfaces to create vopstats-related data structures */ 93 1520 rsb extern void initialize_vopstats(vopstats_t *); 94 1520 rsb extern vopstats_t *get_fstype_vopstats(struct vfs *, struct vfssw *); 95 1520 rsb extern vsk_anchor_t *get_vskstat_anchor(struct vfs *); 96 1520 rsb 97 0 stevel static void vfs_clearmntopt_nolock(mntopts_t *, const char *, int); 98 0 stevel static void vfs_setmntopt_nolock(mntopts_t *, const char *, 99 0 stevel const char *, int, int); 100 0 stevel static int vfs_optionisset_nolock(const mntopts_t *, const char *, char **); 101 0 stevel static void vfs_freemnttab(struct vfs *); 102 0 stevel static void vfs_freeopt(mntopt_t *); 103 0 stevel static void vfs_swapopttbl_nolock(mntopts_t *, mntopts_t *); 104 0 stevel static void vfs_swapopttbl(mntopts_t *, mntopts_t *); 105 0 stevel static void vfs_copyopttbl_extend(const mntopts_t *, mntopts_t *, int); 106 0 stevel static void vfs_createopttbl_extend(mntopts_t *, const char *, 107 0 stevel const mntopts_t *); 108 0 stevel static char **vfs_copycancelopt_extend(char **const, int); 109 0 stevel static void vfs_freecancelopt(char **); 110 5084 johnlev static void getrootfs(char **, char **); 111 0 stevel static int getmacpath(dev_info_t *, void *); 112 4863 praks static void vfs_mnttabvp_setup(void); 113 0 stevel 114 0 stevel struct ipmnt { 115 0 stevel struct ipmnt *mip_next; 116 0 stevel dev_t mip_dev; 117 0 stevel struct vfs *mip_vfsp; 118 0 stevel }; 119 0 stevel 120 0 stevel static kmutex_t vfs_miplist_mutex; 121 0 stevel static struct ipmnt *vfs_miplist = NULL; 122 0 stevel static struct ipmnt *vfs_miplist_end = NULL; 123 0 stevel 124 5331 amw static kmem_cache_t *vfs_cache; /* Pointer to VFS kmem cache */ 125 5331 amw 126 0 stevel /* 127 0 stevel * VFS global data. 128 0 stevel */ 129 0 stevel vnode_t *rootdir; /* pointer to root inode vnode. */ 130 0 stevel vnode_t *devicesdir; /* pointer to inode of devices root */ 131 2621 llai1 vnode_t *devdir; /* pointer to inode of dev root */ 132 0 stevel 133 0 stevel char *server_rootpath; /* root path for diskless clients */ 134 0 stevel char *server_hostname; /* hostname of diskless server */ 135 0 stevel 136 0 stevel static struct vfs root; 137 0 stevel static struct vfs devices; 138 2621 llai1 static struct vfs dev; 139 0 stevel struct vfs *rootvfs = &root; /* pointer to root vfs; head of VFS list. */ 140 0 stevel rvfs_t *rvfs_list; /* array of vfs ptrs for vfs hash list */ 141 0 stevel int vfshsz = 512; /* # of heads/locks in vfs hash arrays */ 142 0 stevel /* must be power of 2! */ 143 0 stevel timespec_t vfs_mnttab_ctime; /* mnttab created time */ 144 0 stevel timespec_t vfs_mnttab_mtime; /* mnttab last modified time */ 145 4813 dm120769 char *vfs_dummyfstype = "\0"; 146 0 stevel struct pollhead vfs_pollhd; /* for mnttab pollers */ 147 4863 praks struct vnode *vfs_mntdummyvp; /* to fake mnttab read/write for file events */ 148 4863 praks int mntfstype; /* will be set once mnt fs is mounted */ 149 0 stevel 150 0 stevel /* 151 0 stevel * Table for generic options recognized in the VFS layer and acted 152 0 stevel * on at this level before parsing file system specific options. 153 0 stevel * The nosuid option is stronger than any of the devices and setuid 154 0 stevel * options, so those are canceled when nosuid is seen. 155 0 stevel * 156 0 stevel * All options which are added here need to be added to the 157 0 stevel * list of standard options in usr/src/cmd/fs.d/fslib.c as well. 158 0 stevel */ 159 0 stevel /* 160 0 stevel * VFS Mount options table 161 0 stevel */ 162 0 stevel static char *ro_cancel[] = { MNTOPT_RW, NULL }; 163 0 stevel static char *rw_cancel[] = { MNTOPT_RO, NULL }; 164 0 stevel static char *suid_cancel[] = { MNTOPT_NOSUID, NULL }; 165 0 stevel static char *nosuid_cancel[] = { MNTOPT_SUID, MNTOPT_DEVICES, MNTOPT_NODEVICES, 166 0 stevel MNTOPT_NOSETUID, MNTOPT_SETUID, NULL }; 167 0 stevel static char *devices_cancel[] = { MNTOPT_NODEVICES, NULL }; 168 0 stevel static char *nodevices_cancel[] = { MNTOPT_DEVICES, NULL }; 169 0 stevel static char *setuid_cancel[] = { MNTOPT_NOSETUID, NULL }; 170 0 stevel static char *nosetuid_cancel[] = { MNTOPT_SETUID, NULL }; 171 0 stevel static char *nbmand_cancel[] = { MNTOPT_NONBMAND, NULL }; 172 0 stevel static char *nonbmand_cancel[] = { MNTOPT_NBMAND, NULL }; 173 0 stevel static char *exec_cancel[] = { MNTOPT_NOEXEC, NULL }; 174 0 stevel static char *noexec_cancel[] = { MNTOPT_EXEC, NULL }; 175 0 stevel 176 0 stevel static const mntopt_t mntopts[] = { 177 0 stevel /* 178 0 stevel * option name cancel options default arg flags 179 0 stevel */ 180 0 stevel { MNTOPT_REMOUNT, NULL, NULL, 181 0 stevel MO_NODISPLAY, (void *)0 }, 182 0 stevel { MNTOPT_RO, ro_cancel, NULL, 0, 183 0 stevel (void *)0 }, 184 0 stevel { MNTOPT_RW, rw_cancel, NULL, 0, 185 0 stevel (void *)0 }, 186 0 stevel { MNTOPT_SUID, suid_cancel, NULL, 0, 187 0 stevel (void *)0 }, 188 0 stevel { MNTOPT_NOSUID, nosuid_cancel, NULL, 0, 189 0 stevel (void *)0 }, 190 0 stevel { MNTOPT_DEVICES, devices_cancel, NULL, 0, 191 0 stevel (void *)0 }, 192 0 stevel { MNTOPT_NODEVICES, nodevices_cancel, NULL, 0, 193 0 stevel (void *)0 }, 194 0 stevel { MNTOPT_SETUID, setuid_cancel, NULL, 0, 195 0 stevel (void *)0 }, 196 0 stevel { MNTOPT_NOSETUID, nosetuid_cancel, NULL, 0, 197 0 stevel (void *)0 }, 198 0 stevel { MNTOPT_NBMAND, nbmand_cancel, NULL, 0, 199 0 stevel (void *)0 }, 200 0 stevel { MNTOPT_NONBMAND, nonbmand_cancel, NULL, 0, 201 0 stevel (void *)0 }, 202 0 stevel { MNTOPT_EXEC, exec_cancel, NULL, 0, 203 0 stevel (void *)0 }, 204 0 stevel { MNTOPT_NOEXEC, noexec_cancel, NULL, 0, 205 0 stevel (void *)0 }, 206 0 stevel }; 207 0 stevel 208 0 stevel const mntopts_t vfs_mntopts = { 209 0 stevel sizeof (mntopts) / sizeof (mntopt_t), 210 0 stevel (mntopt_t *)&mntopts[0] 211 0 stevel }; 212 0 stevel 213 0 stevel /* 214 0 stevel * File system operation dispatch functions. 215 0 stevel */ 216 0 stevel 217 0 stevel int 218 0 stevel fsop_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr) 219 0 stevel { 220 0 stevel return (*(vfsp)->vfs_op->vfs_mount)(vfsp, mvp, uap, cr); 221 0 stevel } 222 0 stevel 223 0 stevel int 224 0 stevel fsop_unmount(vfs_t *vfsp, int flag, cred_t *cr) 225 0 stevel { 226 0 stevel return (*(vfsp)->vfs_op->vfs_unmount)(vfsp, flag, cr); 227 0 stevel } 228 0 stevel 229 0 stevel int 230 0 stevel fsop_root(vfs_t *vfsp, vnode_t **vpp) 231 0 stevel { 232 0 stevel refstr_t *mntpt; 233 0 stevel int ret = (*(vfsp)->vfs_op->vfs_root)(vfsp, vpp); 234 0 stevel /* 235 0 stevel * Make sure this root has a path. With lofs, it is possible to have 236 0 stevel * a NULL mountpoint. 237 0 stevel */ 238 254 eschrock if (ret == 0 && vfsp->vfs_mntpt != NULL && (*vpp)->v_path == NULL) { 239 0 stevel mntpt = vfs_getmntpoint(vfsp); 240 0 stevel vn_setpath_str(*vpp, refstr_value(mntpt), 241 0 stevel strlen(refstr_value(mntpt))); 242 0 stevel refstr_rele(mntpt); 243 0 stevel } 244 0 stevel 245 0 stevel return (ret); 246 0 stevel } 247 0 stevel 248 0 stevel int 249 0 stevel fsop_statfs(vfs_t *vfsp, statvfs64_t *sp) 250 0 stevel { 251 0 stevel return (*(vfsp)->vfs_op->vfs_statvfs)(vfsp, sp); 252 0 stevel } 253 0 stevel 254 0 stevel int 255 0 stevel fsop_sync(vfs_t *vfsp, short flag, cred_t *cr) 256 0 stevel { 257 0 stevel return (*(vfsp)->vfs_op->vfs_sync)(vfsp, flag, cr); 258 0 stevel } 259 0 stevel 260 0 stevel int 261 0 stevel fsop_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp) 262 0 stevel { 263 5331 amw /* 264 5331 amw * In order to handle system attribute fids in a manner 265 5331 amw * transparent to the underlying fs, we embed the fid for 266 5331 amw * the sysattr parent object in the sysattr fid and tack on 267 5331 amw * some extra bytes that only the sysattr layer knows about. 268 5331 amw * 269 5331 amw * This guarantees that sysattr fids are larger than other fids 270 7757 Janice * for this vfs. If the vfs supports the sysattr view interface 271 7757 Janice * (as indicated by VFSFT_SYSATTR_VIEWS), we cannot have a size 272 7757 Janice * collision with XATTR_FIDSZ. 273 7757 Janice */ 274 7757 Janice if (vfs_has_feature(vfsp, VFSFT_SYSATTR_VIEWS) && 275 5331 amw fidp->fid_len == XATTR_FIDSZ) 276 5331 amw return (xattr_dir_vget(vfsp, vpp, fidp)); 277 5331 amw 278 0 stevel return (*(vfsp)->vfs_op->vfs_vget)(vfsp, vpp, fidp); 279 0 stevel } 280 0 stevel 281 0 stevel int 282 0 stevel fsop_mountroot(vfs_t *vfsp, enum whymountroot reason) 283 0 stevel { 284 0 stevel return (*(vfsp)->vfs_op->vfs_mountroot)(vfsp, reason); 285 0 stevel } 286 0 stevel 287 0 stevel void 288 0 stevel fsop_freefs(vfs_t *vfsp) 289 0 stevel { 290 0 stevel (*(vfsp)->vfs_op->vfs_freevfs)(vfsp); 291 0 stevel } 292 0 stevel 293 0 stevel int 294 0 stevel fsop_vnstate(vfs_t *vfsp, vnode_t *vp, vntrans_t nstate) 295 0 stevel { 296 0 stevel return ((*(vfsp)->vfs_op->vfs_vnstate)(vfsp, vp, nstate)); 297 0 stevel } 298 0 stevel 299 0 stevel int 300 0 stevel fsop_sync_by_kind(int fstype, short flag, cred_t *cr) 301 0 stevel { 302 0 stevel ASSERT((fstype >= 0) && (fstype < nfstype)); 303 0 stevel 304 0 stevel if (ALLOCATED_VFSSW(&vfssw[fstype]) && VFS_INSTALLED(&vfssw[fstype])) 305 0 stevel return (*vfssw[fstype].vsw_vfsops.vfs_sync) (NULL, flag, cr); 306 0 stevel else 307 0 stevel return (ENOTSUP); 308 0 stevel } 309 0 stevel 310 0 stevel /* 311 0 stevel * File system initialization. vfs_setfsops() must be called from a file 312 0 stevel * system's init routine. 313 0 stevel */ 314 0 stevel 315 0 stevel static int 316 0 stevel fs_copyfsops(const fs_operation_def_t *template, vfsops_t *actual, 317 0 stevel int *unused_ops) 318 0 stevel { 319 0 stevel static const fs_operation_trans_def_t vfs_ops_table[] = { 320 0 stevel VFSNAME_MOUNT, offsetof(vfsops_t, vfs_mount), 321 0 stevel fs_nosys, fs_nosys, 322 0 stevel 323 0 stevel VFSNAME_UNMOUNT, offsetof(vfsops_t, vfs_unmount), 324 0 stevel fs_nosys, fs_nosys, 325 0 stevel 326 0 stevel VFSNAME_ROOT, offsetof(vfsops_t, vfs_root), 327 0 stevel fs_nosys, fs_nosys, 328 0 stevel 329 0 stevel VFSNAME_STATVFS, offsetof(vfsops_t, vfs_statvfs), 330 0 stevel fs_nosys, fs_nosys, 331 0 stevel 332 0 stevel VFSNAME_SYNC, offsetof(vfsops_t, vfs_sync), 333 0 stevel (fs_generic_func_p) fs_sync, 334 0 stevel (fs_generic_func_p) fs_sync, /* No errors allowed */ 335 0 stevel 336 0 stevel VFSNAME_VGET, offsetof(vfsops_t, vfs_vget), 337 0 stevel fs_nosys, fs_nosys, 338 0 stevel 339 0 stevel VFSNAME_MOUNTROOT, offsetof(vfsops_t, vfs_mountroot), 340 0 stevel fs_nosys, fs_nosys, 341 0 stevel 342 0 stevel VFSNAME_FREEVFS, offsetof(vfsops_t, vfs_freevfs), 343 0 stevel (fs_generic_func_p)fs_freevfs, 344 0 stevel (fs_generic_func_p)fs_freevfs, /* Shouldn't fail */ 345 0 stevel 346 0 stevel VFSNAME_VNSTATE, offsetof(vfsops_t, vfs_vnstate), 347 0 stevel (fs_generic_func_p)fs_nosys, 348 0 stevel (fs_generic_func_p)fs_nosys, 349 0 stevel 350 0 stevel NULL, 0, NULL, NULL 351 0 stevel }; 352 0 stevel 353 0 stevel return (fs_build_vector(actual, unused_ops, vfs_ops_table, template)); 354 0 stevel } 355 0 stevel 356 6423 gw25295 void 357 6423 gw25295 zfs_boot_init() { 358 6423 gw25295 359 6423 gw25295 if (strcmp(rootfs.bo_fstype, MNTTYPE_ZFS) == 0) 360 6423 gw25295 spa_boot_init(); 361 6423 gw25295 } 362 6423 gw25295 363 0 stevel int 364 0 stevel vfs_setfsops(int fstype, const fs_operation_def_t *template, vfsops_t **actual) 365 0 stevel { 366 0 stevel int error; 367 0 stevel int unused_ops; 368 0 stevel 369 3904 rsb /* 370 3904 rsb * Verify that fstype refers to a valid fs. Note that 371 3904 rsb * 0 is valid since it's used to set "stray" ops. 372 3904 rsb */ 373 3904 rsb if ((fstype < 0) || (fstype >= nfstype)) 374 0 stevel return (EINVAL); 375 0 stevel 376 0 stevel if (!ALLOCATED_VFSSW(&vfssw[fstype])) 377 0 stevel return (EINVAL); 378 0 stevel 379 0 stevel /* Set up the operations vector. */ 380 0 stevel 381 0 stevel error = fs_copyfsops(template, &vfssw[fstype].vsw_vfsops, &unused_ops); 382 0 stevel 383 0 stevel if (error != 0) 384 0 stevel return (error); 385 0 stevel 386 0 stevel vfssw[fstype].vsw_flag |= VSW_INSTALLED; 387 0 stevel 388 0 stevel if (actual != NULL) 389 0 stevel *actual = &vfssw[fstype].vsw_vfsops; 390 0 stevel 391 0 stevel #if DEBUG 392 0 stevel if (unused_ops != 0) 393 0 stevel cmn_err(CE_WARN, "vfs_setfsops: %s: %d operations supplied " 394 0 stevel "but not used", vfssw[fstype].vsw_name, unused_ops); 395 0 stevel #endif 396 0 stevel 397 0 stevel return (0); 398 0 stevel } 399 0 stevel 400 0 stevel int 401 0 stevel vfs_makefsops(const fs_operation_def_t *template, vfsops_t **actual) 402 0 stevel { 403 0 stevel int error; 404 0 stevel int unused_ops; 405 0 stevel 406 0 stevel *actual = (vfsops_t *)kmem_alloc(sizeof (vfsops_t), KM_SLEEP); 407 0 stevel 408 0 stevel error = fs_copyfsops(template, *actual, &unused_ops); 409 0 stevel if (error != 0) { 410 0 stevel kmem_free(*actual, sizeof (vfsops_t)); 411 0 stevel *actual = NULL; 412 0 stevel return (error); 413 0 stevel } 414 0 stevel 415 0 stevel return (0); 416 0 stevel } 417 0 stevel 418 0 stevel /* 419 0 stevel * Free a vfsops structure created as a result of vfs_makefsops(). 420 0 stevel * NOTE: For a vfsops structure initialized by vfs_setfsops(), use 421 0 stevel * vfs_freevfsops_by_type(). 422 0 stevel */ 423 0 stevel void 424 0 stevel vfs_freevfsops(vfsops_t *vfsops) 425 0 stevel { 426 0 stevel kmem_free(vfsops, sizeof (vfsops_t)); 427 0 stevel } 428 0 stevel 429 0 stevel /* 430 0 stevel * Since the vfsops structure is part of the vfssw table and wasn't 431 0 stevel * really allocated, we're not really freeing anything. We keep 432 0 stevel * the name for consistency with vfs_freevfsops(). We do, however, 433 0 stevel * need to take care of a little bookkeeping. 434 0 stevel * NOTE: For a vfsops structure created by vfs_setfsops(), use 435 0 stevel * vfs_freevfsops_by_type(). 436 0 stevel */ 437 0 stevel int 438 0 stevel vfs_freevfsops_by_type(int fstype) 439 0 stevel { 440 0 stevel 441 0 stevel /* Verify that fstype refers to a loaded fs (and not fsid 0). */ 442 0 stevel if ((fstype <= 0) || (fstype >= nfstype)) 443 0 stevel return (EINVAL); 444 0 stevel 445 0 stevel WLOCK_VFSSW(); 446 0 stevel if ((vfssw[fstype].vsw_flag & VSW_INSTALLED) == 0) { 447 0 stevel WUNLOCK_VFSSW(); 448 0 stevel return (EINVAL); 449 0 stevel } 450 0 stevel 451 0 stevel vfssw[fstype].vsw_flag &= ~VSW_INSTALLED; 452 0 stevel WUNLOCK_VFSSW(); 453 0 stevel 454 0 stevel return (0); 455 0 stevel } 456 0 stevel 457 0 stevel /* Support routines used to reference vfs_op */ 458 0 stevel 459 0 stevel /* Set the operations vector for a vfs */ 460 0 stevel void 461 0 stevel vfs_setops(vfs_t *vfsp, vfsops_t *vfsops) 462 0 stevel { 463 0 stevel vfsops_t *op; 464 0 stevel 465 0 stevel ASSERT(vfsp != NULL); 466 0 stevel ASSERT(vfsops != NULL); 467 0 stevel 468 0 stevel op = vfsp->vfs_op; 469 0 stevel membar_consumer(); 470 5331 amw if (vfsp->vfs_femhead == NULL && 471 0 stevel casptr(&vfsp->vfs_op, op, vfsops) == op) { 472 0 stevel return; 473 0 stevel } 474 0 stevel fsem_setvfsops(vfsp, vfsops); 475 0 stevel } 476 0 stevel 477 0 stevel /* Retrieve the operations vector for a vfs */ 478 0 stevel vfsops_t * 479 0 stevel vfs_getops(vfs_t *vfsp) 480 0 stevel { 481 0 stevel vfsops_t *op; 482 0 stevel 483 0 stevel ASSERT(vfsp != NULL); 484 0 stevel 485 0 stevel op = vfsp->vfs_op; 486 0 stevel membar_consumer(); 487 5331 amw if (vfsp->vfs_femhead == NULL && op == vfsp->vfs_op) { 488 0 stevel return (op); 489 0 stevel } else { 490 0 stevel return (fsem_getvfsops(vfsp)); 491 0 stevel } 492 0 stevel } 493 0 stevel 494 0 stevel /* 495 0 stevel * Returns non-zero (1) if the vfsops matches that of the vfs. 496 0 stevel * Returns zero (0) if not. 497 0 stevel */ 498 0 stevel int 499 0 stevel vfs_matchops(vfs_t *vfsp, vfsops_t *vfsops) 500 0 stevel { 501 0 stevel return (vfs_getops(vfsp) == vfsops); 502 0 stevel } 503 0 stevel 504 0 stevel /* 505 0 stevel * Returns non-zero (1) if the file system has installed a non-default, 506 0 stevel * non-error vfs_sync routine. Returns zero (0) otherwise. 507 0 stevel */ 508 0 stevel int 509 0 stevel vfs_can_sync(vfs_t *vfsp) 510 0 stevel { 511 0 stevel /* vfs_sync() routine is not the default/error function */ 512 0 stevel return (vfs_getops(vfsp)->vfs_sync != fs_sync); 513 0 stevel } 514 0 stevel 515 0 stevel /* 516 0 stevel * Initialize a vfs structure. 517 0 stevel */ 518 0 stevel void 519 0 stevel vfs_init(vfs_t *vfsp, vfsops_t *op, void *data) 520 0 stevel { 521 5331 amw /* Other initialization has been moved to vfs_alloc() */ 522 0 stevel vfsp->vfs_count = 0; 523 0 stevel vfsp->vfs_next = vfsp; 524 0 stevel vfsp->vfs_prev = vfsp; 525 0 stevel vfsp->vfs_zone_next = vfsp; 526 0 stevel vfsp->vfs_zone_prev = vfsp; 527 6734 johnlev vfsp->vfs_lofi_minor = 0; 528 5331 amw sema_init(&vfsp->vfs_reflock, 1, NULL, SEMA_DEFAULT, NULL); 529 5331 amw vfsimpl_setup(vfsp); 530 0 stevel vfsp->vfs_data = (data); 531 0 stevel vfs_setops((vfsp), (op)); 532 0 stevel } 533 0 stevel 534 1925 rsb /* 535 1925 rsb * Allocate and initialize the vfs implementation private data 536 1925 rsb * structure, vfs_impl_t. 537 1925 rsb */ 538 1925 rsb void 539 1925 rsb vfsimpl_setup(vfs_t *vfsp) 540 1925 rsb { 541 5331 amw int i; 542 5331 amw 543 5331 amw if (vfsp->vfs_implp != NULL) { 544 5331 amw return; 545 5331 amw } 546 5331 amw 547 1925 rsb vfsp->vfs_implp = kmem_alloc(sizeof (vfs_impl_t), KM_SLEEP); 548 5331 amw /* Note that these are #define'd in vfs.h */ 549 1925 rsb vfsp->vfs_vskap = NULL; 550 1925 rsb vfsp->vfs_fstypevsp = NULL; 551 5331 amw 552 5331 amw /* Set size of counted array, then zero the array */ 553 5331 amw vfsp->vfs_featureset[0] = VFS_FEATURE_MAXSZ - 1; 554 5331 amw for (i = 1; i < VFS_FEATURE_MAXSZ; i++) { 555 5331 amw vfsp->vfs_featureset[i] = 0; 556 5331 amw } 557 1925 rsb } 558 1925 rsb 559 1925 rsb /* 560 1925 rsb * Release the vfs_impl_t structure, if it exists. Some unbundled 561 1925 rsb * filesystems may not use the newer version of vfs and thus 562 1925 rsb * would not contain this implementation private data structure. 563 1925 rsb */ 564 1925 rsb void 565 1925 rsb vfsimpl_teardown(vfs_t *vfsp) 566 1925 rsb { 567 1925 rsb vfs_impl_t *vip = vfsp->vfs_implp; 568 1925 rsb 569 1925 rsb if (vip == NULL) 570 1925 rsb return; 571 1925 rsb 572 1925 rsb kmem_free(vfsp->vfs_implp, sizeof (vfs_impl_t)); 573 1925 rsb vfsp->vfs_implp = NULL; 574 1925 rsb } 575 0 stevel 576 0 stevel /* 577 0 stevel * VFS system calls: mount, umount, syssync, statfs, fstatfs, statvfs, 578 0 stevel * fstatvfs, and sysfs moved to common/syscall. 579 0 stevel */ 580 0 stevel 581 0 stevel /* 582 0 stevel * Update every mounted file system. We call the vfs_sync operation of 583 0 stevel * each file system type, passing it a NULL vfsp to indicate that all 584 0 stevel * mounted file systems of that type should be updated. 585 0 stevel */ 586 0 stevel void 587 0 stevel vfs_sync(int flag) 588 0 stevel { 589 0 stevel struct vfssw *vswp; 590 0 stevel RLOCK_VFSSW(); 591 0 stevel for (vswp = &vfssw[1]; vswp < &vfssw[nfstype]; vswp++) { 592 0 stevel if (ALLOCATED_VFSSW(vswp) && VFS_INSTALLED(vswp)) { 593 0 stevel vfs_refvfssw(vswp); 594 0 stevel RUNLOCK_VFSSW(); 595 0 stevel (void) (*vswp->vsw_vfsops.vfs_sync)(NULL, flag, 596 0 stevel CRED()); 597 0 stevel vfs_unrefvfssw(vswp); 598 0 stevel RLOCK_VFSSW(); 599 0 stevel } 600 0 stevel } 601 0 stevel RUNLOCK_VFSSW(); 602 0 stevel } 603 0 stevel 604 0 stevel void 605 0 stevel sync(void) 606 0 stevel { 607 0 stevel vfs_sync(0); 608 0 stevel } 609 0 stevel 610 0 stevel /* 611 0 stevel * External routines. 612 0 stevel */ 613 0 stevel 614 0 stevel krwlock_t vfssw_lock; /* lock accesses to vfssw */ 615 0 stevel 616 0 stevel /* 617 0 stevel * Lock for accessing the vfs linked list. Initialized in vfs_mountroot(), 618 0 stevel * but otherwise should be accessed only via vfs_list_lock() and 619 0 stevel * vfs_list_unlock(). Also used to protect the timestamp for mods to the list. 620 0 stevel */ 621 0 stevel static krwlock_t vfslist; 622 0 stevel 623 0 stevel /* 624 0 stevel * Mount devfs on /devices. This is done right after root is mounted 625 0 stevel * to provide device access support for the system 626 0 stevel */ 627 0 stevel static void 628 0 stevel vfs_mountdevices(void) 629 0 stevel { 630 0 stevel struct vfssw *vsw; 631 0 stevel struct vnode *mvp; 632 0 stevel struct mounta mounta = { /* fake mounta for devfs_mount() */ 633 0 stevel NULL, 634 0 stevel NULL, 635 0 stevel MS_SYSSPACE, 636 0 stevel NULL, 637 0 stevel NULL, 638 0 stevel 0, 639 0 stevel NULL, 640 0 stevel 0 641 0 stevel }; 642 0 stevel 643 0 stevel /* 644 0 stevel * _init devfs module to fill in the vfssw 645 0 stevel */ 646 0 stevel if (modload("fs", "devfs") == -1) 647 3446 mrj panic("Cannot _init devfs module"); 648 0 stevel 649 0 stevel /* 650 0 stevel * Hold vfs 651 0 stevel */ 652 0 stevel RLOCK_VFSSW(); 653 0 stevel vsw = vfs_getvfsswbyname("devfs"); 654 0 stevel VFS_INIT(&devices, &vsw->vsw_vfsops, NULL); 655 0 stevel VFS_HOLD(&devices); 656 0 stevel 657 0 stevel /* 658 0 stevel * Locate mount point 659 0 stevel */ 660 0 stevel if (lookupname("/devices", UIO_SYSSPACE, FOLLOW, NULLVPP, &mvp)) 661 3446 mrj panic("Cannot find /devices"); 662 0 stevel 663 0 stevel /* 664 0 stevel * Perform the mount of /devices 665 0 stevel */ 666 0 stevel if (VFS_MOUNT(&devices, mvp, &mounta, CRED())) 667 3446 mrj panic("Cannot mount /devices"); 668 0 stevel 669 0 stevel RUNLOCK_VFSSW(); 670 0 stevel 671 0 stevel /* 672 0 stevel * Set appropriate members and add to vfs list for mnttab display 673 0 stevel */ 674 0 stevel vfs_setresource(&devices, "/devices"); 675 0 stevel vfs_setmntpoint(&devices, "/devices"); 676 0 stevel 677 0 stevel /* 678 0 stevel * Hold the root of /devices so it won't go away 679 0 stevel */ 680 0 stevel if (VFS_ROOT(&devices, &devicesdir)) 681 3446 mrj panic("vfs_mountdevices: not devices root"); 682 0 stevel 683 0 stevel if (vfs_lock(&devices) != 0) { 684 2621 llai1 VN_RELE(devicesdir); 685 0 stevel cmn_err(CE_NOTE, "Cannot acquire vfs_lock of /devices"); 686 0 stevel return; 687 0 stevel } 688 0 stevel 689 0 stevel if (vn_vfswlock(mvp) != 0) { 690 0 stevel vfs_unlock(&devices); 691 2621 llai1 VN_RELE(devicesdir); 692 0 stevel cmn_err(CE_NOTE, "Cannot acquire vfswlock of /devices"); 693 0 stevel return; 694 0 stevel } 695 0 stevel 696 0 stevel vfs_add(mvp, &devices, 0); 697 0 stevel vn_vfsunlock(mvp); 698 0 stevel vfs_unlock(&devices); 699 2621 llai1 VN_RELE(devicesdir); 700 2621 llai1 } 701 2621 llai1 702 2621 llai1 /* 703 2621 llai1 * mount the first instance of /dev to root and remain mounted 704 2621 llai1 */ 705 2621 llai1 static void 706 2621 llai1 vfs_mountdev1(void) 707 2621 llai1 { 708 2621 llai1 struct vfssw *vsw; 709 2621 llai1 struct vnode *mvp; 710 2621 llai1 struct mounta mounta = { /* fake mounta for sdev_mount() */ 711 2621 llai1 NULL, 712 2621 llai1 NULL, 713 2621 llai1 MS_SYSSPACE | MS_OVERLAY, 714 2621 llai1 NULL, 715 2621 llai1 NULL, 716 2621 llai1 0, 717 2621 llai1 NULL, 718 2621 llai1 0 719 2621 llai1 }; 720 2621 llai1 721 2621 llai1 /* 722 2621 llai1 * _init dev module to fill in the vfssw 723 2621 llai1 */ 724 2621 llai1 if (modload("fs", "dev") == -1) 725 2621 llai1 cmn_err(CE_PANIC, "Cannot _init dev module\n"); 726 2621 llai1 727 2621 llai1 /* 728 2621 llai1 * Hold vfs 729 2621 llai1 */ 730 2621 llai1 RLOCK_VFSSW(); 731 2621 llai1 vsw = vfs_getvfsswbyname("dev"); 732 2621 llai1 VFS_INIT(&dev, &vsw->vsw_vfsops, NULL); 733 2621 llai1 VFS_HOLD(&dev); 734 2621 llai1 735 2621 llai1 /* 736 2621 llai1 * Locate mount point 737 2621 llai1 */ 738 2621 llai1 if (lookupname("/dev", UIO_SYSSPACE, FOLLOW, NULLVPP, &mvp)) 739 2621 llai1 cmn_err(CE_PANIC, "Cannot find /dev\n"); 740 2621 llai1 741 2621 llai1 /* 742 2621 llai1 * Perform the mount of /dev 743 2621 llai1 */ 744 2621 llai1 if (VFS_MOUNT(&dev, mvp, &mounta, CRED())) 745 2621 llai1 cmn_err(CE_PANIC, "Cannot mount /dev 1\n"); 746 2621 llai1 747 2621 llai1 RUNLOCK_VFSSW(); 748 2621 llai1 749 2621 llai1 /* 750 2621 llai1 * Set appropriate members and add to vfs list for mnttab display 751 2621 llai1 */ 752 2621 llai1 vfs_setresource(&dev, "/dev"); 753 2621 llai1 vfs_setmntpoint(&dev, "/dev"); 754 2621 llai1 755 2621 llai1 /* 756 2621 llai1 * Hold the root of /dev so it won't go away 757 2621 llai1 */ 758 2621 llai1 if (VFS_ROOT(&dev, &devdir)) 759 2621 llai1 cmn_err(CE_PANIC, "vfs_mountdev1: not dev root"); 760 2621 llai1 761 2621 llai1 if (vfs_lock(&dev) != 0) { 762 2621 llai1 VN_RELE(devdir); 763 2621 llai1 cmn_err(CE_NOTE, "Cannot acquire vfs_lock of /dev"); 764 2621 llai1 return; 765 2621 llai1 } 766 2621 llai1 767 2621 llai1 if (vn_vfswlock(mvp) != 0) { 768 2621 llai1 vfs_unlock(&dev); 769 2621 llai1 VN_RELE(devdir); 770 2621 llai1 cmn_err(CE_NOTE, "Cannot acquire vfswlock of /dev"); 771 2621 llai1 return; 772 2621 llai1 } 773 2621 llai1 774 2621 llai1 vfs_add(mvp, &dev, 0); 775 2621 llai1 vn_vfsunlock(mvp); 776 2621 llai1 vfs_unlock(&dev); 777 2621 llai1 VN_RELE(devdir); 778 0 stevel } 779 0 stevel 780 0 stevel /* 781 0 stevel * Mount required filesystem. This is done right after root is mounted. 782 0 stevel */ 783 0 stevel static void 784 0 stevel vfs_mountfs(char *module, char *spec, char *path) 785 0 stevel { 786 0 stevel struct vnode *mvp; 787 0 stevel struct mounta mounta; 788 0 stevel vfs_t *vfsp; 789 0 stevel 790 0 stevel mounta.flags = MS_SYSSPACE | MS_DATA; 791 0 stevel mounta.fstype = module; 792 0 stevel mounta.spec = spec; 793 0 stevel mounta.dir = path; 794 0 stevel if (lookupname(path, UIO_SYSSPACE, FOLLOW, NULLVPP, &mvp)) { 795 3446 mrj cmn_err(CE_WARN, "Cannot find %s", path); 796 0 stevel return; 797 0 stevel } 798 0 stevel if (domount(NULL, &mounta, mvp, CRED(), &vfsp)) 799 3446 mrj cmn_err(CE_WARN, "Cannot mount %s", path); 800 0 stevel else 801 0 stevel VFS_RELE(vfsp); 802 0 stevel VN_RELE(mvp); 803 0 stevel } 804 0 stevel 805 0 stevel /* 806 0 stevel * vfs_mountroot is called by main() to mount the root filesystem. 807 0 stevel */ 808 0 stevel void 809 0 stevel vfs_mountroot(void) 810 0 stevel { 811 0 stevel struct vnode *rvp = NULL; 812 0 stevel char *path; 813 0 stevel size_t plen; 814 1488 rsb struct vfssw *vswp; 815 11173 Jonathan proc_t *p; 816 0 stevel 817 0 stevel rw_init(&vfssw_lock, NULL, RW_DEFAULT, NULL); 818 0 stevel rw_init(&vfslist, NULL, RW_DEFAULT, NULL); 819 0 stevel 820 0 stevel /* 821 0 stevel * Alloc the vfs hash bucket array and locks 822 0 stevel */ 823 0 stevel rvfs_list = kmem_zalloc(vfshsz * sizeof (rvfs_t), KM_SLEEP); 824 0 stevel 825 0 stevel /* 826 0 stevel * Call machine-dependent routine "rootconf" to choose a root 827 0 stevel * file system type. 828 0 stevel */ 829 0 stevel if (rootconf()) 830 3446 mrj panic("vfs_mountroot: cannot mount root"); 831 0 stevel /* 832 0 stevel * Get vnode for '/'. Set up rootdir, u.u_rdir and u.u_cdir 833 0 stevel * to point to it. These are used by lookuppn() so that it 834 0 stevel * knows where to start from ('/' or '.'). 835 0 stevel */ 836 0 stevel vfs_setmntpoint(rootvfs, "/"); 837 0 stevel if (VFS_ROOT(rootvfs, &rootdir)) 838 3446 mrj panic("vfs_mountroot: no root vnode"); 839 11173 Jonathan 840 11173 Jonathan /* 841 11173 Jonathan * At this point, the process tree consists of p0 and possibly some 842 11173 Jonathan * direct children of p0. (i.e. there are no grandchildren) 843 11173 Jonathan * 844 11173 Jonathan * Walk through them all, setting their current directory. 845 11173 Jonathan */ 846 11173 Jonathan mutex_enter(&pidlock); 847 11173 Jonathan for (p = practive; p != NULL; p = p->p_next) { 848 11173 Jonathan ASSERT(p == &p0 || p->p_parent == &p0); 849 11173 Jonathan 850 11173 Jonathan PTOU(p)->u_cdir = rootdir; 851 11173 Jonathan VN_HOLD(PTOU(p)->u_cdir); 852 11173 Jonathan PTOU(p)->u_rdir = NULL; 853 11173 Jonathan } 854 11173 Jonathan mutex_exit(&pidlock); 855 0 stevel 856 0 stevel /* 857 0 stevel * Setup the global zone's rootvp, now that it exists. 858 0 stevel */ 859 0 stevel global_zone->zone_rootvp = rootdir; 860 0 stevel VN_HOLD(global_zone->zone_rootvp); 861 0 stevel 862 0 stevel /* 863 0 stevel * Notify the module code that it can begin using the 864 0 stevel * root filesystem instead of the boot program's services. 865 0 stevel */ 866 0 stevel modrootloaded = 1; 867 6423 gw25295 868 6423 gw25295 /* 869 6423 gw25295 * Special handling for a ZFS root file system. 870 6423 gw25295 */ 871 6423 gw25295 zfs_boot_init(); 872 6423 gw25295 873 0 stevel /* 874 0 stevel * Set up mnttab information for root 875 0 stevel */ 876 0 stevel vfs_setresource(rootvfs, rootfs.bo_name); 877 0 stevel 878 0 stevel /* 879 0 stevel * Notify cluster software that the root filesystem is available. 880 0 stevel */ 881 0 stevel clboot_mountroot(); 882 1488 rsb 883 1488 rsb /* Now that we're all done with the root FS, set up its vopstats */ 884 1488 rsb if ((vswp = vfs_getvfsswbyvfsops(vfs_getops(rootvfs))) != NULL) { 885 1488 rsb /* Set flag for statistics collection */ 886 1488 rsb if (vswp->vsw_flag & VSW_STATS) { 887 1520 rsb initialize_vopstats(&rootvfs->vfs_vopstats); 888 1488 rsb rootvfs->vfs_flag |= VFS_STATS; 889 1520 rsb rootvfs->vfs_fstypevsp = 890 1520 rsb get_fstype_vopstats(rootvfs, vswp); 891 1520 rsb rootvfs->vfs_vskap = get_vskstat_anchor(rootvfs); 892 1488 rsb } 893 1488 rsb vfs_unrefvfssw(vswp); 894 1488 rsb } 895 0 stevel 896 0 stevel /* 897 2621 llai1 * Mount /devices, /dev instance 1, /system/contract, /etc/mnttab, 898 3957 th199096 * /etc/svc/volatile, /etc/dfs/sharetab, /system/object, and /proc. 899 0 stevel */ 900 0 stevel vfs_mountdevices(); 901 2621 llai1 vfs_mountdev1(); 902 0 stevel 903 0 stevel vfs_mountfs("ctfs", "ctfs", CTFS_ROOT); 904 0 stevel vfs_mountfs("proc", "/proc", "/proc"); 905 0 stevel vfs_mountfs("mntfs", "/etc/mnttab", "/etc/mnttab"); 906 0 stevel vfs_mountfs("tmpfs", "/etc/svc/volatile", "/etc/svc/volatile"); 907 0 stevel vfs_mountfs("objfs", "objfs", OBJFS_ROOT); 908 3957 th199096 909 3957 th199096 if (getzoneid() == GLOBAL_ZONEID) { 910 3957 th199096 vfs_mountfs("sharefs", "sharefs", "/etc/dfs/sharetab"); 911 3957 th199096 } 912 0 stevel 913 0 stevel #ifdef __sparc 914 0 stevel /* 915 0 stevel * This bit of magic can go away when we convert sparc to 916 0 stevel * the new boot architecture based on ramdisk. 917 0 stevel * 918 0 stevel * Booting off a mirrored root volume: 919 0 stevel * At this point, we have booted and mounted root on a 920 0 stevel * single component of the mirror. Complete the boot 921 0 stevel * by configuring SVM and converting the root to the 922 0 stevel * dev_t of the mirrored root device. This dev_t conversion 923 0 stevel * only works because the underlying device doesn't change. 924 0 stevel */ 925 0 stevel if (root_is_svm) { 926 0 stevel if (svm_rootconf()) { 927 3446 mrj panic("vfs_mountroot: cannot remount root"); 928 0 stevel } 929 0 stevel 930 0 stevel /* 931 0 stevel * mnttab should reflect the new root device 932 0 stevel */ 933 0 stevel vfs_lock_wait(rootvfs); 934 0 stevel vfs_setresource(rootvfs, rootfs.bo_name); 935 0 stevel vfs_unlock(rootvfs); 936 0 stevel } 937 0 stevel #endif /* __sparc */ 938 0 stevel 939 0 stevel /* 940 0 stevel * Look up the root device via devfs so that a dv_node is 941 0 stevel * created for it. The vnode is never VN_RELE()ed. 942 0 stevel * We allocate more than MAXPATHLEN so that the 943 0 stevel * buffer passed to i_ddi_prompath_to_devfspath() is 944 0 stevel * exactly MAXPATHLEN (the function expects a buffer 945 0 stevel * of that length). 946 0 stevel */ 947 0 stevel plen = strlen("/devices"); 948 0 stevel path = kmem_alloc(plen + MAXPATHLEN, KM_SLEEP); 949 0 stevel (void) strcpy(path, "/devices"); 950 0 stevel 951 0 stevel if (i_ddi_prompath_to_devfspath(rootfs.bo_name, path + plen) 952 0 stevel != DDI_SUCCESS || 953 0 stevel lookupname(path, UIO_SYSSPACE, FOLLOW, NULLVPP, &rvp)) { 954 0 stevel 955 0 stevel /* NUL terminate in case "path" has garbage */ 956 0 stevel path[plen + MAXPATHLEN - 1] = '\0'; 957 0 stevel #ifdef DEBUG 958 0 stevel cmn_err(CE_WARN, "!Cannot lookup root device: %s", path); 959 0 stevel #endif 960 0 stevel } 961 0 stevel kmem_free(path, plen + MAXPATHLEN); 962 4863 praks vfs_mnttabvp_setup(); 963 994 evanl } 964 994 evanl 965 994 evanl /* 966 994 evanl * If remount failed and we're in a zone we need to check for the zone 967 994 evanl * root path and strip it before the call to vfs_setpath(). 968 994 evanl * 969 994 evanl * If strpath doesn't begin with the zone_rootpath the original 970 994 evanl * strpath is returned unchanged. 971 994 evanl */ 972 994 evanl static const char * 973 994 evanl stripzonepath(const char *strpath) 974 994 evanl { 975 994 evanl char *str1, *str2; 976 994 evanl int i; 977 994 evanl zone_t *zonep = curproc->p_zone; 978 994 evanl 979 994 evanl if (zonep->zone_rootpath == NULL || strpath == NULL) { 980 994 evanl return (NULL); 981 994 evanl } 982 994 evanl 983 994 evanl /* 984 994 evanl * we check for the end of the string at one past the 985 994 evanl * current position because the zone_rootpath always 986 994 evanl * ends with "/" but we don't want to strip that off. 987 994 evanl */ 988 994 evanl str1 = zonep->zone_rootpath; 989 994 evanl str2 = (char *)strpath; 990 994 evanl ASSERT(str1[0] != '\0'); 991 994 evanl for (i = 0; str1[i + 1] != '\0'; i++) { 992 994 evanl if (str1[i] != str2[i]) 993 994 evanl return ((char *)strpath); 994 994 evanl } 995 994 evanl return (&str2[i]); 996 6734 johnlev } 997 6734 johnlev 998 6734 johnlev /* 999 6734 johnlev * Check to see if our "block device" is actually a file. If so, 1000 6734 johnlev * automatically add a lofi device, and keep track of this fact. 1001 6734 johnlev */ 1002 6734 johnlev static int 1003 6734 johnlev lofi_add(const char *fsname, struct vfs *vfsp, 1004 6734 johnlev mntopts_t *mntopts, struct mounta *uap) 1005 6734 johnlev { 1006 6734 johnlev int fromspace = (uap->flags & MS_SYSSPACE) ? 1007 6734 johnlev UIO_SYSSPACE : UIO_USERSPACE; 1008 6734 johnlev struct lofi_ioctl *li = NULL; 1009 6734 johnlev struct vnode *vp = NULL; 1010 6734 johnlev struct pathname pn = { NULL }; 1011 6734 johnlev ldi_ident_t ldi_id; 1012 6734 johnlev ldi_handle_t ldi_hdl; 1013 6855 johnlev vfssw_t *vfssw; 1014 6734 johnlev int minor; 1015 6734 johnlev int err = 0; 1016 6734 johnlev 1017 6855 johnlev if (fsname == NULL || 1018 6855 johnlev (vfssw = vfs_getvfssw(fsname)) == NULL) 1019 6855 johnlev return (0); 1020 6855 johnlev 1021 6855 johnlev if (!(vfssw->vsw_flag & VSW_CANLOFI)) { 1022 6855 johnlev vfs_unrefvfssw(vfssw); 1023 6855 johnlev return (0); 1024 6855 johnlev } 1025 6855 johnlev 1026 6855 johnlev vfs_unrefvfssw(vfssw); 1027 6855 johnlev vfssw = NULL; 1028 6734 johnlev 1029 6734 johnlev if (pn_get(uap->spec, fromspace, &pn) != 0) 1030 6734 johnlev return (0); 1031 6734 johnlev 1032 6734 johnlev if (lookupname(uap->spec, fromspace, FOLLOW, NULL, &vp) != 0) 1033 6734 johnlev goto out; 1034 6734 johnlev 1035 6734 johnlev if (vp->v_type != VREG) 1036 6734 johnlev goto out; 1037 6734 johnlev 1038 6734 johnlev /* OK, this is a lofi mount. */ 1039 6734 johnlev 1040 6734 johnlev if ((uap->flags & (MS_REMOUNT|MS_GLOBAL)) || 1041 6734 johnlev vfs_optionisset_nolock(mntopts, MNTOPT_SUID, NULL) || 1042 6734 johnlev vfs_optionisset_nolock(mntopts, MNTOPT_SETUID, NULL) || 1043 6734 johnlev vfs_optionisset_nolock(mntopts, MNTOPT_DEVICES, NULL)) { 1044 6734 johnlev err = EINVAL; 1045 6734 johnlev goto out; 1046 6734 johnlev } 1047 6734 johnlev 1048 6734 johnlev ldi_id = ldi_ident_from_anon(); 1049 6734 johnlev li = kmem_zalloc(sizeof (*li), KM_SLEEP); 1050 8081 Dina (void) strlcpy(li->li_filename, pn.pn_path, MAXPATHLEN); 1051 6734 johnlev 1052 6734 johnlev /* 1053 6734 johnlev * The lofi control node is currently exclusive-open. We'd like 1054 6734 johnlev * to improve this, but in the meantime, we'll loop waiting for 1055 6734 johnlev * access. 1056 6734 johnlev */ 1057 6734 johnlev for (;;) { 1058 6734 johnlev err = ldi_open_by_name("/dev/lofictl", FREAD | FWRITE | FEXCL, 1059 6734 johnlev kcred, &ldi_hdl, ldi_id); 1060 6734 johnlev 1061 6734 johnlev if (err != EBUSY) 1062 6734 johnlev break; 1063 6734 johnlev 1064 6734 johnlev if ((err = delay_sig(hz / 8)) == EINTR) 1065 6734 johnlev break; 1066 6734 johnlev } 1067 6734 johnlev 1068 6734 johnlev if (err) 1069 6734 johnlev goto out2; 1070 6734 johnlev 1071 6734 johnlev err = ldi_ioctl(ldi_hdl, LOFI_MAP_FILE, (intptr_t)li, 1072 6734 johnlev FREAD | FWRITE | FEXCL | FKIOCTL, kcred, &minor); 1073 6734 johnlev 1074 6734 johnlev (void) ldi_close(ldi_hdl, FREAD | FWRITE | FEXCL, kcred); 1075 6734 johnlev 1076 6734 johnlev if (!err) 1077 6734 johnlev vfsp->vfs_lofi_minor = minor; 1078 6734 johnlev 1079 6734 johnlev out2: 1080 6734 johnlev ldi_ident_release(ldi_id); 1081 6734 johnlev out: 1082 6734 johnlev if (li != NULL) 1083 6734 johnlev kmem_free(li, sizeof (*li)); 1084 6734 johnlev if (vp != NULL) 1085 6734 johnlev VN_RELE(vp); 1086 6734 johnlev pn_free(&pn); 1087 6734 johnlev return (err); 1088 6734 johnlev } 1089 6734 johnlev 1090 6734 johnlev static void 1091 6734 johnlev lofi_remove(struct vfs *vfsp) 1092 6734 johnlev { 1093 6734 johnlev struct lofi_ioctl *li = NULL; 1094 6734 johnlev ldi_ident_t ldi_id; 1095 6734 johnlev ldi_handle_t ldi_hdl; 1096 6734 johnlev int err; 1097 6734 johnlev 1098 6734 johnlev if (vfsp->vfs_lofi_minor == 0) 1099 6734 johnlev return; 1100 6734 johnlev 1101 6734 johnlev ldi_id = ldi_ident_from_anon(); 1102 6734 johnlev 1103 6734 johnlev li = kmem_zalloc(sizeof (*li), KM_SLEEP); 1104 6734 johnlev li->li_minor = vfsp->vfs_lofi_minor; 1105 6734 johnlev li->li_cleanup = B_TRUE; 1106 6734 johnlev 1107 6734 johnlev do { 1108 6734 johnlev err = ldi_open_by_name("/dev/lofictl", FREAD | FWRITE | FEXCL, 1109 6734 johnlev kcred, &ldi_hdl, ldi_id); 1110 6734 johnlev } while (err == EBUSY); 1111 6734 johnlev 1112 6734 johnlev if (err) 1113 6734 johnlev goto out; 1114 6734 johnlev 1115 6734 johnlev err = ldi_ioctl(ldi_hdl, LOFI_UNMAP_FILE_MINOR, (intptr_t)li, 1116 6734 johnlev FREAD | FWRITE | FEXCL | FKIOCTL, kcred, NULL); 1117 6734 johnlev 1118 6734 johnlev (void) ldi_close(ldi_hdl, FREAD | FWRITE | FEXCL, kcred); 1119 6734 johnlev 1120 6734 johnlev if (!err) 1121 6734 johnlev vfsp->vfs_lofi_minor = 0; 1122 6734 johnlev 1123 6734 johnlev out: 1124 6734 johnlev ldi_ident_release(ldi_id); 1125 6734 johnlev if (li != NULL) 1126 6734 johnlev kmem_free(li, sizeof (*li)); 1127 0 stevel } 1128 0 stevel 1129 0 stevel /* 1130 0 stevel * Common mount code. Called from the system call entry point, from autofs, 1131 5302 th199096 * nfsv4 trigger mounts, and from pxfs. 1132 0 stevel * 1133 0 stevel * Takes the effective file system type, mount arguments, the mount point 1134 0 stevel * vnode, flags specifying whether the mount is a remount and whether it 1135 0 stevel * should be entered into the vfs list, and credentials. Fills in its vfspp 1136 0 stevel * parameter with the mounted file system instance's vfs. 1137 0 stevel * 1138 0 stevel * Note that the effective file system type is specified as a string. It may 1139 0 stevel * be null, in which case it's determined from the mount arguments, and may 1140 0 stevel * differ from the type specified in the mount arguments; this is a hook to 1141 0 stevel * allow interposition when instantiating file system instances. 1142 0 stevel * 1143 0 stevel * The caller is responsible for releasing its own hold on the mount point 1144 0 stevel * vp (this routine does its own hold when necessary). 1145 0 stevel * Also note that for remounts, the mount point vp should be the vnode for 1146 0 stevel * the root of the file system rather than the vnode that the file system 1147 0 stevel * is mounted on top of. 1148 0 stevel */ 1149 0 stevel int 1150 0 stevel domount(char *fsname, struct mounta *uap, vnode_t *vp, struct cred *credp, 1151 0 stevel struct vfs **vfspp) 1152 0 stevel { 1153 0 stevel struct vfssw *vswp; 1154 0 stevel vfsops_t *vfsops; 1155 0 stevel struct vfs *vfsp; 1156 0 stevel struct vnode *bvp; 1157 0 stevel dev_t bdev = 0; 1158 0 stevel mntopts_t mnt_mntopts; 1159 0 stevel int error = 0; 1160 0 stevel int copyout_error = 0; 1161 0 stevel int ovflags; 1162 0 stevel char *opts = uap->optptr; 1163 0 stevel char *inargs = opts; 1164 0 stevel int optlen = uap->optlen; 1165 0 stevel int remount; 1166 0 stevel int rdonly; 1167 0 stevel int nbmand = 0; 1168 0 stevel int delmip = 0; 1169 0 stevel int addmip = 0; 1170 0 stevel int splice = ((uap->flags & MS_NOSPLICE) == 0); 1171 0 stevel int fromspace = (uap->flags & MS_SYSSPACE) ? 1172 3912 lling UIO_SYSSPACE : UIO_USERSPACE; 1173 0 stevel char *resource = NULL, *mountpt = NULL; 1174 0 stevel refstr_t *oldresource, *oldmntpt; 1175 0 stevel struct pathname pn, rpn; 1176 1520 rsb vsk_anchor_t *vskap; 1177 6734 johnlev char fstname[FSTYPSZ]; 1178 0 stevel 1179 0 stevel /* 1180 0 stevel * The v_flag value for the mount point vp is permanently set 1181 0 stevel * to VVFSLOCK so that no one bypasses the vn_vfs*locks routine 1182 0 stevel * for mount point locking. 1183 0 stevel */ 1184 0 stevel mutex_enter(&vp->v_lock); 1185 0 stevel vp->v_flag |= VVFSLOCK; 1186 0 stevel mutex_exit(&vp->v_lock); 1187 0 stevel 1188 0 stevel mnt_mntopts.mo_count = 0; 1189 0 stevel /* 1190 0 stevel * Find the ops vector to use to invoke the file system-specific mount 1191 0 stevel * method. If the fsname argument is non-NULL, use it directly. 1192 0 stevel * Otherwise, dig the file system type information out of the mount 1193 0 stevel * arguments. 1194 0 stevel * 1195 0 stevel * A side effect is to hold the vfssw entry. 1196 0 stevel * 1197 0 stevel * Mount arguments can be specified in several ways, which are 1198 0 stevel * distinguished by flag bit settings. The preferred way is to set 1199 0 stevel * MS_OPTIONSTR, indicating an 8 argument mount with the file system 1200 0 stevel * type supplied as a character string and the last two arguments 1201 0 stevel * being a pointer to a character buffer and the size of the buffer. 1202 0 stevel * On entry, the buffer holds a null terminated list of options; on 1203 0 stevel * return, the string is the list of options the file system 1204 0 stevel * recognized. If MS_DATA is set arguments five and six point to a 1205 0 stevel * block of binary data which the file system interprets. 1206 0 stevel * A further wrinkle is that some callers don't set MS_FSS and MS_DATA 1207 0 stevel * consistently with these conventions. To handle them, we check to 1208 0 stevel * see whether the pointer to the file system name has a numeric value 1209 0 stevel * less than 256. If so, we treat it as an index. 1210 0 stevel */ 1211 0 stevel if (fsname != NULL) { 1212 0 stevel if ((vswp = vfs_getvfssw(fsname)) == NULL) { 1213 0 stevel return (EINVAL); 1214 0 stevel } 1215 0 stevel } else if (uap->flags & (MS_OPTIONSTR | MS_DATA | MS_FSS)) { 1216 0 stevel size_t n; 1217 0 stevel uint_t fstype; 1218 6734 johnlev 1219 6734 johnlev fsname = fstname; 1220 0 stevel 1221 0 stevel if ((fstype = (uintptr_t)uap->fstype) < 256) { 1222 0 stevel RLOCK_VFSSW(); 1223 0 stevel if (fstype == 0 || fstype >= nfstype || 1224 0 stevel !ALLOCATED_VFSSW(&vfssw[fstype])) { 1225 0 stevel RUNLOCK_VFSSW(); 1226 0 stevel return (EINVAL); 1227 0 stevel } 1228 6734 johnlev (void) strcpy(fsname, vfssw[fstype].vsw_name); 1229 6734 johnlev RUNLOCK_VFSSW(); 1230 6734 johnlev if ((vswp = vfs_getvfssw(fsname)) == NULL) 1231 0 stevel return (EINVAL); 1232 0 stevel } else { 1233 0 stevel /* 1234 0 stevel * Handle either kernel or user address space. 1235 0 stevel */ 1236 0 stevel if (uap->flags & MS_SYSSPACE) { 1237 6734 johnlev error = copystr(uap->fstype, fsname, 1238 0 stevel FSTYPSZ, &n); 1239 0 stevel } else { 1240 6734 johnlev error = copyinstr(uap->fstype, fsname, 1241 0 stevel FSTYPSZ, &n); 1242 0 stevel } 1243 0 stevel if (error) { 1244 0 stevel if (error == ENAMETOOLONG) 1245 0 stevel return (EINVAL); 1246 0 stevel return (error); 1247 0 stevel } 1248 6734 johnlev if ((vswp = vfs_getvfssw(fsname)) == NULL) 1249 0 stevel return (EINVAL); 1250 0 stevel } 1251 0 stevel } else { 1252 0 stevel if ((vswp = vfs_getvfsswbyvfsops(vfs_getops(rootvfs))) == NULL) 1253 0 stevel return (EINVAL); 1254 0 stevel } 1255 0 stevel if (!VFS_INSTALLED(vswp)) 1256 0 stevel return (EINVAL); 1257 0 stevel vfsops = &vswp->vsw_vfsops; 1258 0 stevel 1259 0 stevel vfs_copyopttbl(&vswp->vsw_optproto, &mnt_mntopts); 1260 0 stevel /* 1261 0 stevel * Fetch mount options and parse them for generic vfs options 1262 0 stevel */ 1263 0 stevel if (uap->flags & MS_OPTIONSTR) { 1264 0 stevel /* 1265 0 stevel * Limit the buffer size 1266 0 stevel */ 1267 0 stevel if (optlen < 0 || optlen > MAX_MNTOPT_STR) { 1268 0 stevel error = EINVAL; 1269 0 stevel goto errout; 1270 0 stevel } 1271 0 stevel if ((uap->flags & MS_SYSSPACE) == 0) { 1272 0 stevel inargs = kmem_alloc(MAX_MNTOPT_STR, KM_SLEEP); 1273 0 stevel inargs[0] = '\0'; 1274 0 stevel if (optlen) { 1275 0 stevel error = copyinstr(opts, inargs, (size_t)optlen, 1276 3912 lling NULL); 1277 0 stevel if (error) { 1278 0 stevel goto errout; 1279 0 stevel } 1280 0 stevel } 1281 0 stevel } 1282 0 stevel vfs_parsemntopts(&mnt_mntopts, inargs, 0); 1283 0 stevel } 1284 0 stevel /* 1285 0 stevel * Flag bits override the options string. 1286 0 stevel */ 1287 0 stevel if (uap->flags & MS_REMOUNT) 1288 0 stevel vfs_setmntopt_nolock(&mnt_mntopts, MNTOPT_REMOUNT, NULL, 0, 0); 1289 0 stevel if (uap->flags & MS_RDONLY) 1290 0 stevel vfs_setmntopt_nolock(&mnt_mntopts, MNTOPT_RO, NULL, 0, 0); 1291 0 stevel if (uap->flags & MS_NOSUID) 1292 0 stevel vfs_setmntopt_nolock(&mnt_mntopts, MNTOPT_NOSUID, NULL, 0, 0); 1293 0 stevel 1294 0 stevel /* 1295 0 stevel * Check if this is a remount; must be set in the option string and 1296 0 stevel * the file system must support a remount option. 1297 0 stevel */ 1298 0 stevel if (remount = vfs_optionisset_nolock(&mnt_mntopts, 1299 0 stevel MNTOPT_REMOUNT, NULL)) { 1300 0 stevel if (!(vswp->vsw_flag & VSW_CANREMOUNT)) { 1301 0 stevel error = ENOTSUP; 1302 0 stevel goto errout; 1303 0 stevel } 1304 0 stevel uap->flags |= MS_REMOUNT; 1305 0 stevel } 1306 0 stevel 1307 0 stevel /* 1308 0 stevel * uap->flags and vfs_optionisset() should agree. 1309 0 stevel */ 1310 0 stevel if (rdonly = vfs_optionisset_nolock(&mnt_mntopts, MNTOPT_RO, NULL)) { 1311 0 stevel uap->flags |= MS_RDONLY; 1312 0 stevel } 1313 0 stevel if (vfs_optionisset_nolock(&mnt_mntopts, MNTOPT_NOSUID, NULL)) { 1314 0 stevel uap->flags |= MS_NOSUID; 1315 0 stevel } 1316 0 stevel nbmand = vfs_optionisset_nolock(&mnt_mntopts, MNTOPT_NBMAND, NULL); 1317 0 stevel ASSERT(splice || !remount); 1318 0 stevel /* 1319 0 stevel * If we are splicing the fs into the namespace, 1320 0 stevel * perform mount point checks. 1321 0 stevel * 1322 0 stevel * We want to resolve the path for the mount point to eliminate 1323 0 stevel * '.' and ".." and symlinks in mount points; we can't do the 1324 0 stevel * same for the resource string, since it would turn 1325 0 stevel * "/dev/dsk/c0t0d0s0" into "/devices/pci@...". We need to do 1326 0 stevel * this before grabbing vn_vfswlock(), because otherwise we 1327 0 stevel * would deadlock with lookuppn(). 1328 0 stevel */ 1329 0 stevel if (splice) { 1330 0 stevel ASSERT(vp->v_count > 0); 1331 0 stevel 1332 0 stevel /* 1333 0 stevel * Pick up mount point and device from appropriate space. 1334 0 stevel */ 1335 0 stevel if (pn_get(uap->spec, fromspace, &pn) == 0) { 1336 0 stevel resource = kmem_alloc(pn.pn_pathlen + 1, 1337 0 stevel KM_SLEEP); 1338 0 stevel (void) strcpy(resource, pn.pn_path); 1339 0 stevel pn_free(&pn); 1340 0 stevel } 1341 0 stevel /* 1342 0 stevel * Do a lookupname prior to taking the 1343 0 stevel * writelock. Mark this as completed if 1344 0 stevel * successful for later cleanup and addition to 1345 0 stevel * the mount in progress table. 1346 0 stevel */ 1347 0 stevel if ((uap->flags & MS_GLOBAL) == 0 && 1348 0 stevel lookupname(uap->spec, fromspace, 1349 3912 lling FOLLOW, NULL, &bvp) == 0) { 1350 0 stevel addmip = 1; 1351 0 stevel } 1352 0 stevel 1353 0 stevel if ((error = pn_get(uap->dir, fromspace, &pn)) == 0) { 1354 0 stevel pathname_t *pnp; 1355 0 stevel 1356 0 stevel if (*pn.pn_path != '/') { 1357 0 stevel error = EINVAL; 1358 0 stevel pn_free(&pn); 1359 0 stevel goto errout; 1360 0 stevel } 1361 0 stevel pn_alloc(&rpn); 1362 0 stevel /* 1363 0 stevel * Kludge to prevent autofs from deadlocking with 1364 0 stevel * itself when it calls domount(). 1365 0 stevel * 1366 0 stevel * If autofs is calling, it is because it is doing 1367 0 stevel * (autofs) mounts in the process of an NFS mount. A 1368 0 stevel * lookuppn() here would cause us to block waiting for 1369 0 stevel * said NFS mount to complete, which can't since this 1370 0 stevel * is the thread that was supposed to doing it. 1371 0 stevel */ 1372 0 stevel if (fromspace == UIO_USERSPACE) { 1373 0 stevel if ((error = lookuppn(&pn, &rpn, FOLLOW, NULL, 1374 0 stevel NULL)) == 0) { 1375 0 stevel pnp = &rpn; 1376 0 stevel } else { 1377 0 stevel /* 1378 0 stevel * The file disappeared or otherwise 1379 0 stevel * became inaccessible since we opened 1380 0 stevel * it; might as well fail the mount 1381 0 stevel * since the mount point is no longer 1382 0 stevel * accessible. 1383 0 stevel */ 1384 0 stevel pn_free(&rpn); 1385 0 stevel pn_free(&pn); 1386 0 stevel goto errout; 1387 0 stevel } 1388 0 stevel } else { 1389 0 stevel pnp = &pn; 1390 0 stevel } 1391 0 stevel mountpt = kmem_alloc(pnp->pn_pathlen + 1, KM_SLEEP); 1392 0 stevel (void) strcpy(mountpt, pnp->pn_path); 1393 0 stevel 1394 0 stevel /* 1395 0 stevel * If the addition of the zone's rootpath 1396 0 stevel * would push us over a total path length 1397 0 stevel * of MAXPATHLEN, we fail the mount with 1398 0 stevel * ENAMETOOLONG, which is what we would have 1399 0 stevel * gotten if we were trying to perform the same 1400 0 stevel * mount in the global zone. 1401 0 stevel * 1402 0 stevel * strlen() doesn't count the trailing 1403 0 stevel * '\0', but zone_rootpathlen counts both a 1404 0 stevel * trailing '/' and the terminating '\0'. 1405 0 stevel */ 1406 0 stevel if ((curproc->p_zone->zone_rootpathlen - 1 + 1407 0 stevel strlen(mountpt)) > MAXPATHLEN || 1408 0 stevel (resource != NULL && 1409 0 stevel (curproc->p_zone->zone_rootpathlen - 1 + 1410 0 stevel strlen(resource)) > MAXPATHLEN)) { 1411 0 stevel error = ENAMETOOLONG; 1412 0 stevel } 1413 0 stevel 1414 0 stevel pn_free(&rpn); 1415 0 stevel pn_free(&pn); 1416 0 stevel } 1417 0 stevel 1418 0 stevel if (error) 1419 0 stevel goto errout; 1420 0 stevel 1421 0 stevel /* 1422 0 stevel * Prevent path name resolution from proceeding past 1423 0 stevel * the mount point. 1424 0 stevel */ 1425 0 stevel if (vn_vfswlock(vp) != 0) { 1426 0 stevel error = EBUSY; 1427 0 stevel goto errout; 1428 0 stevel } 1429 0 stevel 1430 0 stevel /* 1431 0 stevel * Verify that it's legitimate to establish a mount on 1432 0 stevel * the prospective mount point. 1433 0 stevel */ 1434 0 stevel if (vn_mountedvfs(vp) != NULL) { 1435 0 stevel /* 1436 0 stevel * The mount point lock was obtained after some 1437 0 stevel * other thread raced through and established a mount. 1438 0 stevel */ 1439 0 stevel vn_vfsunlock(vp); 1440 0 stevel error = EBUSY; 1441 0 stevel goto errout; 1442 0 stevel } 1443 0 stevel if (vp->v_flag & VNOMOUNT) { 1444 0 stevel vn_vfsunlock(vp); 1445 0 stevel error = EINVAL; 1446 0 stevel goto errout; 1447 0 stevel } 1448 0 stevel } 1449 0 stevel if ((uap->flags & (MS_DATA | MS_OPTIONSTR)) == 0) { 1450 0 stevel uap->dataptr = NULL; 1451 0 stevel uap->datalen = 0; 1452 0 stevel } 1453 0 stevel 1454 0 stevel /* 1455 0 stevel * If this is a remount, we don't want to create a new VFS. 1456 0 stevel * Instead, we pass the existing one with a remount flag. 1457 0 stevel */ 1458 0 stevel if (remount) { 1459 0 stevel /* 1460 0 stevel * Confirm that the mount point is the root vnode of the 1461 0 stevel * file system that is being remounted. 1462 0 stevel * This can happen if the user specifies a different 1463 0 stevel * mount point directory pathname in the (re)mount command. 1464 0 stevel * 1465 0 stevel * Code below can only be reached if splice is true, so it's 1466 0 stevel * safe to do vn_vfsunlock() here. 1467 0 stevel */ 1468 0 stevel if ((vp->v_flag & VROOT) == 0) { 1469 0 stevel vn_vfsunlock(vp); 1470 0 stevel error = ENOENT; 1471 0 stevel goto errout; 1472 0 stevel } 1473 0 stevel /* 1474 0 stevel * Disallow making file systems read-only unless file system 1475 0 stevel * explicitly allows it in its vfssw. Ignore other flags. 1476 0 stevel */ 1477 0 stevel if (rdonly && vn_is_readonly(vp) == 0 && 1478 0 stevel (vswp->vsw_flag & VSW_CANRWRO) == 0) { 1479 0 stevel vn_vfsunlock(vp); 1480 0 stevel error = EINVAL; 1481 0 stevel goto errout; 1482 0 stevel } 1483 0 stevel /* 1484 5331 amw * Disallow changing the NBMAND disposition of the file 1485 5331 amw * system on remounts. 1486 0 stevel */ 1487 0 stevel if ((nbmand && ((vp->v_vfsp->vfs_flag & VFS_NBMAND) == 0)) || 1488 0 stevel (!nbmand && (vp->v_vfsp->vfs_flag & VFS_NBMAND))) { 1489 5331 amw vn_vfsunlock(vp); 1490 5331 amw error = EINVAL; 1491 5331 amw goto errout; 1492 0 stevel } 1493 0 stevel vfsp = vp->v_vfsp; 1494 0 stevel ovflags = vfsp->vfs_flag; 1495 0 stevel vfsp->vfs_flag |= VFS_REMOUNT; 1496 0 stevel vfsp->vfs_flag &= ~VFS_RDONLY; 1497 0 stevel } else { 1498 5331 amw vfsp = vfs_alloc(KM_SLEEP); 1499 0 stevel VFS_INIT(vfsp, vfsops, NULL); 1500 0 stevel } 1501 0 stevel 1502 0 stevel VFS_HOLD(vfsp); 1503 0 stevel 1504 6734 johnlev if ((error = lofi_add(fsname, vfsp, &mnt_mntopts, uap)) != 0) { 1505 6734 johnlev if (!remount) { 1506 6734 johnlev if (splice) 1507 6734 johnlev vn_vfsunlock(vp); 1508 6734 johnlev vfs_free(vfsp); 1509 6734 johnlev } else { 1510 6734 johnlev vn_vfsunlock(vp); 1511 6734 johnlev VFS_RELE(vfsp); 1512 6734 johnlev } 1513 6734 johnlev goto errout; 1514 6734 johnlev } 1515 6734 johnlev 1516 6734 johnlev /* 1517 6734 johnlev * PRIV_SYS_MOUNT doesn't mean you can become root. 1518 6734 johnlev */ 1519 6734 johnlev if (vfsp->vfs_lofi_minor != 0) { 1520 6734 johnlev uap->flags |= MS_NOSUID; 1521 6734 johnlev vfs_setmntopt_nolock(&mnt_mntopts, MNTOPT_NOSUID, NULL, 0, 0); 1522 6734 johnlev } 1523 6734 johnlev 1524 0 stevel /* 1525 0 stevel * The vfs_reflock is not used anymore the code below explicitly 1526 0 stevel * holds it preventing others accesing it directly. 1527 0 stevel */ 1528 0 stevel if ((sema_tryp(&vfsp->vfs_reflock) == 0) && 1529 0 stevel !(vfsp->vfs_flag & VFS_REMOUNT)) 1530 0 stevel cmn_err(CE_WARN, 1531 3446 mrj "mount type %s couldn't get vfs_reflock", vswp->vsw_name); 1532 0 stevel 1533 0 stevel /* 1534 0 stevel * Lock the vfs. If this is a remount we want to avoid spurious umount 1535 0 stevel * failures that happen as a side-effect of fsflush() and other mount 1536 0 stevel * and unmount operations that might be going on simultaneously and 1537 0 stevel * may have locked the vfs currently. To not return EBUSY immediately 1538 0 stevel * here we use vfs_lock_wait() instead vfs_lock() for the remount case. 1539 0 stevel */ 1540 0 stevel if (!remount) { 1541 0 stevel if (error = vfs_lock(vfsp)) { 1542 0 stevel vfsp->vfs_flag = ovflags; 1543 6734 johnlev 1544 6734 johnlev lofi_remove(vfsp); 1545 6734 johnlev 1546 0 stevel if (splice) 1547 0 stevel vn_vfsunlock(vp); 1548 5331 amw vfs_free(vfsp); 1549 0 stevel goto errout; 1550 0 stevel } 1551 0 stevel } else { 1552 0 stevel vfs_lock_wait(vfsp); 1553 0 stevel } 1554 0 stevel 1555 0 stevel /* 1556 0 stevel * Add device to mount in progress table, global mounts require special 1557 0 stevel * handling. It is possible that we have already done the lookupname 1558 0 stevel * on a spliced, non-global fs. If so, we don't want to do it again 1559 0 stevel * since we cannot do a lookupname after taking the 1560 0 stevel * wlock above. This case is for a non-spliced, non-global filesystem. 1561 0 stevel */ 1562 0 stevel if (!addmip) { 1563 3912 lling if ((uap->flags & MS_GLOBAL) == 0 && 1564 3912 lling lookupname(uap->spec, fromspace, FOLLOW, NULL, &bvp) == 0) { 1565 0 stevel addmip = 1; 1566 0 stevel } 1567 0 stevel } 1568 0 stevel 1569 0 stevel if (addmip) { 1570 6734 johnlev vnode_t *lvp = NULL; 1571 6734 johnlev 1572 6734 johnlev error = vfs_get_lofi(vfsp, &lvp); 1573 6734 johnlev if (error > 0) { 1574 6734 johnlev lofi_remove(vfsp); 1575 6734 johnlev 1576 6734 johnlev if (splice) 1577 6734 johnlev vn_vfsunlock(vp); 1578 6734 johnlev vfs_unlock(vfsp); 1579 6734 johnlev 1580 6734 johnlev if (remount) { 1581 6734 johnlev VFS_RELE(vfsp); 1582 6734 johnlev } else { 1583 6734 johnlev vfs_free(vfsp); 1584 6734 johnlev } 1585 6734 johnlev 1586 6734 johnlev goto errout; 1587 6734 johnlev } else if (error == -1) { 1588 6734 johnlev bdev = bvp->v_rdev; 1589 6734 johnlev VN_RELE(bvp); 1590 6734 johnlev } else { 1591 6734 johnlev bdev = lvp->v_rdev; 1592 6734 johnlev VN_RELE(lvp); 1593 6734 johnlev VN_RELE(bvp); 1594 6734 johnlev } 1595 6734 johnlev 1596 0 stevel vfs_addmip(bdev, vfsp); 1597 0 stevel addmip = 0; 1598 0 stevel delmip = 1; 1599 0 stevel } 1600 0 stevel /* 1601 0 stevel * Invalidate cached entry for the mount point. 1602 0 stevel */ 1603 0 stevel if (splice) 1604 0 stevel dnlc_purge_vp(vp); 1605 0 stevel 1606 0 stevel /* 1607 0 stevel * If have an option string but the filesystem doesn't supply a 1608 0 stevel * prototype options table, create a table with the global 1609 0 stevel * options and sufficient room to accept all the options in the 1610 0 stevel * string. Then parse the passed in option string 1611 0 stevel * accepting all the options in the string. This gives us an 1612 0 stevel * option table with all the proper cancel properties for the 1613 0 stevel * global options. 1614 0 stevel * 1615 0 stevel * Filesystems that supply a prototype options table are handled 1616 0 stevel * earlier in this function. 1617 0 stevel */ 1618 0 stevel if (uap->flags & MS_OPTIONSTR) { 1619 0 stevel if (!(vswp->vsw_flag & VSW_HASPROTO)) { 1620 0 stevel mntopts_t tmp_mntopts; 1621 0 stevel 1622 0 stevel tmp_mntopts.mo_count = 0; 1623 0 stevel vfs_createopttbl_extend(&tmp_mntopts, inargs, 1624 0 stevel &mnt_mntopts); 1625 0 stevel vfs_parsemntopts(&tmp_mntopts, inargs, 1); 1626 0 stevel vfs_swapopttbl_nolock(&mnt_mntopts, &tmp_mntopts); 1627 0 stevel vfs_freeopttbl(&tmp_mntopts); 1628 0 stevel } 1629 0 stevel } 1630 0 stevel 1631 0 stevel /* 1632 0 stevel * Serialize with zone creations. 1633 0 stevel */ 1634 0 stevel mount_in_progress(); 1635 0 stevel /* 1636 0 stevel * Instantiate (or reinstantiate) the file system. If appropriate, 1637 0 stevel * splice it into the file system name space. 1638 0 stevel * 1639 0 stevel * We want VFS_MOUNT() to be able to override the vfs_resource 1640 0 stevel * string if necessary (ie, mntfs), and also for a remount to 1641 0 stevel * change the same (necessary when remounting '/' during boot). 1642 0 stevel * So we set up vfs_mntpt and vfs_resource to what we think they 1643 0 stevel * should be, then hand off control to VFS_MOUNT() which can 1644 0 stevel * override this. 1645 0 stevel * 1646 0 stevel * For safety's sake, when changing vfs_resource or vfs_mntpt of 1647 0 stevel * a vfs which is on the vfs list (i.e. during a remount), we must 1648 0 stevel * never set those fields to NULL. Several bits of code make 1649 0 stevel * assumptions that the fields are always valid. 1650 0 stevel */ 1651 0 stevel vfs_swapopttbl(&mnt_mntopts, &vfsp->vfs_mntopts); 1652 0 stevel if (remount) { 1653 0 stevel if ((oldresource = vfsp->vfs_resource) != NULL) 1654 0 stevel refstr_hold(oldresource); 1655 0 stevel if ((oldmntpt = vfsp->vfs_mntpt) != NULL) 1656 0 stevel refstr_hold(oldmntpt); 1657 0 stevel } 1658 0 stevel vfs_setresource(vfsp, resource); 1659 0 stevel vfs_setmntpoint(vfsp, mountpt); 1660 0 stevel 1661 4863 praks /* 1662 4863 praks * going to mount on this vnode, so notify. 1663 4863 praks */ 1664 5331 amw vnevent_mountedover(vp, NULL); 1665 0 stevel error = VFS_MOUNT(vfsp, vp, uap, credp); 1666 0 stevel 1667 0 stevel if (uap->flags & MS_RDONLY) 1668 0 stevel vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0); 1669 0 stevel if (uap->flags & MS_NOSUID) 1670 0 stevel vfs_setmntopt(vfsp, MNTOPT_NOSUID, NULL, 0); 1671 0 stevel if (uap->flags & MS_GLOBAL) 1672 0 stevel vfs_setmntopt(vfsp, MNTOPT_GLOBAL, NULL, 0); 1673 0 stevel 1674 0 stevel if (error) { 1675 6734 johnlev lofi_remove(vfsp); 1676 6734 johnlev 1677 0 stevel if (remount) { 1678 0 stevel /* put back pre-remount options */ 1679 0 stevel vfs_swapopttbl(&mnt_mntopts, &vfsp->vfs_mntopts); 1680 994 evanl vfs_setmntpoint(vfsp, (stripzonepath( 1681 3912 lling refstr_value(oldmntpt)))); 1682 0 stevel if (oldmntpt) 1683 0 stevel refstr_rele(oldmntpt); 1684 994 evanl vfs_setresource(vfsp, (stripzonepath( 1685 3912 lling refstr_value(oldresource)))); 1686 0 stevel if (oldresource) 1687 0 stevel refstr_rele(oldresource); 1688 0 stevel vfsp->vfs_flag = ovflags; 1689 0 stevel vfs_unlock(vfsp); 1690 0 stevel VFS_RELE(vfsp); 1691 0 stevel } else { 1692 0 stevel vfs_unlock(vfsp); 1693 0 stevel vfs_freemnttab(vfsp); 1694 5331 amw vfs_free(vfsp); 1695 0 stevel } 1696 0 stevel } else { 1697 0 stevel /* 1698 0 stevel * Set the mount time to now 1699 0 stevel */ 1700 0 stevel vfsp->vfs_mtime = ddi_get_time(); 1701 0 stevel if (remount) { 1702 0 stevel vfsp->vfs_flag &= ~VFS_REMOUNT; 1703 0 stevel if (oldresource) 1704 0 stevel refstr_rele(oldresource); 1705 0 stevel if (oldmntpt) 1706 0 stevel refstr_rele(oldmntpt); 1707 0 stevel } else if (splice) { 1708 0 stevel /* 1709 0 stevel * Link vfsp into the name space at the mount 1710 0 stevel * point. Vfs_add() is responsible for 1711 0 stevel * holding the mount point which will be 1712 0 stevel * released when vfs_remove() is called. 1713 0 stevel */ 1714 0 stevel vfs_add(vp, vfsp, uap->flags); 1715 0 stevel } else { 1716 0 stevel /* 1717 0 stevel * Hold the reference to file system which is 1718 0 stevel * not linked into the name space. 1719 0 stevel */ 1720 0 stevel vfsp->vfs_zone = NULL; 1721 0 stevel VFS_HOLD(vfsp); 1722 0 stevel vfsp->vfs_vnodecovered = NULL; 1723 0 stevel } 1724 0 stevel /* 1725 0 stevel * Set flags for global options encountered 1726 0 stevel */ 1727 0 stevel if (vfs_optionisset(vfsp, MNTOPT_RO, NULL)) 1728 0 stevel vfsp->vfs_flag |= VFS_RDONLY; 1729 0 stevel else 1730 0 stevel vfsp->vfs_flag &= ~VFS_RDONLY; 1731 0 stevel if (vfs_optionisset(vfsp, MNTOPT_NOSUID, NULL)) { 1732 0 stevel vfsp->vfs_flag |= (VFS_NOSETUID|VFS_NODEVICES); 1733 0 stevel } else { 1734 0 stevel if (vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL)) 1735 0 stevel vfsp->vfs_flag |= VFS_NODEVICES; 1736 0 stevel else 1737 0 stevel vfsp->vfs_flag &= ~VFS_NODEVICES; 1738 0 stevel if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) 1739 0 stevel vfsp->vfs_flag |= VFS_NOSETUID; 1740 0 stevel else 1741 0 stevel vfsp->vfs_flag &= ~VFS_NOSETUID; 1742 0 stevel } 1743 0 stevel if (vfs_optionisset(vfsp, MNTOPT_NBMAND, NULL)) 1744 0 stevel vfsp->vfs_flag |= VFS_NBMAND; 1745 0 stevel else 1746 0 stevel vfsp->vfs_flag &= ~VFS_NBMAND; 1747 0 stevel 1748 0 stevel if (vfs_optionisset(vfsp, MNTOPT_XATTR, NULL)) 1749 0 stevel vfsp->vfs_flag |= VFS_XATTR; 1750 0 stevel else 1751 0 stevel vfsp->vfs_flag &= ~VFS_XATTR; 1752 0 stevel 1753 0 stevel if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL)) 1754 0 stevel vfsp->vfs_flag |= VFS_NOEXEC; 1755 0 stevel else 1756 0 stevel vfsp->vfs_flag &= ~VFS_NOEXEC; 1757 0 stevel 1758 0 stevel /* 1759 0 stevel * Now construct the output option string of options 1760 0 stevel * we recognized. 1761 0 stevel */ 1762 0 stevel if (uap->flags & MS_OPTIONSTR) { 1763 0 stevel vfs_list_read_lock(); 1764 0 stevel copyout_error = vfs_buildoptionstr( 1765 3912 lling &vfsp->vfs_mntopts, inargs, optlen); 1766 0 stevel vfs_list_unlock(); 1767 0 stevel if (copyout_error == 0 && 1768 0 stevel (uap->flags & MS_SYSSPACE) == 0) { 1769 0 stevel copyout_error = copyoutstr(inargs, opts, 1770 0 stevel optlen, NULL); 1771 0 stevel } 1772 0 stevel } 1773 1488 rsb 1774 1520 rsb /* 1775 1520 rsb * If this isn't a remount, set up the vopstats before 1776 1678 rsb * anyone can touch this. We only allow spliced file 1777 1678 rsb * systems (file systems which are in the namespace) to 1778 1678 rsb * have the VFS_STATS flag set. 1779 1678 rsb * NOTE: PxFS mounts the underlying file system with 1780 1678 rsb * MS_NOSPLICE set and copies those vfs_flags to its private 1781 1678 rsb * vfs structure. As a result, PxFS should never have 1782 1678 rsb * the VFS_STATS flag or else we might access the vfs 1783 1678 rsb * statistics-related fields prior to them being 1784 1678 rsb * properly initialized. 1785 1678 rsb */ 1786 1678 rsb if (!remount && (vswp->vsw_flag & VSW_STATS) && splice) { 1787 1520 rsb initialize_vopstats(&vfsp->vfs_vopstats); 1788 1520 rsb /* 1789 1520 rsb * We need to set vfs_vskap to NULL because there's 1790 1520 rsb * a chance it won't be set below. This is checked 1791 1520 rsb * in teardown_vopstats() so we can't have garbage. 1792 1520 rsb */ 1793 1520 rsb vfsp->vfs_vskap = NULL; 1794 1488 rsb vfsp->vfs_flag |= VFS_STATS; 1795 1520 rsb vfsp->vfs_fstypevsp = get_fstype_vopstats(vfsp, vswp); 1796 1488 rsb } 1797 1488 rsb 1798 4321 casper if (vswp->vsw_flag & VSW_XID) 1799 4321 casper vfsp->vfs_flag |= VFS_XID; 1800 4321 casper 1801 0 stevel vfs_unlock(vfsp); 1802 0 stevel } 1803 0 stevel mount_completed(); 1804 0 stevel if (splice) 1805 0 stevel vn_vfsunlock(vp); 1806 0 stevel 1807 0 stevel if ((error == 0) && (copyout_error == 0)) { 1808 1520 rsb if (!remount) { 1809 1520 rsb /* 1810 1520 rsb * Don't call get_vskstat_anchor() while holding 1811 1520 rsb * locks since it allocates memory and calls 1812 1520 rsb * VFS_STATVFS(). For NFS, the latter can generate 1813 1520 rsb * an over-the-wire call. 1814 1520 rsb */ 1815 1520 rsb vskap = get_vskstat_anchor(vfsp); 1816 1520 rsb /* Only take the lock if we have something to do */ 1817 1520 rsb if (vskap != NULL) { 1818 1520 rsb vfs_lock_wait(vfsp); 1819 1520 rsb if (vfsp->vfs_flag & VFS_STATS) { 1820 1520 rsb vfsp->vfs_vskap = vskap; 1821 1520 rsb } 1822 1520 rsb vfs_unlock(vfsp); 1823 1520 rsb } 1824 1520 rsb } 1825 1488 rsb /* Return vfsp to caller. */ 1826 0 stevel *vfspp = vfsp; 1827 0 stevel } 1828 0 stevel errout: 1829 0 stevel vfs_freeopttbl(&mnt_mntopts); 1830 0 stevel if (resource != NULL) 1831 0 stevel kmem_free(resource, strlen(resource) + 1); 1832 0 stevel if (mountpt != NULL) 1833 0 stevel kmem_free(mountpt, strlen(mountpt) + 1); 1834 0 stevel /* 1835 0 stevel * It is possible we errored prior to adding to mount in progress 1836 0 stevel * table. Must free vnode we acquired with successful lookupname. 1837 0 stevel */ 1838 0 stevel if (addmip) 1839 0 stevel VN_RELE(bvp); 1840 0 stevel if (delmip) 1841 0 stevel vfs_delmip(vfsp); 1842 0 stevel ASSERT(vswp != NULL); 1843 0 stevel vfs_unrefvfssw(vswp); 1844 0 stevel if (inargs != opts) 1845 0 stevel kmem_free(inargs, MAX_MNTOPT_STR); 1846 0 stevel if (copyout_error) { 1847 6734 johnlev lofi_remove(vfsp); 1848 0 stevel VFS_RELE(vfsp); 1849 0 stevel error = copyout_error; 1850 0 stevel } 1851 0 stevel return (error); 1852 0 stevel } 1853 0 stevel 1854 0 stevel static void 1855 0 stevel vfs_setpath(struct vfs *vfsp, refstr_t **refp, const char *newpath) 1856 0 stevel { 1857 0 stevel size_t len; 1858 0 stevel refstr_t *ref; 1859 0 stevel zone_t *zone = curproc->p_zone; 1860 0 stevel char *sp; 1861 0 stevel int have_list_lock = 0; 1862 0 stevel 1863 0 stevel ASSERT(!VFS_ON_LIST(vfsp) || vfs_lock_held(vfsp)); 1864 0 stevel 1865 0 stevel /* 1866 0 stevel * New path must be less than MAXPATHLEN because mntfs 1867 0 stevel * will only display up to MAXPATHLEN bytes. This is currently 1868 0 stevel * safe, because domount() uses pn_get(), and other callers 1869 0 stevel * similarly cap the size to fewer than MAXPATHLEN bytes. 1870 0 stevel */ 1871 0 stevel 1872 0 stevel ASSERT(strlen(newpath) < MAXPATHLEN); 1873 0 stevel 1874 0 stevel /* mntfs requires consistency while vfs list lock is held */ 1875 0 stevel 1876 0 stevel if (VFS_ON_LIST(vfsp)) { 1877 0 stevel have_list_lock = 1; 1878 0 stevel vfs_list_lock(); 1879 0 stevel } 1880 0 stevel 1881 0 stevel if (*refp != NULL) 1882 0 stevel refstr_rele(*refp); 1883 0 stevel 1884 0 stevel /* Do we need to modify the path? */ 1885 0 stevel 1886 0 stevel if (zone == global_zone || *newpath != '/') { 1887 0 stevel ref = refstr_alloc(newpath); 1888 0 stevel goto out; 1889 0 stevel } 1890 0 stevel 1891 0 stevel /* 1892 0 stevel * Truncate the trailing '/' in the zoneroot, and merge 1893 0 stevel * in the zone's rootpath with the "newpath" (resource 1894 0 stevel * or mountpoint) passed in. 1895 0 stevel * 1896 0 stevel * The size of the required buffer is thus the size of 1897 0 stevel * the buffer required for the passed-in newpath 1898 0 stevel * (strlen(newpath) + 1), plus the size of the buffer 1899 0 stevel * required to hold zone_rootpath (zone_rootpathlen) 1900 0 stevel * minus one for one of the now-superfluous NUL 1901 0 stevel * terminations, minus one for the trailing '/'. 1902 0 stevel * 1903 0 stevel * That gives us: 1904 0 stevel * 1905 0 stevel * (strlen(newpath) + 1) + zone_rootpathlen - 1 - 1 1906 0 stevel * 1907 0 stevel * Which is what we have below. 1908 0 stevel */ 1909 0 stevel 1910 0 stevel len = strlen(newpath) + zone->zone_rootpathlen - 1; 1911 0 stevel sp = kmem_alloc(len, KM_SLEEP); 1912 0 stevel 1913 0 stevel /* 1914 0 stevel * Copy everything including the trailing slash, which 1915 0 stevel * we then overwrite with the NUL character. 1916 0 stevel */ 1917 0 stevel 1918 0 stevel (void) strcpy(sp, zone->zone_rootpath); 1919 0 stevel sp[zone->zone_rootpathlen - 2] = '\0'; 1920 0 stevel (void) strcat(sp, newpath); 1921 0 stevel 1922 0 stevel ref = refstr_alloc(sp); 1923 0 stevel kmem_free(sp, len); 1924 0 stevel out: 1925 0 stevel *refp = ref; 1926 0 stevel 1927 0 stevel if (have_list_lock) { 1928 0 stevel vfs_mnttab_modtimeupd(); 1929 0 stevel vfs_list_unlock(); 1930 0 stevel } 1931 0 stevel } 1932 0 stevel 1933 0 stevel /* 1934 0 stevel * Record a mounted resource name in a vfs structure. 1935 0 stevel * If vfsp is already mounted, caller must hold the vfs lock. 1936 0 stevel */ 1937 0 stevel void 1938 0 stevel vfs_setresource(struct vfs *vfsp, const char *resource) 1939 0 stevel { 1940 0 stevel if (resource == NULL || resource[0] == '\0') 1941 0 stevel resource = VFS_NORESOURCE; 1942 0 stevel vfs_setpath(vfsp, &vfsp->vfs_resource, resource); 1943 0 stevel } 1944 0 stevel 1945 0 stevel /* 1946 0 stevel * Record a mount point name in a vfs structure. 1947 0 stevel * If vfsp is already mounted, caller must hold the vfs lock. 1948 0 stevel */ 1949 0 stevel void 1950 0 stevel vfs_setmntpoint(struct vfs *vfsp, const char *mntpt) 1951 0 stevel { 1952 0 stevel if (mntpt == NULL || mntpt[0] == '\0') 1953 0 stevel mntpt = VFS_NOMNTPT; 1954 0 stevel vfs_setpath(vfsp, &vfsp->vfs_mntpt, mntpt); 1955 0 stevel } 1956 0 stevel 1957 0 stevel /* Returns the vfs_resource. Caller must call refstr_rele() when finished. */ 1958 0 stevel 1959 0 stevel refstr_t * 1960 0 stevel vfs_getresource(const struct vfs *vfsp) 1961 0 stevel { 1962 0 stevel refstr_t *resource; 1963 0 stevel 1964 0 stevel vfs_list_read_lock(); 1965 0 stevel resource = vfsp->vfs_resource; 1966 0 stevel refstr_hold(resource); 1967 0 stevel vfs_list_unlock(); 1968 0 stevel 1969 0 stevel return (resource); 1970 0 stevel } 1971 0 stevel 1972 0 stevel /* Returns the vfs_mntpt. Caller must call refstr_rele() when finished. */ 1973 0 stevel 1974 0 stevel refstr_t * 1975 0 stevel vfs_getmntpoint(const struct vfs *vfsp) 1976 0 stevel { 1977 0 stevel refstr_t *mntpt; 1978 0 stevel 1979 0 stevel vfs_list_read_lock(); 1980 0 stevel mntpt = vfsp->vfs_mntpt; 1981 0 stevel refstr_hold(mntpt); 1982 0 stevel vfs_list_unlock(); 1983 0 stevel 1984 0 stevel return (mntpt); 1985 0 stevel } 1986 0 stevel 1987 0 stevel /* 1988 0 stevel * Create an empty options table with enough empty slots to hold all 1989 0 stevel * The options in the options string passed as an argument. 1990 0 stevel * Potentially prepend another options table. 1991 0 stevel * 1992 0 stevel * Note: caller is responsible for locking the vfs list, if needed, 1993 0 stevel * to protect mops. 1994 0 stevel */ 1995 0 stevel static void 1996 0 stevel vfs_createopttbl_extend(mntopts_t *mops, const char *opts, 1997 0 stevel const mntopts_t *mtmpl) 1998 0 stevel { 1999 0 stevel const char *s = opts; 2000 0 stevel uint_t count; 2001 0 stevel 2002 0 stevel if (opts == NULL || *opts == '\0') { 2003 0 stevel count = 0; 2004 0 stevel } else { 2005 0 stevel count = 1; 2006 0 stevel 2007 0 stevel /* 2008 0 stevel * Count number of options in the string 2009 0 stevel */ 2010 0 stevel for (s = strchr(s, ','); s != NULL; s = strchr(s, ',')) { 2011 0 stevel count++; 2012 0 stevel s++; 2013 0 stevel } 2014 0 stevel } 2015 0 stevel vfs_copyopttbl_extend(mtmpl, mops, count); 2016 0 stevel } 2017 0 stevel 2018 0 stevel /* 2019 0 stevel * Create an empty options table with enough empty slots to hold all 2020 0 stevel * The options in the options string passed as an argument. 2021 0 stevel * 2022 0 stevel * This function is *not* for general use by filesystems. 2023 0 stevel * 2024 0 stevel * Note: caller is responsible for locking the vfs list, if needed, 2025 0 stevel * to protect mops. 2026 0 stevel */ 2027 0 stevel void 2028 0 stevel vfs_createopttbl(mntopts_t *mops, const char *opts) 2029 0 stevel { 2030 0 stevel vfs_createopttbl_extend(mops, opts, NULL); 2031 0 stevel } 2032 0 stevel 2033 0 stevel 2034 0 stevel /* 2035 0 stevel * Swap two mount options tables 2036 0 stevel */ 2037 0 stevel static void 2038 0 stevel vfs_swapopttbl_nolock(mntopts_t *optbl1, mntopts_t *optbl2) 2039 0 stevel { 2040 0 stevel uint_t tmpcnt; 2041 0 stevel mntopt_t *tmplist; 2042 0 stevel 2043 0 stevel tmpcnt = optbl2->mo_count; 2044 0 stevel tmplist = optbl2->mo_list; 2045 0 stevel optbl2->mo_count = optbl1->mo_count; 2046 0 stevel optbl2->mo_list = optbl1->mo_list; 2047 0 stevel optbl1->mo_count = tmpcnt; 2048 0 stevel optbl1->mo_list = tmplist; 2049 0 stevel } 2050 0 stevel 2051 0 stevel static void 2052 0 stevel vfs_swapopttbl(mntopts_t *optbl1, mntopts_t *optbl2) 2053 0 stevel { 2054 0 stevel vfs_list_lock(); 2055 0 stevel vfs_swapopttbl_nolock(optbl1, optbl2); 2056 0 stevel vfs_mnttab_modtimeupd(); 2057 0 stevel vfs_list_unlock(); 2058 0 stevel } 2059 0 stevel 2060 0 stevel static char ** 2061 0 stevel vfs_copycancelopt_extend(char **const moc, int extend) 2062 0 stevel { 2063 0 stevel int i = 0; 2064 0 stevel int j; 2065 0 stevel char **result; 2066 0 stevel 2067 0 stevel if (moc != NULL) { 2068 0 stevel for (; moc[i] != NULL; i++) 2069 0 stevel /* count number of options to cancel */; 2070 0 stevel } 2071 0 stevel 2072 0 stevel if (i + extend == 0) 2073 0 stevel return (NULL); 2074 0 stevel 2075 0 stevel result = kmem_alloc((i + extend + 1) * sizeof (char *), KM_SLEEP); 2076 0 stevel 2077 0 stevel for (j = 0; j < i; j++) { 2078 0 stevel result[j] = kmem_alloc(strlen(moc[j]) + 1, KM_SLEEP); 2079 0 stevel (void) strcpy(result[j], moc[j]); 2080 0 stevel } 2081 0 stevel for (; j <= i + extend; j++) 2082 0 stevel result[j] = NULL; 2083 0 stevel 2084 0 stevel return (result); 2085 0 stevel } 2086 0 stevel 2087 0 stevel static void 2088 0 stevel vfs_copyopt(const mntopt_t *s, mntopt_t *d) 2089 0 stevel { 2090 0 stevel char *sp, *dp; 2091 0 stevel 2092 0 stevel d->mo_flags = s->mo_flags; 2093 0 stevel d->mo_data = s->mo_data; 2094 0 stevel sp = s->mo_name; 2095 0 stevel if (sp != NULL) { 2096 0 stevel dp = kmem_alloc(strlen(sp) + 1, KM_SLEEP); 2097 0 stevel (void) strcpy(dp, sp); 2098 0 stevel d->mo_name = dp; 2099 0 stevel } else { 2100 0 stevel d->mo_name = NULL; /* should never happen */ 2101 0 stevel } 2102 0 stevel 2103 0 stevel d->mo_cancel = vfs_copycancelopt_extend(s->mo_cancel, 0); 2104 0 stevel 2105 0 stevel sp = s->mo_arg; 2106 0 stevel if (sp != NULL) { 2107 0 stevel dp = kmem_alloc(strlen(sp) + 1, KM_SLEEP); 2108 0 stevel (void) strcpy(dp, sp); 2109 0 stevel d->mo_arg = dp; 2110 0 stevel } else { 2111 0 stevel d->mo_arg = NULL; 2112 0 stevel } 2113 0 stevel } 2114 0 stevel 2115 0 stevel /* 2116 0 stevel * Copy a mount options table, possibly allocating some spare 2117 0 stevel * slots at the end. It is permissible to copy_extend the NULL table. 2118 0 stevel */ 2119 0 stevel static void 2120 0 stevel vfs_copyopttbl_extend(const mntopts_t *smo, mntopts_t *dmo, int extra) 2121 0 stevel { 2122 0 stevel uint_t i, count; 2123 0 stevel mntopt_t *motbl; 2124 0 stevel 2125 0 stevel /* 2126 0 stevel * Clear out any existing stuff in the options table being initialized 2127 0 stevel */ 2128 0 stevel vfs_freeopttbl(dmo); 2129 0 stevel count = (smo == NULL) ? 0 : smo->mo_count; 2130 0 stevel if ((count + extra) == 0) /* nothing to do */ 2131 0 stevel return; 2132 0 stevel dmo->mo_count = count + extra; 2133 0 stevel motbl = kmem_zalloc((count + extra) * sizeof (mntopt_t), KM_SLEEP); 2134 0 stevel dmo->mo_list = motbl; 2135 0 stevel for (i = 0; i < count; i++) { 2136 0 stevel vfs_copyopt(&smo->mo_list[i], &motbl[i]); 2137 0 stevel } 2138 0 stevel for (i = count; i < count + extra; i++) { 2139 0 stevel motbl[i].mo_flags = MO_EMPTY; 2140 0 stevel } 2141 0 stevel } 2142 0 stevel 2143 0 stevel /* 2144 0 stevel * Copy a mount options table. 2145 0 stevel * 2146 0 stevel * This function is *not* for general use by filesystems. 2147 0 stevel * 2148 0 stevel * Note: caller is responsible for locking the vfs list, if needed, 2149 0 stevel * to protect smo and dmo. 2150 0 stevel */ 2151 0 stevel void 2152 0 stevel vfs_copyopttbl(const mntopts_t *smo, mntopts_t *dmo) 2153 0 stevel { 2154 0 stevel vfs_copyopttbl_extend(smo, dmo, 0); 2155 0 stevel } 2156 0 stevel 2157 0 stevel static char ** 2158 0 stevel vfs_mergecancelopts(const mntopt_t *mop1, const mntopt_t *mop2) 2159 0 stevel { 2160 0 stevel int c1 = 0; 2161 0 stevel int c2 = 0; 2162 0 stevel char **result; 2163 0 stevel char **sp1, **sp2, **dp; 2164 0 stevel 2165 0 stevel /* 2166 0 stevel * First we count both lists of cancel options. 2167 0 stevel * If either is NULL or has no elements, we return a copy of 2168 0 stevel * the other. 2169 0 stevel */ 2170 0 stevel if (mop1->mo_cancel != NULL) { 2171 0 stevel for (; mop1->mo_cancel[c1] != NULL; c1++) 2172 0 stevel /* count cancel options in mop1 */; 2173 0 stevel } 2174 0 stevel 2175 0 stevel if (c1 == 0) 2176 0 stevel return (vfs_copycancelopt_extend(mop2->mo_cancel, 0)); 2177 0 stevel 2178 0 stevel if (mop2->mo_cancel != NULL) { 2179 0 stevel for (; mop2->mo_cancel[c2] != NULL; c2++) 2180 0 stevel /* count cancel options in mop2 */; 2181 0 stevel } 2182 0 stevel 2183 0 stevel result = vfs_copycancelopt_extend(mop1->mo_cancel, c2); 2184 0 stevel 2185 0 stevel if (c2 == 0) 2186 0 stevel return (result); 2187 0 stevel 2188 0 stevel /* 2189 0 stevel * When we get here, we've got two sets of cancel options; 2190 0 stevel * we need to merge the two sets. We know that the result 2191 0 stevel * array has "c1+c2+1" entries and in the end we might shrink 2192 0 stevel * it. 2193 0 stevel * Result now has a copy of the c1 entries from mop1; we'll 2194 0 stevel * now lookup all the entries of mop2 in mop1 and copy it if 2195 0 stevel * it is unique. 2196 0 stevel * This operation is O(n^2) but it's only called once per 2197 0 stevel * filesystem per duplicate option. This is a situation 2198 0 stevel * which doesn't arise with the filesystems in ON and 2199 0 stevel * n is generally 1. 2200 0 stevel */ 2201 0 stevel 2202 0 stevel dp = &result[c1]; 2203 0 stevel for (sp2 = mop2->mo_cancel; *sp2 != NULL; sp2++) { 2204 0 stevel for (sp1 = mop1->mo_cancel; *sp1 != NULL; sp1++) { 2205 0 stevel if (strcmp(*sp1, *sp2) == 0) 2206 0 stevel break; 2207 0 stevel } 2208 0 stevel if (*sp1 == NULL) { 2209 0 stevel /* 2210 0 stevel * Option *sp2 not found in mop1, so copy it. 2211 0 stevel * The calls to vfs_copycancelopt_extend() 2212 0 stevel * guarantee that there's enough room. 2213 0 stevel */ 2214 0 stevel *dp = kmem_alloc(strlen(*sp2) + 1, KM_SLEEP); 2215 0 stevel (void) strcpy(*dp++, *sp2); 2216 0 stevel } 2217 0 stevel } 2218 0 stevel if (dp != &result[c1+c2]) { 2219 0 stevel size_t bytes = (dp - result + 1) * sizeof (char *); 2220 0 stevel char **nres = kmem_alloc(bytes, KM_SLEEP); 2221 0 stevel 2222 0 stevel bcopy(result, nres, bytes); 2223 0 stevel kmem_free(result, (c1 + c2 + 1) * sizeof (char *)); 2224 0 stevel result = nres; 2225 0 stevel } 2226 0 stevel return (result); 2227 0 stevel } 2228 0 stevel 2229 0 stevel /* 2230 0 stevel * Merge two mount option tables (outer and inner) into one. This is very 2231 0 stevel * similar to "merging" global variables and automatic variables in C. 2232 0 stevel * 2233 0 stevel * This isn't (and doesn't have to be) fast. 2234 0 stevel * 2235 0 stevel * This function is *not* for general use by filesystems. 2236 0 stevel * 2237 0 stevel * Note: caller is responsible for locking the vfs list, if needed, 2238 0 stevel * to protect omo, imo & dmo. 2239 0 stevel */ 2240 0 stevel void 2241 0 stevel vfs_mergeopttbl(const mntopts_t *omo, const mntopts_t *imo, mntopts_t *dmo) 2242 0 stevel { 2243 0 stevel uint_t i, count; 2244 0 stevel mntopt_t *mop, *motbl; 2245 0 stevel uint_t freeidx; 2246 0 stevel 2247 0 stevel /* 2248 0 stevel * First determine how much space we need to allocate. 2249 0 stevel */ 2250 0 stevel count = omo->mo_count; 2251 0 stevel for (i = 0; i < imo->mo_count; i++) { 2252 0 stevel if (imo->mo_list[i].mo_flags & MO_EMPTY) 2253 0 stevel continue; 2254 0 stevel if (vfs_hasopt(omo, imo->mo_list[i].mo_name) == NULL) 2255 0 stevel count++; 2256 0 stevel } 2257 0 stevel ASSERT(count >= omo->mo_count && 2258 0 stevel count <= omo->mo_count + imo->mo_count); 2259 0 stevel motbl = kmem_alloc(count * sizeof (mntopt_t), KM_SLEEP); 2260 0 stevel for (i = 0; i < omo->mo_count; i++) 2261 0 stevel vfs_copyopt(&omo->mo_list[i], &motbl[i]); 2262 0 stevel freeidx = omo->mo_count; 2263 0 stevel for (i = 0; i < imo->mo_count; i++) { 2264 0 stevel if (imo->mo_list[i].mo_flags & MO_EMPTY) 2265 0 stevel continue; 2266 0 stevel if ((mop = vfs_hasopt(omo, imo->mo_list[i].mo_name)) != NULL) { 2267 0 stevel char **newcanp; 2268 0 stevel uint_t index = mop - omo->mo_list; 2269 0 stevel 2270 0 stevel newcanp = vfs_mergecancelopts(mop, &motbl[index]); 2271 0 stevel 2272 0 stevel vfs_freeopt(&motbl[index]); 2273 0 stevel vfs_copyopt(&imo->mo_list[i], &motbl[index]); 2274 0 stevel 2275 0 stevel vfs_freecancelopt(motbl[index].mo_cancel); 2276 0 stevel motbl[index].mo_cancel = newcanp; 2277 0 stevel } else { 2278 0 stevel /* 2279 0 stevel * If it's a new option, just copy it over to the first 2280 0 stevel * free location. 2281 0 stevel */ 2282 0 stevel vfs_copyopt(&imo->mo_list[i], &motbl[freeidx++]); 2283 0 stevel } 2284 0 stevel } 2285 0 stevel dmo->mo_count = count; 2286 0 stevel dmo->mo_list = motbl; 2287 0 stevel } 2288 0 stevel 2289 0 stevel /* 2290 0 stevel * Functions to set and clear mount options in a mount options table. 2291 0 stevel */ 2292 0 stevel 2293 0 stevel /* 2294 0 stevel * Clear a mount option, if it exists. 2295 0 stevel * 2296 0 stevel * The update_mnttab arg indicates whether mops is part of a vfs that is on 2297 0 stevel * the vfs list. 2298 0 stevel */ 2299 0 stevel static void 2300 0 stevel vfs_clearmntopt_nolock(mntopts_t *mops, const char *opt, int update_mnttab) 2301 0 stevel { 2302 0 stevel struct mntopt *mop; 2303 0 stevel uint_t i, count; 2304 0 stevel 2305 0 stevel ASSERT(!update_mnttab || RW_WRITE_HELD(&vfslist)); 2306 0 stevel 2307 0 stevel count = mops->mo_count; 2308 0 stevel for (i = 0; i < count; i++) { 2309 0 stevel mop = &mops->mo_list[i]; 2310 0 stevel 2311 0 stevel if (mop->mo_flags & MO_EMPTY) 2312 0 stevel continue; 2313 0 stevel if (strcmp(opt, mop->mo_name)) 2314 0 stevel continue; 2315 0 stevel mop->mo_flags &= ~MO_SET; 2316 0 stevel if (mop->mo_arg != NULL) { 2317 0 stevel kmem_free(mop->mo_arg, strlen(mop->mo_arg) + 1); 2318 0 stevel } 2319 0 stevel mop->mo_arg = NULL; 2320 0 stevel if (update_mnttab) 2321 0 stevel vfs_mnttab_modtimeupd(); 2322 0 stevel break; 2323 0 stevel } 2324 0 stevel } 2325 0 stevel 2326 0 stevel void 2327 0 stevel vfs_clearmntopt(struct vfs *vfsp, const char *opt) 2328 0 stevel { 2329 0 stevel int gotlock = 0; 2330 0 stevel 2331 0 stevel if (VFS_ON_LIST(vfsp)) { 2332 0 stevel gotlock = 1; 2333 0 stevel vfs_list_lock(); 2334 0 stevel } 2335 0 stevel vfs_clearmntopt_nolock(&vfsp->vfs_mntopts, opt, gotlock); 2336 0 stevel if (gotlock) 2337 0 stevel vfs_list_unlock(); 2338 0 stevel } 2339 0 stevel 2340 0 stevel 2341 0 stevel /* 2342 0 stevel * Set a mount option on. If it's not found in the table, it's silently 2343 0 stevel * ignored. If the option has MO_IGNORE set, it is still set unless the 2344 0 stevel * VFS_NOFORCEOPT bit is set in the flags. Also, VFS_DISPLAY/VFS_NODISPLAY flag 2345 0 stevel * bits can be used to toggle the MO_NODISPLAY bit for the option. 2346 0 stevel * If the VFS_CREATEOPT flag bit is set then the first option slot with 2347 0 stevel * MO_EMPTY set is created as the option passed in. 2348 0 stevel * 2349 0 stevel * The update_mnttab arg indicates whether mops is part of a vfs that is on 2350 0 stevel * the vfs list. 2351 0 stevel */ 2352 0 stevel static void 2353 0 stevel vfs_setmntopt_nolock(mntopts_t *mops, const char *opt, 2354 0 stevel const char *arg, int flags, int update_mnttab) 2355 0 stevel { 2356 0 stevel mntopt_t *mop; 2357 0 stevel uint_t i, count; 2358 0 stevel char *sp; 2359 0 stevel 2360 0 stevel ASSERT(!update_mnttab || RW_WRITE_HELD(&vfslist)); 2361 0 stevel 2362 0 stevel if (flags & VFS_CREATEOPT) { 2363 0 stevel if (vfs_hasopt(mops, opt) != NULL) { 2364 0 stevel flags &= ~VFS_CREATEOPT; 2365 0 stevel } 2366 0 stevel } 2367 0 stevel count = mops->mo_count; 2368 0 stevel for (i = 0; i < count; i++) { 2369 0 stevel mop = &mops->mo_list[i]; 2370 0 stevel 2371 0 stevel if (mop->mo_flags & MO_EMPTY) { 2372 0 stevel if ((flags & VFS_CREATEOPT) == 0) 2373 0 stevel continue; 2374 0 stevel sp = kmem_alloc(strlen(opt) + 1, KM_SLEEP); 2375 0 stevel (void) strcpy(sp, opt); 2376 0 stevel mop->mo_name = sp; 2377 0 stevel if (arg != NULL) 2378 0 stevel mop->mo_flags = MO_HASVALUE; 2379 0 stevel else 2380 0 stevel mop->mo_flags = 0; 2381 0 stevel } else if (strcmp(opt, mop->mo_name)) { 2382 0 stevel continue; 2383 0 stevel } 2384 0 stevel if ((mop->mo_flags & MO_IGNORE) && (flags & VFS_NOFORCEOPT)) 2385 0 stevel break; 2386 0 stevel if (arg != NULL && (mop->mo_flags & MO_HASVALUE) != 0) { 2387 0 stevel sp = kmem_alloc(strlen(arg) + 1, KM_SLEEP); 2388 0 stevel (void) strcpy(sp, arg); 2389 0 stevel } else { 2390 0 stevel sp = NULL; 2391 0 stevel } 2392 0 stevel if (mop->mo_arg != NULL) 2393 0 stevel kmem_free(mop->mo_arg, strlen(mop->mo_arg) + 1); 2394 0 stevel mop->mo_arg = sp; 2395 0 stevel if (flags & VFS_DISPLAY) 2396 0 stevel mop->mo_flags &= ~MO_NODISPLAY; 2397 0 stevel if (flags & VFS_NODISPLAY) 2398 0 stevel mop->mo_flags |= MO_NODISPLAY; 2399 0 stevel mop->mo_flags |= MO_SET; 2400 0 stevel if (mop->mo_cancel != NULL) { 2401 0 stevel char **cp; 2402 0 stevel 2403 0 stevel for (cp = mop->mo_cancel; *cp != NULL; cp++) 2404 0 stevel vfs_clearmntopt_nolock(mops, *cp, 0); 2405 0 stevel } 2406 0 stevel if (update_mnttab) 2407 0 stevel vfs_mnttab_modtimeupd(); 2408 0 stevel break; 2409 0 stevel } 2410 0 stevel } 2411 0 stevel 2412 0 stevel void 2413 0 stevel vfs_setmntopt(struct vfs *vfsp, const char *opt, const char *arg, int flags) 2414 0 stevel { 2415 0 stevel int gotlock = 0; 2416 0 stevel 2417 0 stevel if (VFS_ON_LIST(vfsp)) { 2418 0 stevel gotlock = 1; 2419 0 stevel vfs_list_lock(); 2420 0 stevel } 2421 0 stevel vfs_setmntopt_nolock(&vfsp->vfs_mntopts, opt, arg, flags, gotlock); 2422 0 stevel if (gotlock) 2423 0 stevel vfs_list_unlock(); 2424 0 stevel } 2425 0 stevel 2426 0 stevel 2427 0 stevel /* 2428 0 stevel * Add a "tag" option to a mounted file system's options list. 2429 0 stevel * 2430 0 stevel * Note: caller is responsible for locking the vfs list, if needed, 2431 0 stevel * to protect mops. 2432 0 stevel */ 2433 0 stevel static mntopt_t * 2434 0 stevel vfs_addtag(mntopts_t *mops, const char *tag) 2435 0 stevel { 2436 0 stevel uint_t count; 2437 0 stevel mntopt_t *mop, *motbl; 2438 0 stevel 2439 0 stevel count = mops->mo_count + 1; 2440 0 stevel motbl = kmem_zalloc(count * sizeof (mntopt_t), KM_SLEEP); 2441 0 stevel if (mops->mo_count) { 2442 0 stevel size_t len = (count - 1) * sizeof (mntopt_t); 2443 0 stevel 2444 0 stevel bcopy(mops->mo_list, motbl, len); 2445 0 stevel kmem_free(mops->mo_list, len); 2446 0 stevel } 2447 0 stevel mops->mo_count = count; 2448 0 stevel mops->mo_list = motbl; 2449 0 stevel mop = &motbl[count - 1]; 2450 0 stevel mop->mo_flags = MO_TAG; 2451 0 stevel mop->mo_name = kmem_alloc(strlen(tag) + 1, KM_SLEEP); 2452 0 stevel (void) strcpy(mop->mo_name, tag); 2453 0 stevel return (mop); 2454 0 stevel } 2455 0 stevel 2456 0 stevel /* 2457 0 stevel * Allow users to set arbitrary "tags" in a vfs's mount options. 2458 0 stevel * Broader use within the kernel is discouraged. 2459 0 stevel */ 2460 0 stevel int 2461 0 stevel vfs_settag(uint_t major, uint_t minor, const char *mntpt, const char *tag, 2462 0 stevel cred_t *cr) 2463 0 stevel { 2464 0 stevel vfs_t *vfsp; 2465 0 stevel mntopts_t *mops; 2466 0 stevel mntopt_t *mop; 2467 0 stevel int found = 0; 2468 0 stevel dev_t dev = makedevice(major, minor); 2469 0 stevel int err = 0; 2470 0 stevel char *buf = kmem_alloc(MAX_MNTOPT_STR, KM_SLEEP); 2471 0 stevel 2472 0 stevel /* 2473 0 stevel * Find the desired mounted file system 2474 0 stevel */ 2475 0 stevel vfs_list_lock(); 2476 0 stevel vfsp = rootvfs; 2477 0 stevel do { 2478 0 stevel if (vfsp->vfs_dev == dev && 2479 0 stevel strcmp(mntpt, refstr_value(vfsp->vfs_mntpt)) == 0) { 2480 0 stevel found = 1; 2481 0 stevel break; 2482 0 stevel } 2483 0 stevel vfsp = vfsp->vfs_next; 2484 0 stevel } while (vfsp != rootvfs); 2485 0 stevel 2486 0 stevel if (!found) { 2487 0 stevel err = EINVAL; 2488 0 stevel goto out; 2489 0 stevel } 2490 0 stevel err = secpolicy_fs_config(cr, vfsp); 2491 0 stevel if (err != 0) 2492 0 stevel goto out; 2493 0 stevel 2494 0 stevel mops = &vfsp->vfs_mntopts; 2495 0 stevel /* 2496 0 stevel * Add tag if it doesn't already exist 2497 0 stevel */ 2498 0 stevel if ((mop = vfs_hasopt(mops, tag)) == NULL) { 2499 0 stevel int len; 2500 0 stevel 2501 0 stevel (void) vfs_buildoptionstr(mops, buf, MAX_MNTOPT_STR); 2502 0 stevel len = strlen(buf); 2503 0 stevel if (len + strlen(tag) + 2 > MAX_MNTOPT_STR) { 2504 0 stevel err = ENAMETOOLONG; 2505 0 stevel goto out; 2506 0 stevel } 2507 0 stevel mop = vfs_addtag(mops, tag); 2508 0 stevel } 2509 0 stevel if ((mop->mo_flags & MO_TAG) == 0) { 2510 0 stevel err = EINVAL; 2511 0 stevel goto out; 2512 0 stevel } 2513 0 stevel vfs_setmntopt_nolock(mops, tag, NULL, 0, 1); 2514 0 stevel out: 2515 0 stevel vfs_list_unlock(); 2516 0 stevel kmem_free(buf, MAX_MNTOPT_STR); 2517 0 stevel return (err); 2518 0 stevel } 2519 0 stevel 2520 0 stevel /* 2521 0 stevel * Allow users to remove arbitrary "tags" in a vfs's mount options. 2522 0 stevel * Broader use within the kernel is discouraged. 2523 0 stevel */ 2524 0 stevel int 2525 0 stevel vfs_clrtag(uint_t major, uint_t minor, const char *mntpt, const char *tag, 2526 0 stevel cred_t *cr) 2527 0 stevel { 2528 0 stevel vfs_t *vfsp; 2529 0 stevel mntopt_t *mop; 2530 0 stevel int found = 0; 2531 0 stevel dev_t dev = makedevice(major, minor); 2532 0 stevel int err = 0; 2533 0 stevel 2534 0 stevel /* 2535 0 stevel * Find the desired mounted file system 2536 0 stevel */ 2537 0 stevel vfs_list_lock(); 2538 0 stevel vfsp = rootvfs; 2539 0 stevel do { 2540 0 stevel if (vfsp->vfs_dev == dev && 2541 0 stevel strcmp(mntpt, refstr_value(vfsp->vfs_mntpt)) == 0) { 2542 0 stevel found = 1; 2543 0 stevel break; 2544 0 stevel } 2545 0 stevel vfsp = vfsp->vfs_next; 2546 0 stevel } while (vfsp != rootvfs); 2547 0 stevel 2548 0 stevel if (!found) { 2549 0 stevel err = EINVAL; 2550 0 stevel goto out; 2551 0 stevel } 2552 0 stevel err = secpolicy_fs_config(cr, vfsp); 2553 0 stevel if (err != 0) 2554 0 stevel goto out; 2555 0 stevel 2556 0 stevel if ((mop = vfs_hasopt(&vfsp->vfs_mntopts, tag)) == NULL) { 2557 0 stevel err = EINVAL; 2558 0 stevel goto out; 2559 0 stevel } 2560 0 stevel if ((mop->mo_flags & MO_TAG) == 0) { 2561 0 stevel err = EINVAL; 2562 0 stevel goto out; 2563 0 stevel } 2564 0 stevel vfs_clearmntopt_nolock(&vfsp->vfs_mntopts, tag, 1); 2565 0 stevel out: 2566 0 stevel vfs_list_unlock(); 2567 0 stevel return (err); 2568 0 stevel } 2569 0 stevel 2570 0 stevel /* 2571 0 stevel * Function to parse an option string and fill in a mount options table. 2572 0 stevel * Unknown options are silently ignored. The input option string is modified 2573 0 stevel * by replacing separators with nulls. If the create flag is set, options 2574 0 stevel * not found in the table are just added on the fly. The table must have 2575 0 stevel * an option slot marked MO_EMPTY to add an option on the fly. 2576 0 stevel * 2577 0 stevel * This function is *not* for general use by filesystems. 2578 0 stevel * 2579 0 stevel * Note: caller is responsible for locking the vfs list, if needed, 2580 0 stevel * to protect mops.. 2581 0 stevel */ 2582 0 stevel void 2583 0 stevel vfs_parsemntopts(mntopts_t *mops, char *osp, int create) 2584 0 stevel { 2585 0 stevel char *s = osp, *p, *nextop, *valp, *cp, *ep; 2586 0 stevel int setflg = VFS_NOFORCEOPT; 2587 0 stevel 2588 0 stevel if (osp == NULL) 2589 0 stevel return; 2590 0 stevel while (*s != '\0') { 2591 0 stevel p = strchr(s, ','); /* find next option */ 2592 0 stevel if (p == NULL) { 2593 0 stevel cp = NULL; 2594 0 stevel p = s + strlen(s); 2595 0 stevel } else { 2596 0 stevel cp = p; /* save location of comma */ 2597 0 stevel *p++ = '\0'; /* mark end and point to next option */ 2598 0 stevel } 2599 0 stevel nextop = p; 2600 0 stevel p = strchr(s, '='); /* look for value */ 2601 0 stevel if (p == NULL) { 2602 0 stevel valp = NULL; /* no value supplied */ 2603 0 stevel } else { 2604 0 stevel ep = p; /* save location of equals */ 2605 0 stevel *p++ = '\0'; /* end option and point to value */ 2606 0 stevel valp = p; 2607 0 stevel } 2608 0 stevel /* 2609 0 stevel * set option into options table 2610 0 stevel */ 2611 0 stevel if (create) 2612 0 stevel setflg |= VFS_CREATEOPT; 2613 0 stevel vfs_setmntopt_nolock(mops, s, valp, setflg, 0); 2614 0 stevel if (cp != NULL) 2615 0 stevel *cp = ','; /* restore the comma */ 2616 0 stevel if (valp != NULL) 2617 0 stevel *ep = '='; /* restore the equals */ 2618 0 stevel s = nextop; 2619 0 stevel } 2620 0 stevel } 2621 0 stevel 2622 0 stevel /* 2623 0 stevel * Function to inquire if an option exists in a mount options table. 2624 0 stevel * Returns a pointer to the option if it exists, else NULL. 2625 0 stevel * 2626 0 stevel * This function is *not* for general use by filesystems. 2627 0 stevel * 2628 0 stevel * Note: caller is responsible for locking the vfs list, if needed, 2629 0 stevel * to protect mops. 2630 0 stevel */ 2631 0 stevel struct mntopt * 2632 0 stevel vfs_hasopt(const mntopts_t *mops, const char *opt) 2633 0 stevel { 2634 0 stevel struct mntopt *mop; 2635 0 stevel uint_t i, count; 2636 0 stevel 2637 0 stevel count = mops->mo_count; 2638 0 stevel for (i = 0; i < count; i++) { 2639 0 stevel mop = &mops->mo_list[i]; 2640 0 stevel 2641 0 stevel if (mop->mo_flags & MO_EMPTY) 2642 0 stevel continue; 2643 0 stevel if (strcmp(opt, mop->mo_name) == 0) 2644 0 stevel return (mop); 2645 0 stevel } 2646 0 stevel return (NULL); 2647 0 stevel } 2648 0 stevel 2649 0 stevel /* 2650 0 stevel * Function to inquire if an option is set in a mount options table. 2651 0 stevel * Returns non-zero if set and fills in the arg pointer with a pointer to 2652 0 stevel * the argument string or NULL if there is no argument string. 2653 0 stevel */ 2654 0 stevel static int 2655 0 stevel vfs_optionisset_nolock(const mntopts_t *mops, const char *opt, char **argp) 2656 0 stevel { 2657 0 stevel struct mntopt *mop; 2658 0 stevel uint_t i, count; 2659 0 stevel 2660 0 stevel count = mops->mo_count; 2661 0 stevel for (i = 0; i < count; i++) { 2662 0 stevel mop = &mops->mo_list[i]; 2663 0 stevel 2664 0 stevel if (mop->mo_flags & MO_EMPTY) 2665 0 stevel continue; 2666 0 stevel if (strcmp(opt, mop->mo_name)) 2667 0 stevel continue; 2668 0 stevel if ((mop->mo_flags & MO_SET) == 0) 2669 0 stevel return (0); 2670 0 stevel if (argp != NULL && (mop->mo_flags & MO_HASVALUE) != 0) 2671 0 stevel *argp = mop->mo_arg; 2672 0 stevel return (1); 2673 0 stevel } 2674 0 stevel return (0); 2675 0 stevel } 2676 0 stevel 2677 0 stevel 2678 0 stevel int 2679 0 stevel vfs_optionisset(const struct vfs *vfsp, const char *opt, char **argp) 2680 0 stevel { 2681 0 stevel int ret; 2682 0 stevel 2683 0 stevel vfs_list_read_lock(); 2684 0 stevel ret = vfs_optionisset_nolock(&vfsp->vfs_mntopts, opt, argp); 2685 0 stevel vfs_list_unlock(); 2686 0 stevel return (ret); 2687 0 stevel } 2688 0 stevel 2689 0 stevel 2690 0 stevel /* 2691 0 stevel * Construct a comma separated string of the options set in the given 2692 0 stevel * mount table, return the string in the given buffer. Return non-zero if 2693 0 stevel * the buffer would overflow. 2694 0 stevel * 2695 0 stevel * This function is *not* for general use by filesystems. 2696 0 stevel * 2697 0 stevel * Note: caller is responsible for locking the vfs list, if needed, 2698 0 stevel * to protect mp. 2699 0 stevel */ 2700 0 stevel int 2701 0 stevel vfs_buildoptionstr(const mntopts_t *mp, char *buf, int len) 2702 0 stevel { 2703 0 stevel char *cp; 2704 0 stevel uint_t i; 2705 0 stevel 2706 0 stevel buf[0] = '\0'; 2707 0 stevel cp = buf; 2708 0 stevel for (i = 0; i < mp->mo_count; i++) { 2709 0 stevel struct mntopt *mop; 2710 0 stevel 2711 0 stevel mop = &mp->mo_list[i]; 2712 0 stevel if (mop->mo_flags & MO_SET) { 2713 0 stevel int optlen, comma = 0; 2714 0 stevel 2715 0 stevel if (buf[0] != '\0') 2716 0 stevel comma = 1; 2717 0 stevel optlen = strlen(mop->mo_name); 2718 0 stevel if (strlen(buf) + comma + optlen + 1 > len) 2719 0 stevel goto err; 2720 0 stevel if (comma) 2721 0 stevel *cp++ = ','; 2722 0 stevel (void) strcpy(cp, mop->mo_name); 2723 0 stevel cp += optlen; 2724 0 stevel /* 2725 0 stevel * Append option value if there is one 2726 0 stevel */ 2727 0 stevel if (mop->mo_arg != NULL) { 2728 0 stevel int arglen; 2729 0 stevel 2730 0 stevel arglen = strlen(mop->mo_arg); 2731 0 stevel if (strlen(buf) + arglen + 2 > len) 2732 0 stevel goto err; 2733 0 stevel *cp++ = '='; 2734 0 stevel (void) strcpy(cp, mop->mo_arg); 2735 0 stevel cp += arglen; 2736 0 stevel } 2737 0 stevel } 2738 0 stevel } 2739 0 stevel return (0); 2740 0 stevel err: 2741 0 stevel return (EOVERFLOW); 2742 0 stevel } 2743 0 stevel 2744 0 stevel static void 2745 0 stevel vfs_freecancelopt(char **moc) 2746 0 stevel { 2747 0 stevel if (moc != NULL) { 2748 0 stevel int ccnt = 0; 2749 0 stevel char **cp; 2750 0 stevel 2751 0 stevel for (cp = moc; *cp != NULL; cp++) { 2752 0 stevel kmem_free(*cp, strlen(*cp) + 1); 2753 0 stevel ccnt++; 2754 0 stevel } 2755 0 stevel kmem_free(moc, (ccnt + 1) * sizeof (char *)); 2756 0 stevel } 2757 0 stevel } 2758 0 stevel 2759 0 stevel static void 2760 0 stevel vfs_freeopt(mntopt_t *mop) 2761 0 stevel { 2762 0 stevel if (mop->mo_name != NULL) 2763 0 stevel kmem_free(mop->mo_name, strlen(mop->mo_name) + 1); 2764 0 stevel 2765 0 stevel vfs_freecancelopt(mop->mo_cancel); 2766 0 stevel 2767 0 stevel if (mop->mo_arg != NULL) 2768 0 stevel kmem_free(mop->mo_arg, strlen(mop->mo_arg) + 1); 2769 0 stevel } 2770 0 stevel 2771 0 stevel /* 2772 0 stevel * Free a mount options table 2773 0 stevel * 2774 0 stevel * This function is *not* for general use by filesystems. 2775 0 stevel * 2776 0 stevel * Note: caller is responsible for locking the vfs list, if needed, 2777 0 stevel * to protect mp. 2778 0 stevel */ 2779 0 stevel void 2780 0 stevel vfs_freeopttbl(mntopts_t *mp) 2781 0 stevel { 2782 0 stevel uint_t i, count; 2783 0 stevel 2784 0 stevel count = mp->mo_count; 2785 0 stevel for (i = 0; i < count; i++) { 2786 0 stevel vfs_freeopt(&mp->mo_list[i]); 2787 0 stevel } 2788 0 stevel if (count) { 2789 0 stevel kmem_free(mp->mo_list, sizeof (mntopt_t) * count); 2790 0 stevel mp->mo_count = 0; 2791 0 stevel mp->mo_list = NULL; 2792 0 stevel } 2793 0 stevel } 2794 0 stevel 2795 4863 praks 2796 4863 praks /* ARGSUSED */ 2797 4863 praks static int 2798 4863 praks vfs_mntdummyread(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cred, 2799 4863 praks caller_context_t *ct) 2800 4863 praks { 2801 4863 praks return (0); 2802 4863 praks } 2803 4863 praks 2804 4863 praks /* ARGSUSED */ 2805 4863 praks static int 2806 4863 praks vfs_mntdummywrite(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cred, 2807 4863 praks caller_context_t *ct) 2808 4863 praks { 2809 4863 praks return (0); 2810 4863 praks } 2811 4863 praks 2812 4863 praks /* 2813 4863 praks * The dummy vnode is currently used only by file events notification 2814 4863 praks * module which is just interested in the timestamps. 2815 4863 praks */ 2816 4863 praks /* ARGSUSED */ 2817 4863 praks static int 2818 5331 amw vfs_mntdummygetattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, 2819 5331 amw caller_context_t *ct) 2820 4863 praks { 2821 4863 praks bzero(vap, sizeof (vattr_t)); 2822 4863 praks vap->va_type = VREG; 2823 4863 praks vap->va_nlink = 1; 2824 4863 praks vap->va_ctime = vfs_mnttab_ctime; 2825 4863 praks /* 2826 4863 praks * it is ok to just copy mtime as the time will be monotonically 2827 4863 praks * increasing. 2828 4863 praks */ 2829 4863 praks vap->va_mtime = vfs_mnttab_mtime; 2830 4863 praks vap->va_atime = vap->va_mtime; 2831 4863 praks return (0); 2832 4863 praks } 2833 4863 praks 2834 4863 praks static void 2835 4863 praks vfs_mnttabvp_setup(void) 2836 4863 praks { 2837 4863 praks vnode_t *tvp; 2838 4863 praks vnodeops_t *vfs_mntdummyvnops; 2839 4863 praks const fs_operation_def_t mnt_dummyvnodeops_template[] = { 2840 4863 praks VOPNAME_READ, { .vop_read = vfs_mntdummyread }, 2841 4863 praks VOPNAME_WRITE, { .vop_write = vfs_mntdummywrite }, 2842 4863 praks VOPNAME_GETATTR, { .vop_getattr = vfs_mntdummygetattr }, 2843 4863 praks VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 2844 4863 praks NULL, NULL 2845 4863 praks }; 2846 4863 praks 2847 4863 praks if (vn_make_ops("mnttab", mnt_dummyvnodeops_template, 2848 4863 praks &vfs_mntdummyvnops) != 0) { 2849 4863 praks cmn_err(CE_WARN, "vfs_mnttabvp_setup: vn_make_ops failed"); 2850 4863 praks /* Shouldn't happen, but not bad enough to panic */ 2851 4863 praks return; 2852 4863 praks } 2853 4863 praks 2854 4863 praks /* 2855 4863 praks * A global dummy vnode is allocated to represent mntfs files. 2856 4863 praks * The mntfs file (/etc/mnttab) can be monitored for file events 2857 4863 praks * and receive an event when mnttab changes. Dummy VOP calls 2858 4863 praks * will be made on this vnode. The file events notification module 2859 4863 praks * intercepts this vnode and delivers relevant events. 2860 4863 praks */ 2861 4863 praks tvp = vn_alloc(KM_SLEEP); 2862 4863 praks tvp->v_flag = VNOMOUNT|VNOMAP|VNOSWAP|VNOCACHE; 2863 4863 praks vn_setops(tvp, vfs_mntdummyvnops); 2864 4863 praks tvp->v_type = VREG; 2865 4863 praks /* 2866 4863 praks * The mnt dummy ops do not reference v_data. 2867 4863 praks * No other module intercepting this vnode should either. 2868 4863 praks * Just set it to point to itself. 2869 4863 praks */ 2870 4863 praks tvp->v_data = (caddr_t)tvp; 2871 4863 praks tvp->v_vfsp = rootvfs; 2872 4863 praks vfs_mntdummyvp = tvp; 2873 4863 praks } 2874 4863 praks 2875 4863 praks /* 2876 4863 praks * performs fake read/write ops 2877 4863 praks */ 2878 4863 praks static void 2879 4863 praks vfs_mnttab_rwop(int rw) 2880 4863 praks { 2881 4863 praks struct uio uio; 2882 4863 praks struct iovec iov; 2883 4863 praks char buf[1]; 2884 4863 praks 2885 4863 praks if (vfs_mntdummyvp == NULL) 2886 4863 praks return; 2887 4863 praks 2888 4863 praks bzero(&uio, sizeof (uio)); 2889 4863 praks bzero(&iov, sizeof (iov)); 2890 4863 praks iov.iov_base = buf; 2891 4863 praks iov.iov_len = 0; 2892 4863 praks uio.uio_iov = &iov; 2893 4863 praks uio.uio_iovcnt = 1; 2894 4863 praks uio.uio_loffset = 0; 2895 4863 praks uio.uio_segflg = UIO_SYSSPACE; 2896 4863 praks uio.uio_resid = 0; 2897 4863 praks if (rw) { 2898 4863 praks (void) VOP_WRITE(vfs_mntdummyvp, &uio, 0, kcred, NULL); 2899 4863 praks } else { 2900 4863 praks (void) VOP_READ(vfs_mntdummyvp, &uio, 0, kcred, NULL); 2901 4863 praks } 2902 4863 praks } 2903 4863 praks 2904 4863 praks /* 2905 4863 praks * Generate a write operation. 2906 4863 praks */ 2907 4863 praks void 2908 4863 praks vfs_mnttab_writeop(void) 2909 4863 praks { 2910 4863 praks vfs_mnttab_rwop(1); 2911 4863 praks } 2912 4863 praks 2913 4863 praks /* 2914 4863 praks * Generate a read operation. 2915 4863 praks */ 2916 4863 praks void 2917 4863 praks vfs_mnttab_readop(void) 2918 4863 praks { 2919 4863 praks vfs_mnttab_rwop(0); 2920 4863 praks } 2921 4863 praks 2922 0 stevel /* 2923 0 stevel * Free any mnttab information recorded in the vfs struct. 2924 0 stevel * The vfs must not be on the vfs list. 2925 0 stevel */ 2926 0 stevel static void 2927 0 stevel vfs_freemnttab(struct vfs *vfsp) 2928 0 stevel { 2929 0 stevel ASSERT(!VFS_ON_LIST(vfsp)); 2930 0 stevel 2931 0 stevel /* 2932 0 stevel * Free device and mount point information 2933 0 stevel */ 2934 0 stevel if (vfsp->vfs_mntpt != NULL) { 2935 0 stevel refstr_rele(vfsp->vfs_mntpt); 2936 0 stevel vfsp->vfs_mntpt = NULL; 2937 0 stevel } 2938 0 stevel if (vfsp->vfs_resource != NULL) { 2939 0 stevel refstr_rele(vfsp->vfs_resource); 2940 0 stevel vfsp->vfs_resource = NULL; 2941 0 stevel } 2942 0 stevel /* 2943 0 stevel * Now free mount options information 2944 0 stevel */ 2945 0 stevel vfs_freeopttbl(&vfsp->vfs_mntopts); 2946 0 stevel } 2947 0 stevel 2948 0 stevel /* 2949 0 stevel * Return the last mnttab modification time 2950 0 stevel */ 2951 0 stevel void 2952 0 stevel vfs_mnttab_modtime(timespec_t *ts) 2953 0 stevel { 2954 0 stevel ASSERT(RW_LOCK_HELD(&vfslist)); 2955 0 stevel *ts = vfs_mnttab_mtime; 2956 0 stevel } 2957 0 stevel 2958 0 stevel /* 2959 0 stevel * See if mnttab is changed 2960 0 stevel */ 2961 0 stevel void 2962 0 stevel vfs_mnttab_poll(timespec_t *old, struct pollhead **phpp) 2963 0 stevel { 2964 0 stevel int changed; 2965 0 stevel 2966 0 stevel *phpp = (struct pollhead *)NULL; 2967 0 stevel 2968 0 stevel /* 2969 0 stevel * Note: don't grab vfs list lock before accessing vfs_mnttab_mtime. 2970 0 stevel * Can lead to deadlock against vfs_mnttab_modtimeupd(). It is safe 2971 0 stevel * to not grab the vfs list lock because tv_sec is monotonically 2972 0 stevel * increasing. 2973 0 stevel */ 2974 0 stevel 2975 0 stevel changed = (old->tv_nsec != vfs_mnttab_mtime.tv_nsec) || 2976 0 stevel (old->tv_sec != vfs_mnttab_mtime.tv_sec); 2977 0 stevel if (!changed) { 2978 0 stevel *phpp = &vfs_pollhd; 2979 0 stevel } 2980 0 stevel } 2981 0 stevel 2982 10910 Robert /* Provide a unique and monotonically-increasing timestamp. */ 2983 10910 Robert void 2984 10910 Robert vfs_mono_time(timespec_t *ts) 2985 10910 Robert { 2986 10910 Robert static volatile hrtime_t hrt; /* The saved time. */ 2987 10910 Robert hrtime_t newhrt, oldhrt; /* For effecting the CAS. */ 2988 10910 Robert timespec_t newts; 2989 10910 Robert 2990 11005 Robert /* 2991 11005 Robert * Try gethrestime() first, but be prepared to fabricate a sensible 2992 11005 Robert * answer at the first sign of any trouble. 2993 11005 Robert */ 2994 10910 Robert gethrestime(&newts); 2995 10910 Robert newhrt = ts2hrt(&newts); 2996 11005 Robert for (;;) { 2997 10910 Robert oldhrt = hrt; 2998 11005 Robert if (newhrt <= hrt) 2999 11005 Robert newhrt = hrt + 1; 3000 11005 Robert if (cas64((uint64_t *)&hrt, oldhrt, newhrt) == oldhrt) 3001 11005 Robert break; 3002 11005 Robert } 3003 10910 Robert hrt2ts(newhrt, ts); 3004 10910 Robert } 3005 10910 Robert 3006 0 stevel /* 3007 0 stevel * Update the mnttab modification time and wake up any waiters for 3008 0 stevel * mnttab changes 3009 0 stevel */ 3010 0 stevel void 3011 0 stevel vfs_mnttab_modtimeupd() 3012 0 stevel { 3013 11005 Robert hrtime_t oldhrt, newhrt; 3014 11005 Robert 3015 0 stevel ASSERT(RW_WRITE_HELD(&vfslist)); 3016 11005 Robert oldhrt = ts2hrt(&vfs_mnttab_mtime); 3017 11005 Robert gethrestime(&vfs_mnttab_mtime); 3018 11005 Robert newhrt = ts2hrt(&vfs_mnttab_mtime); 3019 11005 Robert if (oldhrt == (hrtime_t)0) 3020 0 stevel vfs_mnttab_ctime = vfs_mnttab_mtime; 3021 11005 Robert /* 3022 11005 Robert * Attempt to provide unique mtime (like uniqtime but not). 3023 11005 Robert */ 3024 11005 Robert if (newhrt == oldhrt) { 3025 11005 Robert newhrt++; 3026 11005 Robert hrt2ts(newhrt, &vfs_mnttab_mtime); 3027 11005 Robert } 3028 0 stevel pollwakeup(&vfs_pollhd, (short)POLLRDBAND); 3029 4863 praks vfs_mnttab_writeop(); 3030 0 stevel } 3031 0 stevel 3032 0 stevel int 3033 0 stevel dounmount(struct vfs *vfsp, int flag, cred_t *cr) 3034 0 stevel { 3035 0 stevel vnode_t *coveredvp; 3036 0 stevel int error; 3037 1488 rsb extern void teardown_vopstats(vfs_t *); 3038 0 stevel 3039 0 stevel /* 3040 0 stevel * Get covered vnode. This will be NULL if the vfs is not linked 3041 0 stevel * into the file system name space (i.e., domount() with MNT_NOSPICE). 3042 0 stevel */ 3043 0 stevel coveredvp = vfsp->vfs_vnodecovered; 3044 0 stevel ASSERT(coveredvp == NULL || vn_vfswlock_held(coveredvp)); 3045 0 stevel 3046 0 stevel /* 3047 0 stevel * Purge all dnlc entries for this vfs. 3048 0 stevel */ 3049 0 stevel (void) dnlc_purge_vfsp(vfsp, 0); 3050 0 stevel 3051 0 stevel /* For forcible umount, skip VFS_SYNC() since it may hang */ 3052 0 stevel if ((flag & MS_FORCE) == 0) 3053 0 stevel (void) VFS_SYNC(vfsp, 0, cr); 3054 0 stevel 3055 0 stevel /* 3056 0 stevel * Lock the vfs to maintain fs status quo during unmount. This 3057 0 stevel * has to be done after the sync because ufs_update tries to acquire 3058 0 stevel * the vfs_reflock. 3059 0 stevel */ 3060 0 stevel vfs_lock_wait(vfsp); 3061 0 stevel 3062 0 stevel if (error = VFS_UNMOUNT(vfsp, flag, cr)) { 3063 0 stevel vfs_unlock(vfsp); 3064 0 stevel if (coveredvp != NULL) 3065 0 stevel vn_vfsunlock(coveredvp); 3066 0 stevel } else if (coveredvp != NULL) { 3067 1488 rsb teardown_vopstats(vfsp); 3068 0 stevel /* 3069 0 stevel * vfs_remove() will do a VN_RELE(vfsp->vfs_vnodecovered) 3070 0 stevel * when it frees vfsp so we do a VN_HOLD() so we can 3071 0 stevel * continue to use coveredvp afterwards. 3072 0 stevel */ 3073 0 stevel VN_HOLD(coveredvp); 3074 0 stevel vfs_remove(vfsp); 3075 0 stevel vn_vfsunlock(coveredvp); 3076 0 stevel VN_RELE(coveredvp); 3077 0 stevel } else { 3078 1488 rsb teardown_vopstats(vfsp); 3079 0 stevel /* 3080 0 stevel * Release the reference to vfs that is not linked 3081 0 stevel * into the name space. 3082 0 stevel */ 3083 0 stevel vfs_unlock(vfsp); 3084 0 stevel VFS_RELE(vfsp); 3085 0 stevel } 3086 0 stevel return (error); 3087 0 stevel } 3088 0 stevel 3089 0 stevel 3090 0 stevel /* 3091 0 stevel * Vfs_unmountall() is called by uadmin() to unmount all 3092 0 stevel * mounted file systems (except the root file system) during shutdown. 3093 0 stevel * It follows the existing locking protocol when traversing the vfs list 3094 0 stevel * to sync and unmount vfses. Even though there should be no 3095 0 stevel * other thread running while the system is shutting down, it is prudent 3096 0 stevel * to still follow the locking protocol. 3097 0 stevel */ 3098 0 stevel void 3099 0 stevel vfs_unmountall(void) 3100 0 stevel { 3101 0 stevel struct vfs *vfsp; 3102 0 stevel struct vfs *prev_vfsp = NULL; 3103 0 stevel int error; 3104 0 stevel 3105 0 stevel /* 3106 0 stevel * Toss all dnlc entries now so that the per-vfs sync 3107 0 stevel * and unmount operations don't have to slog through 3108 0 stevel * a bunch of uninteresting vnodes over and over again. 3109 0 stevel */ 3110 0 stevel dnlc_purge(); 3111 0 stevel 3112 0 stevel vfs_list_lock(); 3113 0 stevel for (vfsp = rootvfs->vfs_prev; vfsp != rootvfs; vfsp = prev_vfsp) { 3114 0 stevel prev_vfsp = vfsp->vfs_prev; 3115 0 stevel 3116 0 stevel if (vfs_lock(vfsp) != 0) 3117 0 stevel continue; 3118 0 stevel error = vn_vfswlock(vfsp->vfs_vnodecovered); 3119 0 stevel vfs_unlock(vfsp); 3120 0 stevel if (error) 3121 0 stevel continue; 3122 0 stevel 3123 0 stevel vfs_list_unlock(); 3124 0 stevel 3125 0 stevel (void) VFS_SYNC(vfsp, SYNC_CLOSE, CRED()); 3126 0 stevel (void) dounmount(vfsp, 0, CRED()); 3127 0 stevel 3128 0 stevel /* 3129 0 stevel * Since we dropped the vfslist lock above we must 3130 0 stevel * verify that next_vfsp still exists, else start over. 3131 0 stevel */ 3132 0 stevel vfs_list_lock(); 3133 0 stevel for (vfsp = rootvfs->vfs_prev; 3134 3912 lling vfsp != rootvfs; vfsp = vfsp->vfs_prev) 3135 0 stevel if (vfsp == prev_vfsp) 3136 0 stevel break; 3137 0 stevel if (vfsp == rootvfs && prev_vfsp != rootvfs) 3138 0 stevel prev_vfsp = rootvfs->vfs_prev; 3139 0 stevel } 3140 0 stevel vfs_list_unlock(); 3141 0 stevel } 3142 0 stevel 3143 0 stevel /* 3144 0 stevel * Called to add an entry to the end of the vfs mount in progress list 3145 0 stevel */ 3146 0 stevel void 3147 0 stevel vfs_addmip(dev_t dev, struct vfs *vfsp) 3148 0 stevel { 3149 0 stevel struct ipmnt *mipp; 3150 0 stevel 3151 0 stevel mipp = (struct ipmnt *)kmem_alloc(sizeof (struct ipmnt), KM_SLEEP); 3152 0 stevel mipp->mip_next = NULL; 3153 0 stevel mipp->mip_dev = dev; 3154 0 stevel mipp->mip_vfsp = vfsp; 3155 0 stevel mutex_enter(&vfs_miplist_mutex); 3156 0 stevel if (vfs_miplist_end != NULL) 3157 0 stevel vfs_miplist_end->mip_next = mipp; 3158 0 stevel else 3159 0 stevel vfs_miplist = mipp; 3160 0 stevel vfs_miplist_end = mipp; 3161 0 stevel mutex_exit(&vfs_miplist_mutex); 3162 0 stevel } 3163 0 stevel 3164 0 stevel /* 3165 0 stevel * Called to remove an entry from the mount in progress list 3166 0 stevel * Either because the mount completed or it failed. 3167 0 stevel */ 3168 0 stevel void 3169 0 stevel vfs_delmip(struct vfs *vfsp) 3170 0 stevel { 3171 0 stevel struct ipmnt *mipp, *mipprev; 3172 0 stevel 3173 0 stevel mutex_enter(&vfs_miplist_mutex); 3174 0 stevel mipprev = NULL; 3175 0 stevel for (mipp = vfs_miplist; 3176 3912 lling mipp && mipp->mip_vfsp != vfsp; mipp = mipp->mip_next) { 3177 0 stevel mipprev = mipp; 3178 0 stevel } 3179 0 stevel if (mipp == NULL) 3180 0 stevel return; /* shouldn't happen */ 3181 0 stevel if (mipp == vfs_miplist_end) 3182 0 stevel vfs_miplist_end = mipprev; 3183 0 stevel if (mipprev == NULL) 3184 0 stevel vfs_miplist = mipp->mip_next; 3185 0 stevel else 3186 0 stevel mipprev->mip_next = mipp->mip_next; 3187 0 stevel mutex_exit(&vfs_miplist_mutex); 3188 0 stevel kmem_free(mipp, sizeof (struct ipmnt)); 3189 0 stevel } 3190 0 stevel 3191 0 stevel /* 3192 0 stevel * vfs_add is called by a specific filesystem's mount routine to add 3193 0 stevel * the new vfs into the vfs list/hash and to cover the mounted-on vnode. 3194 0 stevel * The vfs should already have been locked by the caller. 3195 0 stevel * 3196 0 stevel * coveredvp is NULL if this is the root. 3197 0 stevel */ 3198 0 stevel void 3199 0 stevel vfs_add(vnode_t *coveredvp, struct vfs *vfsp, int mflag) 3200 0 stevel { 3201 0 stevel int newflag; 3202 0 stevel 3203 0 stevel ASSERT(vfs_lock_held(vfsp)); 3204 0 stevel VFS_HOLD(vfsp); 3205 0 stevel newflag = vfsp->vfs_flag; 3206 0 stevel if (mflag & MS_RDONLY) 3207 0 stevel newflag |= VFS_RDONLY; 3208 0 stevel else 3209 0 stevel newflag &= ~VFS_RDONLY; 3210 0 stevel if (mflag & MS_NOSUID) 3211 0 stevel newflag |= (VFS_NOSETUID|VFS_NODEVICES); 3212 0 stevel else 3213