Home | History | Annotate | Download | only in lofs
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
     27 
     28 #include <sys/param.h>
     29 #include <sys/errno.h>
     30 #include <sys/vfs.h>
     31 #include <sys/vfs_opreg.h>
     32 #include <sys/vnode.h>
     33 #include <sys/uio.h>
     34 #include <sys/pathname.h>
     35 #include <sys/kmem.h>
     36 #include <sys/cred.h>
     37 #include <sys/statvfs.h>
     38 #include <sys/fs/lofs_info.h>
     39 #include <sys/fs/lofs_node.h>
     40 #include <sys/mount.h>
     41 #include <sys/mntent.h>
     42 #include <sys/mkdev.h>
     43 #include <sys/priv.h>
     44 #include <sys/sysmacros.h>
     45 #include <sys/systm.h>
     46 #include <sys/cmn_err.h>
     47 #include <sys/policy.h>
     48 #include <sys/tsol/label.h>
     49 #include "fs/fs_subr.h"
     50 
     51 /*
     52  * This is the loadable module wrapper.
     53  */
     54 #include <sys/modctl.h>
     55 
     56 static mntopts_t lofs_mntopts;
     57 
     58 static int lofsinit(int, char *);
     59 
     60 static vfsdef_t vfw = {
     61 	VFSDEF_VERSION,
     62 	"lofs",
     63 	lofsinit,
     64 	VSW_HASPROTO|VSW_STATS,
     65 	&lofs_mntopts
     66 };
     67 
     68 /*
     69  * LOFS mount options table
     70  */
     71 static char *xattr_cancel[] = { MNTOPT_NOXATTR, NULL };
     72 static char *noxattr_cancel[] = { MNTOPT_XATTR, NULL };
     73 static char *sub_cancel[] = { MNTOPT_LOFS_NOSUB, NULL };
     74 static char *nosub_cancel[] = { MNTOPT_LOFS_SUB, NULL };
     75 
     76 static mntopt_t mntopts[] = {
     77 /*
     78  *	option name		cancel option	default arg	flags
     79  *		private data
     80  */
     81 	{ MNTOPT_XATTR,		xattr_cancel,	NULL,		0,
     82 		(void *)0 },
     83 	{ MNTOPT_NOXATTR,	noxattr_cancel,	NULL,		0,
     84 		(void *)0 },
     85 	{ MNTOPT_LOFS_SUB,	sub_cancel,	NULL,		0,
     86 		(void *)0 },
     87 	{ MNTOPT_LOFS_NOSUB,	nosub_cancel,	NULL,		0,
     88 		(void *)0 },
     89 };
     90 
     91 static mntopts_t lofs_mntopts = {
     92 	sizeof (mntopts) / sizeof (mntopt_t),
     93 	mntopts
     94 };
     95 
     96 /*
     97  * Module linkage information for the kernel.
     98  */
     99 
    100 static struct modlfs modlfs = {
    101 	&mod_fsops, "filesystem for lofs", &vfw
    102 };
    103 
    104 static struct modlinkage modlinkage = {
    105 	MODREV_1, (void *)&modlfs, NULL
    106 };
    107 
    108 /*
    109  * This is the module initialization routine.
    110  */
    111 
    112 int
    113 _init(void)
    114 {
    115 	int status;
    116 
    117 	lofs_subrinit();
    118 	status = mod_install(&modlinkage);
    119 	if (status != 0) {
    120 		/*
    121 		 * Cleanup previously initialized work.
    122 		 */
    123 		lofs_subrfini();
    124 	}
    125 
    126 	return (status);
    127 }
    128 
    129 /*
    130  * Don't allow the lofs module to be unloaded for now.
    131  * There is a memory leak if it gets unloaded.
    132  */
    133 
    134 int
    135 _fini(void)
    136 {
    137 	return (EBUSY);
    138 }
    139 
    140 int
    141 _info(struct modinfo *modinfop)
    142 {
    143 	return (mod_info(&modlinkage, modinfop));
    144 }
    145 
    146 
    147 static int lofsfstype;
    148 vfsops_t *lo_vfsops;
    149 
    150 /*
    151  * lo mount vfsop
    152  * Set up mount info record and attach it to vfs struct.
    153  */
    154 /*ARGSUSED*/
    155 static int
    156 lo_mount(struct vfs *vfsp,
    157 	struct vnode *vp,
    158 	struct mounta *uap,
    159 	struct cred *cr)
    160 {
    161 	int error;
    162 	struct vnode *srootvp = NULL;	/* the server's root */
    163 	struct vnode *realrootvp;
    164 	struct loinfo *li;
    165 	int nodev;
    166 
    167 	nodev = vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL);
    168 
    169 	if ((error = secpolicy_fs_mount(cr, vp, vfsp)) != 0)
    170 		return (EPERM);
    171 
    172 	/*
    173 	 * Loopback devices which get "nodevices" added can be done without
    174 	 * "nodevices" set because we cannot import devices into a zone
    175 	 * with loopback.  Note that we have all zone privileges when
    176 	 * this happens; if not, we'd have gotten "nosuid".
    177 	 */
    178 	if (!nodev && vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL))
    179 		vfs_setmntopt(vfsp, MNTOPT_DEVICES, NULL, VFS_NODISPLAY);
    180 
    181 	mutex_enter(&vp->v_lock);
    182 	if (!(uap->flags & MS_OVERLAY) &&
    183 	    (vp->v_count != 1 || (vp->v_flag & VROOT))) {
    184 		mutex_exit(&vp->v_lock);
    185 		return (EBUSY);
    186 	}
    187 	mutex_exit(&vp->v_lock);
    188 
    189 	/*
    190 	 * Find real root, and make vfs point to real vfs
    191 	 */
    192 
    193 	if (error = lookupname(uap->spec, (uap->flags & MS_SYSSPACE) ?
    194 	    UIO_SYSSPACE : UIO_USERSPACE, FOLLOW, NULLVPP, &realrootvp))
    195 		return (error);
    196 
    197 	/*
    198 	 * Enforce MAC policy if needed.
    199 	 *
    200 	 * Loopback mounts must not allow writing up. The dominance test
    201 	 * is intended to prevent a global zone caller from accidentally
    202 	 * creating write-up conditions between two labeled zones.
    203 	 * Local zones can't violate MAC on their own without help from
    204 	 * the global zone because they can't name a pathname that
    205 	 * they don't already have.
    206 	 *
    207 	 * The special case check for the NET_MAC_AWARE process flag is
    208 	 * to support the case of the automounter in the global zone. We
    209 	 * permit automounting of local zone directories such as home
    210 	 * directories, into the global zone as required by setlabel,
    211 	 * zonecopy, and saving of desktop sessions. Such mounts are
    212 	 * trusted not to expose the contents of one zone's directories
    213 	 * to another by leaking them through the global zone.
    214 	 */
    215 	if (is_system_labeled() && crgetzoneid(cr) == GLOBAL_ZONEID) {
    216 		char	specname[MAXPATHLEN];
    217 		zone_t	*from_zptr;
    218 		zone_t	*to_zptr;
    219 
    220 		if (vnodetopath(NULL, realrootvp, specname,
    221 		    sizeof (specname), CRED()) != 0) {
    222 			VN_RELE(realrootvp);
    223 			return (EACCES);
    224 		}
    225 
    226 		from_zptr = zone_find_by_path(specname);
    227 		to_zptr = zone_find_by_path(refstr_value(vfsp->vfs_mntpt));
    228 
    229 		/*
    230 		 * Special case for zone devfs: the zone for /dev will
    231 		 * incorrectly appear as the global zone since it's not
    232 		 * under the zone rootpath.  So for zone devfs check allow
    233 		 * read-write mounts.
    234 		 *
    235 		 * Second special case for scratch zones used for Live Upgrade:
    236 		 * this is used to mount the zone's root from /root to /a in
    237 		 * the scratch zone.  As with the other special case, this
    238 		 * appears to be outside of the zone because it's not under
    239 		 * the zone rootpath, which is $ZONEPATH/lu in the scratch
    240 		 * zone case.
    241 		 */
    242 
    243 		if (from_zptr != to_zptr &&
    244 		    !(to_zptr->zone_flags & ZF_IS_SCRATCH)) {
    245 			/*
    246 			 * We know at this point that the labels aren't equal
    247 			 * because the zone pointers aren't equal, and zones
    248 			 * can't share a label.
    249 			 *
    250 			 * If the source is the global zone then making
    251 			 * it available to a local zone must be done in
    252 			 * read-only mode as the label will become admin_low.
    253 			 *
    254 			 * If it is a mount between local zones then if
    255 			 * the current process is in the global zone and has
    256 			 * the NET_MAC_AWARE flag, then regular read-write
    257 			 * access is allowed.  If it's in some other zone, but
    258 			 * the label on the mount point dominates the original
    259 			 * source, then allow the mount as read-only
    260 			 * ("read-down").
    261 			 */
    262 			if (from_zptr->zone_id == GLOBAL_ZONEID) {
    263 				/* make the mount read-only */
    264 				vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0);
    265 			} else { /* cross-zone mount */
    266 				if (to_zptr->zone_id == GLOBAL_ZONEID &&
    267 				    /* LINTED: no consequent */
    268 				    getpflags(NET_MAC_AWARE, cr) != 0) {
    269 					/* Allow the mount as read-write */
    270 				} else if (bldominates(
    271 				    label2bslabel(to_zptr->zone_slabel),
    272 				    label2bslabel(from_zptr->zone_slabel))) {
    273 					/* make the mount read-only */
    274 					vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0);
    275 				} else {
    276 					VN_RELE(realrootvp);
    277 					zone_rele(to_zptr);
    278 					zone_rele(from_zptr);
    279 					return (EACCES);
    280 				}
    281 			}
    282 		}
    283 		zone_rele(to_zptr);
    284 		zone_rele(from_zptr);
    285 	}
    286 
    287 	/*
    288 	 * realrootvp may be an AUTOFS node, in which case we
    289 	 * perform a VOP_ACCESS() to trigger the mount of the
    290 	 * intended filesystem, so we loopback mount the intended
    291 	 * filesystem instead of the AUTOFS filesystem.
    292 	 */
    293 	(void) VOP_ACCESS(realrootvp, 0, 0, cr, NULL);
    294 
    295 	/*
    296 	 * We're interested in the top most filesystem.
    297 	 * This is specially important when uap->spec is a trigger
    298 	 * AUTOFS node, since we're really interested in mounting the
    299 	 * filesystem AUTOFS mounted as result of the VOP_ACCESS()
    300 	 * call not the AUTOFS node itself.
    301 	 */
    302 	if (vn_mountedvfs(realrootvp) != NULL) {
    303 		if (error = traverse(&realrootvp)) {
    304 			VN_RELE(realrootvp);
    305 			return (error);
    306 		}
    307 	}
    308 
    309 	/*
    310 	 * Allocate a vfs info struct and attach it
    311 	 */
    312 	li = kmem_zalloc(sizeof (struct loinfo), KM_SLEEP);
    313 	li->li_realvfs = realrootvp->v_vfsp;
    314 	li->li_mountvfs = vfsp;
    315 
    316 	/*
    317 	 * Set mount flags to be inherited by loopback vfs's
    318 	 */
    319 	if (vfs_optionisset(vfsp, MNTOPT_RO, NULL)) {
    320 		li->li_mflag |= VFS_RDONLY;
    321 	}
    322 	if (vfs_optionisset(vfsp, MNTOPT_NOSUID, NULL)) {
    323 		li->li_mflag |= (VFS_NOSETUID|VFS_NODEVICES);
    324 	}
    325 	if (vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL)) {
    326 		li->li_mflag |= VFS_NODEVICES;
    327 	}
    328 	if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) {
    329 		li->li_mflag |= VFS_NOSETUID;
    330 	}
    331 	/*
    332 	 * Permissive flags are added to the "deny" bitmap.
    333 	 */
    334 	if (vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL)) {
    335 		li->li_dflag |= VFS_XATTR;
    336 	}
    337 	if (vfs_optionisset(vfsp, MNTOPT_NONBMAND, NULL)) {
    338 		li->li_dflag |= VFS_NBMAND;
    339 	}
    340 
    341 	/*
    342 	 * Propagate inheritable mount flags from the real vfs.
    343 	 */
    344 	if ((li->li_realvfs->vfs_flag & VFS_RDONLY) &&
    345 	    !vfs_optionisset(vfsp, MNTOPT_RO, NULL))
    346 		vfs_setmntopt(vfsp, MNTOPT_RO, NULL,
    347 		    VFS_NODISPLAY);
    348 	if ((li->li_realvfs->vfs_flag & VFS_NOSETUID) &&
    349 	    !vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL))
    350 		vfs_setmntopt(vfsp, MNTOPT_NOSETUID, NULL,
    351 		    VFS_NODISPLAY);
    352 	if ((li->li_realvfs->vfs_flag & VFS_NODEVICES) &&
    353 	    !vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL))
    354 		vfs_setmntopt(vfsp, MNTOPT_NODEVICES, NULL,
    355 		    VFS_NODISPLAY);
    356 	/*
    357 	 * Permissive flags such as VFS_XATTR, as opposed to restrictive flags
    358 	 * such as VFS_RDONLY, are handled differently.  An explicit
    359 	 * MNTOPT_NOXATTR should override the underlying filesystem's VFS_XATTR.
    360 	 */
    361 	if ((li->li_realvfs->vfs_flag & VFS_XATTR) &&
    362 	    !vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL) &&
    363 	    !vfs_optionisset(vfsp, MNTOPT_XATTR, NULL))
    364 		vfs_setmntopt(vfsp, MNTOPT_XATTR, NULL,
    365 		    VFS_NODISPLAY);
    366 	if ((li->li_realvfs->vfs_flag & VFS_NBMAND) &&
    367 	    !vfs_optionisset(vfsp, MNTOPT_NBMAND, NULL) &&
    368 	    !vfs_optionisset(vfsp, MNTOPT_NONBMAND, NULL))
    369 		vfs_setmntopt(vfsp, MNTOPT_NBMAND, NULL,
    370 		    VFS_NODISPLAY);
    371 
    372 	li->li_refct = 0;
    373 	vfsp->vfs_data = (caddr_t)li;
    374 	vfsp->vfs_bcount = 0;
    375 	vfsp->vfs_fstype = lofsfstype;
    376 	vfsp->vfs_bsize = li->li_realvfs->vfs_bsize;
    377 
    378 	vfsp->vfs_dev = li->li_realvfs->vfs_dev;
    379 	vfsp->vfs_fsid.val[0] = li->li_realvfs->vfs_fsid.val[0];
    380 	vfsp->vfs_fsid.val[1] = li->li_realvfs->vfs_fsid.val[1];
    381 
    382 	if (vfs_optionisset(vfsp, MNTOPT_LOFS_NOSUB, NULL)) {
    383 		li->li_flag |= LO_NOSUB;
    384 	}
    385 
    386 	/*
    387 	 * Propagate any VFS features
    388 	 */
    389 
    390 	vfs_propagate_features(li->li_realvfs, vfsp);
    391 
    392 	/*
    393 	 * Setup the hashtable. If the root of this mount isn't a directory,
    394 	 * there's no point in allocating a large hashtable. A table with one
    395 	 * bucket is sufficient.
    396 	 */
    397 	if (realrootvp->v_type != VDIR)
    398 		lsetup(li, 1);
    399 	else
    400 		lsetup(li, 0);
    401 
    402 	/*
    403 	 * Make the root vnode
    404 	 */
    405 	srootvp = makelonode(realrootvp, li, 0);
    406 	srootvp->v_flag |= VROOT;
    407 	li->li_rootvp = srootvp;
    408 
    409 #ifdef LODEBUG
    410 	lo_dprint(4, "lo_mount: vfs %p realvfs %p root %p realroot %p li %p\n",
    411 	    vfsp, li->li_realvfs, srootvp, realrootvp, li);
    412 #endif
    413 	return (0);
    414 }
    415 
    416 /*
    417  * Undo loopback mount
    418  */
    419 static int
    420 lo_unmount(struct vfs *vfsp, int flag, struct cred *cr)
    421 {
    422 	struct loinfo *li;
    423 
    424 	if (secpolicy_fs_unmount(cr, vfsp) != 0)
    425 		return (EPERM);
    426 
    427 	/*
    428 	 * Forced unmount is not supported by this file system
    429 	 * and thus, ENOTSUP, is being returned.
    430 	 */
    431 	if (flag & MS_FORCE)
    432 		return (ENOTSUP);
    433 
    434 	li = vtoli(vfsp);
    435 #ifdef LODEBUG
    436 	lo_dprint(4, "lo_unmount(%p) li %p\n", vfsp, li);
    437 #endif
    438 	if (li->li_refct != 1 || li->li_rootvp->v_count != 1) {
    439 #ifdef LODEBUG
    440 		lo_dprint(4, "refct %d v_ct %d\n", li->li_refct,
    441 		    li->li_rootvp->v_count);
    442 #endif
    443 		return (EBUSY);
    444 	}
    445 	VN_RELE(li->li_rootvp);
    446 	return (0);
    447 }
    448 
    449 /*
    450  * Find root of lofs mount.
    451  */
    452 static int
    453 lo_root(struct vfs *vfsp, struct vnode **vpp)
    454 {
    455 	*vpp = vtoli(vfsp)->li_rootvp;
    456 #ifdef LODEBUG
    457 	lo_dprint(4, "lo_root(0x%p) = %p\n", vfsp, *vpp);
    458 #endif
    459 	/*
    460 	 * If the root of the filesystem is a special file, return the specvp
    461 	 * version of the vnode. We don't save the specvp vnode in our
    462 	 * hashtable since that's exclusively for lnodes.
    463 	 */
    464 	if (IS_DEVVP(*vpp)) {
    465 		struct vnode *svp;
    466 
    467 		svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, kcred);
    468 		if (svp == NULL)
    469 			return (ENOSYS);
    470 		*vpp = svp;
    471 	} else {
    472 		VN_HOLD(*vpp);
    473 	}
    474 
    475 	return (0);
    476 }
    477 
    478 /*
    479  * Get file system statistics.
    480  */
    481 static int
    482 lo_statvfs(register struct vfs *vfsp, struct statvfs64 *sbp)
    483 {
    484 	vnode_t *realrootvp;
    485 
    486 #ifdef LODEBUG
    487 	lo_dprint(4, "lostatvfs %p\n", vfsp);
    488 #endif
    489 	/*
    490 	 * Using realrootvp->v_vfsp (instead of the realvfsp that was
    491 	 * cached) is necessary to make lofs work woth forced UFS unmounts.
    492 	 * In the case of a forced unmount, UFS stores a set of dummy vfsops
    493 	 * in all the (i)vnodes in the filesystem. The dummy ops simply
    494 	 * returns back EIO.
    495 	 */
    496 	(void) lo_realvfs(vfsp, &realrootvp);
    497 	if (realrootvp != NULL)
    498 		return (VFS_STATVFS(realrootvp->v_vfsp, sbp));
    499 	else
    500 		return (EIO);
    501 }
    502 
    503 /*
    504  * LOFS doesn't have any data or metadata to flush, pending I/O on the
    505  * underlying filesystem will be flushed when such filesystem is synched.
    506  */
    507 /* ARGSUSED */
    508 static int
    509 lo_sync(struct vfs *vfsp,
    510 	short flag,
    511 	struct cred *cr)
    512 {
    513 #ifdef LODEBUG
    514 	lo_dprint(4, "lo_sync: %p\n", vfsp);
    515 #endif
    516 	return (0);
    517 }
    518 
    519 /*
    520  * Obtain the vnode from the underlying filesystem.
    521  */
    522 static int
    523 lo_vget(struct vfs *vfsp, struct vnode **vpp, struct fid *fidp)
    524 {
    525 	vnode_t *realrootvp;
    526 
    527 #ifdef LODEBUG
    528 	lo_dprint(4, "lo_vget: %p\n", vfsp);
    529 #endif
    530 	(void) lo_realvfs(vfsp, &realrootvp);
    531 	if (realrootvp != NULL)
    532 		return (VFS_VGET(realrootvp->v_vfsp, vpp, fidp));
    533 	else
    534 		return (EIO);
    535 }
    536 
    537 /*
    538  * Free mount-specific data.
    539  */
    540 static void
    541 lo_freevfs(struct vfs *vfsp)
    542 {
    543 	struct loinfo *li = vtoli(vfsp);
    544 
    545 	ldestroy(li);
    546 	kmem_free(li, sizeof (struct loinfo));
    547 }
    548 
    549 static int
    550 lofsinit(int fstyp, char *name)
    551 {
    552 	static const fs_operation_def_t lo_vfsops_template[] = {
    553 		VFSNAME_MOUNT,		{ .vfs_mount = lo_mount },
    554 		VFSNAME_UNMOUNT,	{ .vfs_unmount = lo_unmount },
    555 		VFSNAME_ROOT,		{ .vfs_root = lo_root },
    556 		VFSNAME_STATVFS,	{ .vfs_statvfs = lo_statvfs },
    557 		VFSNAME_SYNC,		{ .vfs_sync = lo_sync },
    558 		VFSNAME_VGET,		{ .vfs_vget = lo_vget },
    559 		VFSNAME_FREEVFS,	{ .vfs_freevfs = lo_freevfs },
    560 		NULL,			NULL
    561 	};
    562 	int error;
    563 
    564 	error = vfs_setfsops(fstyp, lo_vfsops_template, &lo_vfsops);
    565 	if (error != 0) {
    566 		cmn_err(CE_WARN, "lofsinit: bad vfs ops template");
    567 		return (error);
    568 	}
    569 
    570 	error = vn_make_ops(name, lo_vnodeops_template, &lo_vnodeops);
    571 	if (error != 0) {
    572 		(void) vfs_freevfsops_by_type(fstyp);
    573 		cmn_err(CE_WARN, "lofsinit: bad vnode ops template");
    574 		return (error);
    575 	}
    576 
    577 	lofsfstype = fstyp;
    578 
    579 	return (0);
    580 }
    581