Home | History | Annotate | Download | only in devfs
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 /*
     27  * This is the device filesystem.
     28  *
     29  * It is a combination of a namer to drive autoconfiguration,
     30  * plus the access methods for the device drivers of the system.
     31  *
     32  * The prototype is fairly dependent on specfs for the latter part
     33  * of its implementation, though a final version would integrate the two.
     34  */
     35 #include <sys/types.h>
     36 #include <sys/param.h>
     37 #include <sys/sysmacros.h>
     38 #include <sys/systm.h>
     39 #include <sys/kmem.h>
     40 #include <sys/time.h>
     41 #include <sys/pathname.h>
     42 #include <sys/vfs.h>
     43 #include <sys/vfs_opreg.h>
     44 #include <sys/vnode.h>
     45 #include <sys/stat.h>
     46 #include <sys/uio.h>
     47 #include <sys/stat.h>
     48 #include <sys/errno.h>
     49 #include <sys/cmn_err.h>
     50 #include <sys/cred.h>
     51 #include <sys/statvfs.h>
     52 #include <sys/mount.h>
     53 #include <sys/debug.h>
     54 #include <sys/modctl.h>
     55 #include <fs/fs_subr.h>
     56 #include <sys/fs/dv_node.h>
     57 #include <sys/fs/snode.h>
     58 #include <sys/sunndi.h>
     59 #include <sys/policy.h>
     60 #include <sys/sunmdi.h>
     61 
     62 /*
     63  * devfs vfs operations.
     64  */
     65 static int devfs_mount(struct vfs *, struct vnode *, struct mounta *,
     66     struct cred *);
     67 static int devfs_unmount(struct vfs *, int, struct cred *);
     68 static int devfs_root(struct vfs *, struct vnode **);
     69 static int devfs_statvfs(struct vfs *, struct statvfs64 *);
     70 static int devfs_mountroot(struct vfs *, enum whymountroot);
     71 
     72 static int devfsinit(int, char *);
     73 
     74 static vfsdef_t devfs_vfssw = {
     75 	VFSDEF_VERSION,
     76 	"devfs",	/* type name string */
     77 	devfsinit,	/* init routine */
     78 	0,		/* flags */
     79 	NULL		/* mount options table prototype */
     80 };
     81 
     82 static kmutex_t devfs_lock;	/* protects global data */
     83 static int devfstype;		/* fstype */
     84 static dev_t devfsdev;		/* the fictious 'device' we live on */
     85 static struct devfs_data *devfs_mntinfo;	/* linked list of instances */
     86 
     87 /*
     88  * Module linkage information
     89  */
     90 static struct modlfs modlfs = {
     91 	&mod_fsops, "devices filesystem", &devfs_vfssw
     92 };
     93 
     94 static struct modlinkage modlinkage = {
     95 	MODREV_1, (void *)&modlfs, NULL
     96 };
     97 
     98 int
     99 _init(void)
    100 {
    101 	int e;
    102 
    103 	mutex_init(&devfs_lock, "devfs lock", MUTEX_DEFAULT, NULL);
    104 	dv_node_cache_init();
    105 	if ((e = mod_install(&modlinkage)) != 0) {
    106 		dv_node_cache_fini();
    107 		mutex_destroy(&devfs_lock);
    108 		return (e);
    109 	}
    110 	dcmn_err(("devfs loaded\n"));
    111 	return (0);
    112 }
    113 
    114 int
    115 _fini(void)
    116 {
    117 	return (EBUSY);
    118 }
    119 
    120 int
    121 _info(struct modinfo *modinfop)
    122 {
    123 	return (mod_info(&modlinkage, modinfop));
    124 }
    125 
    126 /*ARGSUSED1*/
    127 static int
    128 devfsinit(int fstype, char *name)
    129 {
    130 	static const fs_operation_def_t devfs_vfsops_template[] = {
    131 		VFSNAME_MOUNT,		{ .vfs_mount = devfs_mount },
    132 		VFSNAME_UNMOUNT,	{ .vfs_unmount = devfs_unmount },
    133 		VFSNAME_ROOT,		{ .vfs_root = devfs_root },
    134 		VFSNAME_STATVFS,	{ .vfs_statvfs = devfs_statvfs },
    135 		VFSNAME_SYNC,		{ .vfs_sync = fs_sync },
    136 		VFSNAME_MOUNTROOT,	{ .vfs_mountroot = devfs_mountroot },
    137 		NULL,			NULL
    138 	};
    139 	int error;
    140 	int dev;
    141 	extern major_t getudev(void);	/* gack - what a function */
    142 
    143 	devfstype = fstype;
    144 	/*
    145 	 * Associate VFS ops vector with this fstype
    146 	 */
    147 	error = vfs_setfsops(fstype, devfs_vfsops_template, NULL);
    148 	if (error != 0) {
    149 		cmn_err(CE_WARN, "devfsinit: bad vfs ops template");
    150 		return (error);
    151 	}
    152 
    153 	error = vn_make_ops("dev fs", dv_vnodeops_template, &dv_vnodeops);
    154 	if (error != 0) {
    155 		(void) vfs_freevfsops_by_type(fstype);
    156 		cmn_err(CE_WARN, "devfsinit: bad vnode ops template");
    157 		return (error);
    158 	}
    159 
    160 	/*
    161 	 * Invent a dev_t (sigh).
    162 	 */
    163 	if ((dev = getudev()) == DDI_MAJOR_T_NONE) {
    164 		cmn_err(CE_NOTE, "%s: can't get unique dev", devfs_vfssw.name);
    165 		dev = 0;
    166 	}
    167 	devfsdev = makedevice(dev, 0);
    168 
    169 	return (0);
    170 }
    171 
    172 /*
    173  * The name of the mount point and the name of the attribute
    174  * filesystem are passed down from userland for now.
    175  */
    176 static int
    177 devfs_mount(struct vfs *vfsp, struct vnode *mvp, struct mounta *uap,
    178     struct cred *cr)
    179 {
    180 	struct devfs_data *devfs_data;
    181 	struct vnode *avp;
    182 	struct dv_node *dv;
    183 	struct vattr va;
    184 
    185 	dcmn_err(("devfs_mount\n"));
    186 
    187 	if (secpolicy_fs_mount(cr, mvp, vfsp) != 0)
    188 		return (EPERM);
    189 
    190 	/*
    191 	 * check that the mount point is sane
    192 	 */
    193 	if (mvp->v_type != VDIR)
    194 		return (ENOTDIR);
    195 
    196 	ASSERT(uap->flags & MS_SYSSPACE);
    197 	/*
    198 	 * Devfs can only be mounted from kernel during boot.
    199 	 * avp is the existing /devices, the same as the mount point.
    200 	 */
    201 	avp = mvp;
    202 
    203 	/*
    204 	 * Create and initialize the vfs-private data.
    205 	 * This includes a hand-crafted root vnode (we build
    206 	 * this here mostly so that traverse() doesn't sleep
    207 	 * in VFS_ROOT()).
    208 	 */
    209 	mutex_enter(&devfs_lock);
    210 	ASSERT(devfs_mntinfo == NULL);
    211 	dv = dv_mkroot(vfsp, devfsdev);
    212 	dv->dv_attrvp = avp;		/* attribute root vp */
    213 
    214 	ASSERT(dv == dv->dv_dotdot);
    215 
    216 	devfs_data = kmem_zalloc(sizeof (struct devfs_data), KM_SLEEP);
    217 	devfs_data->devfs_vfsp = vfsp;
    218 	devfs_data->devfs_root = dv;
    219 
    220 	vfsp->vfs_data = (caddr_t)devfs_data;
    221 	vfsp->vfs_fstype = devfstype;
    222 	vfsp->vfs_dev = devfsdev;
    223 	vfsp->vfs_bsize = DEV_BSIZE;
    224 	vfsp->vfs_mtime = ddi_get_time();
    225 	vfs_make_fsid(&vfsp->vfs_fsid, vfsp->vfs_dev, devfstype);
    226 
    227 	/* We're there. */
    228 	devfs_mntinfo = devfs_data;
    229 	mutex_exit(&devfs_lock);
    230 
    231 	va.va_mask = AT_ATIME|AT_MTIME;
    232 	gethrestime(&va.va_atime);
    233 	gethrestime(&va.va_mtime);
    234 	(void) VOP_SETATTR(DVTOV(dv), &va, 0, cr, NULL);
    235 	return (0);
    236 }
    237 
    238 
    239 /*
    240  * We never unmount devfs in a real production system.
    241  */
    242 /*ARGSUSED*/
    243 static int
    244 devfs_unmount(struct vfs *vfsp, int flag, struct cred *cr)
    245 {
    246 	return (EBUSY);
    247 }
    248 
    249 /*
    250  * return root vnode for given vfs
    251  */
    252 static int
    253 devfs_root(struct vfs *vfsp, struct vnode **vpp)
    254 {
    255 	dcmn_err(("devfs_root\n"));
    256 	*vpp = DVTOV(VFSTODVFS(vfsp)->devfs_root);
    257 	VN_HOLD(*vpp);
    258 	return (0);
    259 }
    260 
    261 /*
    262  * return 'generic superblock' information to userland.
    263  *
    264  * not much that we can usefully admit to here
    265  */
    266 static int
    267 devfs_statvfs(struct vfs *vfsp, struct statvfs64 *sbp)
    268 {
    269 	extern kmem_cache_t *dv_node_cache;
    270 
    271 	dev32_t d32;
    272 
    273 	dcmn_err(("devfs_statvfs\n"));
    274 	bzero(sbp, sizeof (*sbp));
    275 	sbp->f_frsize = sbp->f_bsize = vfsp->vfs_bsize;
    276 	/*
    277 	 * We could compute the number of devfsnodes here .. but since
    278 	 * it's dynamic anyway, it's not clear how useful this is.
    279 	 */
    280 	sbp->f_files = kmem_cache_stat(dv_node_cache, "alloc");
    281 
    282 	/* no illusions that free/avail files is relevant to devfs */
    283 	sbp->f_ffree = 0;
    284 	sbp->f_favail = 0;
    285 
    286 	/* no illusions that blocks are relevant to devfs */
    287 	sbp->f_bfree = 0;
    288 	sbp->f_bavail = 0;
    289 	sbp->f_blocks = 0;
    290 
    291 	(void) cmpldev(&d32, vfsp->vfs_dev);
    292 	sbp->f_fsid = d32;
    293 	(void) strcpy(sbp->f_basetype, vfssw[devfstype].vsw_name);
    294 	sbp->f_flag = vf_to_stf(vfsp->vfs_flag);
    295 	sbp->f_namemax = MAXNAMELEN - 1;
    296 	(void) strcpy(sbp->f_fstr, "devices");
    297 
    298 	return (0);
    299 }
    300 
    301 /*
    302  * devfs always mount after root is mounted, so this should never
    303  * be invoked.
    304  */
    305 /*ARGSUSED*/
    306 static int
    307 devfs_mountroot(struct vfs *vfsp, enum whymountroot why)
    308 {
    309 	dcmn_err(("devfs_mountroot\n"));
    310 
    311 	return (EINVAL);
    312 }
    313 
    314 struct dv_node *
    315 devfs_dip_to_dvnode(dev_info_t *dip)
    316 {
    317 	char *dirpath;
    318 	struct vnode *dirvp;
    319 
    320 	ASSERT(dip != NULL);
    321 
    322 	/* no-op if devfs not mounted yet */
    323 	if (devfs_mntinfo == NULL)
    324 		return (NULL);
    325 
    326 	/*
    327 	 * The lookupname below only looks up cached dv_nodes
    328 	 * because devfs_clean_key is set in thread specific data.
    329 	 */
    330 	dirpath = kmem_alloc(MAXPATHLEN, KM_SLEEP);
    331 	(void) ddi_pathname(dip, dirpath);
    332 	if (devfs_lookupname(dirpath, NULLVPP, &dirvp)) {
    333 		dcmn_err(("directory %s not found\n", dirpath));
    334 		kmem_free(dirpath, MAXPATHLEN);
    335 		return (NULL);
    336 	}
    337 
    338 	kmem_free(dirpath, MAXPATHLEN);
    339 	return (VTODV(dirvp));
    340 }
    341 
    342 /*
    343  * If DV_CLEAN_FORCE devfs_clean is issued with a dip that is not the root
    344  * and not a vHCI we also need to clean any vHCI branches because they
    345  * may contain pHCI nodes. A detach_node() of a pHCI will fail if its
    346  * mdi_devi_offline() fails, and the mdi_devi_offline() of the last
    347  * pHCI will fail unless an ndi_devi_offline() of the Client nodes under
    348  * the vHCI is successful - which requires a clean vHCI branch to removed
    349  * the devi_refs associated with devfs vnodes.
    350  */
    351 static int
    352 devfs_clean_vhci(dev_info_t *dip, void *args)
    353 {
    354 	struct dv_node	*dvp;
    355 	uint_t		flags = (uint_t)(uintptr_t)args;
    356 
    357 	(void) tsd_set(devfs_clean_key, (void *)1);
    358 	dvp = devfs_dip_to_dvnode(dip);
    359 	if (dvp) {
    360 		(void) dv_cleandir(dvp, NULL, flags);
    361 		VN_RELE(DVTOV(dvp));
    362 	}
    363 	(void) tsd_set(devfs_clean_key, NULL);
    364 	return (DDI_WALK_CONTINUE);
    365 }
    366 
    367 /*
    368  * devfs_clean()
    369  *
    370  * Destroy unreferenced dv_node's and detach devices.
    371  *
    372  * devfs_clean will try its best to clean up unused nodes. It is
    373  * no longer valid to assume that just because devfs_clean fails,
    374  * the device is not removable. This is because device contracts
    375  * can result in userland processes releasing a device during the
    376  * device offline process in the kernel. Thus it is no longer
    377  * correct to fail an offline just because devfs_clean finds
    378  * referenced dv_nodes. To enforce this, devfs_clean() always
    379  * returns success i.e. 0.
    380  *
    381  * devfs_clean() may return before removing all possible nodes if
    382  * we cannot acquire locks in areas of the code where potential for
    383  * deadlock exists (see comments in dv_find() and dv_cleandir() for
    384  * examples of this).
    385  *
    386  * devfs caches unreferenced dv_node to speed by the performance
    387  * of ls, find, etc. devfs_clean() is invoked to cleanup cached
    388  * dv_nodes to reclaim memory as well as to facilitate device
    389  * removal (dv_node reference devinfo nodes, which prevents driver
    390  * detach).
    391  *
    392  * If a shell parks in a /devices directory, the dv_node will be
    393  * held, preventing the corresponding device to be detached.
    394  * This would be a denial of service against DR. To prevent this,
    395  * DR code calls devfs_clean() with the DV_CLEAN_FORCE flag.
    396  * The dv_cleandir() implementation does the right thing to ensure
    397  * successful DR.
    398  */
    399 int
    400 devfs_clean(dev_info_t *dip, char *devnm, uint_t flags)
    401 {
    402 	struct dv_node		*dvp;
    403 
    404 	dcmn_err(("devfs_unconfigure: dip = 0x%p, flags = 0x%x",
    405 	    (void *)dip, flags));
    406 
    407 	/* avoid recursion back into the device tree */
    408 	(void) tsd_set(devfs_clean_key, (void *)1);
    409 	dvp = devfs_dip_to_dvnode(dip);
    410 	if (dvp == NULL) {
    411 		(void) tsd_set(devfs_clean_key, NULL);
    412 		return (0);
    413 	}
    414 
    415 	(void) dv_cleandir(dvp, devnm, flags);
    416 	(void) tsd_set(devfs_clean_key, NULL);
    417 	VN_RELE(DVTOV(dvp));
    418 
    419 	/*
    420 	 * If we are doing a DV_CLEAN_FORCE, and we did not start at the
    421 	 * root, and we did not start at a vHCI node then clean vHCI
    422 	 * branches too.  Failure to clean vHCI branch does not cause EBUSY.
    423 	 *
    424 	 * Also, to accommodate nexus callers that clean 'self' to DR 'child'
    425 	 * (like pcihp) we clean vHCIs even when dv_cleandir() of dip branch
    426 	 * above fails - this prevents a busy DR 'child' sibling from causing
    427 	 * the DR of 'child' to fail because a vHCI branch was not cleaned.
    428 	 */
    429 	if ((flags & DV_CLEAN_FORCE) && (dip != ddi_root_node()) &&
    430 	    (mdi_component_is_vhci(dip, NULL) != MDI_SUCCESS)) {
    431 		/*
    432 		 * NOTE: for backport the following is recommended
    433 		 * 	(void) devfs_clean_vhci(scsi_vhci_dip,
    434 		 *	    (void *)(uintptr_t)flags);
    435 		 */
    436 		mdi_walk_vhcis(devfs_clean_vhci, (void *)(uintptr_t)flags);
    437 	}
    438 
    439 	return (0);
    440 }
    441 
    442 /*
    443  * lookup a devfs relative pathname, returning held vnodes for the final
    444  * component and the containing directory (if requested).
    445  *
    446  * NOTE: We can't use lookupname because this would use the current
    447  *	processes credentials (CRED) in the call lookuppnvp instead
    448  *	of kcred.  It also does not give you the flexibility so
    449  * 	specify the directory to start the resolution in (devicesdir).
    450  */
    451 int
    452 devfs_lookupname(
    453 	char	*pathname,		/* user pathname */
    454 	vnode_t **dirvpp,		/* ret for ptr to parent dir vnode */
    455 	vnode_t **compvpp)		/* ret for ptr to component vnode */
    456 {
    457 	struct pathname	pn;
    458 	int		error;
    459 
    460 	ASSERT(devicesdir);		/* devfs must be initialized */
    461 	ASSERT(pathname);		/* must have some path */
    462 
    463 	if (error = pn_get(pathname, UIO_SYSSPACE, &pn))
    464 		return (error);
    465 
    466 	/* make the path relative to /devices. */
    467 	pn_skipslash(&pn);
    468 	if (pn_pathleft(&pn) == 0) {
    469 		/* all we had was "\0" or "/" (which skipslash skiped) */
    470 		if (dirvpp)
    471 			*dirvpp = NULL;
    472 		if (compvpp) {
    473 			VN_HOLD(devicesdir);
    474 			*compvpp = devicesdir;
    475 		}
    476 	} else {
    477 		/*
    478 		 * Use devfs lookup to resolve pathname to the vnode for
    479 		 * the device via relative lookup in devfs. Extra holds for
    480 		 * using devicesdir as directory we are searching and for
    481 		 * being our root without being == rootdir.
    482 		 */
    483 		VN_HOLD(devicesdir);
    484 		VN_HOLD(devicesdir);
    485 		error = lookuppnvp(&pn, NULL, FOLLOW, dirvpp, compvpp,
    486 		    devicesdir, devicesdir, kcred);
    487 	}
    488 	pn_free(&pn);
    489 
    490 	return (error);
    491 }
    492 
    493 /*
    494  * Given a devfs path (without the /devices prefix), walk
    495  * the dv_node sub-tree rooted at the path.
    496  */
    497 int
    498 devfs_walk(
    499 	char		*path,
    500 	void		(*callback)(struct dv_node *, void *),
    501 	void		*arg)
    502 {
    503 	char *dirpath, *devnm;
    504 	struct vnode	*dirvp;
    505 
    506 	ASSERT(path && callback);
    507 
    508 	if (*path != '/' || devfs_mntinfo == NULL)
    509 		return (ENXIO);
    510 
    511 	dcmn_err(("devfs_walk: path = %s", path));
    512 
    513 	dirpath = kmem_alloc(MAXPATHLEN, KM_SLEEP);
    514 
    515 	(void) snprintf(dirpath, MAXPATHLEN, "/devices%s", path);
    516 
    517 	devnm = strrchr(dirpath, '/');
    518 
    519 	ASSERT(devnm);
    520 
    521 	*devnm++ = '\0';
    522 
    523 	if (lookupname(dirpath, UIO_SYSSPACE, 0, NULL, &dirvp)) {
    524 		dcmn_err(("directory %s not found\n", dirpath));
    525 		kmem_free(dirpath, MAXPATHLEN);
    526 		return (ENXIO);
    527 	}
    528 
    529 	/*
    530 	 * if path == "/", visit the root dv_node
    531 	 */
    532 	if (*devnm == '\0') {
    533 		callback(VTODV(dirvp), arg);
    534 		devnm = NULL;
    535 	}
    536 
    537 	dv_walk(VTODV(dirvp), devnm, callback, arg);
    538 
    539 	VN_RELE(dirvp);
    540 
    541 	kmem_free(dirpath, MAXPATHLEN);
    542 
    543 	return (0);
    544 }
    545 
    546 int
    547 devfs_devpolicy(vnode_t *vp, devplcy_t **dpp)
    548 {
    549 	struct vnode *rvp;
    550 	struct dv_node *dvp;
    551 	int rval = -1;
    552 
    553 	/* fail if devfs not mounted yet */
    554 	if (devfs_mntinfo == NULL)
    555 		return (rval);
    556 
    557 	if (VOP_REALVP(vp, &rvp, NULL) == 0 && vn_matchops(rvp, dv_vnodeops)) {
    558 		dvp = VTODV(rvp);
    559 		rw_enter(&dvp->dv_contents, RW_READER);
    560 		if (dvp->dv_priv) {
    561 			dphold(dvp->dv_priv);
    562 			*dpp = dvp->dv_priv;
    563 			rval = 0;
    564 		}
    565 		rw_exit(&dvp->dv_contents);
    566 	}
    567 	return (rval);
    568 }
    569