Home | History | Annotate | Download | only in fd
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
     27 
     28 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
     29 /*	  All rights reserved.  	*/
     30 
     31 
     32 #include <sys/types.h>
     33 #include <sys/param.h>
     34 #include <sys/cmn_err.h>
     35 #include <sys/debug.h>
     36 #include <sys/dirent.h>
     37 #include <sys/errno.h>
     38 #include <sys/file.h>
     39 #include <sys/inline.h>
     40 #include <sys/kmem.h>
     41 #include <sys/pathname.h>
     42 #include <sys/resource.h>
     43 #include <sys/statvfs.h>
     44 #include <sys/mount.h>
     45 #include <sys/sysmacros.h>
     46 #include <sys/systm.h>
     47 #include <sys/uio.h>
     48 #include <sys/vfs.h>
     49 #include <sys/vfs_opreg.h>
     50 #include <sys/vnode.h>
     51 #include <sys/cred.h>
     52 #include <sys/mntent.h>
     53 #include <sys/mount.h>
     54 #include <sys/user.h>
     55 #include <sys/t_lock.h>
     56 #include <sys/modctl.h>
     57 #include <sys/policy.h>
     58 #include <fs/fs_subr.h>
     59 #include <sys/atomic.h>
     60 #include <sys/mkdev.h>
     61 
     62 #define	round(r)	(((r)+sizeof (int)-1)&(~(sizeof (int)-1)))
     63 #define	fdtoi(n)	((n)+100)
     64 
     65 #define	FDDIRSIZE 14
     66 struct fddirect {
     67 	short	d_ino;
     68 	char	d_name[FDDIRSIZE];
     69 };
     70 
     71 #define	FDROOTINO	2
     72 #define	FDSDSIZE	sizeof (struct fddirect)
     73 #define	FDNSIZE		10
     74 
     75 static int		fdfstype = 0;
     76 static major_t		fdfsmaj;
     77 static minor_t		fdfsmin;
     78 static major_t		fdrmaj;
     79 static kmutex_t		fd_minor_lock;
     80 
     81 static int fdget(vnode_t *, char *, vnode_t **);
     82 
     83 /* ARGSUSED */
     84 static int
     85 fdopen(vnode_t **vpp, int mode, cred_t *cr, caller_context_t *ct)
     86 {
     87 	if ((*vpp)->v_type != VDIR) {
     88 		mutex_enter(&(*vpp)->v_lock);
     89 		(*vpp)->v_flag |= VDUP;
     90 		mutex_exit(&(*vpp)->v_lock);
     91 	}
     92 	return (0);
     93 }
     94 
     95 /* ARGSUSED */
     96 static int
     97 fdclose(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr,
     98 	caller_context_t *ct)
     99 {
    100 	return (0);
    101 }
    102 
    103 /* ARGSUSED */
    104 static int
    105 fdread(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr, caller_context_t *ct)
    106 {
    107 	static struct fddirect dotbuf[] = {
    108 		{ FDROOTINO, "."  },
    109 		{ FDROOTINO, ".." }
    110 	};
    111 	struct fddirect dirbuf;
    112 	int i, n;
    113 	int minfd, maxfd, modoff, error = 0;
    114 	int nentries;
    115 	rctl_qty_t fdno_ctl;
    116 	int endoff;
    117 
    118 	if (vp->v_type != VDIR)
    119 		return (ENOSYS);
    120 
    121 	mutex_enter(&curproc->p_lock);
    122 	fdno_ctl = rctl_enforced_value(rctlproc_legacy[RLIMIT_NOFILE],
    123 	    curproc->p_rctls, curproc);
    124 	nentries = MIN(P_FINFO(curproc)->fi_nfiles, (int)fdno_ctl);
    125 	mutex_exit(&curproc->p_lock);
    126 
    127 	endoff = (nentries + 2) * FDSDSIZE;
    128 
    129 	/*
    130 	 * Fake up ".", "..", and the /dev/fd directory entries.
    131 	 */
    132 	if (uiop->uio_loffset < (offset_t)0 ||
    133 	    uiop->uio_loffset >= (offset_t)endoff ||
    134 	    uiop->uio_resid <= 0)
    135 		return (0);
    136 	ASSERT(uiop->uio_loffset <= MAXOFF_T);
    137 	if (uiop->uio_offset < 2*FDSDSIZE) {
    138 		error = uiomove((caddr_t)dotbuf + uiop->uio_offset,
    139 		    MIN(uiop->uio_resid, 2*FDSDSIZE - uiop->uio_offset),
    140 		    UIO_READ, uiop);
    141 		if (uiop->uio_resid <= 0 || error)
    142 			return (error);
    143 	}
    144 	minfd = (uiop->uio_offset - 2*FDSDSIZE)/FDSDSIZE;
    145 	maxfd = (uiop->uio_offset + uiop->uio_resid - 1)/FDSDSIZE;
    146 	modoff = uiop->uio_offset % FDSDSIZE;
    147 
    148 	for (i = 0; i < FDDIRSIZE; i++)
    149 		dirbuf.d_name[i] = '\0';
    150 	for (i = minfd; i < MIN(maxfd, nentries); i++) {
    151 		n = i;
    152 		dirbuf.d_ino = fdtoi(n);
    153 		numtos((ulong_t)n, dirbuf.d_name);
    154 		error = uiomove((caddr_t)&dirbuf + modoff,
    155 		    MIN(uiop->uio_resid, FDSDSIZE - modoff),
    156 		    UIO_READ, uiop);
    157 		if (uiop->uio_resid <= 0 || error)
    158 			return (error);
    159 		modoff = 0;
    160 	}
    161 
    162 	return (error);
    163 }
    164 
    165 /* ARGSUSED */
    166 static int
    167 fdgetattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
    168 	caller_context_t *ct)
    169 {
    170 	vfs_t *vfsp = vp->v_vfsp;
    171 	timestruc_t now;
    172 
    173 	if (vp->v_type == VDIR) {
    174 		vap->va_nlink = 2;
    175 		vap->va_size = (u_offset_t)
    176 		    ((P_FINFO(curproc)->fi_nfiles + 2) * FDSDSIZE);
    177 		vap->va_mode = 0555;
    178 		vap->va_nodeid = (ino64_t)FDROOTINO;
    179 	} else {
    180 		vap->va_nlink = 1;
    181 		vap->va_size = (u_offset_t)0;
    182 		vap->va_mode = 0666;
    183 		vap->va_nodeid = (ino64_t)fdtoi(getminor(vp->v_rdev));
    184 	}
    185 	vap->va_type = vp->v_type;
    186 	vap->va_rdev = vp->v_rdev;
    187 	vap->va_blksize = vfsp->vfs_bsize;
    188 	vap->va_nblocks = (fsblkcnt64_t)0;
    189 	gethrestime(&now);
    190 	vap->va_atime = vap->va_mtime = vap->va_ctime = now;
    191 	vap->va_uid = 0;
    192 	vap->va_gid = 0;
    193 	vap->va_fsid = vfsp->vfs_dev;
    194 	vap->va_seq = 0;
    195 	return (0);
    196 }
    197 
    198 /* ARGSUSED */
    199 static int
    200 fdaccess(vnode_t *vp, int mode, int flags, cred_t *cr, caller_context_t *ct)
    201 {
    202 	return (0);
    203 }
    204 
    205 /* ARGSUSED */
    206 static int
    207 fdlookup(vnode_t *dp, char *comp, vnode_t **vpp, pathname_t *pnp,
    208 	int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct,
    209 	int *direntflags, pathname_t *realpnp)
    210 {
    211 	if (comp[0] == 0 || strcmp(comp, ".") == 0 || strcmp(comp, "..") == 0) {
    212 		VN_HOLD(dp);
    213 		*vpp = dp;
    214 		return (0);
    215 	}
    216 	return (fdget(dp, comp, vpp));
    217 }
    218 
    219 /* ARGSUSED */
    220 static int
    221 fdcreate(vnode_t *dvp, char *comp, vattr_t *vap, enum vcexcl excl,
    222 	int mode, vnode_t **vpp, cred_t *cr, int flag, caller_context_t *ct,
    223 	vsecattr_t *vsecp)
    224 {
    225 	return (fdget(dvp, comp, vpp));
    226 }
    227 
    228 /* ARGSUSED */
    229 static int
    230 fdreaddir(vnode_t *vp, uio_t *uiop, cred_t *cr, int *eofp, caller_context_t *ct,
    231 	int flags)
    232 {
    233 	/* bp holds one dirent structure */
    234 	u_offset_t bp[DIRENT64_RECLEN(FDNSIZE) / sizeof (u_offset_t)];
    235 	struct dirent64 *dirent = (struct dirent64 *)bp;
    236 	int reclen, nentries;
    237 	rctl_qty_t fdno_ctl;
    238 	int  n;
    239 	int oresid;
    240 	off_t off;
    241 
    242 	if (uiop->uio_offset < 0 || uiop->uio_resid <= 0 ||
    243 	    (uiop->uio_offset % FDSDSIZE) != 0)
    244 		return (ENOENT);
    245 
    246 	ASSERT(uiop->uio_loffset <= MAXOFF_T);
    247 	oresid = uiop->uio_resid;
    248 	bzero(bp, sizeof (bp));
    249 
    250 	mutex_enter(&curproc->p_lock);
    251 	fdno_ctl = rctl_enforced_value(rctlproc_legacy[RLIMIT_NOFILE],
    252 	    curproc->p_rctls, curproc);
    253 	nentries = MIN(P_FINFO(curproc)->fi_nfiles, (int)fdno_ctl);
    254 	mutex_exit(&curproc->p_lock);
    255 
    256 	while (uiop->uio_resid > 0) {
    257 		if ((off = uiop->uio_offset) == 0) {	/* "." */
    258 			dirent->d_ino = (ino64_t)FDROOTINO;
    259 			dirent->d_name[0] = '.';
    260 			dirent->d_name[1] = '\0';
    261 			reclen = DIRENT64_RECLEN(1);
    262 		} else if (off == FDSDSIZE) {		/* ".." */
    263 			dirent->d_ino = (ino64_t)FDROOTINO;
    264 			dirent->d_name[0] = '.';
    265 			dirent->d_name[1] = '.';
    266 			dirent->d_name[2] = '\0';
    267 			reclen = DIRENT64_RECLEN(2);
    268 		} else {
    269 			/*
    270 			 * Return entries corresponding to the allowable
    271 			 * number of file descriptors for this process.
    272 			 */
    273 			if ((n = (off-2*FDSDSIZE)/FDSDSIZE) >= nentries)
    274 				break;
    275 			dirent->d_ino = (ino64_t)fdtoi(n);
    276 			numtos((ulong_t)n, dirent->d_name);
    277 			reclen = DIRENT64_RECLEN(strlen(dirent->d_name));
    278 		}
    279 		dirent->d_off = (offset_t)(uiop->uio_offset + FDSDSIZE);
    280 		dirent->d_reclen = (ushort_t)reclen;
    281 
    282 		if (reclen > uiop->uio_resid) {
    283 			/*
    284 			 * Error if no entries have been returned yet.
    285 			 */
    286 			if (uiop->uio_resid == oresid)
    287 				return (EINVAL);
    288 			break;
    289 		}
    290 		/*
    291 		 * uiomove() updates both resid and offset by the same
    292 		 * amount.  But we want offset to change in increments
    293 		 * of FDSDSIZE, which is different from the number of bytes
    294 		 * being returned to the user.  So we set uio_offset
    295 		 * separately, ignoring what uiomove() does.
    296 		 */
    297 		if (uiomove((caddr_t)dirent, reclen, UIO_READ, uiop))
    298 			return (EFAULT);
    299 		uiop->uio_offset = off + FDSDSIZE;
    300 	}
    301 	if (eofp)
    302 		*eofp = ((uiop->uio_offset-2*FDSDSIZE)/FDSDSIZE >= nentries);
    303 	return (0);
    304 }
    305 
    306 /* ARGSUSED */
    307 static void
    308 fdinactive(vnode_t *vp, cred_t *cr, caller_context_t *ct)
    309 {
    310 	mutex_enter(&vp->v_lock);
    311 	ASSERT(vp->v_count >= 1);
    312 	if (--vp->v_count != 0) {
    313 		mutex_exit(&vp->v_lock);
    314 		return;
    315 	}
    316 	mutex_exit(&vp->v_lock);
    317 	vn_invalid(vp);
    318 	vn_free(vp);
    319 }
    320 
    321 static struct vnodeops *fd_vnodeops;
    322 
    323 static const fs_operation_def_t fd_vnodeops_template[] = {
    324 	VOPNAME_OPEN,		{ .vop_open = fdopen },
    325 	VOPNAME_CLOSE,		{ .vop_close = fdclose },
    326 	VOPNAME_READ,		{ .vop_read = fdread },
    327 	VOPNAME_GETATTR,	{ .vop_getattr = fdgetattr },
    328 	VOPNAME_ACCESS,		{ .vop_access = fdaccess },
    329 	VOPNAME_LOOKUP,		{ .vop_lookup = fdlookup },
    330 	VOPNAME_CREATE,		{ .vop_create = fdcreate },
    331 	VOPNAME_READDIR,	{ .vop_readdir = fdreaddir },
    332 	VOPNAME_INACTIVE,	{ .vop_inactive = fdinactive },
    333 	VOPNAME_FRLOCK,		{ .error = fs_error },
    334 	VOPNAME_POLL,		{ .error = fs_error },
    335 	VOPNAME_DISPOSE,	{ .error = fs_error },
    336 	NULL,			NULL
    337 };
    338 
    339 static int
    340 fdget(struct vnode *dvp, char *comp, struct vnode **vpp)
    341 {
    342 	int n = 0;
    343 	struct vnode *vp;
    344 
    345 	while (*comp) {
    346 		if (*comp < '0' || *comp > '9')
    347 			return (ENOENT);
    348 		n = 10 * n + *comp++ - '0';
    349 	}
    350 	vp = vn_alloc(KM_SLEEP);
    351 	vp->v_type = VCHR;
    352 	vp->v_vfsp = dvp->v_vfsp;
    353 	vn_setops(vp, fd_vnodeops);
    354 	vp->v_data = NULL;
    355 	vp->v_flag = VNOMAP;
    356 	vp->v_rdev = makedevice(fdrmaj, n);
    357 	vn_exists(vp);
    358 	*vpp = vp;
    359 	return (0);
    360 }
    361 
    362 /*
    363  * fdfs is mounted on /dev/fd, however, there are two interesting
    364  * possibilities - two threads racing to do the same mount (protected
    365  * by vfs locking), and two threads mounting fdfs in different places.
    366  */
    367 /*ARGSUSED*/
    368 static int
    369 fdmount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr)
    370 {
    371 	struct vnode *vp;
    372 
    373 	if (secpolicy_fs_mount(cr, mvp, vfsp) != 0)
    374 		return (EPERM);
    375 	if (mvp->v_type != VDIR)
    376 		return (ENOTDIR);
    377 
    378 	mutex_enter(&mvp->v_lock);
    379 	if ((uap->flags & MS_OVERLAY) == 0 &&
    380 	    (mvp->v_count > 1 || (mvp->v_flag & VROOT))) {
    381 		mutex_exit(&mvp->v_lock);
    382 		return (EBUSY);
    383 	}
    384 	mutex_exit(&mvp->v_lock);
    385 
    386 	/*
    387 	 * Having the resource be anything but "fd" doesn't make sense
    388 	 */
    389 	vfs_setresource(vfsp, "fd");
    390 
    391 	vp = vn_alloc(KM_SLEEP);
    392 	vp->v_vfsp = vfsp;
    393 	vn_setops(vp, fd_vnodeops);
    394 	vp->v_type = VDIR;
    395 	vp->v_data = NULL;
    396 	vp->v_flag |= VROOT;
    397 	vfsp->vfs_fstype = fdfstype;
    398 	vfsp->vfs_data = (char *)vp;
    399 	mutex_enter(&fd_minor_lock);
    400 	do {
    401 		fdfsmin = (fdfsmin + 1) & L_MAXMIN32;
    402 		vfsp->vfs_dev = makedevice(fdfsmaj, fdfsmin);
    403 	} while (vfs_devismounted(vfsp->vfs_dev));
    404 	mutex_exit(&fd_minor_lock);
    405 	vfs_make_fsid(&vfsp->vfs_fsid, vfsp->vfs_dev, fdfstype);
    406 	vfsp->vfs_bsize = 1024;
    407 	return (0);
    408 }
    409 
    410 /* ARGSUSED */
    411 static int
    412 fdunmount(vfs_t *vfsp, int flag, cred_t *cr)
    413 {
    414 	vnode_t *rvp;
    415 
    416 	if (secpolicy_fs_unmount(cr, vfsp) != 0)
    417 		return (EPERM);
    418 
    419 	/*
    420 	 * forced unmount is not supported by this file system
    421 	 * and thus, ENOTSUP, is being returned.
    422 	 */
    423 	if (flag & MS_FORCE)
    424 		return (ENOTSUP);
    425 
    426 	rvp = (vnode_t *)vfsp->vfs_data;
    427 	if (rvp->v_count > 1)
    428 		return (EBUSY);
    429 
    430 	VN_RELE(rvp);
    431 	return (0);
    432 }
    433 
    434 /* ARGSUSED */
    435 static int
    436 fdroot(vfs_t *vfsp, vnode_t **vpp)
    437 {
    438 	vnode_t *vp = (vnode_t *)vfsp->vfs_data;
    439 
    440 	VN_HOLD(vp);
    441 	*vpp = vp;
    442 	return (0);
    443 }
    444 
    445 /*
    446  * No locking required because I held the root vnode before calling this
    447  * function so the vfs won't disappear on me.  To be more explicit:
    448  * fdvrootp->v_count will be greater than 1 so fdunmount will just return.
    449  */
    450 static int
    451 fdstatvfs(struct vfs *vfsp, struct statvfs64 *sp)
    452 {
    453 	dev32_t d32;
    454 	rctl_qty_t fdno_ctl;
    455 
    456 	mutex_enter(&curproc->p_lock);
    457 	fdno_ctl = rctl_enforced_value(rctlproc_legacy[RLIMIT_NOFILE],
    458 	    curproc->p_rctls, curproc);
    459 	mutex_exit(&curproc->p_lock);
    460 
    461 	bzero(sp, sizeof (*sp));
    462 	sp->f_bsize = 1024;
    463 	sp->f_frsize = 1024;
    464 	sp->f_blocks = (fsblkcnt64_t)0;
    465 	sp->f_bfree = (fsblkcnt64_t)0;
    466 	sp->f_bavail = (fsblkcnt64_t)0;
    467 	sp->f_files = (fsfilcnt64_t)
    468 	    (MIN(P_FINFO(curproc)->fi_nfiles, fdno_ctl + 2));
    469 	sp->f_ffree = (fsfilcnt64_t)0;
    470 	sp->f_favail = (fsfilcnt64_t)0;
    471 	(void) cmpldev(&d32, vfsp->vfs_dev);
    472 	sp->f_fsid = d32;
    473 	(void) strcpy(sp->f_basetype, vfssw[fdfstype].vsw_name);
    474 	sp->f_flag = vf_to_stf(vfsp->vfs_flag);
    475 	sp->f_namemax = FDNSIZE;
    476 	(void) strcpy(sp->f_fstr, "/dev/fd");
    477 	(void) strcpy(&sp->f_fstr[8], "/dev/fd");
    478 	return (0);
    479 }
    480 
    481 int
    482 fdinit(int fstype, char *name)
    483 {
    484 	static const fs_operation_def_t fd_vfsops_template[] = {
    485 		VFSNAME_MOUNT,		{ .vfs_mount = fdmount },
    486 		VFSNAME_UNMOUNT,	{ .vfs_unmount = fdunmount },
    487 		VFSNAME_ROOT, 		{ .vfs_root = fdroot },
    488 		VFSNAME_STATVFS,	{ .vfs_statvfs = fdstatvfs },
    489 		NULL,			NULL
    490 	};
    491 	int error;
    492 
    493 	fdfstype = fstype;
    494 	ASSERT(fdfstype != 0);
    495 
    496 	/*
    497 	 * Associate VFS ops vector with this fstype.
    498 	 */
    499 	error = vfs_setfsops(fstype, fd_vfsops_template, NULL);
    500 	if (error != 0) {
    501 		cmn_err(CE_WARN, "fdinit: bad vnode ops template");
    502 		return (error);
    503 	}
    504 
    505 	error = vn_make_ops(name, fd_vnodeops_template, &fd_vnodeops);
    506 	if (error != 0) {
    507 		(void) vfs_freevfsops_by_type(fstype);
    508 		cmn_err(CE_WARN, "fdinit: bad vnode ops template");
    509 		return (error);
    510 	}
    511 
    512 	/*
    513 	 * Assign unique "device" numbers (reported by stat(2)).
    514 	 */
    515 	fdfsmaj = getudev();
    516 	fdrmaj = getudev();
    517 	if (fdfsmaj == (major_t)-1 || fdrmaj == (major_t)-1) {
    518 		cmn_err(CE_WARN, "fdinit: can't get unique device numbers");
    519 		if (fdfsmaj == (major_t)-1)
    520 			fdfsmaj = 0;
    521 		if (fdrmaj == (major_t)-1)
    522 			fdrmaj = 0;
    523 	}
    524 	mutex_init(&fd_minor_lock, NULL, MUTEX_DEFAULT, NULL);
    525 	return (0);
    526 }
    527 
    528 /*
    529  * FDFS Mount options table
    530  */
    531 static char *rw_cancel[] = { MNTOPT_RO, NULL };
    532 
    533 static mntopt_t mntopts[] = {
    534 /*
    535  *	option name		cancel option	default arg	flags
    536  */
    537 	{ MNTOPT_RW,		rw_cancel,	NULL,		MO_DEFAULT,
    538 		(void *)MNTOPT_NOINTR },
    539 	{ MNTOPT_IGNORE,	NULL,		NULL,		0,
    540 		(void *)0 },
    541 };
    542 
    543 static mntopts_t fdfs_mntopts = {
    544 	sizeof (mntopts) / sizeof (mntopt_t),
    545 	mntopts
    546 };
    547 
    548 static vfsdef_t vfw = {
    549 	VFSDEF_VERSION,
    550 	"fd",
    551 	fdinit,
    552 	VSW_HASPROTO,
    553 	&fdfs_mntopts
    554 };
    555 
    556 static struct modlfs modlfs = {
    557 	&mod_fsops,
    558 	"filesystem for fd",
    559 	&vfw
    560 };
    561 
    562 static struct modlinkage modlinkage = {
    563 	MODREV_1,
    564 	&modlfs,
    565 	NULL
    566 };
    567 
    568 int
    569 _init(void)
    570 {
    571 	return (mod_install(&modlinkage));
    572 }
    573 
    574 int
    575 _info(struct modinfo *modinfop)
    576 {
    577 	return (mod_info(&modlinkage, modinfop));
    578 }
    579