Home | History | Annotate | Download | only in tmpfs
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
     27 
     28 #include <sys/types.h>
     29 #include <sys/param.h>
     30 #include <sys/sysmacros.h>
     31 #include <sys/systm.h>
     32 #include <sys/time.h>
     33 #include <sys/vfs.h>
     34 #include <sys/vnode.h>
     35 #include <sys/errno.h>
     36 #include <sys/cmn_err.h>
     37 #include <sys/cred.h>
     38 #include <sys/stat.h>
     39 #include <sys/debug.h>
     40 #include <sys/policy.h>
     41 #include <sys/fs/tmpnode.h>
     42 #include <sys/fs/tmp.h>
     43 #include <sys/vtrace.h>
     44 
     45 static int tdircheckpath(struct tmpnode *, struct tmpnode *, struct cred *);
     46 static int tdirrename(struct tmpnode *, struct tmpnode *, struct tmpnode *,
     47 	char *, struct tmpnode *, struct tdirent *, struct cred *);
     48 static void tdirfixdotdot(struct tmpnode *, struct tmpnode *, struct tmpnode *);
     49 static int tdirmaketnode(struct tmpnode *, struct tmount *, struct vattr *,
     50 	enum de_op, struct tmpnode **, struct cred *);
     51 static int tdiraddentry(struct tmpnode *, struct tmpnode *, char *,
     52 	enum de_op, struct tmpnode *);
     53 
     54 
     55 #define	T_HASH_SIZE	8192		/* must be power of 2 */
     56 #define	T_MUTEX_SIZE	64
     57 
     58 static struct tdirent	*t_hashtable[T_HASH_SIZE];
     59 static kmutex_t		 t_hashmutex[T_MUTEX_SIZE];
     60 
     61 #define	T_HASH_INDEX(a)		((a) & (T_HASH_SIZE-1))
     62 #define	T_MUTEX_INDEX(a)	((a) & (T_MUTEX_SIZE-1))
     63 
     64 #define	TMPFS_HASH(tp, name, hash)				\
     65 	{							\
     66 		char Xc, *Xcp;					\
     67 		hash = (uint_t)(uintptr_t)(tp) >> 8;		\
     68 		for (Xcp = (name); (Xc = *Xcp) != 0; Xcp++)	\
     69 			hash = (hash << 4) + hash + (uint_t)Xc;	\
     70 	}
     71 
     72 void
     73 tmpfs_hash_init(void)
     74 {
     75 	int	ix;
     76 
     77 	for (ix = 0; ix < T_MUTEX_SIZE; ix++)
     78 		mutex_init(&t_hashmutex[ix], NULL, MUTEX_DEFAULT, NULL);
     79 }
     80 
     81 /*
     82  * This routine is where the rubber meets the road for identities.
     83  */
     84 static void
     85 tmpfs_hash_in(struct tdirent *t)
     86 {
     87 	uint_t		hash;
     88 	struct tdirent	**prevpp;
     89 	kmutex_t	*t_hmtx;
     90 
     91 	TMPFS_HASH(t->td_parent, t->td_name, hash);
     92 	t->td_hash = hash;
     93 	prevpp = &t_hashtable[T_HASH_INDEX(hash)];
     94 	t_hmtx = &t_hashmutex[T_MUTEX_INDEX(hash)];
     95 	mutex_enter(t_hmtx);
     96 	t->td_link = *prevpp;
     97 	*prevpp = t;
     98 	mutex_exit(t_hmtx);
     99 }
    100 
    101 /*
    102  * Remove tdirent *t from the hash list.
    103  */
    104 static void
    105 tmpfs_hash_out(struct tdirent *t)
    106 {
    107 	uint_t		hash;
    108 	struct tdirent	**prevpp;
    109 	kmutex_t	*t_hmtx;
    110 
    111 	hash = t->td_hash;
    112 	prevpp = &t_hashtable[T_HASH_INDEX(hash)];
    113 	t_hmtx = &t_hashmutex[T_MUTEX_INDEX(hash)];
    114 	mutex_enter(t_hmtx);
    115 	while (*prevpp != t)
    116 		prevpp = &(*prevpp)->td_link;
    117 	*prevpp = t->td_link;
    118 	mutex_exit(t_hmtx);
    119 }
    120 
    121 /*
    122  * Currently called by tdirrename() only.
    123  * rename operation needs to be done with lock held, to ensure that
    124  * no other operations can access the tmpnode at the same instance.
    125  */
    126 static void
    127 tmpfs_hash_change(struct tdirent *tdp, struct tmpnode *fromtp)
    128 {
    129 	uint_t		hash;
    130 	kmutex_t	*t_hmtx;
    131 
    132 	hash = tdp->td_hash;
    133 	t_hmtx = &t_hashmutex[T_MUTEX_INDEX(hash)];
    134 	mutex_enter(t_hmtx);
    135 	tdp->td_tmpnode = fromtp;
    136 	mutex_exit(t_hmtx);
    137 }
    138 
    139 static struct tdirent *
    140 tmpfs_hash_lookup(char *name, struct tmpnode *parent, uint_t hold,
    141 	struct tmpnode **found)
    142 {
    143 	struct tdirent	*l;
    144 	uint_t		hash;
    145 	kmutex_t	*t_hmtx;
    146 	struct tmpnode	*tnp;
    147 
    148 	TMPFS_HASH(parent, name, hash);
    149 	t_hmtx = &t_hashmutex[T_MUTEX_INDEX(hash)];
    150 	mutex_enter(t_hmtx);
    151 	l = t_hashtable[T_HASH_INDEX(hash)];
    152 	while (l) {
    153 		if ((l->td_hash == hash) &&
    154 		    (l->td_parent == parent) &&
    155 		    (strcmp(l->td_name, name) == 0)) {
    156 			/*
    157 			 * We need to make sure that the tmpnode that
    158 			 * we put a hold on is the same one that we pass back.
    159 			 * Hence, temporary variable tnp is necessary.
    160 			 */
    161 			tnp = l->td_tmpnode;
    162 			if (hold) {
    163 				ASSERT(tnp);
    164 				tmpnode_hold(tnp);
    165 			}
    166 			if (found)
    167 				*found = tnp;
    168 			mutex_exit(t_hmtx);
    169 			return (l);
    170 		} else {
    171 			l = l->td_link;
    172 		}
    173 	}
    174 	mutex_exit(t_hmtx);
    175 	return (NULL);
    176 }
    177 
    178 /*
    179  * Search directory 'parent' for entry 'name'.
    180  *
    181  * The calling thread can't hold the write version
    182  * of the rwlock for the directory being searched
    183  *
    184  * 0 is returned on success and *foundtp points
    185  * to the found tmpnode with its vnode held.
    186  */
    187 int
    188 tdirlookup(
    189 	struct tmpnode *parent,
    190 	char *name,
    191 	struct tmpnode **foundtp,
    192 	struct cred *cred)
    193 {
    194 	int error;
    195 
    196 	*foundtp = NULL;
    197 	if (parent->tn_type != VDIR)
    198 		return (ENOTDIR);
    199 
    200 	if ((error = tmp_taccess(parent, VEXEC, cred)))
    201 		return (error);
    202 
    203 	if (*name == '\0') {
    204 		tmpnode_hold(parent);
    205 		*foundtp = parent;
    206 		return (0);
    207 	}
    208 
    209 	/*
    210 	 * Search the directory for the matching name
    211 	 * We need the lock protecting the tn_dir list
    212 	 * so that it doesn't change out from underneath us.
    213 	 * tmpfs_hash_lookup() will pass back the tmpnode
    214 	 * with a hold on it.
    215 	 */
    216 
    217 	if (tmpfs_hash_lookup(name, parent, 1, foundtp) != NULL) {
    218 		ASSERT(*foundtp);
    219 		return (0);
    220 	}
    221 
    222 	return (ENOENT);
    223 }
    224 
    225 /*
    226  * Enter a directory entry for 'name' and 'tp' into directory 'dir'
    227  *
    228  * Returns 0 on success.
    229  */
    230 int
    231 tdirenter(
    232 	struct tmount	*tm,
    233 	struct tmpnode	*dir,		/* target directory to make entry in */
    234 	char		*name,		/* name of entry */
    235 	enum de_op	op,		/* entry operation */
    236 	struct tmpnode	*fromparent,	/* source directory if rename */
    237 	struct tmpnode	*tp,		/* source tmpnode, if link/rename */
    238 	struct vattr	*va,
    239 	struct tmpnode	**tpp,		/* return tmpnode, if create/mkdir */
    240 	struct cred	*cred,
    241 	caller_context_t *ctp)
    242 {
    243 	struct tdirent *tdp;
    244 	struct tmpnode *found = NULL;
    245 	int error = 0;
    246 	char *s;
    247 
    248 	/*
    249 	 * tn_rwlock is held to serialize direnter and dirdeletes
    250 	 */
    251 	ASSERT(RW_WRITE_HELD(&dir->tn_rwlock));
    252 	ASSERT(dir->tn_type == VDIR);
    253 
    254 	/*
    255 	 * Don't allow '/' characters in pathname component
    256 	 * (thus in ufs_direnter()).
    257 	 */
    258 	for (s = name; *s; s++)
    259 		if (*s == '/')
    260 			return (EACCES);
    261 
    262 	if (name[0] == '\0')
    263 		panic("tdirenter: NULL name");
    264 
    265 	/*
    266 	 * For link and rename lock the source entry and check the link count
    267 	 * to see if it has been removed while it was unlocked.
    268 	 */
    269 	if (op == DE_LINK || op == DE_RENAME) {
    270 		if (tp != dir)
    271 			rw_enter(&tp->tn_rwlock, RW_WRITER);
    272 		mutex_enter(&tp->tn_tlock);
    273 		if (tp->tn_nlink == 0) {
    274 			mutex_exit(&tp->tn_tlock);
    275 			if (tp != dir)
    276 				rw_exit(&tp->tn_rwlock);
    277 			return (ENOENT);
    278 		}
    279 
    280 		if (tp->tn_nlink == MAXLINK) {
    281 			mutex_exit(&tp->tn_tlock);
    282 			if (tp != dir)
    283 				rw_exit(&tp->tn_rwlock);
    284 			return (EMLINK);
    285 		}
    286 		tp->tn_nlink++;
    287 		gethrestime(&tp->tn_ctime);
    288 		mutex_exit(&tp->tn_tlock);
    289 		if (tp != dir)
    290 			rw_exit(&tp->tn_rwlock);
    291 	}
    292 
    293 	/*
    294 	 * This might be a "dangling detached directory".
    295 	 * it could have been removed, but a reference
    296 	 * to it kept in u_cwd.  don't bother searching
    297 	 * it, and with any luck the user will get tired
    298 	 * of dealing with us and cd to some absolute
    299 	 * pathway.  *sigh*, thus in ufs, too.
    300 	 */
    301 	if (dir->tn_nlink == 0) {
    302 		error = ENOENT;
    303 		goto out;
    304 	}
    305 
    306 	/*
    307 	 * If this is a rename of a directory and the parent is
    308 	 * different (".." must be changed), then the source
    309 	 * directory must not be in the directory hierarchy
    310 	 * above the target, as this would orphan everything
    311 	 * below the source directory.
    312 	 */
    313 	if (op == DE_RENAME) {
    314 		if (tp == dir) {
    315 			error = EINVAL;
    316 			goto out;
    317 		}
    318 		if (tp->tn_type == VDIR) {
    319 			if ((fromparent != dir) &&
    320 			    (error = tdircheckpath(tp, dir, cred))) {
    321 				goto out;
    322 			}
    323 		}
    324 	}
    325 
    326 	/*
    327 	 * Search for the entry.  Return "found" if it exists.
    328 	 */
    329 	tdp = tmpfs_hash_lookup(name, dir, 1, &found);
    330 
    331 	if (tdp) {
    332 		ASSERT(found);
    333 		switch (op) {
    334 		case DE_CREATE:
    335 		case DE_MKDIR:
    336 			if (tpp) {
    337 				*tpp = found;
    338 				error = EEXIST;
    339 			} else {
    340 				tmpnode_rele(found);
    341 			}
    342 			break;
    343 
    344 		case DE_RENAME:
    345 			error = tdirrename(fromparent, tp,
    346 			    dir, name, found, tdp, cred);
    347 			if (error == 0) {
    348 				if (found != NULL) {
    349 					vnevent_rename_dest(TNTOV(found),
    350 					    TNTOV(dir), name, ctp);
    351 				}
    352 			}
    353 
    354 			tmpnode_rele(found);
    355 			break;
    356 
    357 		case DE_LINK:
    358 			/*
    359 			 * Can't link to an existing file.
    360 			 */
    361 			error = EEXIST;
    362 			tmpnode_rele(found);
    363 			break;
    364 		}
    365 	} else {
    366 
    367 		/*
    368 		 * The entry does not exist. Check write permission in
    369 		 * directory to see if entry can be created.
    370 		 */
    371 		if (error = tmp_taccess(dir, VWRITE, cred))
    372 			goto out;
    373 		if (op == DE_CREATE || op == DE_MKDIR) {
    374 			/*
    375 			 * Make new tmpnode and directory entry as required.
    376 			 */
    377 			error = tdirmaketnode(dir, tm, va, op, &tp, cred);
    378 			if (error)
    379 				goto out;
    380 		}
    381 		if (error = tdiraddentry(dir, tp, name, op, fromparent)) {
    382 			if (op == DE_CREATE || op == DE_MKDIR) {
    383 				/*
    384 				 * Unmake the inode we just made.
    385 				 */
    386 				rw_enter(&tp->tn_rwlock, RW_WRITER);
    387 				if ((tp->tn_type) == VDIR) {
    388 					ASSERT(tdp == NULL);
    389 					/*
    390 					 * cleanup allocs made by tdirinit()
    391 					 */
    392 					tdirtrunc(tp);
    393 				}
    394 				mutex_enter(&tp->tn_tlock);
    395 				tp->tn_nlink = 0;
    396 				mutex_exit(&tp->tn_tlock);
    397 				gethrestime(&tp->tn_ctime);
    398 				rw_exit(&tp->tn_rwlock);
    399 				tmpnode_rele(tp);
    400 				tp = NULL;
    401 			}
    402 		} else if (tpp) {
    403 			*tpp = tp;
    404 		} else if (op == DE_CREATE || op == DE_MKDIR) {
    405 			tmpnode_rele(tp);
    406 		}
    407 	}
    408 
    409 out:
    410 	if (error && (op == DE_LINK || op == DE_RENAME)) {
    411 		/*
    412 		 * Undo bumped link count.
    413 		 */
    414 		DECR_COUNT(&tp->tn_nlink, &tp->tn_tlock);
    415 		gethrestime(&tp->tn_ctime);
    416 	}
    417 	return (error);
    418 }
    419 
    420 /*
    421  * Delete entry tp of name "nm" from dir.
    422  * Free dir entry space and decrement link count on tmpnode(s).
    423  *
    424  * Return 0 on success.
    425  */
    426 int
    427 tdirdelete(
    428 	struct tmpnode *dir,
    429 	struct tmpnode *tp,
    430 	char *nm,
    431 	enum dr_op op,
    432 	struct cred *cred)
    433 {
    434 	struct tdirent *tpdp;
    435 	int error;
    436 	size_t namelen;
    437 	struct tmpnode *tnp;
    438 	timestruc_t now;
    439 
    440 	ASSERT(RW_WRITE_HELD(&dir->tn_rwlock));
    441 	ASSERT(RW_WRITE_HELD(&tp->tn_rwlock));
    442 	ASSERT(dir->tn_type == VDIR);
    443 
    444 	if (nm[0] == '\0')
    445 		panic("tdirdelete: NULL name for %p", (void *)tp);
    446 
    447 	/*
    448 	 * return error when removing . and ..
    449 	 */
    450 	if (nm[0] == '.') {
    451 		if (nm[1] == '\0')
    452 			return (EINVAL);
    453 		if (nm[1] == '.' && nm[2] == '\0')
    454 			return (EEXIST); /* thus in ufs */
    455 	}
    456 
    457 	if (error = tmp_taccess(dir, VEXEC|VWRITE, cred))
    458 		return (error);
    459 
    460 	/*
    461 	 * If the parent directory is "sticky", then the user must
    462 	 * own the parent directory or the file in it, or else must
    463 	 * have permission to write the file.  Otherwise it may not
    464 	 * be deleted (except by privileged users).
    465 	 * Same as ufs_dirremove.
    466 	 */
    467 	if ((error = tmp_sticky_remove_access(dir, tp, cred)) != 0)
    468 		return (error);
    469 
    470 	if (dir->tn_dir == NULL)
    471 		return (ENOENT);
    472 
    473 	tpdp = tmpfs_hash_lookup(nm, dir, 0, &tnp);
    474 	if (tpdp == NULL) {
    475 		/*
    476 		 * If it is gone, some other thread got here first!
    477 		 * Return error ENOENT.
    478 		 */
    479 		return (ENOENT);
    480 	}
    481 
    482 	/*
    483 	 * If the tmpnode in the tdirent changed, we were probably
    484 	 * the victim of a concurrent rename operation.  The original
    485 	 * is gone, so return that status (same as UFS).
    486 	 */
    487 	if (tp != tnp)
    488 		return (ENOENT);
    489 
    490 	tmpfs_hash_out(tpdp);
    491 
    492 	/*
    493 	 * Take tpdp out of the directory list.
    494 	 */
    495 	ASSERT(tpdp->td_next != tpdp);
    496 	ASSERT(tpdp->td_prev != tpdp);
    497 	if (tpdp->td_prev) {
    498 		tpdp->td_prev->td_next = tpdp->td_next;
    499 	}
    500 	if (tpdp->td_next) {
    501 		tpdp->td_next->td_prev = tpdp->td_prev;
    502 	}
    503 
    504 	/*
    505 	 * If the roving slot pointer happens to match tpdp,
    506 	 * point it at the previous dirent.
    507 	 */
    508 	if (dir->tn_dir->td_prev == tpdp) {
    509 		dir->tn_dir->td_prev = tpdp->td_prev;
    510 	}
    511 	ASSERT(tpdp->td_next != tpdp);
    512 	ASSERT(tpdp->td_prev != tpdp);
    513 
    514 	/*
    515 	 * tpdp points to the correct directory entry
    516 	 */
    517 	namelen = strlen(tpdp->td_name) + 1;
    518 
    519 	tmp_memfree(tpdp, sizeof (struct tdirent) + namelen);
    520 	dir->tn_size -= (sizeof (struct tdirent) + namelen);
    521 	dir->tn_dirents--;
    522 
    523 	gethrestime(&now);
    524 	dir->tn_mtime = now;
    525 	dir->tn_ctime = now;
    526 	tp->tn_ctime = now;
    527 
    528 	ASSERT(tp->tn_nlink > 0);
    529 	DECR_COUNT(&tp->tn_nlink, &tp->tn_tlock);
    530 	if (op == DR_RMDIR && tp->tn_type == VDIR) {
    531 		tdirtrunc(tp);
    532 		ASSERT(tp->tn_nlink == 0);
    533 	}
    534 	return (0);
    535 }
    536 
    537 /*
    538  * tdirinit is used internally to initialize a directory (dir)
    539  * with '.' and '..' entries without checking permissions and locking
    540  */
    541 void
    542 tdirinit(
    543 	struct tmpnode *parent,		/* parent of directory to initialize */
    544 	struct tmpnode *dir)		/* the new directory */
    545 {
    546 	struct tdirent *dot, *dotdot;
    547 	timestruc_t now;
    548 
    549 	ASSERT(RW_WRITE_HELD(&parent->tn_rwlock));
    550 	ASSERT(dir->tn_type == VDIR);
    551 
    552 	dot = tmp_memalloc(sizeof (struct tdirent) + 2, TMP_MUSTHAVE);
    553 	dotdot = tmp_memalloc(sizeof (struct tdirent) + 3, TMP_MUSTHAVE);
    554 
    555 	/*
    556 	 * Initialize the entries
    557 	 */
    558 	dot->td_tmpnode = dir;
    559 	dot->td_offset = 0;
    560 	dot->td_name = (char *)dot + sizeof (struct tdirent);
    561 	dot->td_name[0] = '.';
    562 	dot->td_parent = dir;
    563 	tmpfs_hash_in(dot);
    564 
    565 	dotdot->td_tmpnode = parent;
    566 	dotdot->td_offset = 1;
    567 	dotdot->td_name = (char *)dotdot + sizeof (struct tdirent);
    568 	dotdot->td_name[0] = '.';
    569 	dotdot->td_name[1] = '.';
    570 	dotdot->td_parent = dir;
    571 	tmpfs_hash_in(dotdot);
    572 
    573 	/*
    574 	 * Initialize directory entry list.
    575 	 */
    576 	dot->td_next = dotdot;
    577 	dot->td_prev = dotdot;	/* dot's td_prev holds roving slot pointer */
    578 	dotdot->td_next = NULL;
    579 	dotdot->td_prev = dot;
    580 
    581 	gethrestime(&now);
    582 	dir->tn_mtime = now;
    583 	dir->tn_ctime = now;
    584 
    585 	/*
    586 	 * Link counts are special for the hidden attribute directory.
    587 	 * The only explicit reference in the name space is "." and
    588 	 * the reference through ".." is not counted on the parent
    589 	 * file. The attrdir is created as a side effect to lookup,
    590 	 * so don't change the ctime of the parent.
    591 	 * Since tdirinit is called with both dir and parent being the
    592 	 * same for the root vnode, we need to increment this before we set
    593 	 * tn_nlink = 2 below.
    594 	 */
    595 	if (!(dir->tn_vnode->v_flag & V_XATTRDIR)) {
    596 		INCR_COUNT(&parent->tn_nlink, &parent->tn_tlock);
    597 		parent->tn_ctime = now;
    598 	}
    599 
    600 	dir->tn_dir = dot;
    601 	dir->tn_size = 2 * sizeof (struct tdirent) + 5;	/* dot and dotdot */
    602 	dir->tn_dirents = 2;
    603 	dir->tn_nlink = 2;
    604 }
    605 
    606 
    607 /*
    608  * tdirtrunc is called to remove all directory entries under this directory.
    609  */
    610 void
    611 tdirtrunc(struct tmpnode *dir)
    612 {
    613 	struct tdirent *tdp;
    614 	struct tmpnode *tp;
    615 	size_t namelen;
    616 	timestruc_t now;
    617 	int isvattrdir, isdotdot, skip_decr;
    618 
    619 	ASSERT(RW_WRITE_HELD(&dir->tn_rwlock));
    620 	ASSERT(dir->tn_type == VDIR);
    621 
    622 	isvattrdir = (dir->tn_vnode->v_flag & V_XATTRDIR) ? 1 : 0;
    623 	for (tdp = dir->tn_dir; tdp; tdp = dir->tn_dir) {
    624 		ASSERT(tdp->td_next != tdp);
    625 		ASSERT(tdp->td_prev != tdp);
    626 		ASSERT(tdp->td_tmpnode);
    627 
    628 		dir->tn_dir = tdp->td_next;
    629 		namelen = strlen(tdp->td_name) + 1;
    630 
    631 		/*
    632 		 * Adjust the link counts to account for this directory
    633 		 * entry removal. Hidden attribute directories may
    634 		 * not be empty as they may be truncated as a side-
    635 		 * effect of removing the parent. We do hold/rele
    636 		 * operations to free up these tmpnodes.
    637 		 *
    638 		 * Skip the link count adjustment for parents of
    639 		 * attribute directories as those link counts
    640 		 * do not include the ".." reference in the hidden
    641 		 * directories.
    642 		 */
    643 		tp = tdp->td_tmpnode;
    644 		isdotdot = (strcmp("..", tdp->td_name) == 0);
    645 		skip_decr = (isvattrdir && isdotdot);
    646 		if (!skip_decr) {
    647 			ASSERT(tp->tn_nlink > 0);
    648 			DECR_COUNT(&tp->tn_nlink, &tp->tn_tlock);
    649 		}
    650 
    651 		tmpfs_hash_out(tdp);
    652 
    653 		tmp_memfree(tdp, sizeof (struct tdirent) + namelen);
    654 		dir->tn_size -= (sizeof (struct tdirent) + namelen);
    655 		dir->tn_dirents--;
    656 	}
    657 
    658 	gethrestime(&now);
    659 	dir->tn_mtime = now;
    660 	dir->tn_ctime = now;
    661 
    662 	ASSERT(dir->tn_dir == NULL);
    663 	ASSERT(dir->tn_size == 0);
    664 	ASSERT(dir->tn_dirents == 0);
    665 }
    666 
    667 /*
    668  * Check if the source directory is in the path of the target directory.
    669  * The target directory is locked by the caller.
    670  *
    671  * XXX - The source and target's should be different upon entry.
    672  */
    673 static int
    674 tdircheckpath(
    675 	struct tmpnode *fromtp,
    676 	struct tmpnode	*toparent,
    677 	struct cred	*cred)
    678 {
    679 	int	error = 0;
    680 	struct tmpnode *dir, *dotdot;
    681 	struct tdirent *tdp;
    682 
    683 	ASSERT(RW_WRITE_HELD(&toparent->tn_rwlock));
    684 
    685 	tdp = tmpfs_hash_lookup("..", toparent, 1, &dotdot);
    686 	if (tdp == NULL)
    687 		return (ENOENT);
    688 
    689 	ASSERT(dotdot);
    690 
    691 	if (dotdot == toparent) {
    692 		/* root of fs.  search trivially satisfied. */
    693 		tmpnode_rele(dotdot);
    694 		return (0);
    695 	}
    696 	for (;;) {
    697 		/*
    698 		 * Return error for cases like "mv c c/d",
    699 		 * "mv c c/d/e" and so on.
    700 		 */
    701 		if (dotdot == fromtp) {
    702 			tmpnode_rele(dotdot);
    703 			error = EINVAL;
    704 			break;
    705 		}
    706 		dir = dotdot;
    707 		error = tdirlookup(dir, "..", &dotdot, cred);
    708 		if (error) {
    709 			tmpnode_rele(dir);
    710 			break;
    711 		}
    712 		/*
    713 		 * We're okay if we traverse the directory tree up to
    714 		 * the root directory and don't run into the
    715 		 * parent directory.
    716 		 */
    717 		if (dir == dotdot) {
    718 			tmpnode_rele(dir);
    719 			tmpnode_rele(dotdot);
    720 			break;
    721 		}
    722 		tmpnode_rele(dir);
    723 	}
    724 	return (error);
    725 }
    726 
    727 static int
    728 tdirrename(
    729 	struct tmpnode *fromparent,	/* parent directory of source */
    730 	struct tmpnode *fromtp,		/* source tmpnode */
    731 	struct tmpnode *toparent,	/* parent directory of target */
    732 	char *nm,			/* entry we are trying to change */
    733 	struct tmpnode *to,		/* target tmpnode */
    734 	struct tdirent *where,		/* target tmpnode directory entry */
    735 	struct cred *cred)		/* credentials */
    736 {
    737 	int error = 0;
    738 	int doingdirectory;
    739 	timestruc_t now;
    740 
    741 #if defined(lint)
    742 	nm = nm;
    743 #endif
    744 	ASSERT(RW_WRITE_HELD(&toparent->tn_rwlock));
    745 
    746 	/*
    747 	 * Short circuit rename of something to itself.
    748 	 */
    749 	if (fromtp == to)
    750 		return (ESAME);		/* special KLUDGE error code */
    751 
    752 	rw_enter(&fromtp->tn_rwlock, RW_READER);
    753 	rw_enter(&to->tn_rwlock, RW_READER);
    754 
    755 	/*
    756 	 * Check that everything is on the same filesystem.
    757 	 */
    758 	if (to->tn_vnode->v_vfsp != toparent->tn_vnode->v_vfsp ||
    759 	    to->tn_vnode->v_vfsp != fromtp->tn_vnode->v_vfsp) {
    760 		error = EXDEV;
    761 		goto out;
    762 	}
    763 
    764 	/*
    765 	 * Must have write permission to rewrite target entry.
    766 	 * Check for stickyness.
    767 	 */
    768 	if ((error = tmp_taccess(toparent, VWRITE, cred)) != 0 ||
    769 	    (error = tmp_sticky_remove_access(toparent, to, cred)) != 0)
    770 		goto out;
    771 
    772 	/*
    773 	 * Ensure source and target are compatible (both directories
    774 	 * or both not directories).  If target is a directory it must
    775 	 * be empty and have no links to it; in addition it must not
    776 	 * be a mount point, and both the source and target must be
    777 	 * writable.
    778 	 */
    779 	doingdirectory = (fromtp->tn_type == VDIR);
    780 	if (to->tn_type == VDIR) {
    781 		if (!doingdirectory) {
    782 			error = EISDIR;
    783 			goto out;
    784 		}
    785 		/*
    786 		 * vn_vfswlock will prevent mounts from using the directory
    787 		 * until we are done.
    788 		 */
    789 		if (vn_vfswlock(TNTOV(to))) {
    790 			error = EBUSY;
    791 			goto out;
    792 		}
    793 		if (vn_mountedvfs(TNTOV(to)) != NULL) {
    794 			vn_vfsunlock(TNTOV(to));
    795 			error = EBUSY;
    796 			goto out;
    797 		}
    798 
    799 		mutex_enter(&to->tn_tlock);
    800 		if (to->tn_dirents > 2 || to->tn_nlink > 2) {
    801 			mutex_exit(&to->tn_tlock);
    802 			vn_vfsunlock(TNTOV(to));
    803 			error = EEXIST; /* SIGH should be ENOTEMPTY */
    804 			/*
    805 			 * Update atime because checking tn_dirents is
    806 			 * logically equivalent to reading the directory
    807 			 */
    808 			gethrestime(&to->tn_atime);
    809 			goto out;
    810 		}
    811 		mutex_exit(&to->tn_tlock);
    812 	} else if (doingdirectory) {
    813 		error = ENOTDIR;
    814 		goto out;
    815 	}
    816 
    817 	tmpfs_hash_change(where, fromtp);
    818 	gethrestime(&now);
    819 	toparent->tn_mtime = now;
    820 	toparent->tn_ctime = now;
    821 
    822 	/*
    823 	 * Upgrade to write lock on "to" (i.e., the target tmpnode).
    824 	 */
    825 	rw_exit(&to->tn_rwlock);
    826 	rw_enter(&to->tn_rwlock, RW_WRITER);
    827 
    828 	/*
    829 	 * Decrement the link count of the target tmpnode.
    830 	 */
    831 	DECR_COUNT(&to->tn_nlink, &to->tn_tlock);
    832 	to->tn_ctime = now;
    833 
    834 	if (doingdirectory) {
    835 		/*
    836 		 * The entry for "to" no longer exists so release the vfslock.
    837 		 */
    838 		vn_vfsunlock(TNTOV(to));
    839 
    840 		/*
    841 		 * Decrement the target link count and delete all entires.
    842 		 */
    843 		tdirtrunc(to);
    844 		ASSERT(to->tn_nlink == 0);
    845 
    846 		/*
    847 		 * Renaming a directory with the parent different
    848 		 * requires that ".." be rewritten.  The window is
    849 		 * still there for ".." to be inconsistent, but this
    850 		 * is unavoidable, and a lot shorter than when it was
    851 		 * done in a user process.
    852 		 */
    853 		if (fromparent != toparent)
    854 			tdirfixdotdot(fromtp, fromparent, toparent);
    855 	}
    856 out:
    857 	rw_exit(&to->tn_rwlock);
    858 	rw_exit(&fromtp->tn_rwlock);
    859 	return (error);
    860 }
    861 
    862 static void
    863 tdirfixdotdot(
    864 	struct tmpnode	*fromtp,	/* child directory */
    865 	struct tmpnode	*fromparent,	/* old parent directory */
    866 	struct tmpnode	*toparent)	/* new parent directory */
    867 {
    868 	struct tdirent	*dotdot;
    869 
    870 	ASSERT(RW_LOCK_HELD(&toparent->tn_rwlock));
    871 
    872 	/*
    873 	 * Increment the link count in the new parent tmpnode
    874 	 */
    875 	INCR_COUNT(&toparent->tn_nlink, &toparent->tn_tlock);
    876 	gethrestime(&toparent->tn_ctime);
    877 
    878 	dotdot = tmpfs_hash_lookup("..", fromtp, 0, NULL);
    879 
    880 	ASSERT(dotdot->td_tmpnode == fromparent);
    881 	dotdot->td_tmpnode = toparent;
    882 
    883 	/*
    884 	 * Decrement the link count of the old parent tmpnode.
    885 	 * If fromparent is NULL, then this is a new directory link;
    886 	 * it has no parent, so we need not do anything.
    887 	 */
    888 	if (fromparent != NULL) {
    889 		mutex_enter(&fromparent->tn_tlock);
    890 		if (fromparent->tn_nlink != 0) {
    891 			fromparent->tn_nlink--;
    892 			gethrestime(&fromparent->tn_ctime);
    893 		}
    894 		mutex_exit(&fromparent->tn_tlock);
    895 	}
    896 }
    897 
    898 static int
    899 tdiraddentry(
    900 	struct tmpnode	*dir,	/* target directory to make entry in */
    901 	struct tmpnode	*tp,	/* new tmpnode */
    902 	char		*name,
    903 	enum de_op	op,
    904 	struct tmpnode	*fromtp)
    905 {
    906 	struct tdirent *tdp, *tpdp;
    907 	size_t		namelen, alloc_size;
    908 	timestruc_t	now;
    909 
    910 	/*
    911 	 * Make sure the parent directory wasn't removed from
    912 	 * underneath the caller.
    913 	 */
    914 	if (dir->tn_dir == NULL)
    915 		return (ENOENT);
    916 
    917 	/*
    918 	 * Check that everything is on the same filesystem.
    919 	 */
    920 	if (tp->tn_vnode->v_vfsp != dir->tn_vnode->v_vfsp)
    921 		return (EXDEV);
    922 
    923 	/*
    924 	 * Allocate and initialize directory entry
    925 	 */
    926 	namelen = strlen(name) + 1;
    927 	alloc_size = namelen + sizeof (struct tdirent);
    928 	tdp = tmp_memalloc(alloc_size, 0);
    929 	if (tdp == NULL)
    930 		return (ENOSPC);
    931 
    932 	if ((op == DE_RENAME) && (tp->tn_type == VDIR))
    933 		tdirfixdotdot(tp, fromtp, dir);
    934 
    935 	dir->tn_size += alloc_size;
    936 	dir->tn_dirents++;
    937 	tdp->td_tmpnode = tp;
    938 	tdp->td_parent = dir;
    939 
    940 	/*
    941 	 * The directory entry and its name were allocated sequentially.
    942 	 */
    943 	tdp->td_name = (char *)tdp + sizeof (struct tdirent);
    944 	(void) strcpy(tdp->td_name, name);
    945 
    946 	tmpfs_hash_in(tdp);
    947 
    948 	/*
    949 	 * Some utilities expect the size of a directory to remain
    950 	 * somewhat static.  For example, a routine which unlinks
    951 	 * files between calls to readdir(); the size of the
    952 	 * directory changes from underneath it and so the real
    953 	 * directory offset in bytes is invalid.  To circumvent
    954 	 * this problem, we initialize a directory entry with an
    955 	 * phony offset, and use this offset to determine end of
    956 	 * file in tmp_readdir.
    957 	 */
    958 	tpdp = dir->tn_dir->td_prev;
    959 	/*
    960 	 * Install at first empty "slot" in directory list.
    961 	 */
    962 	while (tpdp->td_next != NULL && (tpdp->td_next->td_offset -
    963 	    tpdp->td_offset) <= 1) {
    964 		ASSERT(tpdp->td_next != tpdp);
    965 		ASSERT(tpdp->td_prev != tpdp);
    966 		ASSERT(tpdp->td_next->td_offset > tpdp->td_offset);
    967 		tpdp = tpdp->td_next;
    968 	}
    969 	tdp->td_offset = tpdp->td_offset + 1;
    970 
    971 	/*
    972 	 * If we're at the end of the dirent list and the offset (which
    973 	 * is necessarily the largest offset in this directory) is more
    974 	 * than twice the number of dirents, that means the directory is
    975 	 * 50% holes.  At this point we reset the slot pointer back to
    976 	 * the beginning of the directory so we start using the holes.
    977 	 * The idea is that if there are N dirents, there must also be
    978 	 * N holes, so we can satisfy the next N creates by walking at
    979 	 * most 2N entries; thus the average cost of a create is constant.
    980 	 * Note that we use the first dirent's td_prev as the roving
    981 	 * slot pointer; it's ugly, but it saves a word in every dirent.
    982 	 */
    983 	if (tpdp->td_next == NULL && tpdp->td_offset > 2 * dir->tn_dirents)
    984 		dir->tn_dir->td_prev = dir->tn_dir->td_next;
    985 	else
    986 		dir->tn_dir->td_prev = tdp;
    987 
    988 	ASSERT(tpdp->td_next != tpdp);
    989 	ASSERT(tpdp->td_prev != tpdp);
    990 
    991 	tdp->td_next = tpdp->td_next;
    992 	if (tdp->td_next) {
    993 		tdp->td_next->td_prev = tdp;
    994 	}
    995 	tdp->td_prev = tpdp;
    996 	tpdp->td_next = tdp;
    997 
    998 	ASSERT(tdp->td_next != tdp);
    999 	ASSERT(tdp->td_prev != tdp);
   1000 	ASSERT(tpdp->td_next != tpdp);
   1001 	ASSERT(tpdp->td_prev != tpdp);
   1002 
   1003 	gethrestime(&now);
   1004 	dir->tn_mtime = now;
   1005 	dir->tn_ctime = now;
   1006 
   1007 	return (0);
   1008 }
   1009 
   1010 static int
   1011 tdirmaketnode(
   1012 	struct tmpnode *dir,
   1013 	struct tmount	*tm,
   1014 	struct vattr	*va,
   1015 	enum	de_op	op,
   1016 	struct tmpnode **newnode,
   1017 	struct cred	*cred)
   1018 {
   1019 	struct tmpnode *tp;
   1020 	enum vtype	type;
   1021 
   1022 	ASSERT(va != NULL);
   1023 	ASSERT(op == DE_CREATE || op == DE_MKDIR);
   1024 	if (((va->va_mask & AT_ATIME) && TIMESPEC_OVERFLOW(&va->va_atime)) ||
   1025 	    ((va->va_mask & AT_MTIME) && TIMESPEC_OVERFLOW(&va->va_mtime)))
   1026 		return (EOVERFLOW);
   1027 	type = va->va_type;
   1028 	tp = tmp_memalloc(sizeof (struct tmpnode), TMP_MUSTHAVE);
   1029 	tmpnode_init(tm, tp, va, cred);
   1030 
   1031 	/* setup normal file/dir's extended attribute directory */
   1032 	if (dir->tn_flags & ISXATTR) {
   1033 		/* parent dir is , mark file as xattr */
   1034 		tp->tn_flags |= ISXATTR;
   1035 	}
   1036 
   1037 
   1038 	if (type == VBLK || type == VCHR) {
   1039 		tp->tn_vnode->v_rdev = tp->tn_rdev = va->va_rdev;
   1040 	} else {
   1041 		tp->tn_vnode->v_rdev = tp->tn_rdev = NODEV;
   1042 	}
   1043 	tp->tn_vnode->v_type = type;
   1044 	tp->tn_uid = crgetuid(cred);
   1045 
   1046 	/*
   1047 	 * To determine the group-id of the created file:
   1048 	 *   1) If the gid is set in the attribute list (non-Sun & pre-4.0
   1049 	 *	clients are not likely to set the gid), then use it if
   1050 	 *	the process is privileged, belongs to the target group,
   1051 	 *	or the group is the same as the parent directory.
   1052 	 *   2) If the filesystem was not mounted with the Old-BSD-compatible
   1053 	 *	GRPID option, and the directory's set-gid bit is clear,
   1054 	 *	then use the process's gid.
   1055 	 *   3) Otherwise, set the group-id to the gid of the parent directory.
   1056 	 */
   1057 	if ((va->va_mask & AT_GID) &&
   1058 	    ((va->va_gid == dir->tn_gid) || groupmember(va->va_gid, cred) ||
   1059 	    secpolicy_vnode_create_gid(cred) == 0)) {
   1060 		/*
   1061 		 * XXX - is this only the case when a 4.0 NFS client, or a
   1062 		 * client derived from that code, makes a call over the wire?
   1063 		 */
   1064 		tp->tn_gid = va->va_gid;
   1065 	} else {
   1066 		if (dir->tn_mode & VSGID)
   1067 			tp->tn_gid = dir->tn_gid;
   1068 		else
   1069 			tp->tn_gid = crgetgid(cred);
   1070 	}
   1071 	/*
   1072 	 * If we're creating a directory, and the parent directory has the
   1073 	 * set-GID bit set, set it on the new directory.
   1074 	 * Otherwise, if the user is neither privileged nor a member of the
   1075 	 * file's new group, clear the file's set-GID bit.
   1076 	 */
   1077 	if (dir->tn_mode & VSGID && type == VDIR)
   1078 		tp->tn_mode |= VSGID;
   1079 	else {
   1080 		if ((tp->tn_mode & VSGID) &&
   1081 		    secpolicy_vnode_setids_setgids(cred, tp->tn_gid) != 0)
   1082 			tp->tn_mode &= ~VSGID;
   1083 	}
   1084 
   1085 	if (va->va_mask & AT_ATIME)
   1086 		tp->tn_atime = va->va_atime;
   1087 	if (va->va_mask & AT_MTIME)
   1088 		tp->tn_mtime = va->va_mtime;
   1089 
   1090 	if (op == DE_MKDIR)
   1091 		tdirinit(dir, tp);
   1092 
   1093 	*newnode = tp;
   1094 	return (0);
   1095 }
   1096