Home | History | Annotate | Download | only in ufs
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 /*	copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T	*/
     27 /*	  All Rights Reserved  	*/
     28 
     29 /*
     30  * Portions of this source code were derived from Berkeley 4.3 BSD
     31  * under license from the Regents of the University of California.
     32  */
     33 
     34 #include <sys/types.h>
     35 #include <sys/systm.h>
     36 #include <sys/errno.h>
     37 #include <sys/kmem.h>
     38 #include <sys/buf.h>
     39 #include <sys/vnode.h>
     40 #include <sys/vfs.h>
     41 #include <sys/user.h>
     42 #include <sys/callb.h>
     43 #include <sys/cpuvar.h>
     44 #include <sys/fs/ufs_inode.h>
     45 #include <sys/fs/ufs_log.h>
     46 #include <sys/fs/ufs_trans.h>
     47 #include <sys/fs/ufs_acl.h>
     48 #include <sys/fs/ufs_bio.h>
     49 #include <sys/fs/ufs_fsdir.h>
     50 #include <sys/debug.h>
     51 #include <sys/cmn_err.h>
     52 #include <sys/sysmacros.h>
     53 #include <vm/pvn.h>
     54 
     55 extern pri_t 			minclsyspri;
     56 extern int			hash2ints();
     57 extern struct kmem_cache	*inode_cache;	/* cache of free inodes */
     58 extern int			ufs_idle_waiters;
     59 extern struct instats		ins;
     60 
     61 static void ufs_attr_purge(struct inode *);
     62 
     63 /*
     64  * initialize a thread's queue struct
     65  */
     66 void
     67 ufs_thread_init(struct ufs_q *uq, int lowat)
     68 {
     69 	bzero((caddr_t)uq, sizeof (*uq));
     70 	cv_init(&uq->uq_cv, NULL, CV_DEFAULT, NULL);
     71 	mutex_init(&uq->uq_mutex, NULL, MUTEX_DEFAULT, NULL);
     72 	uq->uq_lowat = lowat;
     73 	uq->uq_hiwat = 2 * lowat;
     74 	uq->uq_threadp = NULL;
     75 }
     76 
     77 /*
     78  * start a thread for a queue (assumes success)
     79  */
     80 void
     81 ufs_thread_start(struct ufs_q *uq, void (*func)(), struct vfs *vfsp)
     82 {
     83 	mutex_enter(&uq->uq_mutex);
     84 	if (uq->uq_threadp == NULL) {
     85 		uq->uq_threadp = thread_create(NULL, 0, func, vfsp, 0, &p0,
     86 		    TS_RUN, minclsyspri);
     87 		uq->uq_flags = 0;
     88 	}
     89 	mutex_exit(&uq->uq_mutex);
     90 }
     91 
     92 /*
     93  * wait for the thread to exit
     94  */
     95 void
     96 ufs_thread_exit(struct ufs_q *uq)
     97 {
     98 	kt_did_t ufs_thread_did = 0;
     99 
    100 	mutex_enter(&uq->uq_mutex);
    101 	uq->uq_flags &= ~(UQ_SUSPEND | UQ_SUSPENDED);
    102 	if (uq->uq_threadp != NULL) {
    103 		ufs_thread_did = uq->uq_threadp->t_did;
    104 		uq->uq_flags |= (UQ_EXIT|UQ_WAIT);
    105 		cv_broadcast(&uq->uq_cv);
    106 	}
    107 	mutex_exit(&uq->uq_mutex);
    108 
    109 	/*
    110 	 * It's safe to call thread_join() with an already-gone
    111 	 * t_did, but we have to obtain it before the kernel
    112 	 * thread structure is freed. We do so above under the
    113 	 * protection of the uq_mutex when we're sure the thread
    114 	 * still exists and it's save to de-reference it.
    115 	 * We also have to check if ufs_thread_did is != 0
    116 	 * before calling thread_join() since thread 0 in the system
    117 	 * gets a t_did of 0.
    118 	 */
    119 	if (ufs_thread_did)
    120 		thread_join(ufs_thread_did);
    121 }
    122 
    123 /*
    124  * wait for a thread to suspend itself on the caller's behalf
    125  *	the caller is responsible for continuing the thread
    126  */
    127 void
    128 ufs_thread_suspend(struct ufs_q *uq)
    129 {
    130 	mutex_enter(&uq->uq_mutex);
    131 	if (uq->uq_threadp != NULL) {
    132 		/*
    133 		 * wait while another thread is suspending this thread.
    134 		 * no need to do a cv_broadcast(), as whoever suspended
    135 		 * the thread must continue it at some point.
    136 		 */
    137 		while ((uq->uq_flags & UQ_SUSPEND) &&
    138 		    (uq->uq_threadp != NULL)) {
    139 			/*
    140 			 * We can't use cv_signal() because if our
    141 			 * signal doesn't happen to hit the desired
    142 			 * thread but instead some other waiter like
    143 			 * ourselves, we'll wait forever for a
    144 			 * response.  Well, at least an indeterminate
    145 			 * amount of time until we just happen to get
    146 			 * lucky from whomever did get signalled doing
    147 			 * a cv_signal() of their own.  This is an
    148 			 * unfortunate performance lossage.
    149 			 */
    150 			uq->uq_flags |= UQ_WAIT;
    151 			cv_wait(&uq->uq_cv, &uq->uq_mutex);
    152 		}
    153 
    154 		uq->uq_flags |= (UQ_SUSPEND | UQ_WAIT);
    155 
    156 		/*
    157 		 * wait for the thread to suspend itself
    158 		 */
    159 		if ((uq->uq_flags & UQ_SUSPENDED) == 0 &&
    160 		    (uq->uq_threadp != NULL)) {
    161 			cv_broadcast(&uq->uq_cv);
    162 		}
    163 
    164 		while (((uq->uq_flags & UQ_SUSPENDED) == 0) &&
    165 		    (uq->uq_threadp != NULL)) {
    166 			cv_wait(&uq->uq_cv, &uq->uq_mutex);
    167 		}
    168 	}
    169 	mutex_exit(&uq->uq_mutex);
    170 }
    171 
    172 /*
    173  * allow a thread to continue from a ufs_thread_suspend()
    174  *	This thread must be the same as the thread that called
    175  *	ufs_thread_suspend.
    176  */
    177 void
    178 ufs_thread_continue(struct ufs_q *uq)
    179 {
    180 	mutex_enter(&uq->uq_mutex);
    181 	uq->uq_flags &= ~(UQ_SUSPEND | UQ_SUSPENDED);
    182 	cv_broadcast(&uq->uq_cv);
    183 	mutex_exit(&uq->uq_mutex);
    184 }
    185 
    186 /*
    187  * some common code for managing a threads execution
    188  *	uq is locked at entry and return
    189  *	may sleep
    190  *	may exit
    191  */
    192 /*
    193  * Kind of a hack passing in the callb_cpr_t * here.
    194  * It should really be part of the ufs_q structure.
    195  * I did not put it in there because we are already in beta
    196  * and I was concerned that changing ufs_inode.h to include
    197  * callb.h might break something.
    198  */
    199 int
    200 ufs_thread_run(struct ufs_q *uq, callb_cpr_t *cprinfop)
    201 {
    202 again:
    203 	ASSERT(uq->uq_ne >= 0);
    204 
    205 	if (uq->uq_flags & UQ_SUSPEND) {
    206 		uq->uq_flags |= UQ_SUSPENDED;
    207 	} else if (uq->uq_flags & UQ_EXIT) {
    208 		/*
    209 		 * exiting; empty the queue (may infinite loop)
    210 		 */
    211 		if (uq->uq_ne)
    212 			return (uq->uq_ne);
    213 		uq->uq_threadp = NULL;
    214 		if (uq->uq_flags & UQ_WAIT) {
    215 			cv_broadcast(&uq->uq_cv);
    216 		}
    217 		uq->uq_flags &= ~(UQ_EXIT | UQ_WAIT);
    218 		CALLB_CPR_EXIT(cprinfop);
    219 		thread_exit();
    220 	} else if (uq->uq_ne >= uq->uq_lowat) {
    221 		/*
    222 		 * process a block of entries until below high water mark
    223 		 */
    224 		return (uq->uq_ne - (uq->uq_lowat >> 1));
    225 	}
    226 	if (uq->uq_flags & UQ_WAIT) {
    227 		uq->uq_flags &= ~UQ_WAIT;
    228 		cv_broadcast(&uq->uq_cv);
    229 	}
    230 	CALLB_CPR_SAFE_BEGIN(cprinfop);
    231 	cv_wait(&uq->uq_cv, &uq->uq_mutex);
    232 	CALLB_CPR_SAFE_END(cprinfop, &uq->uq_mutex);
    233 	goto again;
    234 }
    235 
    236 /*
    237  * DELETE INODE
    238  * The following routines implement the protocol for freeing the resources
    239  * held by an idle and deleted inode.
    240  */
    241 void
    242 ufs_delete(struct ufsvfs *ufsvfsp, struct inode *ip, int dolockfs)
    243 {
    244 	ushort_t	mode;
    245 	struct vnode	*vp	= ITOV(ip);
    246 	struct ulockfs	*ulp;
    247 	int		trans_size;
    248 	int		dorwlock = ((ip->i_mode & IFMT) == IFREG);
    249 	int		issync;
    250 	int		err;
    251 	struct inode	*dp;
    252 	struct ufs_q    *delq = &ufsvfsp->vfs_delete;
    253 	struct ufs_delq_info *delq_info = &ufsvfsp->vfs_delete_info;
    254 
    255 	/*
    256 	 * Ignore if deletes are not allowed (wlock/hlock)
    257 	 */
    258 	if (ULOCKFS_IS_NOIDEL(ITOUL(ip))) {
    259 		mutex_enter(&delq->uq_mutex);
    260 		delq_info->delq_unreclaimed_blocks -= ip->i_blocks;
    261 		delq_info->delq_unreclaimed_files--;
    262 		mutex_exit(&delq->uq_mutex);
    263 		VN_RELE(vp);
    264 		return;
    265 	}
    266 
    267 	if ((vp->v_count > 1) || (ip->i_mode == 0)) {
    268 		mutex_enter(&delq->uq_mutex);
    269 		delq_info->delq_unreclaimed_blocks -= ip->i_blocks;
    270 		delq_info->delq_unreclaimed_files--;
    271 		mutex_exit(&delq->uq_mutex);
    272 		VN_RELE(vp);
    273 		return;
    274 	}
    275 	/*
    276 	 * If we are called as part of setting a fs lock, then only
    277 	 * do part of the lockfs protocol.  In other words, don't hang.
    278 	 */
    279 	if (dolockfs) {
    280 		if (ufs_lockfs_begin(ufsvfsp, &ulp, ULOCKFS_DELETE_MASK))
    281 			return;
    282 	} else {
    283 		/*
    284 		 * check for recursive VOP call
    285 		 */
    286 		if (curthread->t_flag & T_DONTBLOCK) {
    287 			ulp = NULL;
    288 		} else {
    289 			ulp = &ufsvfsp->vfs_ulockfs;
    290 			curthread->t_flag |= T_DONTBLOCK;
    291 		}
    292 	}
    293 
    294 	/*
    295 	 * Hold rwlock to synchronize with (nfs) writes
    296 	 */
    297 	if (dorwlock)
    298 		rw_enter(&ip->i_rwlock, RW_WRITER);
    299 
    300 	/*
    301 	 * Delete the attribute directory.
    302 	 */
    303 	if (ip->i_oeftflag != 0) {
    304 		TRANS_BEGIN_CSYNC(ufsvfsp, issync, TOP_REMOVE,
    305 		    trans_size = (int)TOP_REMOVE_SIZE(ip));
    306 		rw_enter(&ip->i_contents, RW_WRITER);
    307 		err = ufs_iget(ip->i_vfs, ip->i_oeftflag,
    308 		    &dp, CRED());
    309 		if (err == 0) {
    310 			rw_enter(&dp->i_rwlock, RW_WRITER);
    311 			rw_enter(&dp->i_contents, RW_WRITER);
    312 			dp->i_flag |= IUPD|ICHG;
    313 			dp->i_seq++;
    314 			TRANS_INODE(dp->i_ufsvfs, dp);
    315 			dp->i_nlink -= 2;
    316 			ufs_setreclaim(dp);
    317 			/*
    318 			 * Should get rid of any negative cache entries that
    319 			 * might be lingering, as well as ``.'' and
    320 			 * ``..''.  If we don't, the VN_RELE() below
    321 			 * won't actually put dp on the delete queue
    322 			 * and it'll hang out until someone forces it
    323 			 * (lockfs -f, umount, ...).  The only reliable
    324 			 * way of doing this at the moment is to call
    325 			 * dnlc_purge_vp(ITOV(dp)), which is unacceptably
    326 			 * slow, so we'll just note the problem in this
    327 			 * comment for now.
    328 			 */
    329 			dnlc_remove(ITOV(dp), ".");
    330 			dnlc_remove(ITOV(dp), "..");
    331 			ITIMES_NOLOCK(dp);
    332 			if (!TRANS_ISTRANS(ufsvfsp)) {
    333 				ufs_iupdat(dp, I_SYNC);
    334 			}
    335 			rw_exit(&dp->i_contents);
    336 			rw_exit(&dp->i_rwlock);
    337 			VN_RELE(ITOV(dp));
    338 		}
    339 		/*
    340 		 * Clear out attribute pointer
    341 		 */
    342 		ip->i_oeftflag = 0;
    343 		rw_exit(&ip->i_contents);
    344 		TRANS_END_CSYNC(ufsvfsp, err, issync,
    345 		    TOP_REMOVE, trans_size);
    346 		dnlc_remove(ITOV(ip), XATTR_DIR_NAME);
    347 	}
    348 
    349 	if ((ip->i_mode & IFMT) == IFATTRDIR) {
    350 		ufs_attr_purge(ip);
    351 	}
    352 
    353 	(void) TRANS_ITRUNC(ip, (u_offset_t)0, I_FREE | I_ACCT, CRED());
    354 
    355 	/*
    356 	 * the inode's space has been freed; now free the inode
    357 	 */
    358 	if (ulp) {
    359 		trans_size = TOP_IFREE_SIZE(ip);
    360 		TRANS_BEGIN_ASYNC(ufsvfsp, TOP_IFREE, trans_size);
    361 	}
    362 	rw_enter(&ufsvfsp->vfs_dqrwlock, RW_READER);
    363 	rw_enter(&ip->i_contents, RW_WRITER);
    364 	TRANS_INODE(ufsvfsp, ip);
    365 	mode = ip->i_mode;
    366 	ip->i_mode = 0;
    367 	ip->i_rdev = 0;
    368 	ip->i_ordev = 0;
    369 	ip->i_flag |= IMOD;
    370 	if (ip->i_ufs_acl) {
    371 		(void) ufs_si_free(ip->i_ufs_acl, vp->v_vfsp, CRED());
    372 		ip->i_ufs_acl = NULL;
    373 		ip->i_shadow = 0;
    374 	}
    375 
    376 	/*
    377 	 * This inode is torn down but still retains it's identity
    378 	 * (inode number).  It could get recycled soon so it's best
    379 	 * to clean up the vnode just in case.
    380 	 */
    381 	mutex_enter(&vp->v_lock);
    382 	vn_recycle(vp);
    383 	mutex_exit(&vp->v_lock);
    384 
    385 	/*
    386 	 * free the inode
    387 	 */
    388 	ufs_ifree(ip, ip->i_number, mode);
    389 	/*
    390 	 * release quota resources; can't fail
    391 	 */
    392 	(void) chkiq((struct ufsvfs *)vp->v_vfsp->vfs_data,
    393 	    /* change */ -1, ip, (uid_t)ip->i_uid, 0, CRED(),
    394 	    (char **)NULL, (size_t *)NULL);
    395 	dqrele(ip->i_dquot);
    396 	ip->i_dquot = NULL;
    397 	ip->i_flag &= ~(IDEL | IDIRECTIO);
    398 	ip->i_cflags = 0;
    399 	if (!TRANS_ISTRANS(ufsvfsp)) {
    400 		ufs_iupdat(ip, I_SYNC);
    401 	} else {
    402 		mutex_enter(&delq->uq_mutex);
    403 		delq_info->delq_unreclaimed_files--;
    404 		mutex_exit(&delq->uq_mutex);
    405 	}
    406 	rw_exit(&ip->i_contents);
    407 	rw_exit(&ufsvfsp->vfs_dqrwlock);
    408 	if (dorwlock)
    409 		rw_exit(&ip->i_rwlock);
    410 	VN_RELE(vp);
    411 
    412 	/*
    413 	 * End of transaction
    414 	 */
    415 	if (ulp) {
    416 		TRANS_END_ASYNC(ufsvfsp, TOP_IFREE, trans_size);
    417 		if (dolockfs)
    418 			ufs_lockfs_end(ulp);
    419 		else
    420 			curthread->t_flag &= ~T_DONTBLOCK;
    421 	}
    422 }
    423 
    424 /*
    425  * Create the delete thread and init the delq_info for this fs
    426  */
    427 void
    428 ufs_delete_init(struct ufsvfs *ufsvfsp, int lowat)
    429 {
    430 	struct ufs_delq_info *delq_info = &ufsvfsp->vfs_delete_info;
    431 
    432 	ufs_thread_init(&ufsvfsp->vfs_delete, lowat);
    433 	(void) memset((void *)delq_info, 0, sizeof (*delq_info));
    434 }
    435 
    436 /*
    437  * thread that frees up deleted inodes
    438  */
    439 void
    440 ufs_thread_delete(struct vfs *vfsp)
    441 {
    442 	struct ufsvfs	*ufsvfsp = (struct ufsvfs *)vfsp->vfs_data;
    443 	struct ufs_q	*uq = &ufsvfsp->vfs_delete;
    444 	struct inode	*ip;
    445 	long		ne;
    446 	callb_cpr_t	cprinfo;
    447 
    448 	CALLB_CPR_INIT(&cprinfo, &uq->uq_mutex, callb_generic_cpr,
    449 	    "ufsdelete");
    450 
    451 	mutex_enter(&uq->uq_mutex);
    452 again:
    453 	/*
    454 	 * Sleep until there is work to do.  Only do one entry at
    455 	 * a time, to reduce the wait time for checking for a suspend
    456 	 * request.  The ?: is for pedantic portability.
    457 	 */
    458 	ne = ufs_thread_run(uq, &cprinfo) ? 1 : 0;
    459 
    460 	/*
    461 	 * process an entry, if there are any
    462 	 */
    463 	if (ne && (ip = uq->uq_ihead)) {
    464 		/*
    465 		 * process first entry on queue.  Assumed conditions are:
    466 		 *	ip is held (v_count >= 1)
    467 		 *	ip is referenced (i_flag & IREF)
    468 		 *	ip is free (i_nlink <= 0)
    469 		 */
    470 		if ((uq->uq_ihead = ip->i_freef) == ip)
    471 			uq->uq_ihead = NULL;
    472 		ip->i_freef->i_freeb = ip->i_freeb;
    473 		ip->i_freeb->i_freef = ip->i_freef;
    474 		ip->i_freef = ip;
    475 		ip->i_freeb = ip;
    476 		uq->uq_ne--;
    477 		mutex_exit(&uq->uq_mutex);
    478 		ufs_delete(ufsvfsp, ip, 1);
    479 		mutex_enter(&uq->uq_mutex);
    480 	}
    481 	goto again;
    482 }
    483 
    484 /*
    485  * drain ne entries off the delete queue.  As new queue entries may
    486  * be added while we're working, ne is interpreted as follows:
    487  *
    488  * ne > 0   => remove up to ne entries
    489  * ne == 0  => remove all entries currently on the queue
    490  * ne == -1 => remove entries until the queue is empty
    491  */
    492 void
    493 ufs_delete_drain(struct vfs *vfsp, int ne, int dolockfs)
    494 {
    495 	struct ufsvfs	*ufsvfsp = (struct ufsvfs *)vfsp->vfs_data;
    496 	struct ufs_q	*uq;
    497 	struct inode	*ip;
    498 	int		drain_cnt = 0;
    499 	int		done;
    500 
    501 	/*
    502 	 * if forcibly unmounted; ignore
    503 	 */
    504 	if (ufsvfsp == NULL)
    505 		return;
    506 
    507 	uq = &ufsvfsp->vfs_delete;
    508 	mutex_enter(&uq->uq_mutex);
    509 	if (ne == 0)
    510 		drain_cnt = uq->uq_ne;
    511 	else if (ne > 0)
    512 		drain_cnt = ne;
    513 
    514 	/*
    515 	 * process up to ne entries
    516 	 */
    517 
    518 	done = 0;
    519 	while (!done && (ip = uq->uq_ihead)) {
    520 		if (ne != -1)
    521 			drain_cnt--;
    522 		if (ne != -1 && drain_cnt == 0)
    523 			done = 1;
    524 		if ((uq->uq_ihead = ip->i_freef) == ip)
    525 			uq->uq_ihead = NULL;
    526 		ip->i_freef->i_freeb = ip->i_freeb;
    527 		ip->i_freeb->i_freef = ip->i_freef;
    528 		ip->i_freef = ip;
    529 		ip->i_freeb = ip;
    530 		uq->uq_ne--;
    531 		mutex_exit(&uq->uq_mutex);
    532 		ufs_delete(ufsvfsp, ip, dolockfs);
    533 		mutex_enter(&uq->uq_mutex);
    534 	}
    535 	mutex_exit(&uq->uq_mutex);
    536 }
    537 
    538 void
    539 ufs_sync_with_thread(struct ufs_q *uq)
    540 {
    541 	mutex_enter(&uq->uq_mutex);
    542 
    543 	/*
    544 	 * Wake up delete thread to free up space.
    545 	 */
    546 	if ((uq->uq_flags & UQ_WAIT) == 0) {
    547 		uq->uq_flags |= UQ_WAIT;
    548 		cv_broadcast(&uq->uq_cv);
    549 	}
    550 
    551 	while ((uq->uq_threadp != NULL) && (uq->uq_flags & UQ_WAIT)) {
    552 		cv_wait(&uq->uq_cv, &uq->uq_mutex);
    553 	}
    554 
    555 	mutex_exit(&uq->uq_mutex);
    556 }
    557 
    558 /*
    559  * Get rid of everything that's currently in the delete queue,
    560  * plus whatever the delete thread is working on at the moment.
    561  *
    562  * This ability is required for providing true POSIX semantics
    563  * regarding close(2), unlink(2), etc, even when logging is enabled.
    564  * The standard requires that the released space be immediately
    565  * observable (statvfs(2)) and allocatable (e.g., write(2)).
    566  */
    567 void
    568 ufs_delete_drain_wait(struct ufsvfs *ufsvfsp, int dolockfs)
    569 {
    570 	struct ufs_q *uq = &ufsvfsp->vfs_delete;
    571 	int	error;
    572 	struct ufs_q    *delq = &ufsvfsp->vfs_delete;
    573 	struct ufs_delq_info *delq_info = &ufsvfsp->vfs_delete_info;
    574 
    575 	/*
    576 	 * If there is something on delq or delete thread
    577 	 * working on delq.
    578 	 */
    579 	mutex_enter(&delq->uq_mutex);
    580 	if (delq_info->delq_unreclaimed_files > 0) {
    581 		mutex_exit(&delq->uq_mutex);
    582 		(void) ufs_delete_drain(ufsvfsp->vfs_vfs, 0, dolockfs);
    583 		ufs_sync_with_thread(uq);
    584 	} else {
    585 		ASSERT(delq_info->delq_unreclaimed_files == 0);
    586 		mutex_exit(&delq->uq_mutex);
    587 		return;
    588 	}
    589 
    590 	/*
    591 	 * Commit any outstanding transactions to make sure
    592 	 * any canceled freed blocks are available for allocation.
    593 	 */
    594 	curthread->t_flag |= T_DONTBLOCK;
    595 	TRANS_BEGIN_SYNC(ufsvfsp, TOP_COMMIT_UPDATE, TOP_COMMIT_SIZE, error);
    596 	if (!error) {
    597 		TRANS_END_SYNC(ufsvfsp, error, TOP_COMMIT_UPDATE,
    598 		    TOP_COMMIT_SIZE);
    599 	}
    600 	curthread->t_flag &= ~T_DONTBLOCK;
    601 }
    602 
    603 /*
    604  * Adjust the resource usage in a struct statvfs based on
    605  * what's in the delete queue.
    606  *
    607  * We do not consider the impact of ACLs or extended attributes
    608  * that may be deleted as a side-effect of deleting a file.
    609  * Those are metadata, and their sizes aren't reflected in the
    610  * sizes returned by stat(), so this is not a problem.
    611  */
    612 void
    613 ufs_delete_adjust_stats(struct ufsvfs *ufsvfsp, struct statvfs64 *sp)
    614 {
    615 	struct ufs_q *uq = &ufsvfsp->vfs_delete;
    616 	struct ufs_delq_info *delq_info = &ufsvfsp->vfs_delete_info;
    617 
    618 	mutex_enter(&uq->uq_mutex);
    619 	/*
    620 	 * The blocks accounted for in the delete queue info are
    621 	 * counted in DEV_BSIZE chunks, but ufs_statvfs counts in
    622 	 * filesystem fragments, so a conversion is required here.
    623 	 */
    624 	sp->f_bfree += dbtofsb(ufsvfsp->vfs_fs,
    625 	    delq_info->delq_unreclaimed_blocks);
    626 	sp->f_ffree += delq_info->delq_unreclaimed_files;
    627 	mutex_exit(&uq->uq_mutex);
    628 }
    629 
    630 /*
    631  * IDLE INODE
    632  * The following routines implement the protocol for maintaining an
    633  * LRU list of idle inodes and for moving the idle inodes to the
    634  * reuse list when the number of allocated inodes exceeds the user
    635  * tunable high-water mark (ufs_ninode).
    636  */
    637 
    638 /*
    639  * clean an idle inode and move it to the reuse list
    640  */
    641 static void
    642 ufs_idle_free(struct inode *ip)
    643 {
    644 	int			pages;
    645 	int			hno;
    646 	kmutex_t		*ihm;
    647 	struct ufsvfs		*ufsvfsp	= ip->i_ufsvfs;
    648 	struct vnode		*vp		= ITOV(ip);
    649 	int			vn_has_data, vn_modified;
    650 
    651 	/*
    652 	 * inode is held
    653 	 */
    654 
    655 	/*
    656 	 * remember `pages' for stats below
    657 	 */
    658 	pages = (ip->i_mode && vn_has_cached_data(vp) && vp->v_type != VCHR);
    659 
    660 	/*
    661 	 * start the dirty pages to disk and then invalidate them
    662 	 * unless the inode is invalid (ISTALE)
    663 	 */
    664 	if ((ip->i_flag & ISTALE) == 0) {
    665 		(void) TRANS_SYNCIP(ip, B_ASYNC, I_ASYNC, TOP_SYNCIP_FREE);
    666 		(void) TRANS_SYNCIP(ip,
    667 		    (TRANS_ISERROR(ufsvfsp)) ? B_INVAL | B_FORCE : B_INVAL,
    668 		    I_ASYNC, TOP_SYNCIP_FREE);
    669 	}
    670 
    671 	/*
    672 	 * wait for any current ufs_iget to finish and block future ufs_igets
    673 	 */
    674 	ASSERT(ip->i_number != 0);
    675 	hno = INOHASH(ip->i_number);
    676 	ihm = &ih_lock[hno];
    677 	mutex_enter(ihm);
    678 
    679 	/*
    680 	 * It must be guaranteed that v_count >= 2, otherwise
    681 	 * something must be wrong with this vnode already.
    682 	 * That is why we use v_count-- instead of VN_RELE().
    683 	 * Acquire the vnode lock in case another thread is in
    684 	 * VN_RELE().
    685 	 */
    686 	mutex_enter(&vp->v_lock);
    687 
    688 	if (vp->v_count < 2)
    689 		cmn_err(CE_PANIC,
    690 		    "ufs_idle_free: vnode ref count is less than 2");
    691 
    692 	vp->v_count--;
    693 
    694 	vn_has_data = (vp->v_type != VCHR && vn_has_cached_data(vp));
    695 	vn_modified = (ip->i_flag & (IMOD|IMODACC|IACC|ICHG|IUPD|IATTCHG));
    696 
    697 	if (vp->v_count != 1 ||
    698 	    ((vn_has_data || vn_modified) &&
    699 	    ((ip->i_flag & ISTALE) == 0))) {
    700 		/*
    701 		 * Another thread has referenced this inode while
    702 		 * we are trying  to free  it.  Call VN_RELE() to
    703 		 * release our reference, if v_count > 1  data is
    704 		 * present  or one of the modified etc. flags was
    705 		 * set, whereby ISTALE wasn't set.
    706 		 * If we'd proceed with ISTALE set here, we might
    707 		 * get ourselves into a deadlock situation.
    708 		 */
    709 		mutex_exit(&vp->v_lock);
    710 		mutex_exit(ihm);
    711 		VN_RELE(vp);
    712 	} else {
    713 		/*
    714 		 * The inode is currently unreferenced and can not
    715 		 * acquire further references because it has no pages
    716 		 * and the hash is locked.  Inodes acquire references
    717 		 * via the hash list or via their pages.
    718 		 */
    719 
    720 		mutex_exit(&vp->v_lock);
    721 
    722 		/*
    723 		 * remove it from the cache
    724 		 */
    725 		remque(ip);
    726 		mutex_exit(ihm);
    727 		/*
    728 		 * Stale inodes have no valid ufsvfs
    729 		 */
    730 		if ((ip->i_flag & ISTALE) == 0 && ip->i_dquot) {
    731 			TRANS_DQRELE(ufsvfsp, ip->i_dquot);
    732 			ip->i_dquot = NULL;
    733 		}
    734 		if ((ip->i_flag & ISTALE) &&
    735 		    vn_has_data) {
    736 			/*
    737 			 * ISTALE inodes may have data
    738 			 * and  this data needs  to be
    739 			 * cleaned up.
    740 			 */
    741 			(void) pvn_vplist_dirty(vp, (u_offset_t)0,
    742 			    ufs_putapage, B_INVAL | B_TRUNC,
    743 			    (struct cred *)NULL);
    744 		}
    745 		ufs_si_del(ip);
    746 		if (pages) {
    747 			CPU_STATS_ADDQ(CPU, sys, ufsipage, 1);
    748 		} else {
    749 			CPU_STATS_ADDQ(CPU, sys, ufsinopage, 1);
    750 		}
    751 		ASSERT((vp->v_type == VCHR) || !vn_has_cached_data(vp));
    752 
    753 		/*
    754 		 * We had better not have a vnode reference count > 1
    755 		 * at this point, if we do then something is broken as
    756 		 * this inode/vnode acquired a reference underneath of us.
    757 		 */
    758 		ASSERT(vp->v_count == 1);
    759 
    760 		ufs_free_inode(ip);
    761 	}
    762 }
    763 
    764 /*
    765  * this thread processes the global idle queue
    766  */
    767 iqhead_t *ufs_junk_iq;
    768 iqhead_t *ufs_useful_iq;
    769 int ufs_njunk_iq = 0;
    770 int ufs_nuseful_iq = 0;
    771 int ufs_niqhash;
    772 int ufs_iqhashmask;
    773 struct ufs_q	ufs_idle_q;
    774 
    775 void
    776 ufs_thread_idle(void)
    777 {
    778 	callb_cpr_t cprinfo;
    779 	int i;
    780 	int ne;
    781 
    782 	ufs_niqhash = (ufs_idle_q.uq_lowat >> 1) / IQHASHQLEN;
    783 	ufs_niqhash = 1 << highbit(ufs_niqhash); /* round up to power of 2 */
    784 	ufs_iqhashmask = ufs_niqhash - 1;
    785 	ufs_junk_iq = kmem_alloc(ufs_niqhash * sizeof (*ufs_junk_iq),
    786 	    KM_SLEEP);
    787 	ufs_useful_iq = kmem_alloc(ufs_niqhash * sizeof (*ufs_useful_iq),
    788 	    KM_SLEEP);
    789 
    790 	/* Initialize hash queue headers */
    791 	for (i = 0; i < ufs_niqhash; i++) {
    792 		ufs_junk_iq[i].i_freef = (inode_t *)&ufs_junk_iq[i];
    793 		ufs_junk_iq[i].i_freeb = (inode_t *)&ufs_junk_iq[i];
    794 		ufs_useful_iq[i].i_freef = (inode_t *)&ufs_useful_iq[i];
    795 		ufs_useful_iq[i].i_freeb = (inode_t *)&ufs_useful_iq[i];
    796 	}
    797 
    798 	CALLB_CPR_INIT(&cprinfo, &ufs_idle_q.uq_mutex, callb_generic_cpr,
    799 	    "ufsidle");
    800 again:
    801 	/*
    802 	 * Whenever the idle thread is awakened, it repeatedly gives
    803 	 * back half of the idle queue until the idle queue falls
    804 	 * below lowat.
    805 	 */
    806 	mutex_enter(&ufs_idle_q.uq_mutex);
    807 	if (ufs_idle_q.uq_ne < ufs_idle_q.uq_lowat) {
    808 		CALLB_CPR_SAFE_BEGIN(&cprinfo);
    809 		cv_wait(&ufs_idle_q.uq_cv, &ufs_idle_q.uq_mutex);
    810 		CALLB_CPR_SAFE_END(&cprinfo, &ufs_idle_q.uq_mutex);
    811 	}
    812 	mutex_exit(&ufs_idle_q.uq_mutex);
    813 
    814 	/*
    815 	 * Give back 1/2 of the idle queue
    816 	 */
    817 	ne = ufs_idle_q.uq_ne >> 1;
    818 	ins.in_tidles.value.ul += ne;
    819 	ufs_idle_some(ne);
    820 	goto again;
    821 }
    822 
    823 /*
    824  * Reclaim callback for ufs inode cache.
    825  * Invoked by the kernel memory allocator when memory gets tight.
    826  */
    827 /*ARGSUSED*/
    828 void
    829 ufs_inode_cache_reclaim(void *cdrarg)
    830 {
    831 	/*
    832 	 * If we are low on memory and the idle queue is over its
    833 	 * halfway mark, then free 50% of the idle q
    834 	 *
    835 	 * We don't free all of the idle inodes because the inodes
    836 	 * for popular NFS files may have been kicked from the dnlc.
    837 	 * The inodes for these files will end up on the idle queue
    838 	 * after every NFS access.
    839 	 *
    840 	 * If we repeatedly push them from the idle queue then
    841 	 * NFS users may be unhappy as an extra buf cache operation
    842 	 * is incurred for every NFS operation to these files.
    843 	 *
    844 	 * It's not common, but I have seen it happen.
    845 	 *
    846 	 */
    847 	if (ufs_idle_q.uq_ne < (ufs_idle_q.uq_lowat >> 1))
    848 		return;
    849 	mutex_enter(&ufs_idle_q.uq_mutex);
    850 	cv_broadcast(&ufs_idle_q.uq_cv);
    851 	mutex_exit(&ufs_idle_q.uq_mutex);
    852 }
    853 
    854 /*
    855  * Free up some idle inodes
    856  */
    857 void
    858 ufs_idle_some(int ne)
    859 {
    860 	int i;
    861 	struct inode *ip;
    862 	struct vnode *vp;
    863 	static int junk_rotor = 0;
    864 	static int useful_rotor = 0;
    865 
    866 	for (i = 0; i < ne; ++i) {
    867 		mutex_enter(&ufs_idle_q.uq_mutex);
    868 
    869 		if (ufs_njunk_iq) {
    870 			while (ufs_junk_iq[junk_rotor].i_freef ==
    871 			    (inode_t *)&ufs_junk_iq[junk_rotor]) {
    872 				junk_rotor = IQNEXT(junk_rotor);
    873 			}
    874 			ip = ufs_junk_iq[junk_rotor].i_freef;
    875 			ASSERT(ip->i_flag & IJUNKIQ);
    876 		} else if (ufs_nuseful_iq) {
    877 			while (ufs_useful_iq[useful_rotor].i_freef ==
    878 			    (inode_t *)&ufs_useful_iq[useful_rotor]) {
    879 				useful_rotor = IQNEXT(useful_rotor);
    880 			}
    881 			ip = ufs_useful_iq[useful_rotor].i_freef;
    882 			ASSERT(!(ip->i_flag & IJUNKIQ));
    883 		} else {
    884 			mutex_exit(&ufs_idle_q.uq_mutex);
    885 			return;
    886 		}
    887 
    888 		/*
    889 		 * emulate ufs_iget
    890 		 */
    891 		vp = ITOV(ip);
    892 		VN_HOLD(vp);
    893 		mutex_exit(&ufs_idle_q.uq_mutex);
    894 		rw_enter(&ip->i_contents, RW_WRITER);
    895 		/*
    896 		 * VN_RELE should not be called if
    897 		 * ufs_rmidle returns true, as it will
    898 		 * effectively be done in ufs_idle_free.
    899 		 */
    900 		if (ufs_rmidle(ip)) {
    901 			rw_exit(&ip->i_contents);
    902 			ufs_idle_free(ip);
    903 		} else {
    904 			rw_exit(&ip->i_contents);
    905 			VN_RELE(vp);
    906 		}
    907 	}
    908 }
    909 
    910 /*
    911  * drain entries for vfsp from the idle queue
    912  * vfsp == NULL means drain the entire thing
    913  */
    914 void
    915 ufs_idle_drain(struct vfs *vfsp)
    916 {
    917 	struct inode	*ip, *nip;
    918 	struct inode	*ianchor = NULL;
    919 	int		i;
    920 
    921 	mutex_enter(&ufs_idle_q.uq_mutex);
    922 	if (ufs_njunk_iq) {
    923 		/* for each hash q */
    924 		for (i = 0; i < ufs_niqhash; i++) {
    925 			/* search down the hash q */
    926 			for (ip = ufs_junk_iq[i].i_freef;
    927 			    ip != (inode_t *)&ufs_junk_iq[i];
    928 			    ip = ip->i_freef) {
    929 				if (ip->i_vfs == vfsp || vfsp == NULL) {
    930 					/* found a matching entry */
    931 					VN_HOLD(ITOV(ip));
    932 					mutex_exit(&ufs_idle_q.uq_mutex);
    933 					rw_enter(&ip->i_contents, RW_WRITER);
    934 					/*
    935 					 * See comments in ufs_idle_some()
    936 					 * as we will call ufs_idle_free()
    937 					 * after scanning both queues.
    938 					 */
    939 					if (ufs_rmidle(ip)) {
    940 						rw_exit(&ip->i_contents);
    941 						ip->i_freef = ianchor;
    942 						ianchor = ip;
    943 					} else {
    944 						rw_exit(&ip->i_contents);
    945 						VN_RELE(ITOV(ip));
    946 					}
    947 					/* restart this hash q */
    948 					ip = (inode_t *)&ufs_junk_iq[i];
    949 					mutex_enter(&ufs_idle_q.uq_mutex);
    950 				}
    951 			}
    952 		}
    953 	}
    954 	if (ufs_nuseful_iq) {
    955 		/* for each hash q */
    956 		for (i = 0; i < ufs_niqhash; i++) {
    957 			/* search down the hash q */
    958 			for (ip = ufs_useful_iq[i].i_freef;
    959 			    ip != (inode_t *)&ufs_useful_iq[i];
    960 			    ip = ip->i_freef) {
    961 				if (ip->i_vfs == vfsp || vfsp == NULL) {
    962 					/* found a matching entry */
    963 					VN_HOLD(ITOV(ip));
    964 					mutex_exit(&ufs_idle_q.uq_mutex);
    965 					rw_enter(&ip->i_contents, RW_WRITER);
    966 					/*
    967 					 * See comments in ufs_idle_some()
    968 					 * as we will call ufs_idle_free()
    969 					 * after scanning both queues.
    970 					 */
    971 					if (ufs_rmidle(ip)) {
    972 						rw_exit(&ip->i_contents);
    973 						ip->i_freef = ianchor;
    974 						ianchor = ip;
    975 					} else {
    976 						rw_exit(&ip->i_contents);
    977 						VN_RELE(ITOV(ip));
    978 					}
    979 					/* restart this hash q */
    980 					ip = (inode_t *)&ufs_useful_iq[i];
    981 					mutex_enter(&ufs_idle_q.uq_mutex);
    982 				}
    983 			}
    984 		}
    985 	}
    986 
    987 	mutex_exit(&ufs_idle_q.uq_mutex);
    988 	/* no more matching entries, release those we have found (if any) */
    989 	for (ip = ianchor; ip; ip = nip) {
    990 		nip = ip->i_freef;
    991 		ip->i_freef = ip;
    992 		ufs_idle_free(ip);
    993 	}
    994 }
    995 
    996 /*
    997  * RECLAIM DELETED INODES
    998  * The following thread scans the file system once looking for deleted files
    999  */
   1000 void
   1001 ufs_thread_reclaim(struct vfs *vfsp)
   1002 {
   1003 	struct ufsvfs		*ufsvfsp = (struct ufsvfs *)vfsp->vfs_data;
   1004 	struct ufs_q		*uq	= &ufsvfsp->vfs_reclaim;
   1005 	struct fs		*fs	= ufsvfsp->vfs_fs;
   1006 	struct buf		*bp	= 0;
   1007 	int			err	= 0;
   1008 	daddr_t			bno;
   1009 	ino_t			ino;
   1010 	struct dinode		*dp;
   1011 	struct inode		*ip;
   1012 	callb_cpr_t		cprinfo;
   1013 
   1014 	CALLB_CPR_INIT(&cprinfo, &uq->uq_mutex, callb_generic_cpr,
   1015 	    "ufsreclaim");
   1016 
   1017 	/*
   1018 	 * mount decided that we don't need a reclaim thread
   1019 	 */
   1020 	if ((fs->fs_reclaim & FS_RECLAIMING) == 0)
   1021 		err++;
   1022 
   1023 	/*
   1024 	 * don't reclaim if readonly
   1025 	 */
   1026 	if (fs->fs_ronly)
   1027 		err++;
   1028 
   1029 	for (ino = 0; ino < (fs->fs_ncg * fs->fs_ipg) && !err; ++ino) {
   1030 
   1031 		/*
   1032 		 * Check whether we are the target of another
   1033 		 * thread having called ufs_thread_exit() or
   1034 		 * ufs_thread_suspend().
   1035 		 */
   1036 		mutex_enter(&uq->uq_mutex);
   1037 again:
   1038 		if (uq->uq_flags & UQ_EXIT) {
   1039 			err++;
   1040 			mutex_exit(&uq->uq_mutex);
   1041 			break;
   1042 		} else if (uq->uq_flags & UQ_SUSPEND) {
   1043 			uq->uq_flags |= UQ_SUSPENDED;
   1044 			/*
   1045 			 * Release the buf before we cv_wait()
   1046 			 * otherwise we may deadlock with the
   1047 			 * thread that called ufs_thread_suspend().
   1048 			 */
   1049 			if (bp) {
   1050 				brelse(bp);
   1051 				bp = 0;
   1052 			}
   1053 			if (uq->uq_flags & UQ_WAIT) {
   1054 				uq->uq_flags &= ~UQ_WAIT;
   1055 				cv_broadcast(&uq->uq_cv);
   1056 			}
   1057 			CALLB_CPR_SAFE_BEGIN(&cprinfo);
   1058 			cv_wait(&uq->uq_cv, &uq->uq_mutex);
   1059 			CALLB_CPR_SAFE_END(&cprinfo, &uq->uq_mutex);
   1060 			goto again;
   1061 		}
   1062 		mutex_exit(&uq->uq_mutex);
   1063 
   1064 		/*
   1065 		 * if we don't already have the buf; get it
   1066 		 */
   1067 		bno = fsbtodb(fs, itod(fs, ino));
   1068 		if ((bp == 0) || (bp->b_blkno != bno)) {
   1069 			if (bp)
   1070 				brelse(bp);
   1071 			bp = UFS_BREAD(ufsvfsp,
   1072 			    ufsvfsp->vfs_dev, bno, fs->fs_bsize);
   1073 			bp->b_flags |= B_AGE;
   1074 		}
   1075 		if (bp->b_flags & B_ERROR) {
   1076 			err++;
   1077 			continue;
   1078 		}
   1079 		/*
   1080 		 * nlink <= 0 and mode != 0 means deleted
   1081 		 */
   1082 		dp = (struct dinode *)bp->b_un.b_addr + itoo(fs, ino);
   1083 		if ((dp->di_nlink <= 0) && (dp->di_mode != 0)) {
   1084 			/*
   1085 			 * can't hold the buf (deadlock)
   1086 			 */
   1087 			brelse(bp);
   1088 			bp = 0;
   1089 			rw_enter(&ufsvfsp->vfs_dqrwlock, RW_READER);
   1090 			/*
   1091 			 * iget/iput sequence will put inode on ifree
   1092 			 * thread queue if it is idle.  This is a nop
   1093 			 * for busy (open, deleted) inodes
   1094 			 */
   1095 			if (ufs_iget(vfsp, ino, &ip, CRED()))
   1096 				err++;
   1097 			else
   1098 				VN_RELE(ITOV(ip));
   1099 			rw_exit(&ufsvfsp->vfs_dqrwlock);
   1100 		}
   1101 	}
   1102 
   1103 	if (bp)
   1104 		brelse(bp);
   1105 	if (!err) {
   1106 		/*
   1107 		 * reset the reclaiming-bit
   1108 		 */
   1109 		mutex_enter(&ufsvfsp->vfs_lock);
   1110 		fs->fs_reclaim &= ~FS_RECLAIMING;
   1111 		mutex_exit(&ufsvfsp->vfs_lock);
   1112 		TRANS_SBWRITE(ufsvfsp, TOP_SBWRITE_RECLAIM);
   1113 	}
   1114 
   1115 	/*
   1116 	 * exit the reclaim thread
   1117 	 */
   1118 	mutex_enter(&uq->uq_mutex);
   1119 	uq->uq_threadp = NULL;
   1120 	uq->uq_flags &= ~UQ_WAIT;
   1121 	cv_broadcast(&uq->uq_cv);
   1122 	CALLB_CPR_EXIT(&cprinfo);
   1123 	thread_exit();
   1124 }
   1125 /*
   1126  * HLOCK FILE SYSTEM
   1127  *	hlock the file system's whose logs have device errors
   1128  */
   1129 struct ufs_q	ufs_hlock;
   1130 /*ARGSUSED*/
   1131 void
   1132 ufs_thread_hlock(void *ignore)
   1133 {
   1134 	int		retry;
   1135 	callb_cpr_t	cprinfo;
   1136 
   1137 	CALLB_CPR_INIT(&cprinfo, &ufs_hlock.uq_mutex, callb_generic_cpr,
   1138 	    "ufshlock");
   1139 
   1140 	for (;;) {
   1141 		/*
   1142 		 * sleep until there is work to do
   1143 		 */
   1144 		mutex_enter(&ufs_hlock.uq_mutex);
   1145 		(void) ufs_thread_run(&ufs_hlock, &cprinfo);
   1146 		ufs_hlock.uq_ne = 0;
   1147 		mutex_exit(&ufs_hlock.uq_mutex);
   1148 		/*
   1149 		 * hlock the error'ed fs's
   1150 		 *	retry after a bit if another app is doing lockfs stuff
   1151 		 */
   1152 		do {
   1153 			retry = ufs_trans_hlock();
   1154 			if (retry) {
   1155 				mutex_enter(&ufs_hlock.uq_mutex);
   1156 				CALLB_CPR_SAFE_BEGIN(&cprinfo);
   1157 				(void) cv_reltimedwait(&ufs_hlock.uq_cv,
   1158 				    &ufs_hlock.uq_mutex, hz, TR_CLOCK_TICK);
   1159 				CALLB_CPR_SAFE_END(&cprinfo,
   1160 				    &ufs_hlock.uq_mutex);
   1161 				mutex_exit(&ufs_hlock.uq_mutex);
   1162 			}
   1163 		} while (retry);
   1164 	}
   1165 }
   1166 
   1167 static void
   1168 ufs_attr_purge(struct inode *dp)
   1169 {
   1170 	int	err;
   1171 	int	error;
   1172 	off_t 	dirsize;			/* size of the directory */
   1173 	off_t 	offset;	/* offset in the directory */
   1174 	int entryoffsetinblk;		/* offset of ep in fbp's buffer */
   1175 	struct inode *tp;
   1176 	struct fbuf *fbp;	/* pointer to directory block */
   1177 	struct direct *ep;	/* directory entry */
   1178 	int trans_size;
   1179 	int issync;
   1180 	struct ufsvfs	*ufsvfsp = dp->i_ufsvfs;
   1181 
   1182 	rw_enter(&ufsvfsp->vfs_dqrwlock, RW_READER);
   1183 
   1184 	fbp = NULL;
   1185 	dirsize = roundup(dp->i_size, DIRBLKSIZ);
   1186 	offset = 0;
   1187 	entryoffsetinblk = 0;
   1188 
   1189 	/*
   1190 	 * Purge directory cache
   1191 	 */
   1192 
   1193 	dnlc_dir_purge(&dp->i_danchor);
   1194 
   1195 	while (offset < dirsize) {
   1196 		/*
   1197 		 * If offset is on a block boundary,
   1198 		 * read the next directory block.
   1199 		 * Release previous if it exists.
   1200 		 */
   1201 		if (blkoff(dp->i_fs, offset) == 0) {
   1202 			if (fbp != NULL) {
   1203 				fbrelse(fbp, S_OTHER);
   1204 			}
   1205 
   1206 			err = blkatoff(dp, offset, (char **)0, &fbp);
   1207 			if (err) {
   1208 				goto out;
   1209 			}
   1210 			entryoffsetinblk = 0;
   1211 		}
   1212 		ep = (struct direct *)(fbp->fb_addr + entryoffsetinblk);
   1213 		if (ep->d_ino == 0 || (ep->d_name[0] == '.' &&
   1214 		    ep->d_name[1] == '\0') ||
   1215 		    (ep->d_name[0] == '.' && ep->d_name[1] == '.' &&
   1216 		    ep->d_name[2] == '\0')) {
   1217 
   1218 			entryoffsetinblk += ep->d_reclen;
   1219 
   1220 		} else {
   1221 
   1222 			if ((err = ufs_iget(dp->i_vfs, ep->d_ino,
   1223 			    &tp, CRED())) != 0) {
   1224 				goto out;
   1225 			}
   1226 
   1227 			TRANS_BEGIN_CSYNC(ufsvfsp, issync, TOP_REMOVE,
   1228 			    trans_size = (int)TOP_REMOVE_SIZE(tp));
   1229 
   1230 			/*
   1231 			 * Delete inode.
   1232 			 */
   1233 
   1234 			dnlc_remove(ITOV(dp), ep->d_name);
   1235 
   1236 			rw_enter(&tp->i_contents, RW_WRITER);
   1237 			tp->i_flag |= ICHG;
   1238 			tp->i_seq++;
   1239 			TRANS_INODE(tp->i_ufsvfs, tp);
   1240 			tp->i_nlink--;
   1241 			ufs_setreclaim(tp);
   1242 			ITIMES_NOLOCK(tp);
   1243 			rw_exit(&tp->i_contents);
   1244 
   1245 			VN_RELE(ITOV(tp));
   1246 			entryoffsetinblk += ep->d_reclen;
   1247 			TRANS_END_CSYNC(ufsvfsp, error,
   1248 			    issync, TOP_REMOVE, trans_size);
   1249 
   1250 		}
   1251 		offset += ep->d_reclen;
   1252 	}
   1253 
   1254 	if (fbp) {
   1255 		fbrelse(fbp, S_OTHER);
   1256 	}
   1257 
   1258 out:
   1259 	rw_exit(&ufsvfsp->vfs_dqrwlock);
   1260 }
   1261