Home | History | Annotate | Download | only in ufs
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
     27 
     28 #include <sys/systm.h>
     29 #include <sys/types.h>
     30 #include <sys/vnode.h>
     31 #include <sys/errno.h>
     32 #include <sys/sysmacros.h>
     33 #include <sys/debug.h>
     34 #include <sys/kmem.h>
     35 #include <sys/conf.h>
     36 #include <sys/proc.h>
     37 #include <sys/taskq.h>
     38 #include <sys/cmn_err.h>
     39 #include <sys/fs/ufs_inode.h>
     40 #include <sys/fs/ufs_filio.h>
     41 #include <sys/fs/ufs_log.h>
     42 #include <sys/fs/ufs_bio.h>
     43 
     44 /*
     45  * FILE SYSTEM INTERFACE TO TRANSACTION OPERATIONS (TOP; like VOP)
     46  */
     47 
     48 uint_t topkey; /* tsd transaction key */
     49 
     50 /*
     51  * declare a delta
     52  */
     53 void
     54 top_delta(
     55 	ufsvfs_t *ufsvfsp,
     56 	offset_t mof,
     57 	off_t nb,
     58 	delta_t dtyp,
     59 	int (*func)(),
     60 	ulong_t arg)
     61 {
     62 	ml_unit_t		*ul	= ufsvfsp->vfs_log;
     63 	threadtrans_t		*tp	= tsd_get(topkey);
     64 
     65 	ASSERT(ufsvfsp->vfs_dev == ul->un_dev);
     66 	ASSERT(nb);
     67 	ASSERT(((ul->un_debug & (MT_TRANSACT|MT_MATAMAP)) == 0) ||
     68 	    top_delta_debug(ul, mof, nb, dtyp));
     69 
     70 	deltamap_add(ul->un_deltamap, mof, nb, dtyp, func, arg, tp);
     71 
     72 	ul->un_logmap->mtm_ref = 1; /* for roll thread's heuristic */
     73 	if (tp) {
     74 		tp->any_deltas = 1;
     75 	}
     76 }
     77 
     78 /*
     79  * cancel a delta
     80  */
     81 void
     82 top_cancel(ufsvfs_t *ufsvfsp, offset_t mof, off_t nb, int flags)
     83 {
     84 	ml_unit_t	*ul	= ufsvfsp->vfs_log;
     85 	int		metadata = flags & (I_DIR|I_IBLK|I_SHAD|I_QUOTA);
     86 
     87 	ASSERT(ufsvfsp->vfs_dev == ul->un_dev);
     88 	ASSERT(nb);
     89 	ASSERT(((ul->un_debug & (MT_TRANSACT|MT_MATAMAP)) == 0) ||
     90 	    (!(flags & metadata) ||
     91 	    top_delta_debug(ul, mof, nb, DT_CANCEL)));
     92 
     93 	if (metadata)
     94 		deltamap_del(ul->un_deltamap, mof, nb);
     95 
     96 	logmap_cancel(ul, mof, nb, metadata);
     97 
     98 	/*
     99 	 * needed for the roll thread's heuristic
    100 	 */
    101 	ul->un_logmap->mtm_ref = 1;
    102 }
    103 
    104 /*
    105  * check if this delta has been canceled (metadata -> userdata)
    106  */
    107 int
    108 top_iscancel(ufsvfs_t *ufsvfsp, offset_t mof, off_t nb)
    109 {
    110 	ml_unit_t	*ul	= ufsvfsp->vfs_log;
    111 
    112 	ASSERT(ufsvfsp->vfs_dev == ul->un_dev);
    113 	ASSERT(nb);
    114 	if (logmap_iscancel(ul->un_logmap, mof, nb))
    115 		return (1);
    116 	if (ul->un_flags & LDL_ERROR)
    117 		return (1);
    118 	return (0);
    119 }
    120 
    121 /*
    122  * put device into error state
    123  */
    124 void
    125 top_seterror(ufsvfs_t *ufsvfsp)
    126 {
    127 	ml_unit_t	*ul	= ufsvfsp->vfs_log;
    128 
    129 	ASSERT(ufsvfsp->vfs_dev == ul->un_dev);
    130 	ldl_seterror(ul, "ufs is forcing a ufs log error");
    131 }
    132 
    133 /*
    134  * issue a empty sync op to help empty the delta/log map or the log
    135  */
    136 static void
    137 top_issue_sync(void *arg)
    138 {
    139 	ufsvfs_t *ufsvfsp = (ufsvfs_t *)arg;
    140 	ml_unit_t *ul = (ml_unit_t *)ufsvfsp->vfs_log;
    141 	mt_map_t *mtm = ul->un_logmap;
    142 	int	error = 0;
    143 
    144 	if ((curthread->t_flag & T_DONTBLOCK) == 0)
    145 		curthread->t_flag |= T_DONTBLOCK;
    146 	top_begin_sync(ufsvfsp, TOP_COMMIT_ASYNC, 0, &error);
    147 	if (!error) {
    148 		top_end_sync(ufsvfsp, &error, TOP_COMMIT_ASYNC, 0);
    149 	}
    150 
    151 	/*
    152 	 * If we are a taskq thread, decrement mtm_taskq_sync_count and
    153 	 * wake up the thread waiting on the mtm_cv if the mtm_taskq_sync_count
    154 	 * hits zero.
    155 	 */
    156 
    157 	if (taskq_member(system_taskq, curthread)) {
    158 		mutex_enter(&mtm->mtm_lock);
    159 		mtm->mtm_taskq_sync_count--;
    160 		if (mtm->mtm_taskq_sync_count == 0) {
    161 			cv_signal(&mtm->mtm_cv);
    162 		}
    163 		mutex_exit(&mtm->mtm_lock);
    164 	}
    165 }
    166 
    167 /*
    168  * MOBY TRANSACTION ROUTINES
    169  * begin a moby transaction
    170  *	sync ops enter until first sync op finishes
    171  *	async ops enter until last sync op finishes
    172  * end a moby transaction
    173  *		outstanding deltas are pushed thru log
    174  *		log buffer is committed (incore only)
    175  *		next trans is open to async ops
    176  *		log buffer is committed on the log
    177  *		next trans is open to sync ops
    178  */
    179 
    180 /*ARGSUSED*/
    181 void
    182 top_begin_sync(ufsvfs_t *ufsvfsp, top_t topid, ulong_t size, int *error)
    183 {
    184 	ml_unit_t	*ul	= ufsvfsp->vfs_log;
    185 	mt_map_t	*mtm = ul->un_logmap;
    186 	threadtrans_t	*tp;
    187 	ushort_t	seq;
    188 
    189 	ASSERT(ufsvfsp->vfs_dev == ul->un_dev);
    190 	ASSERT(error != NULL);
    191 	ASSERT(*error == 0);
    192 
    193 	mutex_enter(&mtm->mtm_lock);
    194 	if (topid == TOP_FSYNC) {
    195 		/*
    196 		 * Error the fsync immediately if this is an nfs thread
    197 		 * and its last transaction has already been committed.
    198 		 * The only transactions outstanding are those
    199 		 * where no commit has even started
    200 		 * (last_async_tid == mtm->mtm_tid)
    201 		 * or those where a commit is in progress
    202 		 * (last_async_tid == mtm->mtm_committid)
    203 		 */
    204 		if (curthread->t_flag & T_DONTPEND) {
    205 			tp = tsd_get(topkey);
    206 			if (tp && (tp->last_async_tid != mtm->mtm_tid) &&
    207 			    (tp->last_async_tid != mtm->mtm_committid)) {
    208 				mutex_exit(&mtm->mtm_lock);
    209 				*error = 1;
    210 				return;
    211 			}
    212 		}
    213 
    214 		/*
    215 		 * If there's already other synchronous transactions
    216 		 * and we haven't allowed async ones to start yet
    217 		 * then just wait for the commit to complete.
    218 		 */
    219 		if (((mtm->mtm_closed & (TOP_SYNC | TOP_ASYNC)) ==
    220 		    (TOP_SYNC | TOP_ASYNC)) || mtm->mtm_activesync) {
    221 			seq = mtm->mtm_seq;
    222 			do {
    223 				cv_wait(&mtm->mtm_cv_commit, &mtm->mtm_lock);
    224 			} while (seq == mtm->mtm_seq);
    225 			mutex_exit(&mtm->mtm_lock);
    226 			*error = 1;
    227 			return;
    228 		}
    229 		if (mtm->mtm_closed & TOP_SYNC) {
    230 			/*
    231 			 * We know we're in the window where a thread is
    232 			 * committing a transaction in top_end_sync() and
    233 			 * has allowed async threads to start but hasn't
    234 			 * got the completion on the commit write to
    235 			 * allow sync threads to start.
    236 			 * So wait for that commit completion then retest
    237 			 * for the quick nfs check and if that fails
    238 			 * go on to start a transaction
    239 			 */
    240 			seq = mtm->mtm_seq;
    241 			do {
    242 				cv_wait(&mtm->mtm_cv_commit, &mtm->mtm_lock);
    243 			} while (seq == mtm->mtm_seq);
    244 
    245 			/* tp is set above if T_DONTPEND */
    246 			if ((curthread->t_flag & T_DONTPEND) && tp &&
    247 			    (tp->last_async_tid != mtm->mtm_tid) &&
    248 			    (tp->last_async_tid != mtm->mtm_committid)) {
    249 				mutex_exit(&mtm->mtm_lock);
    250 				*error = 1;
    251 				return;
    252 			}
    253 		}
    254 	}
    255 retry:
    256 	mtm->mtm_ref = 1;
    257 	/*
    258 	 * current transaction closed to sync ops; try for next transaction
    259 	 */
    260 	if ((mtm->mtm_closed & TOP_SYNC) && !panicstr) {
    261 		ulong_t		resv;
    262 
    263 		/*
    264 		 * We know a commit is in progress, if we are trying to
    265 		 * commit and we haven't allowed async ones to start yet,
    266 		 * then just wait for the commit completion
    267 		 */
    268 		if ((size == TOP_COMMIT_SIZE) &&
    269 		    (((mtm->mtm_closed & (TOP_SYNC | TOP_ASYNC)) ==
    270 		    (TOP_SYNC | TOP_ASYNC)) || (mtm->mtm_activesync))) {
    271 			seq = mtm->mtm_seq;
    272 			do {
    273 				cv_wait(&mtm->mtm_cv_commit, &mtm->mtm_lock);
    274 			} while (seq == mtm->mtm_seq);
    275 			mutex_exit(&mtm->mtm_lock);
    276 			*error = 1;
    277 			return;
    278 		}
    279 
    280 		/*
    281 		 * next transaction is full; try for next transaction
    282 		 */
    283 		resv = size + ul->un_resv_wantin + ul->un_resv;
    284 		if (resv > ul->un_maxresv) {
    285 			cv_wait(&mtm->mtm_cv_commit, &mtm->mtm_lock);
    286 			goto retry;
    287 		}
    288 		/*
    289 		 * we are in the next transaction; wait for it to start
    290 		 */
    291 		mtm->mtm_wantin++;
    292 		ul->un_resv_wantin += size;
    293 		/*
    294 		 * The corresponding cv_broadcast wakes up
    295 		 * all threads that have been validated to go into
    296 		 * the next transaction. However, because spurious
    297 		 * cv_wait wakeups are possible we use a sequence
    298 		 * number to check that the commit and cv_broadcast
    299 		 * has really occurred. We couldn't use mtm_tid
    300 		 * because on error that doesn't get incremented.
    301 		 */
    302 		seq = mtm->mtm_seq;
    303 		do {
    304 			cv_wait(&mtm->mtm_cv_commit, &mtm->mtm_lock);
    305 		} while (seq == mtm->mtm_seq);
    306 	} else {
    307 		/*
    308 		 * if the current transaction is full; try the next one
    309 		 */
    310 		if (size && (ul->un_resv && ((size + ul->un_resv) >
    311 		    ul->un_maxresv)) && !panicstr) {
    312 			/*
    313 			 * log is over reserved and no one will unresv the space
    314 			 *	so generate empty sync op to unresv the space
    315 			 */
    316 			if (mtm->mtm_activesync == 0) {
    317 				mutex_exit(&mtm->mtm_lock);
    318 				top_issue_sync(ufsvfsp);
    319 				mutex_enter(&mtm->mtm_lock);
    320 				goto retry;
    321 			}
    322 			cv_wait(&mtm->mtm_cv_commit, &mtm->mtm_lock);
    323 			goto retry;
    324 		}
    325 		/*
    326 		 * we are in the current transaction
    327 		 */
    328 		mtm->mtm_active++;
    329 		mtm->mtm_activesync++;
    330 		ul->un_resv += size;
    331 	}
    332 
    333 	ASSERT(mtm->mtm_active > 0);
    334 	ASSERT(mtm->mtm_activesync > 0);
    335 	mutex_exit(&mtm->mtm_lock);
    336 
    337 	ASSERT(((ul->un_debug & MT_TRANSACT) == 0) ||
    338 	    top_begin_debug(ul, topid, size));
    339 }
    340 
    341 int tryfail_cnt;
    342 
    343 int
    344 top_begin_async(ufsvfs_t *ufsvfsp, top_t topid, ulong_t size, int tryasync)
    345 {
    346 	ml_unit_t	*ul	= ufsvfsp->vfs_log;
    347 	mt_map_t	*mtm	= ul->un_logmap;
    348 	threadtrans_t   *tp;
    349 
    350 	ASSERT(ufsvfsp->vfs_dev == ul->un_dev);
    351 
    352 	tp = tsd_get(topkey);
    353 	if (tp == NULL) {
    354 		tp = kmem_zalloc(sizeof (threadtrans_t), KM_SLEEP);
    355 		(void) tsd_set(topkey, tp);
    356 	}
    357 	tp->deltas_size = 0;
    358 	tp->any_deltas = 0;
    359 
    360 	mutex_enter(&mtm->mtm_lock);
    361 retry:
    362 	mtm->mtm_ref = 1;
    363 	/*
    364 	 * current transaction closed to async ops; try for next transaction
    365 	 */
    366 	if ((mtm->mtm_closed & TOP_ASYNC) && !panicstr) {
    367 		if (tryasync) {
    368 			mutex_exit(&mtm->mtm_lock);
    369 			tryfail_cnt++;
    370 			return (EWOULDBLOCK);
    371 		}
    372 		cv_wait(&mtm->mtm_cv_next, &mtm->mtm_lock);
    373 		goto retry;
    374 	}
    375 
    376 	/*
    377 	 * if the current transaction is full; try the next one
    378 	 */
    379 	if (((size + ul->un_resv + ul->un_resv_wantin) > ul->un_maxresv) &&
    380 	    !panicstr) {
    381 		/*
    382 		 * log is overreserved and no one will unresv the space
    383 		 *	so generate empty sync op to unresv the space
    384 		 * We need TOP_SYNC_FORCED because we want to know when
    385 		 * a top_end_sync is completed.
    386 		 * mtm_taskq_sync_count is needed because we want to keep track
    387 		 * of the pending top_issue_sync dispatches so that during
    388 		 * forced umount we can wait for these to complete.
    389 		 * mtm_taskq_sync_count is decremented in top_issue_sync and
    390 		 * can remain set even after top_end_sync completes.
    391 		 * We have a window between the clearing of TOP_SYNC_FORCED
    392 		 * flag and the decrementing of mtm_taskq_sync_count.
    393 		 * If in this window new async transactions start consuming
    394 		 * log space, the log can get overreserved.
    395 		 * Subsequently a new async transaction would fail to generate
    396 		 * an empty sync transaction via the taskq, since it finds
    397 		 * the mtm_taskq_sync_count set. This can cause a hang.
    398 		 * Hence we do not test for mtm_taskq_sync_count being zero.
    399 		 * Instead, the TOP_SYNC_FORCED flag is tested here.
    400 		 */
    401 		if ((mtm->mtm_activesync == 0) &&
    402 		    (!(mtm->mtm_closed & TOP_SYNC_FORCED))) {
    403 			/*
    404 			 * Set flag to stop multiple forced empty
    405 			 * sync transactions. Increment mtm_taskq_sync_count.
    406 			 */
    407 			mtm->mtm_closed |= TOP_SYNC_FORCED;
    408 			mtm->mtm_taskq_sync_count++;
    409 			mutex_exit(&mtm->mtm_lock);
    410 			(void) taskq_dispatch(system_taskq,
    411 			    top_issue_sync, ufsvfsp, TQ_SLEEP);
    412 			if (tryasync) {
    413 				tryfail_cnt++;
    414 				return (EWOULDBLOCK);
    415 			}
    416 			mutex_enter(&mtm->mtm_lock);
    417 			goto retry;
    418 		}
    419 		if (tryasync) {
    420 			mutex_exit(&mtm->mtm_lock);
    421 			tryfail_cnt++;
    422 			return (EWOULDBLOCK);
    423 		}
    424 		cv_wait(&mtm->mtm_cv_next, &mtm->mtm_lock);
    425 		goto retry;
    426 	}
    427 	/*
    428 	 * we are in the current transaction
    429 	 */
    430 	mtm->mtm_active++;
    431 	ul->un_resv += size;
    432 
    433 	ASSERT(mtm->mtm_active > 0);
    434 	mutex_exit(&mtm->mtm_lock);
    435 
    436 	ASSERT(((ul->un_debug & MT_TRANSACT) == 0) ||
    437 	    top_begin_debug(ul, topid, size));
    438 	return (0);
    439 }
    440 
    441 /*ARGSUSED*/
    442 void
    443 top_end_sync(ufsvfs_t *ufsvfsp, int *ep, top_t topid, ulong_t size)
    444 {
    445 	ml_unit_t	*ul	= ufsvfsp->vfs_log;
    446 	mt_map_t	*mtm	= ul->un_logmap;
    447 	mapentry_t	*cancellist;
    448 	uint32_t	tid;
    449 
    450 	ASSERT(ufsvfsp->vfs_dev == ul->un_dev);
    451 	ASSERT(((ul->un_debug & MT_TRANSACT) == 0) ||
    452 	    top_end_debug(ul, mtm, topid, size));
    453 
    454 	mutex_enter(&mtm->mtm_lock);
    455 	tid = mtm->mtm_tid;
    456 
    457 	mtm->mtm_activesync--;
    458 	mtm->mtm_active--;
    459 
    460 	mtm->mtm_ref = 1;
    461 
    462 	/*
    463 	 * wait for last syncop to complete
    464 	 */
    465 	if (mtm->mtm_activesync || panicstr) {
    466 		ushort_t seq = mtm->mtm_seq;
    467 
    468 		mtm->mtm_closed = TOP_SYNC;
    469 
    470 		do {
    471 			cv_wait(&mtm->mtm_cv_commit, &mtm->mtm_lock);
    472 		} while (seq == mtm->mtm_seq);
    473 		mutex_exit(&mtm->mtm_lock);
    474 		goto out;
    475 	}
    476 	/*
    477 	 * last syncop; close current transaction to all ops
    478 	 */
    479 	mtm->mtm_closed = TOP_SYNC|TOP_ASYNC;
    480 
    481 	/*
    482 	 * wait for last asyncop to finish
    483 	 */
    484 	while (mtm->mtm_active) {
    485 		cv_wait(&mtm->mtm_cv_eot, &mtm->mtm_lock);
    486 	}
    487 
    488 	/*
    489 	 * push dirty metadata thru the log
    490 	 */
    491 	deltamap_push(ul);
    492 
    493 	ASSERT(((ul->un_debug & MT_FORCEROLL) == 0) ||
    494 	    top_roll_debug(ul));
    495 
    496 	mtm->mtm_tid = tid + 1;	/* can overflow to 0 */
    497 
    498 	/*
    499 	 * Empty the cancellist, but save it for logmap_free_cancel
    500 	 */
    501 	mutex_enter(&mtm->mtm_mutex);
    502 	cancellist = mtm->mtm_cancel;
    503 	mtm->mtm_cancel = NULL;
    504 	mutex_exit(&mtm->mtm_mutex);
    505 
    506 	/*
    507 	 * allow async ops
    508 	 */
    509 	ASSERT(mtm->mtm_active == 0);
    510 	ul->un_resv = 0; /* unreserve the log space */
    511 	mtm->mtm_closed = TOP_SYNC;
    512 	/*
    513 	 * Hold the un_log_mutex here until we are done writing
    514 	 * the commit record to prevent any more deltas to be written
    515 	 * to the log after we allow async operations.
    516 	 */
    517 	mutex_enter(&ul->un_log_mutex);
    518 	mutex_exit(&mtm->mtm_lock);
    519 	cv_broadcast(&mtm->mtm_cv_next);
    520 
    521 	/*
    522 	 * asynchronously write the commit record,
    523 	 */
    524 	logmap_commit(ul, tid);
    525 
    526 	/*
    527 	 * wait for outstanding log writes (e.g., commits) to finish
    528 	 */
    529 	ldl_waito(ul);
    530 
    531 	/*
    532 	 * Now that we are sure the commit has been written to the log
    533 	 * we can free any canceled deltas.  If we free them before
    534 	 * guaranteeing that the commit was written, we could panic before
    535 	 * the commit, but after an async thread has allocated and written
    536 	 * to canceled freed block.
    537 	 */
    538 
    539 	logmap_free_cancel(mtm, &cancellist);
    540 	mutex_exit(&ul->un_log_mutex);
    541 
    542 	/*
    543 	 * now, allow all ops
    544 	 */
    545 	mutex_enter(&mtm->mtm_lock);
    546 	mtm->mtm_active += mtm->mtm_wantin;
    547 	ul->un_resv += ul->un_resv_wantin;
    548 	mtm->mtm_activesync = mtm->mtm_wantin;
    549 	mtm->mtm_wantin = 0;
    550 	mtm->mtm_closed = 0;
    551 	ul->un_resv_wantin = 0;
    552 	mtm->mtm_committid = mtm->mtm_tid;
    553 	mtm->mtm_seq++;
    554 	mutex_exit(&mtm->mtm_lock);
    555 
    556 	/*
    557 	 * Finish any other synchronous transactions and
    558 	 * start any waiting new synchronous transactions
    559 	 */
    560 	cv_broadcast(&mtm->mtm_cv_commit);
    561 
    562 	/*
    563 	 * if the logmap is getting full; roll something
    564 	 */
    565 	if (logmap_need_roll_sync(mtm)) {
    566 		logmap_forceroll_nowait(mtm);
    567 	}
    568 
    569 out:
    570 	if (ul->un_flags & LDL_ERROR)
    571 		*ep = EIO;
    572 }
    573 
    574 /*ARGSUSED*/
    575 void
    576 top_end_async(ufsvfs_t *ufsvfsp, top_t topid, ulong_t size)
    577 {
    578 	ml_unit_t	*ul	= ufsvfsp->vfs_log;
    579 	mt_map_t	*mtm	= ul->un_logmap;
    580 	threadtrans_t	*tp	= tsd_get(topkey);
    581 	int		wakeup_needed = 0;
    582 
    583 	ASSERT(tp);
    584 	ASSERT(ufsvfsp->vfs_dev == ul->un_dev);
    585 	ASSERT(((ul->un_debug & MT_TRANSACT) == 0) ||
    586 	    top_end_debug(ul, mtm, topid, size));
    587 
    588 	mutex_enter(&mtm->mtm_lock);
    589 
    590 	if (size > tp->deltas_size) {
    591 		ul->un_resv -= (size - tp->deltas_size);
    592 	}
    593 	if (tp->any_deltas) {
    594 		tp->last_async_tid = mtm->mtm_tid;
    595 	}
    596 	mtm->mtm_ref = 1;
    597 
    598 	mtm->mtm_active--;
    599 	if ((mtm->mtm_active == 0) &&
    600 	    (mtm->mtm_closed == (TOP_SYNC|TOP_ASYNC))) {
    601 		wakeup_needed = 1;
    602 	}
    603 	mutex_exit(&mtm->mtm_lock);
    604 	if (wakeup_needed)
    605 		cv_signal(&mtm->mtm_cv_eot);
    606 
    607 	/*
    608 	 * Generate a sync op if the log, logmap, or deltamap are heavily used.
    609 	 * Unless we are possibly holding any VM locks, since if we are holding
    610 	 * any VM locks and we issue a top_end_sync(), we could deadlock.
    611 	 */
    612 	if ((mtm->mtm_activesync == 0) &&
    613 	    !(mtm->mtm_closed & TOP_SYNC) &&
    614 	    (deltamap_need_commit(ul->un_deltamap) ||
    615 	    logmap_need_commit(mtm) ||
    616 	    ldl_need_commit(ul)) &&
    617 	    (topid != TOP_GETPAGE)) {
    618 		top_issue_sync(ufsvfsp);
    619 	}
    620 	/*
    621 	 * roll something from the log if the logmap is too full
    622 	 */
    623 	if (logmap_need_roll_async(mtm))
    624 		logmap_forceroll_nowait(mtm);
    625 }
    626 
    627 /*
    628  * Called from roll thread;
    629  *	buffer set for reading master
    630  * Returns
    631  *	0 - success, can continue with next buffer
    632  *	1 - failure due to logmap deltas being in use
    633  */
    634 int
    635 top_read_roll(rollbuf_t *rbp, ml_unit_t *ul)
    636 {
    637 	buf_t		*bp	= &rbp->rb_bh;
    638 	offset_t	mof	= ldbtob(bp->b_blkno);
    639 
    640 	/*
    641 	 * get a list of deltas
    642 	 */
    643 	if (logmap_list_get_roll(ul->un_logmap, mof, rbp)) {
    644 		/* logmap deltas are in use */
    645 		return (1);
    646 	}
    647 
    648 	/*
    649 	 * no deltas were found, nothing to roll
    650 	 */
    651 	if (rbp->rb_age == NULL) {
    652 		bp->b_flags |= B_INVAL;
    653 		return (0);
    654 	}
    655 
    656 	/*
    657 	 * If there is one cached roll buffer that cover all the deltas then
    658 	 * we can use that instead of copying to a separate roll buffer.
    659 	 */
    660 	if (rbp->rb_crb) {
    661 		rbp->rb_bh.b_blkno = lbtodb(rbp->rb_crb->c_mof);
    662 		return (0);
    663 	}
    664 
    665 	/*
    666 	 * Set up the read.
    667 	 * If no read is needed logmap_setup_read() returns 0.
    668 	 */
    669 	if (logmap_setup_read(rbp->rb_age, rbp)) {
    670 		/*
    671 		 * async read the data from master
    672 		 */
    673 		logstats.ls_rreads.value.ui64++;
    674 		bp->b_bcount = MAPBLOCKSIZE;
    675 		(void) bdev_strategy(bp);
    676 		lwp_stat_update(LWP_STAT_INBLK, 1);
    677 	} else {
    678 		sema_v(&bp->b_io); /* mark read as complete */
    679 	}
    680 	return (0);
    681 }
    682 
    683 int ufs_crb_enable = 1;
    684 
    685 /*
    686  * move deltas from deltamap into the log
    687  */
    688 void
    689 top_log(ufsvfs_t *ufsvfsp, char *va, offset_t vamof, off_t nb,
    690     caddr_t buf, uint32_t bufsz)
    691 {
    692 	ml_unit_t	*ul = ufsvfsp->vfs_log;
    693 	mapentry_t	*me;
    694 	offset_t	hmof;
    695 	uint32_t	hnb, nb1;
    696 
    697 	/*
    698 	 * needed for the roll thread's heuristic
    699 	 */
    700 	ul->un_logmap->mtm_ref = 1;
    701 
    702 	if (buf && ufs_crb_enable) {
    703 		ASSERT((bufsz & DEV_BMASK) == 0);
    704 		/*
    705 		 * Move any deltas to the logmap. Split requests that
    706 		 * straddle MAPBLOCKSIZE hash boundaries (i.e. summary info).
    707 		 */
    708 		for (hmof = vamof - (va - buf), nb1 = nb; bufsz;
    709 		    bufsz -= hnb, hmof += hnb, buf += hnb, nb1 -= hnb) {
    710 			hnb = MAPBLOCKSIZE - (hmof & MAPBLOCKOFF);
    711 			if (hnb > bufsz)
    712 				hnb = bufsz;
    713 			me = deltamap_remove(ul->un_deltamap,
    714 			    MAX(hmof, vamof), MIN(hnb, nb1));
    715 			if (me) {
    716 				logmap_add_buf(ul, va, hmof, me, buf, hnb);
    717 			}
    718 		}
    719 	} else {
    720 		/*
    721 		 * if there are deltas
    722 		 */
    723 		me = deltamap_remove(ul->un_deltamap, vamof, nb);
    724 		if (me) {
    725 			/*
    726 			 * move to logmap
    727 			 */
    728 			logmap_add(ul, va, vamof, me);
    729 		}
    730 	}
    731 
    732 	ASSERT((ul->un_matamap == NULL) ||
    733 	    matamap_within(ul->un_matamap, vamof, nb));
    734 }
    735 
    736 
    737 static void
    738 top_threadtrans_destroy(void *tp)
    739 {
    740 	kmem_free(tp, sizeof (threadtrans_t));
    741 }
    742 
    743 void
    744 _init_top(void)
    745 {
    746 	ASSERT(top_init_debug());
    747 
    748 	/*
    749 	 * set up the delta layer
    750 	 */
    751 	_init_map();
    752 
    753 	/*
    754 	 * Initialise the thread specific data transaction key
    755 	 */
    756 	tsd_create(&topkey, top_threadtrans_destroy);
    757 }
    758