Home | History | Annotate | Download | only in ufs
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 #pragma ident	"%Z%%M%	%I%	%E% SMI"
     22 
     23 /*
     24  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
     25  * Use is subject to license terms.
     26  */
     27 
     28 #include <sys/systm.h>
     29 #include <sys/types.h>
     30 #include <sys/vnode.h>
     31 #include <sys/errno.h>
     32 #include <sys/sysmacros.h>
     33 #include <sys/debug.h>
     34 #include <sys/kmem.h>
     35 #include <sys/conf.h>
     36 #include <sys/proc.h>
     37 #include <sys/cmn_err.h>
     38 #include <sys/fs/ufs_inode.h>
     39 #include <sys/fs/ufs_filio.h>
     40 #include <sys/fs/ufs_log.h>
     41 #include <sys/inttypes.h>
     42 #include <sys/atomic.h>
     43 #include <sys/tuneable.h>
     44 
     45 /*
     46  * externs
     47  */
     48 extern pri_t minclsyspri;
     49 extern struct kmem_cache *lufs_bp;
     50 extern int ufs_trans_push_quota();
     51 
     52 /*
     53  * globals
     54  */
     55 kmem_cache_t *mapentry_cache;
     56 
     57 /*
     58  * logmap tuning constants
     59  */
     60 long	logmap_maxnme_commit	= 2048;
     61 long	logmap_maxnme_async	= 4096;
     62 long	logmap_maxnme_sync	= 6144;
     63 long	logmap_maxcfrag_commit	= 4;	/* Max canceled fragments per moby */
     64 
     65 
     66 uint64_t ufs_crb_size = 0;		/* current size of all crb buffers */
     67 uint64_t ufs_crb_max_size = 0;		/* highest crb buffer use so far */
     68 size_t ufs_crb_limit;			/* max allowable size for crbs */
     69 uint64_t ufs_crb_alloc_fails = 0;	/* crb allocation failures stat */
     70 #define	UFS_MAX_CRB_DEFAULT_DIVISOR 10	/* max 1/10 kmem_maxavail() */
     71 int ufs_max_crb_divisor = UFS_MAX_CRB_DEFAULT_DIVISOR; /* tunable */
     72 void handle_dquot(mapentry_t *);
     73 
     74 /*
     75  * GENERIC MAP ROUTINES
     76  */
     77 
     78 #define	CRB_FREE(crb, me) \
     79 	kmem_free(crb->c_buf, crb->c_nb); \
     80 	atomic_add_64(&ufs_crb_size, -(uint64_t)crb->c_nb); \
     81 	kmem_free(crb, sizeof (crb_t)); \
     82 	(me)->me_crb = NULL;
     83 
     84 #define	CRB_RELE(me) { \
     85 	crb_t *crb = (me)->me_crb; \
     86 	if (crb && (--crb->c_refcnt == 0)) { \
     87 		CRB_FREE(crb, me) \
     88 	} \
     89 }
     90 
     91 /*
     92  * Check that the old delta has an argument and a push function of
     93  * ufs_trans_push_quota(), then check that the old and new deltas differ.
     94  * If so we clean up with handle_dquot() before replacing the old delta.
     95  */
     96 #define	HANDLE_DQUOT(me, melist) { \
     97 	if ((me->me_arg) && \
     98 	    (me->me_func == ufs_trans_push_quota)) { \
     99 		if (!((me->me_dt == melist->me_dt) && \
    100 		    (me->me_arg == melist->me_arg) && \
    101 		    (me->me_func == melist->me_func))) { \
    102 			handle_dquot(me); \
    103 		} \
    104 	} \
    105 }
    106 
    107 /*
    108  * free up all the mapentries for a map
    109  */
    110 void
    111 map_free_entries(mt_map_t *mtm)
    112 {
    113 	int		i;
    114 	mapentry_t	*me;
    115 
    116 	while ((me = mtm->mtm_next) != (mapentry_t *)mtm) {
    117 		me->me_next->me_prev = me->me_prev;
    118 		me->me_prev->me_next = me->me_next;
    119 		CRB_RELE(me);
    120 		kmem_cache_free(mapentry_cache, me);
    121 	}
    122 	for (i = 0; i < mtm->mtm_nhash; i++)
    123 		mtm->mtm_hash[i] = NULL;
    124 	mtm->mtm_nme = 0;
    125 	mtm->mtm_nmet = 0;
    126 }
    127 
    128 /*
    129  * done with map; free if necessary
    130  */
    131 mt_map_t *
    132 map_put(mt_map_t *mtm)
    133 {
    134 	/*
    135 	 * free up the map's memory
    136 	 */
    137 	map_free_entries(mtm);
    138 	ASSERT(map_put_debug(mtm));
    139 	kmem_free(mtm->mtm_hash,
    140 	    (size_t) (sizeof (mapentry_t *) * mtm->mtm_nhash));
    141 	mutex_destroy(&mtm->mtm_mutex);
    142 	mutex_destroy(&mtm->mtm_scan_mutex);
    143 	cv_destroy(&mtm->mtm_to_roll_cv);
    144 	cv_destroy(&mtm->mtm_from_roll_cv);
    145 	rw_destroy(&mtm->mtm_rwlock);
    146 	mutex_destroy(&mtm->mtm_lock);
    147 	cv_destroy(&mtm->mtm_cv_commit);
    148 	cv_destroy(&mtm->mtm_cv_next);
    149 	cv_destroy(&mtm->mtm_cv_eot);
    150 	cv_destroy(&mtm->mtm_cv);
    151 	kmem_free(mtm, sizeof (mt_map_t));
    152 	return (NULL);
    153 }
    154 /*
    155  * Allocate a map;
    156  */
    157 mt_map_t *
    158 map_get(ml_unit_t *ul, enum maptypes maptype, int nh)
    159 {
    160 	mt_map_t	*mtm;
    161 
    162 	/*
    163 	 * assume the map is not here and allocate the necessary structs
    164 	 */
    165 	mtm = kmem_zalloc(sizeof (mt_map_t), KM_SLEEP);
    166 	mutex_init(&mtm->mtm_mutex, NULL, MUTEX_DEFAULT, NULL);
    167 	mutex_init(&mtm->mtm_scan_mutex, NULL, MUTEX_DEFAULT, NULL);
    168 	cv_init(&mtm->mtm_to_roll_cv, NULL, CV_DEFAULT, NULL);
    169 	cv_init(&mtm->mtm_from_roll_cv, NULL, CV_DEFAULT, NULL);
    170 	rw_init(&mtm->mtm_rwlock, NULL, RW_DEFAULT, NULL);
    171 	mtm->mtm_next = (mapentry_t *)mtm;
    172 	mtm->mtm_prev = (mapentry_t *)mtm;
    173 	mtm->mtm_hash = kmem_zalloc((size_t) (sizeof (mapentry_t *) * nh),
    174 	    KM_SLEEP);
    175 	mtm->mtm_nhash = nh;
    176 	mtm->mtm_debug = ul->un_debug;
    177 	mtm->mtm_type = maptype;
    178 
    179 	mtm->mtm_cfrags = 0;
    180 	mtm->mtm_cfragmax = logmap_maxcfrag_commit;
    181 
    182 	/*
    183 	 * for scan test
    184 	 */
    185 	mtm->mtm_ul = ul;
    186 
    187 	/*
    188 	 * Initialize locks
    189 	 */
    190 	mutex_init(&mtm->mtm_lock, NULL, MUTEX_DEFAULT, NULL);
    191 	cv_init(&mtm->mtm_cv_commit, NULL, CV_DEFAULT, NULL);
    192 	cv_init(&mtm->mtm_cv_next, NULL, CV_DEFAULT, NULL);
    193 	cv_init(&mtm->mtm_cv_eot, NULL, CV_DEFAULT, NULL);
    194 	cv_init(&mtm->mtm_cv, NULL, CV_DEFAULT, NULL);
    195 	ASSERT(map_get_debug(ul, mtm));
    196 
    197 	return (mtm);
    198 }
    199 
    200 /*
    201  * DELTAMAP ROUTINES
    202  */
    203 /*
    204  * deltamap tuning constants
    205  */
    206 long	deltamap_maxnme	= 1024;	/* global so it can be set */
    207 
    208 int
    209 deltamap_need_commit(mt_map_t *mtm)
    210 {
    211 	return (mtm->mtm_nme > deltamap_maxnme);
    212 }
    213 
    214 /*
    215  * put a delta into a deltamap; may sleep on memory
    216  */
    217 void
    218 deltamap_add(
    219 	mt_map_t *mtm,
    220 	offset_t mof,
    221 	off_t nb,
    222 	delta_t dtyp,
    223 	int (*func)(),
    224 	ulong_t arg,
    225 	threadtrans_t *tp)
    226 {
    227 	int32_t		hnb;
    228 	mapentry_t	*me;
    229 	mapentry_t	**mep;
    230 
    231 	ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
    232 	    map_check_linkage(mtm));
    233 
    234 	mutex_enter(&mtm->mtm_mutex);
    235 
    236 	for (hnb = 0; nb; nb -= hnb, mof += hnb) {
    237 		hnb = MAPBLOCKSIZE - (mof & MAPBLOCKOFF);
    238 		if (hnb > nb)
    239 			hnb = nb;
    240 		/*
    241 		 * Search for dup entry. We need to ensure that we don't
    242 		 * replace a map entry which carries quota information
    243 		 * with a map entry which doesn't. In that case we lose
    244 		 * reference the the dquot structure which will not be
    245 		 * cleaned up by the push function me->me_func as this will
    246 		 * never be called.
    247 		 * The stray dquot would be found later by invalidatedq()
    248 		 * causing a panic when the filesystem is unmounted.
    249 		 */
    250 		mep = MAP_HASH(mof, mtm);
    251 		for (me = *mep; me; me = me->me_hash) {
    252 			if (DATAwithinME(mof, hnb, me)) {
    253 				/*
    254 				 * Don't remove quota entries which have
    255 				 * incremented the ref count (those with a
    256 				 * ufs_trans_push_quota push function).
    257 				 * Let logmap_add[_buf] clean them up.
    258 				 */
    259 				if (me->me_func == ufs_trans_push_quota) {
    260 					continue;
    261 				}
    262 				break;
    263 			}
    264 			ASSERT((dtyp == DT_CANCEL) ||
    265 			    (!DATAoverlapME(mof, hnb, me)) ||
    266 			    MEwithinDATA(me, mof, hnb));
    267 		}
    268 
    269 		if (me) {
    270 			/* already in map */
    271 			continue;
    272 		}
    273 
    274 		/*
    275 		 * Add up all the delta map deltas so we can compute
    276 		 * an upper bound on the log size used.
    277 		 * Note, some deltas get removed from the deltamap
    278 		 * before the deltamap_push by lufs_write_strategy
    279 		 * and so multiple deltas to the same mof offset
    280 		 * don't get cancelled here but in the logmap.
    281 		 * Thus we can't easily get a accurate count of
    282 		 * the log space used - only an upper bound.
    283 		 */
    284 		if (tp && (mtm->mtm_ul->un_deltamap == mtm)) {
    285 			ASSERT(dtyp != DT_CANCEL);
    286 			if (dtyp == DT_ABZERO) {
    287 				tp->deltas_size += sizeof (struct delta);
    288 			} else {
    289 				tp->deltas_size +=
    290 				    (hnb + sizeof (struct delta));
    291 			}
    292 		}
    293 
    294 		delta_stats[dtyp]++;
    295 
    296 		/*
    297 		 * get a mapentry
    298 		 * May need to drop & re-grab the mtm_mutex
    299 		 * and then recheck for a duplicate
    300 		 */
    301 		me = kmem_cache_alloc(mapentry_cache, KM_NOSLEEP);
    302 		if (me == NULL) {
    303 			mutex_exit(&mtm->mtm_mutex);
    304 			me = kmem_cache_alloc(mapentry_cache, KM_SLEEP);
    305 			mutex_enter(&mtm->mtm_mutex);
    306 		}
    307 		bzero(me, sizeof (mapentry_t));
    308 
    309 		/*
    310 		 * initialize and put in deltamap
    311 		 */
    312 		me->me_mof = mof;
    313 		me->me_nb = hnb;
    314 		me->me_func = func;
    315 		me->me_arg = arg;
    316 		me->me_dt = dtyp;
    317 		me->me_flags = ME_HASH;
    318 		me->me_tid = mtm->mtm_tid;
    319 
    320 		me->me_hash = *mep;
    321 		*mep = me;
    322 		me->me_next = (mapentry_t *)mtm;
    323 		me->me_prev = mtm->mtm_prev;
    324 		mtm->mtm_prev->me_next = me;
    325 		mtm->mtm_prev = me;
    326 		mtm->mtm_nme++;
    327 	}
    328 	mutex_exit(&mtm->mtm_mutex);
    329 
    330 	ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
    331 	    map_check_linkage(mtm));
    332 }
    333 
    334 /*
    335  * remove deltas within (mof, nb) and return as linked list
    336  */
    337 mapentry_t *
    338 deltamap_remove(mt_map_t *mtm, offset_t mof, off_t nb)
    339 {
    340 	off_t		hnb;
    341 	mapentry_t	*me;
    342 	mapentry_t	**mep;
    343 	mapentry_t	*mer;
    344 
    345 	if (mtm == NULL)
    346 		return (NULL);
    347 
    348 	ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
    349 	    map_check_linkage(mtm));
    350 
    351 	mutex_enter(&mtm->mtm_mutex);
    352 	for (mer = NULL, hnb = 0; nb; nb -= hnb, mof += hnb) {
    353 		hnb = MAPBLOCKSIZE - (mof & MAPBLOCKOFF);
    354 		if (hnb > nb)
    355 			hnb = nb;
    356 		/*
    357 		 * remove entries from hash and return as a aged linked list
    358 		 */
    359 		mep = MAP_HASH(mof, mtm);
    360 		while ((me = *mep) != 0) {
    361 			if (MEwithinDATA(me, mof, hnb)) {
    362 				*mep = me->me_hash;
    363 				me->me_next->me_prev = me->me_prev;
    364 				me->me_prev->me_next = me->me_next;
    365 				me->me_hash = mer;
    366 				mer = me;
    367 				me->me_flags |= ME_LIST;
    368 				me->me_flags &= ~ME_HASH;
    369 				mtm->mtm_nme--;
    370 			} else
    371 				mep = &me->me_hash;
    372 		}
    373 	}
    374 	mutex_exit(&mtm->mtm_mutex);
    375 
    376 	ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
    377 	    map_check_linkage(mtm));
    378 
    379 	return (mer);
    380 }
    381 
    382 /*
    383  * delete entries within (mof, nb)
    384  */
    385 void
    386 deltamap_del(mt_map_t *mtm, offset_t mof, off_t nb)
    387 {
    388 	mapentry_t	*me;
    389 	mapentry_t	*menext;
    390 
    391 	menext = deltamap_remove(mtm, mof, nb);
    392 	while ((me = menext) != 0) {
    393 		menext = me->me_hash;
    394 		kmem_cache_free(mapentry_cache, me);
    395 	}
    396 }
    397 
    398 /*
    399  * Call the indicated function to cause deltas to move to the logmap.
    400  * top_end_sync() is the only caller of this function and
    401  * it has waited for the completion of all threads, so there can
    402  * be no other activity in the deltamap. Therefore we don't need to
    403  * hold the deltamap lock.
    404  */
    405 void
    406 deltamap_push(ml_unit_t *ul)
    407 {
    408 	delta_t		dtyp;
    409 	int		(*func)();
    410 	ulong_t		arg;
    411 	mapentry_t	*me;
    412 	offset_t	mof;
    413 	off_t		nb;
    414 	mt_map_t	*mtm	= ul->un_deltamap;
    415 
    416 	ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
    417 	    map_check_linkage(mtm));
    418 
    419 	/*
    420 	 * for every entry in the deltamap
    421 	 */
    422 	while ((me = mtm->mtm_next) != (mapentry_t *)mtm) {
    423 		ASSERT(me->me_func);
    424 		func = me->me_func;
    425 		dtyp = me->me_dt;
    426 		arg = me->me_arg;
    427 		mof = me->me_mof;
    428 		nb = me->me_nb;
    429 		if ((ul->un_flags & LDL_ERROR) ||
    430 		    (*func)(ul->un_ufsvfs, dtyp, arg))
    431 			deltamap_del(mtm, mof, nb);
    432 	}
    433 
    434 	ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
    435 	    map_check_linkage(mtm));
    436 }
    437 
    438 /*
    439  * LOGMAP ROUTINES
    440  */
    441 
    442 int
    443 logmap_need_commit(mt_map_t *mtm)
    444 {
    445 	return ((mtm->mtm_nmet > logmap_maxnme_commit) ||
    446 	    (mtm->mtm_cfrags >= mtm->mtm_cfragmax));
    447 }
    448 
    449 int
    450 logmap_need_roll_async(mt_map_t *mtm)
    451 {
    452 	return (mtm->mtm_nme > logmap_maxnme_async);
    453 }
    454 
    455 int
    456 logmap_need_roll_sync(mt_map_t *mtm)
    457 {
    458 	return (mtm->mtm_nme > logmap_maxnme_sync);
    459 }
    460 
    461 void
    462 logmap_start_roll(ml_unit_t *ul)
    463 {
    464 	mt_map_t	*logmap	= ul->un_logmap;
    465 
    466 	logmap_settail(logmap, ul);
    467 	ASSERT(!(ul->un_flags & LDL_NOROLL));
    468 	mutex_enter(&logmap->mtm_mutex);
    469 	if ((logmap->mtm_flags & MTM_ROLL_RUNNING) == 0) {
    470 		logmap->mtm_flags |= MTM_ROLL_RUNNING;
    471 		logmap->mtm_flags &= ~(MTM_FORCE_ROLL | MTM_ROLL_EXIT);
    472 		(void) thread_create(NULL, 0, trans_roll, ul, 0, &p0,
    473 		    TS_RUN, minclsyspri);
    474 	}
    475 	mutex_exit(&logmap->mtm_mutex);
    476 }
    477 
    478 void
    479 logmap_kill_roll(ml_unit_t *ul)
    480 {
    481 	mt_map_t	*mtm	= ul->un_logmap;
    482 
    483 	if (mtm == NULL)
    484 		return;
    485 
    486 	mutex_enter(&mtm->mtm_mutex);
    487 
    488 	while (mtm->mtm_flags & MTM_ROLL_RUNNING) {
    489 		mtm->mtm_flags |= MTM_ROLL_EXIT;
    490 		cv_signal(&mtm->mtm_to_roll_cv);
    491 		cv_wait(&mtm->mtm_from_roll_cv, &mtm->mtm_mutex);
    492 	}
    493 	mutex_exit(&mtm->mtm_mutex);
    494 }
    495 
    496 /*
    497  * kick the roll thread if it's not doing anything
    498  */
    499 void
    500 logmap_forceroll_nowait(mt_map_t *logmap)
    501 {
    502 	/*
    503 	 * Don't need to lock mtm_mutex to read mtm_flags here as we
    504 	 * don't care in the rare case when we get a transitional value
    505 	 * of mtm_flags. Just by signalling the thread it will wakeup
    506 	 * and notice it has too many logmap entries.
    507 	 */
    508 	ASSERT(!(logmap->mtm_ul->un_flags & LDL_NOROLL));
    509 	if ((logmap->mtm_flags & MTM_ROLLING) == 0) {
    510 		cv_signal(&logmap->mtm_to_roll_cv);
    511 	}
    512 }
    513 
    514 /*
    515  * kick the roll thread and wait for it to finish a cycle
    516  */
    517 void
    518 logmap_forceroll(mt_map_t *mtm)
    519 {
    520 	mutex_enter(&mtm->mtm_mutex);
    521 	if ((mtm->mtm_flags & MTM_FORCE_ROLL) == 0) {
    522 		mtm->mtm_flags |= MTM_FORCE_ROLL;
    523 		cv_signal(&mtm->mtm_to_roll_cv);
    524 	}
    525 	do {
    526 		if ((mtm->mtm_flags & MTM_ROLL_RUNNING) == 0) {
    527 			mtm->mtm_flags &= ~MTM_FORCE_ROLL;
    528 			goto out;
    529 		}
    530 		cv_wait(&mtm->mtm_from_roll_cv, &mtm->mtm_mutex);
    531 	} while (mtm->mtm_flags & MTM_FORCE_ROLL);
    532 out:
    533 	mutex_exit(&mtm->mtm_mutex);
    534 }
    535 
    536 /*
    537  * remove rolled deltas within (mof, nb) and free them
    538  */
    539 void
    540 logmap_remove_roll(mt_map_t *mtm, offset_t mof, off_t nb)
    541 {
    542 	int		dolock = 0;
    543 	off_t		hnb;
    544 	mapentry_t	*me;
    545 	mapentry_t	**mep;
    546 	offset_t	savmof	= mof;
    547 	off_t		savnb	= nb;
    548 
    549 	ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
    550 	    map_check_linkage(mtm));
    551 
    552 again:
    553 	if (dolock)
    554 		rw_enter(&mtm->mtm_rwlock, RW_WRITER);
    555 	mutex_enter(&mtm->mtm_mutex);
    556 	for (hnb = 0; nb; nb -= hnb, mof += hnb) {
    557 		hnb = MAPBLOCKSIZE - (mof & MAPBLOCKOFF);
    558 		if (hnb > nb)
    559 			hnb = nb;
    560 		/*
    561 		 * remove and free the rolled entries
    562 		 */
    563 		mep = MAP_HASH(mof, mtm);
    564 		while ((me = *mep) != 0) {
    565 			if ((me->me_flags & ME_ROLL) &&
    566 			    (MEwithinDATA(me, mof, hnb))) {
    567 				if (me->me_flags & ME_AGE) {
    568 					ASSERT(dolock == 0);
    569 					dolock = 1;
    570 					mutex_exit(&mtm->mtm_mutex);
    571 					mof = savmof;
    572 					nb = savnb;
    573 					goto again;
    574 				}
    575 				*mep = me->me_hash;
    576 				me->me_next->me_prev = me->me_prev;
    577 				me->me_prev->me_next = me->me_next;
    578 				me->me_flags &= ~(ME_HASH|ME_ROLL);
    579 				ASSERT(!(me->me_flags & ME_USER));
    580 				mtm->mtm_nme--;
    581 				/*
    582 				 * cancelled entries are handled by someone else
    583 				 */
    584 				if ((me->me_flags & ME_CANCEL) == 0) {
    585 					roll_stats[me->me_dt]++;
    586 					CRB_RELE(me);
    587 					kmem_cache_free(mapentry_cache, me);
    588 				}
    589 			} else
    590 				mep = &me->me_hash;
    591 		}
    592 	}
    593 	mutex_exit(&mtm->mtm_mutex);
    594 
    595 	ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
    596 	    map_check_linkage(mtm));
    597 
    598 	if (dolock)
    599 		rw_exit(&mtm->mtm_rwlock);
    600 }
    601 
    602 /*
    603  * Find the disk offset of the next delta to roll.
    604  * Returns 0: no more deltas to roll or a transaction is being committed
    605  *	   1: a delta to roll has been found and *mofp points
    606  *	      to the master file disk offset
    607  */
    608 int
    609 logmap_next_roll(mt_map_t *logmap, offset_t *mofp)
    610 {
    611 	mapentry_t *me;
    612 
    613 	ASSERT(((logmap->mtm_debug & MT_CHECK_MAP) == 0) ||
    614 	    map_check_linkage(logmap));
    615 
    616 	mutex_enter(&logmap->mtm_mutex);
    617 	for (me = logmap->mtm_next; me != (mapentry_t *)logmap;
    618 	    me = me->me_next) {
    619 		/* already rolled */
    620 		if (me->me_flags & ME_ROLL) {
    621 			continue;
    622 		}
    623 
    624 		/* part of currently busy transaction; stop */
    625 		if (me->me_tid == logmap->mtm_tid) {
    626 			break;
    627 		}
    628 
    629 		/* part of commit-in-progress transaction; stop */
    630 		if (me->me_tid == logmap->mtm_committid) {
    631 			break;
    632 		}
    633 
    634 		/*
    635 		 * We shouldn't see a DT_CANCEL mapentry whose
    636 		 * tid != mtm_committid, or != mtm_tid since
    637 		 * these are removed at the end of each committed
    638 		 * transaction.
    639 		 */
    640 		ASSERT(!(me->me_dt == DT_CANCEL));
    641 
    642 		*mofp = me->me_mof;
    643 		mutex_exit(&logmap->mtm_mutex);
    644 		return (1);
    645 	}
    646 	mutex_exit(&logmap->mtm_mutex);
    647 	return (0);
    648 }
    649 
    650 /*
    651  * put mapentry on sorted age list
    652  */
    653 static void
    654 logmap_list_age(mapentry_t **age, mapentry_t *meadd)
    655 {
    656 	mapentry_t	*me;
    657 
    658 	ASSERT(!(meadd->me_flags & (ME_AGE|ME_LIST)));
    659 
    660 	for (me = *age; me; age = &me->me_agenext, me = *age) {
    661 		if (me->me_age > meadd->me_age)
    662 			break;
    663 	}
    664 	meadd->me_agenext = me;
    665 	meadd->me_flags |= ME_AGE;
    666 	*age = meadd;
    667 }
    668 
    669 /*
    670  * get a list of deltas within <mof, mof+nb>
    671  *	returns with mtm_rwlock held
    672  *	return value says whether the entire mof range is covered by deltas
    673  */
    674 int
    675 logmap_list_get(
    676 	mt_map_t *mtm,
    677 	offset_t mof,
    678 	off_t nb,
    679 	mapentry_t **age)
    680 {
    681 	off_t		hnb;
    682 	mapentry_t	*me;
    683 	mapentry_t	**mep;
    684 	int		rwtype	= RW_READER;
    685 	offset_t	savmof	= mof;
    686 	off_t		savnb	= nb;
    687 	int		entire	= 0;
    688 	crb_t		*crb;
    689 
    690 	mtm->mtm_ref = 1;
    691 again:
    692 
    693 	ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
    694 	    map_check_linkage(mtm));
    695 
    696 	rw_enter(&mtm->mtm_rwlock, rwtype);
    697 	*age = NULL;
    698 	mutex_enter(&mtm->mtm_mutex);
    699 	for (hnb = 0; nb; nb -= hnb, mof += hnb) {
    700 		hnb = MAPBLOCKSIZE - (mof & MAPBLOCKOFF);
    701 		if (hnb > nb)
    702 			hnb = nb;
    703 		/*
    704 		 * find overlapping entries
    705 		 */
    706 		mep = MAP_HASH(mof, mtm);
    707 		for (me = *mep; me; me = me->me_hash) {
    708 			if (me->me_dt == DT_CANCEL)
    709 				continue;
    710 			if (!DATAoverlapME(mof, hnb, me))
    711 				continue;
    712 			/*
    713 			 * check if map entry is in use
    714 			 * (about to be rolled).
    715 			 */
    716 			if (me->me_flags & ME_AGE) {
    717 				/*
    718 				 * reset the age bit in the list,
    719 				 * upgrade the lock, and try again
    720 				 */
    721 				for (me = *age; me; me = *age) {
    722 					*age = me->me_agenext;
    723 					me->me_flags &= ~ME_AGE;
    724 				}
    725 				mutex_exit(&mtm->mtm_mutex);
    726 				rw_exit(&mtm->mtm_rwlock);
    727 				rwtype = RW_WRITER;
    728 				mof = savmof;
    729 				nb = savnb;
    730 				entire = 0;
    731 				goto again;
    732 			} else {
    733 				/* add mapentry to age ordered list */
    734 				logmap_list_age(age, me);
    735 				crb = me->me_crb;
    736 				if (crb) {
    737 					if (DATAwithinCRB(savmof, savnb, crb)) {
    738 						entire = 1;
    739 					}
    740 				} else {
    741 					if (DATAwithinME(savmof, savnb, me)) {
    742 						entire = 1;
    743 					}
    744 				}
    745 			}
    746 		}
    747 	}
    748 	mutex_exit(&mtm->mtm_mutex);
    749 
    750 	ASSERT(RW_LOCK_HELD(&mtm->mtm_rwlock));
    751 	return (entire);
    752 }
    753 
    754 /*
    755  * Get a list of deltas for rolling - returns sucess or failure.
    756  * Also return the cached roll buffer if all deltas point to it.
    757  */
    758 int
    759 logmap_list_get_roll(mt_map_t *logmap, offset_t mof, rollbuf_t *rbp)
    760 {
    761 	mapentry_t	*me, **mep, *age = NULL;
    762 	crb_t		*crb = NULL;
    763 
    764 	ASSERT(RW_LOCK_HELD(&logmap->mtm_rwlock));
    765 	ASSERT(((logmap->mtm_debug & MT_CHECK_MAP) == 0) ||
    766 	    map_check_linkage(logmap));
    767 	ASSERT((mof & MAPBLOCKOFF) == 0);
    768 
    769 	rbp->rb_crb = NULL;
    770 
    771 	/*
    772 	 * find overlapping entries
    773 	 */
    774 	mutex_enter(&logmap->mtm_mutex);
    775 	mep = MAP_HASH(mof, logmap);
    776 	for (me = *mep; me; me = me->me_hash) {
    777 		if (!DATAoverlapME(mof, MAPBLOCKSIZE, me))
    778 			continue;
    779 		if (me->me_tid == logmap->mtm_tid)
    780 			continue;
    781 		if (me->me_tid == logmap->mtm_committid)
    782 			continue;
    783 		if (me->me_dt == DT_CANCEL)
    784 			continue;
    785 
    786 		/*
    787 		 * Check if map entry is in use (by lufs_read_strategy())
    788 		 * and if so reset the age bit in the list,
    789 		 * upgrade the lock, and try again
    790 		 */
    791 		if (me->me_flags & ME_AGE) {
    792 			for (me = age; me; me = age) {
    793 				age = me->me_agenext;
    794 				me->me_flags &= ~ME_AGE;
    795 			}
    796 			mutex_exit(&logmap->mtm_mutex);
    797 			return (1); /* failure */
    798 		} else {
    799 			/* add mapentry to age ordered list */
    800 			logmap_list_age(&age, me);
    801 		}
    802 	}
    803 	if (!age) {
    804 		goto out;
    805 	}
    806 
    807 	/*
    808 	 * Mark the deltas as being rolled.
    809 	 */
    810 	for (me = age; me; me = me->me_agenext) {
    811 		me->me_flags |= ME_ROLL;
    812 	}
    813 
    814 	/*
    815 	 * Test if all deltas are covered by one valid roll buffer
    816 	 */
    817 	crb = age->me_crb;
    818 	if (crb && !(crb->c_invalid)) {
    819 		for (me = age; me; me = me->me_agenext) {
    820 			if (me->me_crb != crb) {
    821 				crb = NULL;
    822 				break;
    823 			}
    824 		}
    825 		rbp->rb_crb = crb;
    826 	}
    827 out:
    828 	rbp->rb_age = age;
    829 
    830 	mutex_exit(&logmap->mtm_mutex);
    831 
    832 	ASSERT(((logmap->mtm_debug & MT_SCAN) == 0) ||
    833 	    logmap_logscan_debug(logmap, age));
    834 	ASSERT(RW_LOCK_HELD(&logmap->mtm_rwlock));
    835 	return (0); /* success */
    836 }
    837 
    838 void
    839 logmap_list_put_roll(mt_map_t *mtm, mapentry_t *age)
    840 {
    841 	mapentry_t	*me;
    842 
    843 	ASSERT(RW_LOCK_HELD(&mtm->mtm_rwlock));
    844 	mutex_enter(&mtm->mtm_mutex);
    845 	for (me = age; me; me = age) {
    846 		age = me->me_agenext;
    847 		me->me_flags &= ~ME_AGE;
    848 	}
    849 	mutex_exit(&mtm->mtm_mutex);
    850 }
    851 
    852 void
    853 logmap_list_put(mt_map_t *mtm, mapentry_t *age)
    854 {
    855 	mapentry_t	*me;
    856 
    857 	ASSERT(RW_LOCK_HELD(&mtm->mtm_rwlock));
    858 	mutex_enter(&mtm->mtm_mutex);
    859 	for (me = age; me; me = age) {
    860 		age = me->me_agenext;
    861 		me->me_flags &= ~ME_AGE;
    862 	}
    863 	mutex_exit(&mtm->mtm_mutex);
    864 	rw_exit(&mtm->mtm_rwlock);
    865 }
    866 
    867 #define	UFS_RW_BALANCE 2
    868 int ufs_rw_balance = UFS_RW_BALANCE;
    869 
    870 /*
    871  * Check if we need to read the master.
    872  * The master does not need to be read if the log deltas to the
    873  * block are for one contiguous set of full disk sectors.
    874  * Both cylinder group bit maps DT_CG (8K); directory entries (512B);
    875  * and possibly others should not require master disk reads.
    876  * Calculate the sector map for writing later.
    877  */
    878 int
    879 logmap_setup_read(mapentry_t *age, rollbuf_t *rbp)
    880 {
    881 	offset_t mof;
    882 	crb_t *crb;
    883 	mapentry_t *me;
    884 	int32_t nb;
    885 	int i;
    886 	int start_sec, end_sec;
    887 	int read_needed = 0;
    888 	int all_inodes = 1;
    889 	int first_sec = INT_MAX;
    890 	int last_sec = -1;
    891 	rbsecmap_t secmap = 0;
    892 
    893 	/* LINTED: warning: logical expression always true: op "||" */
    894 	ASSERT((MAPBLOCKSIZE / DEV_BSIZE) == (sizeof (secmap) * NBBY));
    895 
    896 	for (me = age; me; me = me->me_agenext) {
    897 		crb = me->me_crb;
    898 		if (crb) {
    899 			nb = crb->c_nb;
    900 			mof = crb->c_mof;
    901 		} else {
    902 			nb = me->me_nb;
    903 			mof = me->me_mof;
    904 		}
    905 
    906 		/*
    907 		 * If the delta is not sector aligned then
    908 		 * read the whole block.
    909 		 */
    910 		if ((nb & DEV_BMASK) || (mof & DEV_BMASK)) {
    911 			read_needed = 1;
    912 		}
    913 
    914 		/* Set sector map used in the MAPBLOCKSIZE block.  */
    915 		start_sec = (mof & MAPBLOCKOFF) >> DEV_BSHIFT;
    916 		end_sec = start_sec + ((nb - 1) >> DEV_BSHIFT);
    917 		for (i = start_sec; i <= end_sec; i++) {
    918 			secmap |= UINT16_C(1) << i;
    919 		}
    920 
    921 		if (me->me_dt != DT_INODE) {
    922 			all_inodes = 0;
    923 		}
    924 		if (start_sec < first_sec) {
    925 			first_sec = start_sec;
    926 		}
    927 		if (end_sec > last_sec) {
    928 			last_sec = end_sec;
    929 		}
    930 	}
    931 
    932 	ASSERT(secmap);
    933 	ASSERT(first_sec != INT_MAX);
    934 	ASSERT(last_sec != -1);
    935 
    936 	if (all_inodes) {
    937 		/*
    938 		 * Here we have a tradeoff choice. It must be better to
    939 		 * do 2 writes * in the same MAPBLOCKSIZE chunk, than a
    940 		 * read and a write. But what about 3 or more writes, versus
    941 		 * a read+write? * Where is the cut over? It will depend on
    942 		 * the track caching, scsi driver and other activity.
    943 		 * A unpublished tunable is defined (ufs_rw_balance) that
    944 		 * currently defaults to 2.
    945 		 */
    946 		if (!read_needed) {
    947 			int count = 0, gap = 0;
    948 			int sector_set; /* write needed to this sector */
    949 
    950 			/* Count the gaps (every 1 to 0 transation) */
    951 			for (i = first_sec + 1; i < last_sec; i++) {
    952 				sector_set = secmap & (UINT16_C(1) << i);
    953 				if (!gap && !sector_set) {
    954 					gap = 1;
    955 					count++;
    956 					if (count > ufs_rw_balance) {
    957 						read_needed = 1;
    958 						break;
    959 					}
    960 				} else if (gap && sector_set) {
    961 					gap = 0;
    962 				}
    963 			}
    964 		}
    965 
    966 		/*
    967 		 * Inodes commonly make up the majority (~85%) of deltas.
    968 		 * They cannot contain embedded user data, so its safe to
    969 		 * read and write them all in one IO.
    970 		 * But for directory entries, shadow inode data, and
    971 		 * quota record data the user data fragments can be embedded
    972 		 * betwen those metadata, and so its not safe to read, modify
    973 		 * then write the entire range as user asynchronous user data
    974 		 * writes could get overwritten with old data.
    975 		 * Thus we have to create a segment map of meta data that
    976 		 * needs to get written.
    977 		 *
    978 		 * If user data was logged then this issue would go away.
    979 		 */
    980 		if (read_needed) {
    981 			for (i = first_sec + 1; i < last_sec; i++) {
    982 				secmap |= (UINT16_C(1) << i);
    983 			}
    984 		}
    985 	}
    986 	rbp->rb_secmap = secmap;
    987 	return (read_needed);
    988 }
    989 
    990 /*
    991  * Abort the load of a set of log map delta's.
    992  * ie,
    993  * Clear out all mapentries on this unit's log map
    994  * which have a tid (transaction id) equal to the
    995  * parameter tid.   Walk the cancel list, taking everything
    996  * off it, too.
    997  */
    998 static void
    999 logmap_abort(ml_unit_t *ul, uint32_t tid)
   1000 {
   1001 	struct mt_map	*mtm = ul->un_logmap;	/* Log map */
   1002 	mapentry_t	*me, **mep;
   1003 	int		i;
   1004 
   1005 	ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
   1006 	    map_check_linkage(mtm));
   1007 
   1008 	/*
   1009 	 * wait for any outstanding reads to finish; lock out future reads
   1010 	 */
   1011 	rw_enter(&mtm->mtm_rwlock, RW_WRITER);
   1012 
   1013 	mutex_enter(&mtm->mtm_mutex);
   1014 	/* Take everything off cancel list */
   1015 	while ((me = mtm->mtm_cancel) != NULL) {
   1016 		mtm->mtm_cancel = me->me_cancel;
   1017 		me->me_flags &= ~ME_CANCEL;
   1018 		me->me_cancel = NULL;
   1019 	}
   1020 
   1021 	/*
   1022 	 * Now take out all mapentries with current tid, and committid
   1023 	 * as this function is called from logmap_logscan and logmap_commit
   1024 	 * When it is called from logmap_logscan mtm_tid == mtm_committid
   1025 	 * But when logmap_abort is called from logmap_commit it is
   1026 	 * because the log errored when trying to write the commit record,
   1027 	 * after the async ops have been allowed to start in top_end_sync.
   1028 	 * So we also need to remove all mapentries from the transaction whose
   1029 	 * commit failed.
   1030 	 */
   1031 	for (i = 0; i < mtm->mtm_nhash; i++) {
   1032 		mep = &mtm->mtm_hash[i];
   1033 		while ((me = *mep) != NULL) {
   1034 			if (me->me_tid == tid ||
   1035 			    me->me_tid == mtm->mtm_committid) {
   1036 				*mep = me->me_hash;
   1037 				me->me_next->me_prev = me->me_prev;
   1038 				me->me_prev->me_next = me->me_next;
   1039 				if (!(me->me_flags & ME_USER)) {
   1040 					mtm->mtm_nme--;
   1041 				}
   1042 				CRB_RELE(me);
   1043 				kmem_cache_free(mapentry_cache, me);
   1044 				continue;
   1045 			}
   1046 			mep = &me->me_hash;
   1047 		}
   1048 	}
   1049 
   1050 	if (!(ul->un_flags & LDL_SCAN))
   1051 		mtm->mtm_flags |= MTM_CANCELED;
   1052 	mutex_exit(&mtm->mtm_mutex);
   1053 	mtm->mtm_dirty = 0;
   1054 	mtm->mtm_nmet = 0;
   1055 	rw_exit(&mtm->mtm_rwlock);
   1056 
   1057 	ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
   1058 	    map_check_linkage(mtm));
   1059 }
   1060 
   1061 static void
   1062 logmap_wait_space(mt_map_t *mtm, ml_unit_t *ul, mapentry_t *me)
   1063 {
   1064 	ASSERT(MUTEX_HELD(&ul->un_log_mutex));
   1065 
   1066 	while (!ldl_has_space(ul, me)) {
   1067 		ASSERT(!(ul->un_flags & LDL_NOROLL));
   1068 		mutex_exit(&ul->un_log_mutex);
   1069 		logmap_forceroll(mtm);
   1070 		mutex_enter(&ul->un_log_mutex);
   1071 		if (ul->un_flags & LDL_ERROR)
   1072 			break;
   1073 	}
   1074 
   1075 	ASSERT(MUTEX_HELD(&ul->un_log_mutex));
   1076 }
   1077 
   1078 /*
   1079  * put a list of deltas into a logmap
   1080  * If va == NULL, don't write to the log.
   1081  */
   1082 void
   1083 logmap_add(
   1084 	ml_unit_t *ul,
   1085 	char *va,			/* Ptr to buf w/deltas & data */
   1086 	offset_t vamof,			/* Offset on master of buf start */
   1087 	mapentry_t *melist)		/* Entries to add */
   1088 {
   1089 	offset_t	mof;
   1090 	off_t		nb;
   1091 	mapentry_t	*me;
   1092 	mapentry_t	**mep;
   1093 	mapentry_t	**savmep;
   1094 	uint32_t	tid;
   1095 	mt_map_t	*mtm	= ul->un_logmap;
   1096 
   1097 	mutex_enter(&ul->un_log_mutex);
   1098 	if (va)
   1099 		logmap_wait_space(mtm, ul, melist);
   1100 
   1101 	ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
   1102 	    map_check_linkage(mtm));
   1103 
   1104 	mtm->mtm_ref = 1;
   1105 	mtm->mtm_dirty++;
   1106 	tid = mtm->mtm_tid;
   1107 	while (melist) {
   1108 		mof = melist->me_mof;
   1109 		nb  = melist->me_nb;
   1110 
   1111 		/*
   1112 		 * search for overlaping entries
   1113 		 */
   1114 		savmep = mep = MAP_HASH(mof, mtm);
   1115 		mutex_enter(&mtm->mtm_mutex);
   1116 		while ((me = *mep) != 0) {
   1117 			/*
   1118 			 * Data consumes old map entry; cancel map entry.
   1119 			 * Take care when we replace an old map entry
   1120 			 * which carries quota information with a newer entry
   1121 			 * which does not. In that case the push function
   1122 			 * would not be called to clean up the dquot structure.
   1123 			 * This would be found later by invalidatedq() causing
   1124 			 * a panic when the filesystem in unmounted.
   1125 			 * We clean up the dquot manually and then replace
   1126 			 * the map entry.
   1127 			 */
   1128 			if (MEwithinDATA(me, mof, nb) &&
   1129 			    ((me->me_flags & (ME_ROLL|ME_CANCEL)) == 0)) {
   1130 				if (tid == me->me_tid &&
   1131 				    ((me->me_flags & ME_AGE) == 0)) {
   1132 					*mep = me->me_hash;
   1133 					me->me_next->me_prev = me->me_prev;
   1134 					me->me_prev->me_next = me->me_next;
   1135 					ASSERT(!(me->me_flags & ME_USER));
   1136 					mtm->mtm_nme--;
   1137 					/*
   1138 					 * Special case if the mapentry
   1139 					 * carries a dquot and a push function.
   1140 					 * We have to clean up the quota info
   1141 					 * before replacing the mapentry.
   1142 					 */
   1143 					if (me->me_dt == DT_QR)
   1144 						HANDLE_DQUOT(me, melist);
   1145 
   1146 					kmem_cache_free(mapentry_cache, me);
   1147 					continue;
   1148 				}
   1149 				me->me_cancel = mtm->mtm_cancel;
   1150 				mtm->mtm_cancel = me;
   1151 				me->me_flags |= ME_CANCEL;
   1152 			}
   1153 			mep = &(*mep)->me_hash;
   1154 		}
   1155 		mutex_exit(&mtm->mtm_mutex);
   1156 
   1157 		/*
   1158 		 * remove from list
   1159 		 */
   1160 		me = melist;
   1161 		melist = melist->me_hash;
   1162 		me->me_flags &= ~ME_LIST;
   1163 		/*
   1164 		 * If va != NULL, put in the log.
   1165 		 */
   1166 		if (va)
   1167 			ldl_write(ul, va, vamof, me);
   1168 		if (ul->un_flags & LDL_ERROR) {
   1169 			kmem_cache_free(mapentry_cache, me);
   1170 			continue;
   1171 		}
   1172 		ASSERT((va == NULL) ||
   1173 		    ((mtm->mtm_debug & MT_LOG_WRITE_CHECK) == 0) ||
   1174 		    map_check_ldl_write(ul, va, vamof, me));
   1175 
   1176 		/*
   1177 		 * put on hash
   1178 		 */
   1179 		mutex_enter(&mtm->mtm_mutex);
   1180 		me->me_hash = *savmep;
   1181 		*savmep = me;
   1182 		me->me_next = (mapentry_t *)mtm;
   1183 		me->me_prev = mtm->mtm_prev;
   1184 		mtm->mtm_prev->me_next = me;
   1185 		mtm->mtm_prev = me;
   1186 		me->me_flags |= ME_HASH;
   1187 		me->me_tid = tid;
   1188 		me->me_age = mtm->mtm_age++;
   1189 		mtm->mtm_nme++;
   1190 		mtm->mtm_nmet++;
   1191 		mutex_exit(&mtm->mtm_mutex);
   1192 	}
   1193 
   1194 	ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
   1195 	    map_check_linkage(mtm));
   1196 	mutex_exit(&ul->un_log_mutex);
   1197 }
   1198 
   1199 /*
   1200  * Add the delta(s) into the log.
   1201  * Create one cached roll buffer logmap entry, and reference count the
   1202  * number of mapentries refering to it.
   1203  * Cancel previous logmap entries.
   1204  * logmap_add is tolerant of failure to allocate a cached roll buffer.
   1205  */
   1206 void
   1207 logmap_add_buf(
   1208 	ml_unit_t *ul,
   1209 	char *va,			/* Ptr to buf w/deltas & data */
   1210 	offset_t bufmof,		/* Offset on master of buf start */
   1211 	mapentry_t *melist,		/* Entries to add */
   1212 	caddr_t	buf,			/* Buffer containing delta(s) */
   1213 	uint32_t bufsz)			/* Size of buf */
   1214 {
   1215 	offset_t	mof;
   1216 	offset_t	vamof = bufmof + (va - buf);
   1217 	off_t		nb;
   1218 	mapentry_t	*me;
   1219 	mapentry_t	**mep;
   1220 	mapentry_t	**savmep;
   1221 	uint32_t	tid;
   1222 	mt_map_t	*mtm	= ul->un_logmap;
   1223 	crb_t		*crb;
   1224 	crb_t		*crbsav = NULL;
   1225 
   1226 	ASSERT((bufsz & DEV_BMASK) == 0);
   1227 	mutex_enter(&ul->un_log_mutex);
   1228 	logmap_wait_space(mtm, ul, melist);
   1229 
   1230 	ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
   1231 	    map_check_linkage(mtm));
   1232 
   1233 	mtm->mtm_ref = 1;
   1234 	mtm->mtm_dirty++;
   1235 	tid = mtm->mtm_tid;
   1236 	while (melist) {
   1237 		mof = melist->me_mof;
   1238 		nb  = melist->me_nb;
   1239 
   1240 		/*
   1241 		 * search for overlapping entries
   1242 		 */
   1243 		savmep = mep = MAP_HASH(mof, mtm);
   1244 		mutex_enter(&mtm->mtm_mutex);
   1245 		while ((me = *mep) != 0) {
   1246 			/*
   1247 			 * Data consumes old map entry; cancel map entry.
   1248 			 * Take care when we replace an old map entry
   1249 			 * which carries quota information with a newer entry
   1250 			 * which does not. In that case the push function
   1251 			 * would not be called to clean up the dquot structure.
   1252 			 * This would be found later by invalidatedq() causing
   1253 			 * a panic when the filesystem in unmounted.
   1254 			 * We clean up the dquot manually and then replace
   1255 			 * the map entry.
   1256 			 */
   1257 			crb = me->me_crb;
   1258 			if (MEwithinDATA(me, mof, nb) &&
   1259 			    ((me->me_flags & (ME_ROLL|ME_CANCEL)) == 0)) {
   1260 				if (tid == me->me_tid &&
   1261 				    ((me->me_flags & ME_AGE) == 0)) {
   1262 					*mep = me->me_hash;
   1263 					me->me_next->me_prev = me->me_prev;
   1264 					me->me_prev->me_next = me->me_next;
   1265 					ASSERT(!(me->me_flags & ME_USER));
   1266 					mtm->mtm_nme--;
   1267 					/*
   1268 					 * Special case if the mapentry
   1269 					 * carries a dquot and a push function.
   1270 					 * We have to clean up the quota info
   1271 					 * before replacing the mapentry.
   1272 					 */
   1273 					if (me->me_dt == DT_QR)
   1274 						HANDLE_DQUOT(me, melist);
   1275 
   1276 					/*
   1277 					 * If this soon to be deleted mapentry
   1278 					 * has a suitable roll buffer then
   1279 					 * re-use it.
   1280 					 */
   1281 					if (crb && (--crb->c_refcnt == 0)) {
   1282 						if (crbsav ||
   1283 						    (crb->c_nb != bufsz)) {
   1284 							CRB_FREE(crb, me);
   1285 						} else {
   1286 							bcopy(buf, crb->c_buf,
   1287 							    bufsz);
   1288 							crb->c_invalid = 0;
   1289 							crb->c_mof = bufmof;
   1290 							crbsav = crb;
   1291 							me->me_crb = NULL;
   1292 						}
   1293 					}
   1294 					kmem_cache_free(mapentry_cache, me);
   1295 					continue;
   1296 				}
   1297 				me->me_cancel = mtm->mtm_cancel;
   1298 				mtm->mtm_cancel = me;
   1299 				me->me_flags |= ME_CANCEL;
   1300 			}
   1301 
   1302 			/*
   1303 			 * Inode deltas within the same fs block come
   1304 			 * in individually as separate calls to logmap_add().
   1305 			 * All others come in as one call. So check for an
   1306 			 * existing entry where we can re-use the crb.
   1307 			 */
   1308 			if ((me->me_dt == DT_INODE) && (tid == me->me_tid) &&
   1309 			    !crbsav && crb &&
   1310 			    WITHIN(mof, nb, crb->c_mof, crb->c_nb)) {
   1311 				ASSERT(crb->c_mof == bufmof);
   1312 				ASSERT(crb->c_nb == bufsz);
   1313 				bcopy(buf, crb->c_buf, bufsz);
   1314 				crbsav = crb;
   1315 			}
   1316 			mep = &(*mep)->me_hash;
   1317 		}
   1318 		mutex_exit(&mtm->mtm_mutex);
   1319 
   1320 		/*
   1321 		 * If we don't already have a crb then allocate one
   1322 		 * and copy the incoming buffer. Only do this once
   1323 		 * for all the incoming deltas.
   1324 		 */
   1325 		if ((crbsav == NULL) && (melist->me_dt != DT_ABZERO)) {
   1326 			/*
   1327 			 * Only use a cached roll buffer if we
   1328 			 * have enough memory, and check for failures.
   1329 			 */
   1330 			if (((ufs_crb_size + bufsz) < ufs_crb_limit) &&
   1331 			    (kmem_avail() > bufsz)) {
   1332 				crbsav = kmem_alloc(sizeof (crb_t), KM_NOSLEEP);
   1333 			} else {
   1334 				ufs_crb_alloc_fails++;
   1335 			}
   1336 			if (crbsav) {
   1337 				crbsav->c_buf = kmem_alloc(bufsz, KM_NOSLEEP);
   1338 				if (crbsav->c_buf) {
   1339 					atomic_add_64(&ufs_crb_size,
   1340 					    (uint64_t)bufsz);
   1341 					if (ufs_crb_size > ufs_crb_max_size) {
   1342 						ufs_crb_max_size = ufs_crb_size;
   1343 					}
   1344 					bcopy(buf, crbsav->c_buf, bufsz);
   1345 					crbsav->c_nb = bufsz;
   1346 					crbsav->c_refcnt = 0;
   1347 					crbsav->c_invalid = 0;
   1348 					ASSERT((bufmof & DEV_BMASK) == 0);
   1349 					crbsav->c_mof = bufmof;
   1350 				} else {
   1351 					kmem_free(crbsav, sizeof (crb_t));
   1352 					crbsav = NULL;
   1353 				}
   1354 			}
   1355 		}
   1356 
   1357 		/*
   1358 		 * remove from list
   1359 		 */
   1360 		me = melist;
   1361 		melist = melist->me_hash;
   1362 		me->me_flags &= ~ME_LIST;
   1363 		me->me_crb = crbsav;
   1364 		if (crbsav) {
   1365 			crbsav->c_refcnt++;
   1366 		}
   1367 		crbsav = NULL;
   1368 
   1369 		ASSERT(va);
   1370 		ldl_write(ul, va, vamof, me); /* add to on-disk log */
   1371 		if (ul->un_flags & LDL_ERROR) {
   1372 			CRB_RELE(me);
   1373 			kmem_cache_free(mapentry_cache, me);
   1374 			continue;
   1375 		}
   1376 		ASSERT(((mtm->mtm_debug & MT_LOG_WRITE_CHECK) == 0) ||
   1377 		    map_check_ldl_write(ul, va, vamof, me));
   1378 
   1379 		/*
   1380 		 * put on hash
   1381 		 */
   1382 		mutex_enter(&mtm->mtm_mutex);
   1383 		me->me_hash = *savmep;
   1384 		*savmep = me;
   1385 		me->me_next = (mapentry_t *)mtm;
   1386 		me->me_prev = mtm->mtm_prev;
   1387 		mtm->mtm_prev->me_next = me;
   1388 		mtm->mtm_prev = me;
   1389 		me->me_flags |= ME_HASH;
   1390 		me->me_tid = tid;
   1391 		me->me_age = mtm->mtm_age++;
   1392 		mtm->mtm_nme++;
   1393 		mtm->mtm_nmet++;
   1394 		mutex_exit(&mtm->mtm_mutex);
   1395 	}
   1396 
   1397 	ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
   1398 	    map_check_linkage(mtm));
   1399 	mutex_exit(&ul->un_log_mutex);
   1400 }
   1401 
   1402 /*
   1403  * free up any cancelled deltas
   1404  */
   1405 void
   1406 logmap_free_cancel(mt_map_t *mtm, mapentry_t **cancelhead)
   1407 {
   1408 	int		dolock	= 0;
   1409 	mapentry_t	*me;
   1410 	mapentry_t	**mep;
   1411 
   1412 	ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
   1413 	    map_check_linkage(mtm));
   1414 
   1415 again:
   1416 	if (dolock)
   1417 		rw_enter(&mtm->mtm_rwlock, RW_WRITER);
   1418 
   1419 	/*
   1420 	 * At EOT, cancel the indicated deltas
   1421 	 */
   1422 	mutex_enter(&mtm->mtm_mutex);
   1423 	if (mtm->mtm_flags & MTM_CANCELED) {
   1424 		mtm->mtm_flags &= ~MTM_CANCELED;
   1425 		ASSERT(dolock == 0);
   1426 		mutex_exit(&mtm->mtm_mutex);
   1427 		return;
   1428 	}
   1429 
   1430 	while ((me = *cancelhead) != NULL) {
   1431 		/*
   1432 		 * roll forward or read collision; wait and try again
   1433 		 */
   1434 		if (me->me_flags & ME_AGE) {
   1435 			ASSERT(dolock == 0);
   1436 			mutex_exit(&mtm->mtm_mutex);
   1437 			dolock = 1;
   1438 			goto again;
   1439 		}
   1440 		/*
   1441 		 * remove from cancel list
   1442 		 */
   1443 		*cancelhead = me->me_cancel;
   1444 		me->me_cancel = NULL;
   1445 		me->me_flags &= ~(ME_CANCEL);
   1446 
   1447 		/*
   1448 		 * logmap_remove_roll handles ME_ROLL entries later
   1449 		 *	we leave them around for logmap_iscancel
   1450 		 *	XXX is this necessary?
   1451 		 */
   1452 		if (me->me_flags & ME_ROLL)
   1453 			continue;
   1454 
   1455 		/*
   1456 		 * remove from hash (if necessary)
   1457 		 */
   1458 		if (me->me_flags & ME_HASH) {
   1459 			mep = MAP_HASH(me->me_mof, mtm);
   1460 			while (*mep) {
   1461 				if (*mep == me) {
   1462 					*mep = me->me_hash;
   1463 					me->me_next->me_prev = me->me_prev;
   1464 					me->me_prev->me_next = me->me_next;
   1465 					me->me_flags &= ~(ME_HASH);
   1466 					if (!(me->me_flags & ME_USER)) {
   1467 						mtm->mtm_nme--;
   1468 					}
   1469 					break;
   1470 				} else
   1471 					mep = &(*mep)->me_hash;
   1472 			}
   1473 		}
   1474 		/*
   1475 		 * put the entry on the free list
   1476 		 */
   1477 		CRB_RELE(me);
   1478 		kmem_cache_free(mapentry_cache, me);
   1479 	}
   1480 	mutex_exit(&mtm->mtm_mutex);
   1481 	if (dolock)
   1482 		rw_exit(&mtm->mtm_rwlock);
   1483 
   1484 	ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
   1485 	    map_check_linkage(mtm));
   1486 }
   1487 
   1488 
   1489 void
   1490 logmap_commit(ml_unit_t *ul, uint32_t tid)
   1491 {
   1492 	mapentry_t	me;
   1493 	mt_map_t	*mtm	= ul->un_logmap;
   1494 
   1495 
   1496 	ASSERT(MUTEX_HELD(&ul->un_log_mutex));
   1497 
   1498 	/*
   1499 	 * async'ly write a commit rec into the log
   1500 	 */
   1501 	if (mtm->mtm_dirty) {
   1502 		/*
   1503 		 * put commit record into log
   1504 		 */
   1505 		me.me_mof = mtm->mtm_tid;
   1506 		me.me_dt = DT_COMMIT;
   1507 		me.me_nb = 0;
   1508 		me.me_hash = NULL;
   1509 		logmap_wait_space(mtm, ul, &me);
   1510 		ldl_write(ul, NULL, (offset_t)0, &me);
   1511 		ldl_round_commit(ul);
   1512 
   1513 		/*
   1514 		 * abort on error; else reset dirty flag
   1515 		 */
   1516 		if (ul->un_flags & LDL_ERROR)
   1517 			logmap_abort(ul, tid);
   1518 		else {
   1519 			mtm->mtm_dirty = 0;
   1520 			mtm->mtm_nmet = 0;
   1521 			mtm->mtm_cfrags = 0;
   1522 		}
   1523 		/* push commit */
   1524 		ldl_push_commit(ul);
   1525 	}
   1526 }
   1527 
   1528 void
   1529 logmap_sethead(mt_map_t *mtm, ml_unit_t *ul)
   1530 {
   1531 	off_t		lof;
   1532 	uint32_t	tid;
   1533 	mapentry_t	*me;
   1534 
   1535 	/*
   1536 	 * move the head forward so the log knows how full it is
   1537 	 * Make sure to skip any mapentry whose me_lof is 0, these
   1538 	 * are just place holders for DT_CANCELED freed user blocks
   1539 	 * for the current moby.
   1540 	 */
   1541 	mutex_enter(&ul->un_log_mutex);
   1542 	mutex_enter(&mtm->mtm_mutex);
   1543 	me = mtm->mtm_next;
   1544 	while (me != (mapentry_t *)mtm && me->me_lof == 0) {
   1545 		me = me->me_next;
   1546 	}
   1547 
   1548 	if (me == (mapentry_t *)mtm)
   1549 		lof = -1;
   1550 	else {
   1551 		lof = me->me_lof;
   1552 		tid = me->me_tid;
   1553 	}
   1554 	mutex_exit(&mtm->mtm_mutex);
   1555 	ldl_sethead(ul, lof, tid);
   1556 	if (lof == -1)
   1557 		mtm->mtm_age = 0;
   1558 	mutex_exit(&ul->un_log_mutex);
   1559 }
   1560 
   1561 void
   1562 logmap_settail(mt_map_t *mtm, ml_unit_t *ul)
   1563 {
   1564 	off_t		lof;
   1565 	size_t		nb;
   1566 
   1567 	/*
   1568 	 * set the tail after the logmap_abort
   1569 	 */
   1570 	mutex_enter(&ul->un_log_mutex);
   1571 	mutex_enter(&mtm->mtm_mutex);
   1572 	if (mtm->mtm_prev == (mapentry_t *)mtm)
   1573 		lof = -1;
   1574 	else {
   1575 		/*
   1576 		 * set the tail to the end of the last commit
   1577 		 */
   1578 		lof = mtm->mtm_tail_lof;
   1579 		nb = mtm->mtm_tail_nb;
   1580 	}
   1581 	mutex_exit(&mtm->mtm_mutex);
   1582 	ldl_settail(ul, lof, nb);
   1583 	mutex_exit(&ul->un_log_mutex);
   1584 }
   1585 
   1586 /*
   1587  * when reseting a device; roll the log until every
   1588  * delta has been rolled forward
   1589  */
   1590 void
   1591 logmap_roll_dev(ml_unit_t *ul)
   1592 {
   1593 	mt_map_t	*mtm	= ul->un_logmap;
   1594 	mapentry_t	*me;
   1595 	ufsvfs_t	*ufsvfsp = ul->un_ufsvfs;
   1596 
   1597 again:
   1598 	ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
   1599 	    map_check_linkage(mtm));
   1600 	if (ul->un_flags & (LDL_ERROR|LDL_NOROLL))
   1601 		return;
   1602 
   1603 	/*
   1604 	 * look for deltas
   1605 	 */
   1606 	mutex_enter(&mtm->mtm_mutex);
   1607 	for (me = mtm->mtm_next; me != (mapentry_t *)mtm; me = me->me_next) {
   1608 		if (me->me_flags & ME_ROLL)
   1609 			break;
   1610 		if (me->me_tid == mtm->mtm_tid)
   1611 			continue;
   1612 		if (me->me_tid == mtm->mtm_committid)
   1613 			continue;
   1614 		break;
   1615 	}
   1616 
   1617 	/*
   1618 	 * found a delta; kick the roll thread
   1619 	 * but only if the thread is running... (jmh)
   1620 	 */
   1621 	if (me != (mapentry_t *)mtm) {
   1622 		mutex_exit(&mtm->mtm_mutex);
   1623 		logmap_forceroll(mtm);
   1624 		goto again;
   1625 	}
   1626 
   1627 	/*
   1628 	 * no more deltas, return
   1629 	 */
   1630 	mutex_exit(&mtm->mtm_mutex);
   1631 	(void) ufs_putsummaryinfo(ul->un_dev, ufsvfsp, ufsvfsp->vfs_fs);
   1632 
   1633 	ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
   1634 	    map_check_linkage(mtm));
   1635 }
   1636 
   1637 static void
   1638 logmap_cancel_delta(ml_unit_t *ul, offset_t mof, int32_t nb, int metadata)
   1639 {
   1640 	mapentry_t	*me;
   1641 	mapentry_t	**mep;
   1642 	mt_map_t	*mtm	= ul->un_logmap;
   1643 	int		frags;
   1644 
   1645 	/*
   1646 	 * map has been referenced and is dirty
   1647 	 */
   1648 	mtm->mtm_ref = 1;
   1649 	mtm->mtm_dirty++;
   1650 
   1651 	/*
   1652 	 * get a mapentry
   1653 	 */
   1654 	me = kmem_cache_alloc(mapentry_cache, KM_SLEEP);
   1655 	bzero(me, sizeof (mapentry_t));
   1656 
   1657 	/*
   1658 	 * initialize cancel record and put in logmap
   1659 	 */
   1660 	me->me_mof = mof;
   1661 	me->me_nb = nb;
   1662 	me->me_dt = DT_CANCEL;
   1663 	me->me_tid = mtm->mtm_tid;
   1664 	me->me_hash = NULL;
   1665 
   1666 	/*
   1667 	 * Write delta to log if this delta is for metadata.  If this is not
   1668 	 * metadata it is user data and we are just putting a cancel
   1669 	 * mapentry into the hash to cancel a user block deletion
   1670 	 * in which we do not want the block to be allocated
   1671 	 * within this moby.  This cancel entry will prevent the block from
   1672 	 * being allocated within the moby and prevent user data corruption
   1673 	 * if we happen to crash before this moby is committed.
   1674 	 */
   1675 	mutex_enter(&ul->un_log_mutex);
   1676 	if (metadata) {
   1677 		logmap_wait_space(mtm, ul, me);
   1678 		ldl_write(ul, NULL, (offset_t)0, me);
   1679 		if (ul->un_flags & LDL_ERROR) {
   1680 			kmem_cache_free(mapentry_cache, me);
   1681 			mutex_exit(&ul->un_log_mutex);
   1682 			return;
   1683 		}
   1684 	}
   1685 
   1686 	/*
   1687 	 * put in hash and on cancel list
   1688 	 */
   1689 	mep = MAP_HASH(mof, mtm);
   1690 	mutex_enter(&mtm->mtm_mutex);
   1691 	me->me_age = mtm->mtm_age++;
   1692 	me->me_hash = *mep;
   1693 	*mep = me;
   1694 	me->me_next = (mapentry_t *)mtm;
   1695 	me->me_prev = mtm->mtm_prev;
   1696 	mtm->mtm_prev->me_next = me;
   1697 	mtm->mtm_prev = me;
   1698 	me->me_cancel = mtm->mtm_cancel;
   1699 	mtm->mtm_cancel = me;
   1700 	if (metadata) {
   1701 		mtm->mtm_nme++;
   1702 		mtm->mtm_nmet++;
   1703 	} else {
   1704 		me->me_flags = ME_USER;
   1705 	}
   1706 	me->me_flags |= (ME_HASH|ME_CANCEL);
   1707 	if (!(metadata)) {
   1708 		frags = blkoff(ul->un_ufsvfs->vfs_fs, nb);
   1709 		if (frags)
   1710 			mtm->mtm_cfrags +=
   1711 			    numfrags(ul->un_ufsvfs->vfs_fs, frags);
   1712 	}
   1713 	mutex_exit(&mtm->mtm_mutex);
   1714 
   1715 	mutex_exit(&ul->un_log_mutex);
   1716 }
   1717 
   1718 /*
   1719  * cancel entries in a logmap (entries are freed at EOT)
   1720  */
   1721 void
   1722 logmap_cancel(ml_unit_t *ul, offset_t mof, off_t nb, int metadata)
   1723 {
   1724 	int32_t		hnb;
   1725 	mapentry_t	*me;
   1726 	mapentry_t	**mep;
   1727 	mt_map_t	*mtm	= ul->un_logmap;
   1728 	crb_t		*crb;
   1729 
   1730 	ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
   1731 	    map_check_linkage(mtm));
   1732 
   1733 	for (hnb = 0; nb; nb -= hnb, mof += hnb) {
   1734 		hnb = MAPBLOCKSIZE - (mof & MAPBLOCKOFF);
   1735 		if (hnb > nb)
   1736 			hnb = nb;
   1737 		/*
   1738 		 * Find overlapping metadata entries.  Don't search through
   1739 		 * the hash chains if this is user data because it is only
   1740 		 * possible to have overlapping map entries for metadata,
   1741 		 * and the search can become expensive for large files.
   1742 		 */
   1743 		if (metadata) {
   1744 			mep = MAP_HASH(mof, mtm);
   1745 			mutex_enter(&mtm->mtm_mutex);
   1746 			for (me = *mep; me; me = me->me_hash) {
   1747 				if (!DATAoverlapME(mof, hnb, me))
   1748 					continue;
   1749 
   1750 				ASSERT(MEwithinDATA(me, mof, hnb));
   1751 
   1752 				if ((me->me_flags & ME_CANCEL) == 0) {
   1753 					me->me_cancel = mtm->mtm_cancel;
   1754 					mtm->mtm_cancel = me;
   1755 					me->me_flags |= ME_CANCEL;
   1756 					crb = me->me_crb;
   1757 					if (crb) {
   1758 						crb->c_invalid = 1;
   1759 					}
   1760 				}
   1761 			}
   1762 			mutex_exit(&mtm->mtm_mutex);
   1763 		}
   1764 
   1765 		/*
   1766 		 * put a cancel record into the log
   1767 		 */
   1768 		logmap_cancel_delta(ul, mof, hnb, metadata);
   1769 	}
   1770 
   1771 	ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
   1772 	    map_check_linkage(mtm));
   1773 }
   1774 
   1775 /*
   1776  * check for overlap w/cancel delta
   1777  */
   1778 int
   1779 logmap_iscancel(mt_map_t *mtm, offset_t mof, off_t nb)
   1780 {
   1781 	off_t		hnb;
   1782 	mapentry_t	*me;
   1783 	mapentry_t	**mep;
   1784 
   1785 	mutex_enter(&mtm->mtm_mutex);
   1786 	for (hnb = 0; nb; nb -= hnb, mof += hnb) {
   1787 		hnb = MAPBLOCKSIZE - (mof & MAPBLOCKOFF);
   1788 		if (hnb > nb)
   1789 			hnb = nb;
   1790 		/*
   1791 		 * search for dup entry
   1792 		 */
   1793 		mep = MAP_HASH(mof, mtm);
   1794 		for (me = *mep; me; me = me->me_hash) {
   1795 			if (((me->me_flags & ME_ROLL) == 0) &&
   1796 			    (me->me_dt != DT_CANCEL))
   1797 				continue;
   1798 			if (DATAoverlapME(mof, hnb, me))
   1799 				break;
   1800 		}
   1801 
   1802 		/*
   1803 		 * overlap detected
   1804 		 */
   1805 		if (me) {
   1806 			mutex_exit(&mtm->mtm_mutex);
   1807 			return (1);
   1808 		}
   1809 	}
   1810 	mutex_exit(&mtm->mtm_mutex);
   1811 	return (0);
   1812 }
   1813 
   1814 static int
   1815 logmap_logscan_add(ml_unit_t *ul, struct delta *dp, off_t lof, size_t *nbp)
   1816 {
   1817 	mapentry_t	*me;
   1818 	int		error;
   1819 	mt_map_t	*mtm	= ul->un_logmap;
   1820 
   1821 	/*
   1822 	 * verify delta header; failure == mediafail
   1823 	 */
   1824 	error = 0;
   1825 	/* delta type */
   1826 	if ((dp->d_typ <= DT_NONE) || (dp->d_typ >= DT_MAX))
   1827 		error = EINVAL;
   1828 	if (dp->d_typ == DT_COMMIT) {
   1829 		if (dp->d_nb != INT32_C(0) && dp->d_nb != INT32_C(-1))
   1830 			error = EINVAL;
   1831 	} else {
   1832 		/* length of delta */
   1833 		if ((dp->d_nb < INT32_C(0)) ||
   1834 		    (dp->d_nb > INT32_C(MAPBLOCKSIZE)))
   1835 			error = EINVAL;
   1836 
   1837 		/* offset on master device */
   1838 		if (dp->d_mof < INT64_C(0))
   1839 			error = EINVAL;
   1840 	}
   1841 
   1842 	if (error) {
   1843 		ldl_seterror(ul, "Error processing ufs log data during scan");
   1844 		return (error);
   1845 	}
   1846 
   1847 	/*
   1848 	 * process commit record
   1849 	 */
   1850 	if (dp->d_typ == DT_COMMIT) {
   1851 		if (mtm->mtm_dirty) {
   1852 			ASSERT(dp->d_nb == INT32_C(0));
   1853 			logmap_free_cancel(mtm, &mtm->mtm_cancel);
   1854 			mtm->mtm_dirty = 0;
   1855 			mtm->mtm_nmet = 0;
   1856 			mtm->mtm_tid++;
   1857 			mtm->mtm_committid = mtm->mtm_tid;
   1858 			ASSERT(((mtm->mtm_debug & MT_SCAN) == 0) ||
   1859 			    logmap_logscan_commit_debug(lof, mtm));
   1860 		}
   1861 		/*
   1862 		 * return #bytes to next sector (next delta header)
   1863 		 */
   1864 		*nbp = ldl_logscan_nbcommit(lof);
   1865 		mtm->mtm_tail_lof = lof;
   1866 		mtm->mtm_tail_nb = *nbp;
   1867 		return (0);
   1868 	}
   1869 
   1870 	/*
   1871 	 * add delta to logmap
   1872 	 */
   1873 	me = kmem_cache_alloc(mapentry_cache, KM_SLEEP);
   1874 	bzero(me, sizeof (mapentry_t));
   1875 	me->me_lof = lof;
   1876 	me->me_mof = dp->d_mof;
   1877 	me->me_nb = dp->d_nb;
   1878 	me->me_tid = mtm->mtm_tid;
   1879 	me->me_dt = dp->d_typ;
   1880 	me->me_hash = NULL;
   1881 	me->me_flags = (ME_LIST | ME_SCAN);
   1882 	logmap_add(ul, NULL, 0, me);
   1883 	switch (dp->d_typ) {
   1884 	case DT_CANCEL:
   1885 		me->me_flags |= ME_CANCEL;
   1886 		me->me_cancel = mtm->mtm_cancel;
   1887 		mtm->mtm_cancel = me;
   1888 		break;
   1889 	default:
   1890 		ASSERT(((mtm->mtm_debug & MT_SCAN) == 0) ||
   1891 		    logmap_logscan_add_debug(dp, mtm));
   1892 		break;
   1893 	}
   1894 
   1895 sizeofdelta:
   1896 	/*
   1897 	 * return #bytes till next delta header
   1898 	 */
   1899 	if ((dp->d_typ == DT_CANCEL) || (dp->d_typ == DT_ABZERO))
   1900 		*nbp = 0;
   1901 	else
   1902 		*nbp = dp->d_nb;
   1903 	return (0);
   1904 }
   1905 
   1906 void
   1907 logmap_logscan(ml_unit_t *ul)
   1908 {
   1909 	size_t		nb, nbd;
   1910 	off_t		lof;
   1911 	struct delta	delta;
   1912 	mt_map_t	*logmap	= ul->un_logmap;
   1913 
   1914 	ASSERT(ul->un_deltamap->mtm_next == (mapentry_t *)ul->un_deltamap);
   1915 
   1916 	/*
   1917 	 * prepare the log for a logscan
   1918 	 */
   1919 	ldl_logscan_begin(ul);
   1920 
   1921 	/*
   1922 	 * prepare the logmap for a logscan
   1923 	 */
   1924 	(void) map_free_entries(logmap);
   1925 	logmap->mtm_tid = 0;
   1926 	logmap->mtm_committid = UINT32_C(0);
   1927 	logmap->mtm_age = 0;
   1928 	logmap->mtm_dirty = 0;
   1929 	logmap->mtm_ref = 0;
   1930 
   1931 	/*
   1932 	 * while not at end of log
   1933 	 *	read delta header
   1934 	 *	add to logmap
   1935 	 *	seek to beginning of next delta
   1936 	 */
   1937 	lof = ul->un_head_lof;
   1938 	nbd = sizeof (delta);
   1939 	while (lof != ul->un_tail_lof) {
   1940 
   1941 		/* read delta header */
   1942 		if (ldl_logscan_read(ul, &lof, nbd, (caddr_t)&delta))
   1943 			break;
   1944 
   1945 		/* add to logmap */
   1946 		if (logmap_logscan_add(ul, &delta, lof, &nb))
   1947 			break;
   1948 
   1949 		/* seek to next header (skip data) */
   1950 		if (ldl_logscan_read(ul, &lof, nb, NULL))
   1951 			break;
   1952 	}
   1953 
   1954 	/*
   1955 	 * remove the last partial transaction from the logmap
   1956 	 */
   1957 	logmap_abort(ul, logmap->mtm_tid);
   1958 
   1959 	ldl_logscan_end(ul);
   1960 }
   1961 
   1962 void
   1963 _init_map(void)
   1964 {
   1965 	/*
   1966 	 * Initialise the mapentry cache. No constructor or deconstructor
   1967 	 * is needed. Also no reclaim function is supplied as reclaiming
   1968 	 * current entries is not possible.
   1969 	 */
   1970 	mapentry_cache = kmem_cache_create("lufs_mapentry_cache",
   1971 	    sizeof (mapentry_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
   1972 }
   1973 
   1974 /*
   1975  * Special case when we replace an old map entry which carries quota
   1976  * information with a newer entry which does not.
   1977  * In that case the push function would not be called to clean up the
   1978  * dquot structure. This would be found later by invalidatedq() causing
   1979  * a panic when the filesystem in unmounted.
   1980  * We clean up the dquot manually before replacing the map entry.
   1981  */
   1982 void
   1983 handle_dquot(mapentry_t *me)
   1984 {
   1985 	int dolock = 0;
   1986 	int domutex = 0;
   1987 	struct dquot *dqp;
   1988 
   1989 	dqp = (struct dquot *)me->me_arg;
   1990 
   1991 	/*
   1992 	 * We need vfs_dqrwlock to call dqput()
   1993 	 */
   1994 	dolock = (!RW_LOCK_HELD(&dqp->dq_ufsvfsp->vfs_dqrwlock));
   1995 	if (dolock)
   1996 		rw_enter(&dqp->dq_ufsvfsp->vfs_dqrwlock, RW_READER);
   1997 
   1998 	domutex = (!MUTEX_HELD(&dqp->dq_lock));
   1999 	if (domutex)
   2000 		mutex_enter(&dqp->dq_lock);
   2001 
   2002 	/*
   2003 	 * Only clean up if the dquot is referenced
   2004 	 */
   2005 	if (dqp->dq_cnt == 0) {
   2006 		if (domutex)
   2007 			mutex_exit(&dqp->dq_lock);
   2008 		if (dolock)
   2009 			rw_exit(&dqp->dq_ufsvfsp->vfs_dqrwlock);
   2010 		return;
   2011 	}
   2012 
   2013 	dqp->dq_flags &= ~(DQ_MOD|DQ_TRANS);
   2014 	dqput(dqp);
   2015 
   2016 	if (domutex)
   2017 		mutex_exit(&dqp->dq_lock);
   2018 
   2019 	if (dolock)
   2020 		rw_exit(&dqp->dq_ufsvfsp->vfs_dqrwlock);
   2021 
   2022 }
   2023