Home | History | Annotate | Download | only in os
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 /*
     27  * Big Theory Statement for mutual exclusion locking primitives.
     28  *
     29  * A mutex serializes multiple threads so that only one thread
     30  * (the "owner" of the mutex) is active at a time.  See mutex(9F)
     31  * for a full description of the interfaces and programming model.
     32  * The rest of this comment describes the implementation.
     33  *
     34  * Mutexes come in two flavors: adaptive and spin.  mutex_init(9F)
     35  * determines the type based solely on the iblock cookie (PIL) argument.
     36  * PIL > LOCK_LEVEL implies a spin lock; everything else is adaptive.
     37  *
     38  * Spin mutexes block interrupts and spin until the lock becomes available.
     39  * A thread may not sleep, or call any function that might sleep, while
     40  * holding a spin mutex.  With few exceptions, spin mutexes should only
     41  * be used to synchronize with interrupt handlers.
     42  *
     43  * Adaptive mutexes (the default type) spin if the owner is running on
     44  * another CPU and block otherwise.  This policy is based on the assumption
     45  * that mutex hold times are typically short enough that the time spent
     46  * spinning is less than the time it takes to block.  If you need mutual
     47  * exclusion semantics with long hold times, consider an rwlock(9F) as
     48  * RW_WRITER.  Better still, reconsider the algorithm: if it requires
     49  * mutual exclusion for long periods of time, it's probably not scalable.
     50  *
     51  * Adaptive mutexes are overwhelmingly more common than spin mutexes,
     52  * so mutex_enter() assumes that the lock is adaptive.  We get away
     53  * with this by structuring mutexes so that an attempt to acquire a
     54  * spin mutex as adaptive always fails.  When mutex_enter() fails
     55  * it punts to mutex_vector_enter(), which does all the hard stuff.
     56  *
     57  * mutex_vector_enter() first checks the type.  If it's spin mutex,
     58  * we just call lock_set_spl() and return.  If it's an adaptive mutex,
     59  * we check to see what the owner is doing.  If the owner is running,
     60  * we spin until the lock becomes available; if not, we mark the lock
     61  * as having waiters and block.
     62  *
     63  * Blocking on a mutex is surprisingly delicate dance because, for speed,
     64  * mutex_exit() doesn't use an atomic instruction.  Thus we have to work
     65  * a little harder in the (rarely-executed) blocking path to make sure
     66  * we don't block on a mutex that's just been released -- otherwise we
     67  * might never be woken up.
     68  *
     69  * The logic for synchronizing mutex_vector_enter() with mutex_exit()
     70  * in the face of preemption and relaxed memory ordering is as follows:
     71  *
     72  * (1) Preemption in the middle of mutex_exit() must cause mutex_exit()
     73  *     to restart.  Each platform must enforce this by checking the
     74  *     interrupted PC in the interrupt handler (or on return from trap --
     75  *     whichever is more convenient for the platform).  If the PC
     76  *     lies within the critical region of mutex_exit(), the interrupt
     77  *     handler must reset the PC back to the beginning of mutex_exit().
     78  *     The critical region consists of all instructions up to, but not
     79  *     including, the store that clears the lock (which, of course,
     80  *     must never be executed twice.)
     81  *
     82  *     This ensures that the owner will always check for waiters after
     83  *     resuming from a previous preemption.
     84  *
     85  * (2) A thread resuming in mutex_exit() does (at least) the following:
     86  *
     87  *	when resuming:	set CPU_THREAD = owner
     88  *			membar #StoreLoad
     89  *
     90  *	in mutex_exit:	check waiters bit; do wakeup if set
     91  *			membar #LoadStore|#StoreStore
     92  *			clear owner
     93  *			(at this point, other threads may or may not grab
     94  *			the lock, and we may or may not reacquire it)
     95  *
     96  *	when blocking:	membar #StoreStore (due to disp_lock_enter())
     97  *			set CPU_THREAD = (possibly) someone else
     98  *
     99  * (3) A thread blocking in mutex_vector_enter() does the following:
    100  *
    101  *			set waiters bit
    102  *			membar #StoreLoad (via membar_enter())
    103  *			check CPU_THREAD for owner's t_cpu
    104  *				continue if owner running
    105  *			membar #LoadLoad (via membar_consumer())
    106  *			check owner and waiters bit; abort if either changed
    107  *			block
    108  *
    109  * Thus the global memory orderings for (2) and (3) are as follows:
    110  *
    111  * (2M) mutex_exit() memory order:
    112  *
    113  *			STORE	CPU_THREAD = owner
    114  *			LOAD	waiters bit
    115  *			STORE	owner = NULL
    116  *			STORE	CPU_THREAD = (possibly) someone else
    117  *
    118  * (3M) mutex_vector_enter() memory order:
    119  *
    120  *			STORE	waiters bit = 1
    121  *			LOAD	CPU_THREAD for each CPU
    122  *			LOAD	owner and waiters bit
    123  *
    124  * It has been verified by exhaustive simulation that all possible global
    125  * memory orderings of (2M) interleaved with (3M) result in correct
    126  * behavior.  Moreover, these ordering constraints are minimal: changing
    127  * the ordering of anything in (2M) or (3M) breaks the algorithm, creating
    128  * windows for missed wakeups.  Note: the possibility that other threads
    129  * may grab the lock after the owner drops it can be factored out of the
    130  * memory ordering analysis because mutex_vector_enter() won't block
    131  * if the lock isn't still owned by the same thread.
    132  *
    133  * The only requirements of code outside the mutex implementation are
    134  * (1) mutex_exit() preemption fixup in interrupt handlers or trap return,
    135  * (2) a membar #StoreLoad after setting CPU_THREAD in resume(),
    136  * (3) mutex_owner_running() preemption fixup in interrupt handlers
    137  * or trap returns.
    138  * Note: idle threads cannot grab adaptive locks (since they cannot block),
    139  * so the membar may be safely omitted when resuming an idle thread.
    140  *
    141  * When a mutex has waiters, mutex_vector_exit() has several options:
    142  *
    143  * (1) Choose a waiter and make that thread the owner before waking it;
    144  *     this is known as "direct handoff" of ownership.
    145  *
    146  * (2) Drop the lock and wake one waiter.
    147  *
    148  * (3) Drop the lock, clear the waiters bit, and wake all waiters.
    149  *
    150  * In many ways (1) is the cleanest solution, but if a lock is moderately
    151  * contended it defeats the adaptive spin logic.  If we make some other
    152  * thread the owner, but he's not ONPROC yet, then all other threads on
    153  * other cpus that try to get the lock will conclude that the owner is
    154  * blocked, so they'll block too.  And so on -- it escalates quickly,
    155  * with every thread taking the blocking path rather than the spin path.
    156  * Thus, direct handoff is *not* a good idea for adaptive mutexes.
    157  *
    158  * Option (2) is the next most natural-seeming option, but it has several
    159  * annoying properties.  If there's more than one waiter, we must preserve
    160  * the waiters bit on an unheld lock.  On cas-capable platforms, where
    161  * the waiters bit is part of the lock word, this means that both 0x0
    162  * and 0x1 represent unheld locks, so we have to cas against *both*.
    163  * Priority inheritance also gets more complicated, because a lock can
    164  * have waiters but no owner to whom priority can be willed.  So while
    165  * it is possible to make option (2) work, it's surprisingly vile.
    166  *
    167  * Option (3), the least-intuitive at first glance, is what we actually do.
    168  * It has the advantage that because you always wake all waiters, you
    169  * never have to preserve the waiters bit.  Waking all waiters seems like
    170  * begging for a thundering herd problem, but consider: under option (2),
    171  * every thread that grabs and drops the lock will wake one waiter -- so
    172  * if the lock is fairly active, all waiters will be awakened very quickly
    173  * anyway.  Moreover, this is how adaptive locks are *supposed* to work.
    174  * The blocking case is rare; the more common case (by 3-4 orders of
    175  * magnitude) is that one or more threads spin waiting to get the lock.
    176  * Only direct handoff can prevent the thundering herd problem, but as
    177  * mentioned earlier, that would tend to defeat the adaptive spin logic.
    178  * In practice, option (3) works well because the blocking case is rare.
    179  */
    180 
    181 /*
    182  * delayed lock retry with exponential delay for spin locks
    183  *
    184  * It is noted above that for both the spin locks and the adaptive locks,
    185  * spinning is the dominate mode of operation.  So long as there is only
    186  * one thread waiting on a lock, the naive spin loop works very well in
    187  * cache based architectures.  The lock data structure is pulled into the
    188  * cache of the processor with the waiting/spinning thread and no further
    189  * memory traffic is generated until the lock is released.  Unfortunately,
    190  * once two or more threads are waiting on a lock, the naive spin has
    191  * the property of generating maximum memory traffic from each spinning
    192  * thread as the spinning threads contend for the lock data structure.
    193  *
    194  * By executing a delay loop before retrying a lock, a waiting thread
    195  * can reduce its memory traffic by a large factor, depending on the
    196  * size of the delay loop.  A large delay loop greatly reduced the memory
    197  * traffic, but has the drawback of having a period of time when
    198  * no thread is attempting to gain the lock even though several threads
    199  * might be waiting.  A small delay loop has the drawback of not
    200  * much reduction in memory traffic, but reduces the potential idle time.
    201  * The theory of the exponential delay code is to start with a short
    202  * delay loop and double the waiting time on each iteration, up to
    203  * a preselected maximum.
    204  */
    205 
    206 #include <sys/param.h>
    207 #include <sys/time.h>
    208 #include <sys/cpuvar.h>
    209 #include <sys/thread.h>
    210 #include <sys/debug.h>
    211 #include <sys/cmn_err.h>
    212 #include <sys/sobject.h>
    213 #include <sys/turnstile.h>
    214 #include <sys/systm.h>
    215 #include <sys/mutex_impl.h>
    216 #include <sys/spl.h>
    217 #include <sys/lockstat.h>
    218 #include <sys/atomic.h>
    219 #include <sys/cpu.h>
    220 #include <sys/stack.h>
    221 #include <sys/archsystm.h>
    222 #include <sys/machsystm.h>
    223 #include <sys/x_call.h>
    224 
    225 /*
    226  * The sobj_ops vector exports a set of functions needed when a thread
    227  * is asleep on a synchronization object of this type.
    228  */
    229 static sobj_ops_t mutex_sobj_ops = {
    230 	SOBJ_MUTEX, mutex_owner, turnstile_stay_asleep, turnstile_change_pri
    231 };
    232 
    233 /*
    234  * If the system panics on a mutex, save the address of the offending
    235  * mutex in panic_mutex_addr, and save the contents in panic_mutex.
    236  */
    237 static mutex_impl_t panic_mutex;
    238 static mutex_impl_t *panic_mutex_addr;
    239 
    240 static void
    241 mutex_panic(char *msg, mutex_impl_t *lp)
    242 {
    243 	if (panicstr)
    244 		return;
    245 
    246 	if (casptr(&panic_mutex_addr, NULL, lp) == NULL)
    247 		panic_mutex = *lp;
    248 
    249 	panic("%s, lp=%p owner=%p thread=%p",
    250 	    msg, (void *)lp, (void *)MUTEX_OWNER(&panic_mutex),
    251 	    (void *)curthread);
    252 }
    253 
    254 /* "tunables" for per-platform backoff constants. */
    255 uint_t mutex_backoff_cap = 0;
    256 ushort_t mutex_backoff_base = MUTEX_BACKOFF_BASE;
    257 ushort_t mutex_cap_factor = MUTEX_CAP_FACTOR;
    258 uchar_t mutex_backoff_shift = MUTEX_BACKOFF_SHIFT;
    259 
    260 void
    261 mutex_sync(void)
    262 {
    263 	MUTEX_SYNC();
    264 }
    265 
    266 /* calculate the backoff interval */
    267 uint_t
    268 default_lock_backoff(uint_t backoff)
    269 {
    270 	uint_t cap;		/* backoff cap calculated */
    271 
    272 	if (backoff == 0) {
    273 		backoff = mutex_backoff_base;
    274 		/* first call just sets the base */
    275 		return (backoff);
    276 	}
    277 
    278 	/* set cap */
    279 	if (mutex_backoff_cap == 0) {
    280 		/*
    281 		 * For a contended lock, in the worst case a load + cas may
    282 		 * be queued  at the controller for each contending CPU.
    283 		 * Therefore, to avoid queueing, the accesses for all CPUS must
    284 		 * be spread out in time over an interval of (ncpu *
    285 		 * cap-factor).  Maximum backoff is set to this value, and
    286 		 * actual backoff is a random number from 0 to the current max.
    287 		 */
    288 		cap = ncpus_online * mutex_cap_factor;
    289 	} else {
    290 		cap = mutex_backoff_cap;
    291 	}
    292 
    293 	/* calculate new backoff value */
    294 	backoff <<= mutex_backoff_shift;	/* increase backoff */
    295 	if (backoff > cap) {
    296 		if (cap < mutex_backoff_base)
    297 			backoff = mutex_backoff_base;
    298 		else
    299 			backoff = cap;
    300 	}
    301 
    302 	return (backoff);
    303 }
    304 
    305 /*
    306  * default delay function for mutexes.
    307  */
    308 void
    309 default_lock_delay(uint_t backoff)
    310 {
    311 	ulong_t rnd;		/* random factor */
    312 	uint_t cur_backoff;	/* calculated backoff */
    313 	uint_t backctr;
    314 
    315 	/*
    316 	 * Modify backoff by a random amount to avoid lockstep, and to
    317 	 * make it probable that some thread gets a small backoff, and
    318 	 * re-checks quickly
    319 	 */
    320 	rnd = (((long)curthread >> PTR24_LSB) ^ (long)MUTEX_GETTICK());
    321 	cur_backoff = (uint_t)(rnd % (backoff - mutex_backoff_base + 1)) +
    322 	    mutex_backoff_base;
    323 
    324 	/*
    325 	 * Delay before trying
    326 	 * to touch the mutex data structure.
    327 	 */
    328 	for (backctr = cur_backoff; backctr; backctr--) {
    329 		MUTEX_DELAY();
    330 	};
    331 }
    332 
    333 uint_t (*mutex_lock_backoff)(uint_t) = default_lock_backoff;
    334 void (*mutex_lock_delay)(uint_t) = default_lock_delay;
    335 void (*mutex_delay)(void) = mutex_delay_default;
    336 
    337 /*
    338  * mutex_vector_enter() is called from the assembly mutex_enter() routine
    339  * if the lock is held or is not of type MUTEX_ADAPTIVE.
    340  */
    341 void
    342 mutex_vector_enter(mutex_impl_t *lp)
    343 {
    344 	kthread_id_t	owner;
    345 	kthread_id_t	lastowner = MUTEX_NO_OWNER; /* track owner changes */
    346 	hrtime_t	sleep_time = 0;	/* how long we slept */
    347 	hrtime_t	spin_time = 0;	/* how long we spun */
    348 	cpu_t 		*cpup;
    349 	turnstile_t	*ts;
    350 	volatile mutex_impl_t *vlp = (volatile mutex_impl_t *)lp;
    351 	uint_t		backoff = 0;	/* current backoff */
    352 	int		changecnt = 0;	/* count of owner changes */
    353 
    354 	ASSERT_STACK_ALIGNED();
    355 
    356 	if (MUTEX_TYPE_SPIN(lp)) {
    357 		lock_set_spl(&lp->m_spin.m_spinlock, lp->m_spin.m_minspl,
    358 		    &lp->m_spin.m_oldspl);
    359 		return;
    360 	}
    361 
    362 	if (!MUTEX_TYPE_ADAPTIVE(lp)) {
    363 		mutex_panic("mutex_enter: bad mutex", lp);
    364 		return;
    365 	}
    366 
    367 	/*
    368 	 * Adaptive mutexes must not be acquired from above LOCK_LEVEL.
    369 	 * We can migrate after loading CPU but before checking CPU_ON_INTR,
    370 	 * so we must verify by disabling preemption and loading CPU again.
    371 	 */
    372 	cpup = CPU;
    373 	if (CPU_ON_INTR(cpup) && !panicstr) {
    374 		kpreempt_disable();
    375 		if (CPU_ON_INTR(CPU))
    376 			mutex_panic("mutex_enter: adaptive at high PIL", lp);
    377 		kpreempt_enable();
    378 	}
    379 
    380 	CPU_STATS_ADDQ(cpup, sys, mutex_adenters, 1);
    381 
    382 	spin_time = LOCKSTAT_START_TIME(LS_MUTEX_ENTER_SPIN);
    383 
    384 	backoff = mutex_lock_backoff(0);	/* set base backoff */
    385 	for (;;) {
    386 		mutex_lock_delay(backoff); /* backoff delay */
    387 
    388 		if (panicstr)
    389 			return;
    390 
    391 		if ((owner = MUTEX_OWNER(vlp)) == NULL) {
    392 			if (mutex_adaptive_tryenter(lp)) {
    393 				break;
    394 			}
    395 			/* increase backoff only on failed attempt. */
    396 			backoff = mutex_lock_backoff(backoff);
    397 			changecnt++;
    398 			continue;
    399 		} else if (lastowner != owner) {
    400 			lastowner = owner;
    401 			backoff = mutex_lock_backoff(backoff);
    402 			changecnt++;
    403 		}
    404 
    405 		if (changecnt >= ncpus_online) {
    406 			backoff = mutex_lock_backoff(0);
    407 			changecnt = 0;
    408 		}
    409 
    410 		if (owner == curthread)
    411 			mutex_panic("recursive mutex_enter", lp);
    412 
    413 		/*
    414 		 * If lock is held but owner is not yet set, spin.
    415 		 * (Only relevant for platforms that don't have cas.)
    416 		 */
    417 		if (owner == MUTEX_NO_OWNER)
    418 			continue;
    419 
    420 		if (mutex_owner_running(lp) != NULL)  {
    421 			continue;
    422 		}
    423 
    424 		/*
    425 		 * The owner appears not to be running, so block.
    426 		 * See the Big Theory Statement for memory ordering issues.
    427 		 */
    428 		ts = turnstile_lookup(lp);
    429 		MUTEX_SET_WAITERS(lp);
    430 		membar_enter();
    431 
    432 		/*
    433 		 * Recheck whether owner is running after waiters bit hits
    434 		 * global visibility (above).  If owner is running, spin.
    435 		 */
    436 		if (mutex_owner_running(lp) != NULL) {
    437 			turnstile_exit(lp);
    438 			continue;
    439 		}
    440 		membar_consumer();
    441 
    442 		/*
    443 		 * If owner and waiters bit are unchanged, block.
    444 		 */
    445 		if (MUTEX_OWNER(vlp) == owner && MUTEX_HAS_WAITERS(vlp)) {
    446 			sleep_time -= gethrtime();
    447 			(void) turnstile_block(ts, TS_WRITER_Q, lp,
    448 			    &mutex_sobj_ops, NULL, NULL);
    449 			sleep_time += gethrtime();
    450 			/* reset backoff after turnstile */
    451 			backoff = mutex_lock_backoff(0);
    452 		} else {
    453 			turnstile_exit(lp);
    454 		}
    455 	}
    456 
    457 	ASSERT(MUTEX_OWNER(lp) == curthread);
    458 
    459 	if (sleep_time != 0) {
    460 		/*
    461 		 * Note, sleep time is the sum of all the sleeping we
    462 		 * did.
    463 		 */
    464 		LOCKSTAT_RECORD(LS_MUTEX_ENTER_BLOCK, lp, sleep_time);
    465 	}
    466 
    467 	/* record spin time, don't count sleep time */
    468 	if (spin_time != 0) {
    469 		LOCKSTAT_RECORD_TIME(LS_MUTEX_ENTER_SPIN, lp,
    470 		    spin_time + sleep_time);
    471 	}
    472 
    473 	LOCKSTAT_RECORD0(LS_MUTEX_ENTER_ACQUIRE, lp);
    474 }
    475 
    476 /*
    477  * mutex_vector_tryenter() is called from the assembly mutex_tryenter()
    478  * routine if the lock is held or is not of type MUTEX_ADAPTIVE.
    479  */
    480 int
    481 mutex_vector_tryenter(mutex_impl_t *lp)
    482 {
    483 	int s;
    484 
    485 	if (MUTEX_TYPE_ADAPTIVE(lp))
    486 		return (0);		/* we already tried in assembly */
    487 
    488 	if (!MUTEX_TYPE_SPIN(lp)) {
    489 		mutex_panic("mutex_tryenter: bad mutex", lp);
    490 		return (0);
    491 	}
    492 
    493 	s = splr(lp->m_spin.m_minspl);
    494 	if (lock_try(&lp->m_spin.m_spinlock)) {
    495 		lp->m_spin.m_oldspl = (ushort_t)s;
    496 		return (1);
    497 	}
    498 	splx(s);
    499 	return (0);
    500 }
    501 
    502 /*
    503  * mutex_vector_exit() is called from mutex_exit() if the lock is not
    504  * adaptive, has waiters, or is not owned by the current thread (panic).
    505  */
    506 void
    507 mutex_vector_exit(mutex_impl_t *lp)
    508 {
    509 	turnstile_t *ts;
    510 
    511 	if (MUTEX_TYPE_SPIN(lp)) {
    512 		lock_clear_splx(&lp->m_spin.m_spinlock, lp->m_spin.m_oldspl);
    513 		return;
    514 	}
    515 
    516 	if (MUTEX_OWNER(lp) != curthread) {
    517 		mutex_panic("mutex_exit: not owner", lp);
    518 		return;
    519 	}
    520 
    521 	ts = turnstile_lookup(lp);
    522 	MUTEX_CLEAR_LOCK_AND_WAITERS(lp);
    523 	if (ts == NULL)
    524 		turnstile_exit(lp);
    525 	else
    526 		turnstile_wakeup(ts, TS_WRITER_Q, ts->ts_waiters, NULL);
    527 	LOCKSTAT_RECORD0(LS_MUTEX_EXIT_RELEASE, lp);
    528 }
    529 
    530 int
    531 mutex_owned(const kmutex_t *mp)
    532 {
    533 	const mutex_impl_t *lp = (const mutex_impl_t *)mp;
    534 
    535 	if (panicstr || quiesce_active)
    536 		return (1);
    537 
    538 	if (MUTEX_TYPE_ADAPTIVE(lp))
    539 		return (MUTEX_OWNER(lp) == curthread);
    540 	return (LOCK_HELD(&lp->m_spin.m_spinlock));
    541 }
    542 
    543 kthread_t *
    544 mutex_owner(const kmutex_t *mp)
    545 {
    546 	const mutex_impl_t *lp = (const mutex_impl_t *)mp;
    547 	kthread_id_t t;
    548 
    549 	if (MUTEX_TYPE_ADAPTIVE(lp) && (t = MUTEX_OWNER(lp)) != MUTEX_NO_OWNER)
    550 		return (t);
    551 	return (NULL);
    552 }
    553 
    554 /*
    555  * The iblock cookie 'ibc' is the spl level associated with the lock;
    556  * this alone determines whether the lock will be ADAPTIVE or SPIN.
    557  *
    558  * Adaptive mutexes created in zeroed memory do not need to call
    559  * mutex_init() as their allocation in this fashion guarantees
    560  * their initialization.
    561  *   eg adaptive mutexes created as static within the BSS or allocated
    562  *      by kmem_zalloc().
    563  */
    564 /* ARGSUSED */
    565 void
    566 mutex_init(kmutex_t *mp, char *name, kmutex_type_t type, void *ibc)
    567 {
    568 	mutex_impl_t *lp = (mutex_impl_t *)mp;
    569 
    570 	ASSERT(ibc < (void *)KERNELBASE);	/* see 1215173 */
    571 
    572 	if ((intptr_t)ibc > ipltospl(LOCK_LEVEL) && ibc < (void *)KERNELBASE) {
    573 		ASSERT(type != MUTEX_ADAPTIVE && type != MUTEX_DEFAULT);
    574 		MUTEX_SET_TYPE(lp, MUTEX_SPIN);
    575 		LOCK_INIT_CLEAR(&lp->m_spin.m_spinlock);
    576 		LOCK_INIT_HELD(&lp->m_spin.m_dummylock);
    577 		lp->m_spin.m_minspl = (int)(intptr_t)ibc;
    578 	} else {
    579 #ifdef MUTEX_ALIGN
    580 		static int misalign_cnt = 0;
    581 
    582 		if (((uintptr_t)lp & (uintptr_t)(MUTEX_ALIGN - 1)) &&
    583 		    (misalign_cnt < MUTEX_ALIGN_WARNINGS)) {
    584 			/*
    585 			 * The mutex is not aligned and may cross a cache line.
    586 			 * This is not supported and may cause a panic.
    587 			 * Show a warning that the mutex is not aligned
    588 			 * and attempt to identify the origin.
    589 			 * Unaligned mutexes are not (supposed to be)
    590 			 * possible on SPARC.
    591 			 */
    592 			char *funcname;
    593 			ulong_t offset = 0;
    594 
    595 			funcname = modgetsymname((uintptr_t)caller(), &offset);
    596 			cmn_err(CE_WARN, "mutex_init: %p is not %d byte "
    597 			    "aligned; caller %s+%lx in module %s. "
    598 			    "This is unsupported and may cause a panic. "
    599 			    "Please report this to the kernel module supplier.",
    600 			    (void *)lp, MUTEX_ALIGN,
    601 			    funcname ? funcname : "unknown", offset,
    602 			    mod_containing_pc(caller()));
    603 			misalign_cnt++;
    604 			if (misalign_cnt >= MUTEX_ALIGN_WARNINGS) {
    605 				cmn_err(CE_WARN, "mutex_init: further unaligned"
    606 				    " mutex warnings will be suppressed.");
    607 			}
    608 		}
    609 #endif	/* MUTEX_ALIGN */
    610 		ASSERT(type != MUTEX_SPIN);
    611 
    612 		MUTEX_SET_TYPE(lp, MUTEX_ADAPTIVE);
    613 		MUTEX_CLEAR_LOCK_AND_WAITERS(lp);
    614 	}
    615 }
    616 
    617 void
    618 mutex_destroy(kmutex_t *mp)
    619 {
    620 	mutex_impl_t *lp = (mutex_impl_t *)mp;
    621 
    622 	if (lp->m_owner == 0 && !MUTEX_HAS_WAITERS(lp)) {
    623 		MUTEX_DESTROY(lp);
    624 	} else if (MUTEX_TYPE_SPIN(lp)) {
    625 		LOCKSTAT_RECORD0(LS_MUTEX_DESTROY_RELEASE, lp);
    626 		MUTEX_DESTROY(lp);
    627 	} else if (MUTEX_TYPE_ADAPTIVE(lp)) {
    628 		LOCKSTAT_RECORD0(LS_MUTEX_DESTROY_RELEASE, lp);
    629 		if (MUTEX_OWNER(lp) != curthread)
    630 			mutex_panic("mutex_destroy: not owner", lp);
    631 		if (MUTEX_HAS_WAITERS(lp)) {
    632 			turnstile_t *ts = turnstile_lookup(lp);
    633 			turnstile_exit(lp);
    634 			if (ts != NULL)
    635 				mutex_panic("mutex_destroy: has waiters", lp);
    636 		}
    637 		MUTEX_DESTROY(lp);
    638 	} else {
    639 		mutex_panic("mutex_destroy: bad mutex", lp);
    640 	}
    641 }
    642 
    643 /*
    644  * Simple C support for the cases where spin locks miss on the first try.
    645  */
    646 void
    647 lock_set_spin(lock_t *lp)
    648 {
    649 	int loop_count = 0;
    650 	uint_t backoff = 0;	/* current backoff */
    651 	hrtime_t spin_time = 0;	/* how long we spun */
    652 
    653 	if (panicstr)
    654 		return;
    655 
    656 	if (ncpus == 1)
    657 		panic("lock_set: %p lock held and only one CPU", (void *)lp);
    658 
    659 	spin_time = LOCKSTAT_START_TIME(LS_LOCK_SET_SPIN);
    660 
    661 	while (LOCK_HELD(lp) || !lock_spin_try(lp)) {
    662 		if (panicstr)
    663 			return;
    664 		loop_count++;
    665 
    666 		if (ncpus_online == loop_count) {
    667 			backoff = mutex_lock_backoff(0);
    668 			loop_count = 0;
    669 		} else {
    670 			backoff = mutex_lock_backoff(backoff);
    671 		}
    672 		mutex_lock_delay(backoff);
    673 	}
    674 
    675 	LOCKSTAT_RECORD_TIME(LS_LOCK_SET_SPIN, lp, spin_time);
    676 
    677 	LOCKSTAT_RECORD0(LS_LOCK_SET_ACQUIRE, lp);
    678 }
    679 
    680 void
    681 lock_set_spl_spin(lock_t *lp, int new_pil, ushort_t *old_pil_addr, int old_pil)
    682 {
    683 	int loop_count = 0;
    684 	uint_t backoff = 0;	/* current backoff */
    685 	hrtime_t spin_time = 0;	/* how long we spun */
    686 
    687 	if (panicstr)
    688 		return;
    689 
    690 	if (ncpus == 1)
    691 		panic("lock_set_spl: %p lock held and only one CPU",
    692 		    (void *)lp);
    693 
    694 	ASSERT(new_pil > LOCK_LEVEL);
    695 
    696 	spin_time = LOCKSTAT_START_TIME(LS_LOCK_SET_SPL_SPIN);
    697 
    698 	do {
    699 		splx(old_pil);
    700 		while (LOCK_HELD(lp)) {
    701 			loop_count++;
    702 
    703 			if (panicstr) {
    704 				*old_pil_addr = (ushort_t)splr(new_pil);
    705 				return;
    706 			}
    707 			if (ncpus_online == loop_count) {
    708 				backoff = mutex_lock_backoff(0);
    709 				loop_count = 0;
    710 			} else {
    711 				backoff = mutex_lock_backoff(backoff);
    712 			}
    713 			mutex_lock_delay(backoff);
    714 		}
    715 		old_pil = splr(new_pil);
    716 	} while (!lock_spin_try(lp));
    717 
    718 	*old_pil_addr = (ushort_t)old_pil;
    719 
    720 	LOCKSTAT_RECORD_TIME(LS_LOCK_SET_SPL_SPIN, lp, spin_time);
    721 
    722 	LOCKSTAT_RECORD0(LS_LOCK_SET_SPL_ACQUIRE, lp);
    723 }
    724