Home | History | Annotate | Download | only in os
      1     0    stevel /*
      2     0    stevel  * CDDL HEADER START
      3     0    stevel  *
      4     0    stevel  * The contents of this file are subject to the terms of the
      5  2205  dv142724  * Common Development and Distribution License (the "License").
      6  2205  dv142724  * You may not use this file except in compliance with the License.
      7     0    stevel  *
      8     0    stevel  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9     0    stevel  * or http://www.opensolaris.org/os/licensing.
     10     0    stevel  * See the License for the specific language governing permissions
     11     0    stevel  * and limitations under the License.
     12     0    stevel  *
     13     0    stevel  * When distributing Covered Code, include this CDDL HEADER in each
     14     0    stevel  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15     0    stevel  * If applicable, add the following below this CDDL HEADER, with the
     16     0    stevel  * fields enclosed by brackets "[]" replaced with your own identifying
     17     0    stevel  * information: Portions Copyright [yyyy] [name of copyright owner]
     18     0    stevel  *
     19     0    stevel  * CDDL HEADER END
     20     0    stevel  */
     21     0    stevel /*
     22  9160    Sherry  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     23     0    stevel  * Use is subject to license terms.
     24     0    stevel  */
     25     0    stevel 
     26     0    stevel /*
     27     0    stevel  * Big Theory Statement for mutual exclusion locking primitives.
     28     0    stevel  *
     29     0    stevel  * A mutex serializes multiple threads so that only one thread
     30     0    stevel  * (the "owner" of the mutex) is active at a time.  See mutex(9F)
     31     0    stevel  * for a full description of the interfaces and programming model.
     32     0    stevel  * The rest of this comment describes the implementation.
     33     0    stevel  *
     34     0    stevel  * Mutexes come in two flavors: adaptive and spin.  mutex_init(9F)
     35     0    stevel  * determines the type based solely on the iblock cookie (PIL) argument.
     36     0    stevel  * PIL > LOCK_LEVEL implies a spin lock; everything else is adaptive.
     37     0    stevel  *
     38     0    stevel  * Spin mutexes block interrupts and spin until the lock becomes available.
     39     0    stevel  * A thread may not sleep, or call any function that might sleep, while
     40     0    stevel  * holding a spin mutex.  With few exceptions, spin mutexes should only
     41     0    stevel  * be used to synchronize with interrupt handlers.
     42     0    stevel  *
     43     0    stevel  * Adaptive mutexes (the default type) spin if the owner is running on
     44     0    stevel  * another CPU and block otherwise.  This policy is based on the assumption
     45     0    stevel  * that mutex hold times are typically short enough that the time spent
     46     0    stevel  * spinning is less than the time it takes to block.  If you need mutual
     47     0    stevel  * exclusion semantics with long hold times, consider an rwlock(9F) as
     48     0    stevel  * RW_WRITER.  Better still, reconsider the algorithm: if it requires
     49     0    stevel  * mutual exclusion for long periods of time, it's probably not scalable.
     50     0    stevel  *
     51     0    stevel  * Adaptive mutexes are overwhelmingly more common than spin mutexes,
     52     0    stevel  * so mutex_enter() assumes that the lock is adaptive.  We get away
     53     0    stevel  * with this by structuring mutexes so that an attempt to acquire a
     54     0    stevel  * spin mutex as adaptive always fails.  When mutex_enter() fails
     55     0    stevel  * it punts to mutex_vector_enter(), which does all the hard stuff.
     56     0    stevel  *
     57     0    stevel  * mutex_vector_enter() first checks the type.  If it's spin mutex,
     58     0    stevel  * we just call lock_set_spl() and return.  If it's an adaptive mutex,
     59     0    stevel  * we check to see what the owner is doing.  If the owner is running,
     60     0    stevel  * we spin until the lock becomes available; if not, we mark the lock
     61     0    stevel  * as having waiters and block.
     62     0    stevel  *
     63     0    stevel  * Blocking on a mutex is surprisingly delicate dance because, for speed,
     64     0    stevel  * mutex_exit() doesn't use an atomic instruction.  Thus we have to work
     65     0    stevel  * a little harder in the (rarely-executed) blocking path to make sure
     66     0    stevel  * we don't block on a mutex that's just been released -- otherwise we
     67     0    stevel  * might never be woken up.
     68     0    stevel  *
     69     0    stevel  * The logic for synchronizing mutex_vector_enter() with mutex_exit()
     70     0    stevel  * in the face of preemption and relaxed memory ordering is as follows:
     71     0    stevel  *
     72     0    stevel  * (1) Preemption in the middle of mutex_exit() must cause mutex_exit()
     73     0    stevel  *     to restart.  Each platform must enforce this by checking the
     74     0    stevel  *     interrupted PC in the interrupt handler (or on return from trap --
     75     0    stevel  *     whichever is more convenient for the platform).  If the PC
     76     0    stevel  *     lies within the critical region of mutex_exit(), the interrupt
     77     0    stevel  *     handler must reset the PC back to the beginning of mutex_exit().
     78     0    stevel  *     The critical region consists of all instructions up to, but not
     79     0    stevel  *     including, the store that clears the lock (which, of course,
     80     0    stevel  *     must never be executed twice.)
     81     0    stevel  *
     82     0    stevel  *     This ensures that the owner will always check for waiters after
     83     0    stevel  *     resuming from a previous preemption.
     84     0    stevel  *
     85     0    stevel  * (2) A thread resuming in mutex_exit() does (at least) the following:
     86     0    stevel  *
     87     0    stevel  *	when resuming:	set CPU_THREAD = owner
     88     0    stevel  *			membar #StoreLoad
     89     0    stevel  *
     90     0    stevel  *	in mutex_exit:	check waiters bit; do wakeup if set
     91     0    stevel  *			membar #LoadStore|#StoreStore
     92     0    stevel  *			clear owner
     93     0    stevel  *			(at this point, other threads may or may not grab
     94     0    stevel  *			the lock, and we may or may not reacquire it)
     95     0    stevel  *
     96     0    stevel  *	when blocking:	membar #StoreStore (due to disp_lock_enter())
     97     0    stevel  *			set CPU_THREAD = (possibly) someone else
     98     0    stevel  *
     99     0    stevel  * (3) A thread blocking in mutex_vector_enter() does the following:
    100     0    stevel  *
    101     0    stevel  *			set waiters bit
    102     0    stevel  *			membar #StoreLoad (via membar_enter())
    103  5834  pt157919  *			check CPU_THREAD for owner's t_cpu
    104  5834  pt157919  *				continue if owner running
    105     0    stevel  *			membar #LoadLoad (via membar_consumer())
    106     0    stevel  *			check owner and waiters bit; abort if either changed
    107     0    stevel  *			block
    108     0    stevel  *
    109     0    stevel  * Thus the global memory orderings for (2) and (3) are as follows:
    110     0    stevel  *
    111     0    stevel  * (2M) mutex_exit() memory order:
    112     0    stevel  *
    113     0    stevel  *			STORE	CPU_THREAD = owner
    114     0    stevel  *			LOAD	waiters bit
    115     0    stevel  *			STORE	owner = NULL
    116     0    stevel  *			STORE	CPU_THREAD = (possibly) someone else
    117     0    stevel  *
    118     0    stevel  * (3M) mutex_vector_enter() memory order:
    119     0    stevel  *
    120     0    stevel  *			STORE	waiters bit = 1
    121     0    stevel  *			LOAD	CPU_THREAD for each CPU
    122     0    stevel  *			LOAD	owner and waiters bit
    123     0    stevel  *
    124     0    stevel  * It has been verified by exhaustive simulation that all possible global
    125     0    stevel  * memory orderings of (2M) interleaved with (3M) result in correct
    126     0    stevel  * behavior.  Moreover, these ordering constraints are minimal: changing
    127     0    stevel  * the ordering of anything in (2M) or (3M) breaks the algorithm, creating
    128     0    stevel  * windows for missed wakeups.  Note: the possibility that other threads
    129     0    stevel  * may grab the lock after the owner drops it can be factored out of the
    130     0    stevel  * memory ordering analysis because mutex_vector_enter() won't block
    131     0    stevel  * if the lock isn't still owned by the same thread.
    132     0    stevel  *
    133     0    stevel  * The only requirements of code outside the mutex implementation are
    134     0    stevel  * (1) mutex_exit() preemption fixup in interrupt handlers or trap return,
    135  5834  pt157919  * (2) a membar #StoreLoad after setting CPU_THREAD in resume(),
    136  5834  pt157919  * (3) mutex_owner_running() preemption fixup in interrupt handlers
    137  5834  pt157919  * or trap returns.
    138     0    stevel  * Note: idle threads cannot grab adaptive locks (since they cannot block),
    139     0    stevel  * so the membar may be safely omitted when resuming an idle thread.
    140     0    stevel  *
    141     0    stevel  * When a mutex has waiters, mutex_vector_exit() has several options:
    142     0    stevel  *
    143     0    stevel  * (1) Choose a waiter and make that thread the owner before waking it;
    144     0    stevel  *     this is known as "direct handoff" of ownership.
    145     0    stevel  *
    146     0    stevel  * (2) Drop the lock and wake one waiter.
    147     0    stevel  *
    148     0    stevel  * (3) Drop the lock, clear the waiters bit, and wake all waiters.
    149     0    stevel  *
    150     0    stevel  * In many ways (1) is the cleanest solution, but if a lock is moderately
    151     0    stevel  * contended it defeats the adaptive spin logic.  If we make some other
    152     0    stevel  * thread the owner, but he's not ONPROC yet, then all other threads on
    153     0    stevel  * other cpus that try to get the lock will conclude that the owner is
    154     0    stevel  * blocked, so they'll block too.  And so on -- it escalates quickly,
    155     0    stevel  * with every thread taking the blocking path rather than the spin path.
    156     0    stevel  * Thus, direct handoff is *not* a good idea for adaptive mutexes.
    157     0    stevel  *
    158     0    stevel  * Option (2) is the next most natural-seeming option, but it has several
    159     0    stevel  * annoying properties.  If there's more than one waiter, we must preserve
    160     0    stevel  * the waiters bit on an unheld lock.  On cas-capable platforms, where
    161     0    stevel  * the waiters bit is part of the lock word, this means that both 0x0
    162     0    stevel  * and 0x1 represent unheld locks, so we have to cas against *both*.
    163     0    stevel  * Priority inheritance also gets more complicated, because a lock can
    164     0    stevel  * have waiters but no owner to whom priority can be willed.  So while
    165     0    stevel  * it is possible to make option (2) work, it's surprisingly vile.
    166     0    stevel  *
    167     0    stevel  * Option (3), the least-intuitive at first glance, is what we actually do.
    168     0    stevel  * It has the advantage that because you always wake all waiters, you
    169     0    stevel  * never have to preserve the waiters bit.  Waking all waiters seems like
    170     0    stevel  * begging for a thundering herd problem, but consider: under option (2),
    171     0    stevel  * every thread that grabs and drops the lock will wake one waiter -- so
    172     0    stevel  * if the lock is fairly active, all waiters will be awakened very quickly
    173     0    stevel  * anyway.  Moreover, this is how adaptive locks are *supposed* to work.
    174     0    stevel  * The blocking case is rare; the more common case (by 3-4 orders of
    175     0    stevel  * magnitude) is that one or more threads spin waiting to get the lock.
    176     0    stevel  * Only direct handoff can prevent the thundering herd problem, but as
    177     0    stevel  * mentioned earlier, that would tend to defeat the adaptive spin logic.
    178     0    stevel  * In practice, option (3) works well because the blocking case is rare.
    179     0    stevel  */
    180     0    stevel 
    181     0    stevel /*
    182     0    stevel  * delayed lock retry with exponential delay for spin locks
    183     0    stevel  *
    184     0    stevel  * It is noted above that for both the spin locks and the adaptive locks,
    185     0    stevel  * spinning is the dominate mode of operation.  So long as there is only
    186     0    stevel  * one thread waiting on a lock, the naive spin loop works very well in
    187     0    stevel  * cache based architectures.  The lock data structure is pulled into the
    188     0    stevel  * cache of the processor with the waiting/spinning thread and no further
    189     0    stevel  * memory traffic is generated until the lock is released.  Unfortunately,
    190     0    stevel  * once two or more threads are waiting on a lock, the naive spin has
    191     0    stevel  * the property of generating maximum memory traffic from each spinning
    192     0    stevel  * thread as the spinning threads contend for the lock data structure.
    193     0    stevel  *
    194     0    stevel  * By executing a delay loop before retrying a lock, a waiting thread
    195     0    stevel  * can reduce its memory traffic by a large factor, depending on the
    196     0    stevel  * size of the delay loop.  A large delay loop greatly reduced the memory
    197     0    stevel  * traffic, but has the drawback of having a period of time when
    198     0    stevel  * no thread is attempting to gain the lock even though several threads
    199     0    stevel  * might be waiting.  A small delay loop has the drawback of not
    200     0    stevel  * much reduction in memory traffic, but reduces the potential idle time.
    201     0    stevel  * The theory of the exponential delay code is to start with a short
    202     0    stevel  * delay loop and double the waiting time on each iteration, up to
    203  5834  pt157919  * a preselected maximum.
    204     0    stevel  */
    205     0    stevel 
    206     0    stevel #include <sys/param.h>
    207     0    stevel #include <sys/time.h>
    208     0    stevel #include <sys/cpuvar.h>
    209     0    stevel #include <sys/thread.h>
    210     0    stevel #include <sys/debug.h>
    211     0    stevel #include <sys/cmn_err.h>
    212     0    stevel #include <sys/sobject.h>
    213     0    stevel #include <sys/turnstile.h>
    214     0    stevel #include <sys/systm.h>
    215     0    stevel #include <sys/mutex_impl.h>
    216     0    stevel #include <sys/spl.h>
    217     0    stevel #include <sys/lockstat.h>
    218     0    stevel #include <sys/atomic.h>
    219     0    stevel #include <sys/cpu.h>
    220     0    stevel #include <sys/stack.h>
    221  5084   johnlev #include <sys/archsystm.h>
    222  5834  pt157919 #include <sys/machsystm.h>
    223  5834  pt157919 #include <sys/x_call.h>
    224     0    stevel 
    225     0    stevel /*
    226     0    stevel  * The sobj_ops vector exports a set of functions needed when a thread
    227     0    stevel  * is asleep on a synchronization object of this type.
    228     0    stevel  */
    229     0    stevel static sobj_ops_t mutex_sobj_ops = {
    230     0    stevel 	SOBJ_MUTEX, mutex_owner, turnstile_stay_asleep, turnstile_change_pri
    231     0    stevel };
    232     0    stevel 
    233     0    stevel /*
    234     0    stevel  * If the system panics on a mutex, save the address of the offending
    235     0    stevel  * mutex in panic_mutex_addr, and save the contents in panic_mutex.
    236     0    stevel  */
    237     0    stevel static mutex_impl_t panic_mutex;
    238     0    stevel static mutex_impl_t *panic_mutex_addr;
    239     0    stevel 
    240     0    stevel static void
    241     0    stevel mutex_panic(char *msg, mutex_impl_t *lp)
    242     0    stevel {
    243     0    stevel 	if (panicstr)
    244     0    stevel 		return;
    245     0    stevel 
    246     0    stevel 	if (casptr(&panic_mutex_addr, NULL, lp) == NULL)
    247     0    stevel 		panic_mutex = *lp;
    248     0    stevel 
    249     0    stevel 	panic("%s, lp=%p owner=%p thread=%p",
    250  7632      Nick 	    msg, (void *)lp, (void *)MUTEX_OWNER(&panic_mutex),
    251  7632      Nick 	    (void *)curthread);
    252     0    stevel }
    253     0    stevel 
    254  5834  pt157919 /* "tunables" for per-platform backoff constants. */
    255  5834  pt157919 uint_t mutex_backoff_cap = 0;
    256  5834  pt157919 ushort_t mutex_backoff_base = MUTEX_BACKOFF_BASE;
    257  5834  pt157919 ushort_t mutex_cap_factor = MUTEX_CAP_FACTOR;
    258  5834  pt157919 uchar_t mutex_backoff_shift = MUTEX_BACKOFF_SHIFT;
    259  5834  pt157919 
    260  5834  pt157919 void
    261  5834  pt157919 mutex_sync(void)
    262  5834  pt157919 {
    263  5834  pt157919 	MUTEX_SYNC();
    264  5834  pt157919 }
    265  5834  pt157919 
    266  5834  pt157919 /* calculate the backoff interval */
    267  6138   svemuri uint_t
    268  5834  pt157919 default_lock_backoff(uint_t backoff)
    269  5834  pt157919 {
    270  5834  pt157919 	uint_t cap;		/* backoff cap calculated */
    271  5834  pt157919 
    272  5834  pt157919 	if (backoff == 0) {
    273  5834  pt157919 		backoff = mutex_backoff_base;
    274  5834  pt157919 		/* first call just sets the base */
    275  5834  pt157919 		return (backoff);
    276  5834  pt157919 	}
    277  5834  pt157919 
    278  5834  pt157919 	/* set cap */
    279  5834  pt157919 	if (mutex_backoff_cap == 0) {
    280  5834  pt157919 		/*
    281  5834  pt157919 		 * For a contended lock, in the worst case a load + cas may
    282  5834  pt157919 		 * be queued  at the controller for each contending CPU.
    283  5834  pt157919 		 * Therefore, to avoid queueing, the accesses for all CPUS must
    284  5834  pt157919 		 * be spread out in time over an interval of (ncpu *
    285  5834  pt157919 		 * cap-factor).  Maximum backoff is set to this value, and
    286  5834  pt157919 		 * actual backoff is a random number from 0 to the current max.
    287  5834  pt157919 		 */
    288  5834  pt157919 		cap = ncpus_online * mutex_cap_factor;
    289  5834  pt157919 	} else {
    290  5834  pt157919 		cap = mutex_backoff_cap;
    291  5834  pt157919 	}
    292  5834  pt157919 
    293  5834  pt157919 	/* calculate new backoff value */
    294  5834  pt157919 	backoff <<= mutex_backoff_shift;	/* increase backoff */
    295  5834  pt157919 	if (backoff > cap) {
    296  5834  pt157919 		if (cap < mutex_backoff_base)
    297  5834  pt157919 			backoff = mutex_backoff_base;
    298  5834  pt157919 		else
    299  5834  pt157919 			backoff = cap;
    300  5834  pt157919 	}
    301  5834  pt157919 
    302  5834  pt157919 	return (backoff);
    303  5834  pt157919 }
    304  5834  pt157919 
    305  5834  pt157919 /*
    306  5834  pt157919  * default delay function for mutexes.
    307  5834  pt157919  */
    308  6138   svemuri void
    309  5834  pt157919 default_lock_delay(uint_t backoff)
    310  5834  pt157919 {
    311  5834  pt157919 	ulong_t rnd;		/* random factor */
    312  5834  pt157919 	uint_t cur_backoff;	/* calculated backoff */
    313  5834  pt157919 	uint_t backctr;
    314  5834  pt157919 
    315  5834  pt157919 	/*
    316  5834  pt157919 	 * Modify backoff by a random amount to avoid lockstep, and to
    317  5834  pt157919 	 * make it probable that some thread gets a small backoff, and
    318  5834  pt157919 	 * re-checks quickly
    319  5834  pt157919 	 */
    320  5834  pt157919 	rnd = (((long)curthread >> PTR24_LSB) ^ (long)MUTEX_GETTICK());
    321  5834  pt157919 	cur_backoff = (uint_t)(rnd % (backoff - mutex_backoff_base + 1)) +
    322  5834  pt157919 	    mutex_backoff_base;
    323  5834  pt157919 
    324  5834  pt157919 	/*
    325  5834  pt157919 	 * Delay before trying
    326  5834  pt157919 	 * to touch the mutex data structure.
    327  5834  pt157919 	 */
    328  5834  pt157919 	for (backctr = cur_backoff; backctr; backctr--) {
    329  5834  pt157919 		MUTEX_DELAY();
    330  5834  pt157919 	};
    331  5834  pt157919 }
    332  5834  pt157919 
    333  5834  pt157919 uint_t (*mutex_lock_backoff)(uint_t) = default_lock_backoff;
    334  5834  pt157919 void (*mutex_lock_delay)(uint_t) = default_lock_delay;
    335  5834  pt157919 void (*mutex_delay)(void) = mutex_delay_default;
    336  5834  pt157919 
    337     0    stevel /*
    338     0    stevel  * mutex_vector_enter() is called from the assembly mutex_enter() routine
    339     0    stevel  * if the lock is held or is not of type MUTEX_ADAPTIVE.
    340     0    stevel  */
    341     0    stevel void
    342     0    stevel mutex_vector_enter(mutex_impl_t *lp)
    343     0    stevel {
    344     0    stevel 	kthread_id_t	owner;
    345  5834  pt157919 	kthread_id_t	lastowner = MUTEX_NO_OWNER; /* track owner changes */
    346     0    stevel 	hrtime_t	sleep_time = 0;	/* how long we slept */
    347  6103  ck142721 	hrtime_t	spin_time = 0;	/* how long we spun */
    348  5834  pt157919 	cpu_t 		*cpup;
    349     0    stevel 	turnstile_t	*ts;
    350     0    stevel 	volatile mutex_impl_t *vlp = (volatile mutex_impl_t *)lp;
    351  5834  pt157919 	uint_t		backoff = 0;	/* current backoff */
    352  5834  pt157919 	int		changecnt = 0;	/* count of owner changes */
    353     0    stevel 
    354     0    stevel 	ASSERT_STACK_ALIGNED();
    355     0    stevel 
    356     0    stevel 	if (MUTEX_TYPE_SPIN(lp)) {
    357     0    stevel 		lock_set_spl(&lp->m_spin.m_spinlock, lp->m_spin.m_minspl,
    358     0    stevel 		    &lp->m_spin.m_oldspl);
    359     0    stevel 		return;
    360     0    stevel 	}
    361     0    stevel 
    362     0    stevel 	if (!MUTEX_TYPE_ADAPTIVE(lp)) {
    363     0    stevel 		mutex_panic("mutex_enter: bad mutex", lp);
    364     0    stevel 		return;
    365     0    stevel 	}
    366     0    stevel 
    367     0    stevel 	/*
    368     0    stevel 	 * Adaptive mutexes must not be acquired from above LOCK_LEVEL.
    369     0    stevel 	 * We can migrate after loading CPU but before checking CPU_ON_INTR,
    370     0    stevel 	 * so we must verify by disabling preemption and loading CPU again.
    371     0    stevel 	 */
    372     0    stevel 	cpup = CPU;
    373     0    stevel 	if (CPU_ON_INTR(cpup) && !panicstr) {
    374     0    stevel 		kpreempt_disable();
    375     0    stevel 		if (CPU_ON_INTR(CPU))
    376     0    stevel 			mutex_panic("mutex_enter: adaptive at high PIL", lp);
    377     0    stevel 		kpreempt_enable();
    378     0    stevel 	}
    379     0    stevel 
    380     0    stevel 	CPU_STATS_ADDQ(cpup, sys, mutex_adenters, 1);
    381     0    stevel 
    382  6103  ck142721 	spin_time = LOCKSTAT_START_TIME(LS_MUTEX_ENTER_SPIN);
    383  6103  ck142721 
    384  5834  pt157919 	backoff = mutex_lock_backoff(0);	/* set base backoff */
    385     0    stevel 	for (;;) {
    386  5834  pt157919 		mutex_lock_delay(backoff); /* backoff delay */
    387     0    stevel 
    388     0    stevel 		if (panicstr)
    389     0    stevel 			return;
    390     0    stevel 
    391     0    stevel 		if ((owner = MUTEX_OWNER(vlp)) == NULL) {
    392  5834  pt157919 			if (mutex_adaptive_tryenter(lp)) {
    393     0    stevel 				break;
    394  5834  pt157919 			}
    395  5834  pt157919 			/* increase backoff only on failed attempt. */
    396  5834  pt157919 			backoff = mutex_lock_backoff(backoff);
    397  5834  pt157919 			changecnt++;
    398     0    stevel 			continue;
    399  5834  pt157919 		} else if (lastowner != owner) {
    400  5834  pt157919 			lastowner = owner;
    401  5834  pt157919 			backoff = mutex_lock_backoff(backoff);
    402  5834  pt157919 			changecnt++;
    403  5834  pt157919 		}
    404  5834  pt157919 
    405  5834  pt157919 		if (changecnt >= ncpus_online) {
    406  5834  pt157919 			backoff = mutex_lock_backoff(0);
    407  5834  pt157919 			changecnt = 0;
    408     0    stevel 		}
    409     0    stevel 
    410     0    stevel 		if (owner == curthread)
    411     0    stevel 			mutex_panic("recursive mutex_enter", lp);
    412     0    stevel 
    413     0    stevel 		/*
    414     0    stevel 		 * If lock is held but owner is not yet set, spin.
    415     0    stevel 		 * (Only relevant for platforms that don't have cas.)
    416     0    stevel 		 */
    417     0    stevel 		if (owner == MUTEX_NO_OWNER)
    418     0    stevel 			continue;
    419     0    stevel 
    420  5834  pt157919 		if (mutex_owner_running(lp) != NULL)  {
    421  5834  pt157919 			continue;
    422  5834  pt157919 		}
    423     0    stevel 
    424     0    stevel 		/*
    425     0    stevel 		 * The owner appears not to be running, so block.
    426     0    stevel 		 * See the Big Theory Statement for memory ordering issues.
    427     0    stevel 		 */
    428     0    stevel 		ts = turnstile_lookup(lp);
    429     0    stevel 		MUTEX_SET_WAITERS(lp);
    430     0    stevel 		membar_enter();
    431     0    stevel 
    432     0    stevel 		/*
    433     0    stevel 		 * Recheck whether owner is running after waiters bit hits
    434     0    stevel 		 * global visibility (above).  If owner is running, spin.
    435     0    stevel 		 */
    436  5834  pt157919 		if (mutex_owner_running(lp) != NULL) {
    437  5834  pt157919 			turnstile_exit(lp);
    438  5834  pt157919 			continue;
    439  5834  pt157919 		}
    440     0    stevel 		membar_consumer();
    441     0    stevel 
    442     0    stevel 		/*
    443     0    stevel 		 * If owner and waiters bit are unchanged, block.
    444     0    stevel 		 */
    445     0    stevel 		if (MUTEX_OWNER(vlp) == owner && MUTEX_HAS_WAITERS(vlp)) {
    446     0    stevel 			sleep_time -= gethrtime();
    447     0    stevel 			(void) turnstile_block(ts, TS_WRITER_Q, lp,
    448     0    stevel 			    &mutex_sobj_ops, NULL, NULL);
    449     0    stevel 			sleep_time += gethrtime();
    450  5834  pt157919 			/* reset backoff after turnstile */
    451  5834  pt157919 			backoff = mutex_lock_backoff(0);
    452     0    stevel 		} else {
    453     0    stevel 			turnstile_exit(lp);
    454     0    stevel 		}
    455     0    stevel 	}
    456     0    stevel 
    457     0    stevel 	ASSERT(MUTEX_OWNER(lp) == curthread);
    458     0    stevel 
    459  2205  dv142724 	if (sleep_time != 0) {
    460  2205  dv142724 		/*
    461  2205  dv142724 		 * Note, sleep time is the sum of all the sleeping we
    462  2205  dv142724 		 * did.
    463  2205  dv142724 		 */
    464     0    stevel 		LOCKSTAT_RECORD(LS_MUTEX_ENTER_BLOCK, lp, sleep_time);
    465     0    stevel 	}
    466  2205  dv142724 
    467  6103  ck142721 	/* record spin time, don't count sleep time */
    468  6103  ck142721 	if (spin_time != 0) {
    469  6103  ck142721 		LOCKSTAT_RECORD_TIME(LS_MUTEX_ENTER_SPIN, lp,
    470  6103  ck142721 		    spin_time + sleep_time);
    471  5834  pt157919 	}
    472     0    stevel 
    473     0    stevel 	LOCKSTAT_RECORD0(LS_MUTEX_ENTER_ACQUIRE, lp);
    474     0    stevel }
    475     0    stevel 
    476     0    stevel /*
    477     0    stevel  * mutex_vector_tryenter() is called from the assembly mutex_tryenter()
    478     0    stevel  * routine if the lock is held or is not of type MUTEX_ADAPTIVE.
    479     0    stevel  */
    480     0    stevel int
    481     0    stevel mutex_vector_tryenter(mutex_impl_t *lp)
    482     0    stevel {
    483     0    stevel 	int s;
    484     0    stevel 
    485     0    stevel 	if (MUTEX_TYPE_ADAPTIVE(lp))
    486     0    stevel 		return (0);		/* we already tried in assembly */
    487     0    stevel 
    488     0    stevel 	if (!MUTEX_TYPE_SPIN(lp)) {
    489     0    stevel 		mutex_panic("mutex_tryenter: bad mutex", lp);
    490     0    stevel 		return (0);
    491     0    stevel 	}
    492     0    stevel 
    493     0    stevel 	s = splr(lp->m_spin.m_minspl);
    494     0    stevel 	if (lock_try(&lp->m_spin.m_spinlock)) {
    495     0    stevel 		lp->m_spin.m_oldspl = (ushort_t)s;
    496     0    stevel 		return (1);
    497     0    stevel 	}
    498     0    stevel 	splx(s);
    499     0    stevel 	return (0);
    500     0    stevel }
    501     0    stevel 
    502     0    stevel /*
    503     0    stevel  * mutex_vector_exit() is called from mutex_exit() if the lock is not
    504     0    stevel  * adaptive, has waiters, or is not owned by the current thread (panic).
    505     0    stevel  */
    506     0    stevel void
    507     0    stevel mutex_vector_exit(mutex_impl_t *lp)
    508     0    stevel {
    509     0    stevel 	turnstile_t *ts;
    510     0    stevel 
    511     0    stevel 	if (MUTEX_TYPE_SPIN(lp)) {
    512     0    stevel 		lock_clear_splx(&lp->m_spin.m_spinlock, lp->m_spin.m_oldspl);
    513     0    stevel 		return;
    514     0    stevel 	}
    515     0    stevel 
    516     0    stevel 	if (MUTEX_OWNER(lp) != curthread) {
    517     0    stevel 		mutex_panic("mutex_exit: not owner", lp);
    518     0    stevel 		return;
    519     0    stevel 	}
    520     0    stevel 
    521     0    stevel 	ts = turnstile_lookup(lp);
    522     0    stevel 	MUTEX_CLEAR_LOCK_AND_WAITERS(lp);
    523     0    stevel 	if (ts == NULL)
    524     0    stevel 		turnstile_exit(lp);
    525     0    stevel 	else
    526     0    stevel 		turnstile_wakeup(ts, TS_WRITER_Q, ts->ts_waiters, NULL);
    527     0    stevel 	LOCKSTAT_RECORD0(LS_MUTEX_EXIT_RELEASE, lp);
    528     0    stevel }
    529     0    stevel 
    530     0    stevel int
    531  6712     tomee mutex_owned(const kmutex_t *mp)
    532     0    stevel {
    533  6712     tomee 	const mutex_impl_t *lp = (const mutex_impl_t *)mp;
    534     0    stevel 
    535  7656    Sherry 	if (panicstr || quiesce_active)
    536     0    stevel 		return (1);
    537     0    stevel 
    538     0    stevel 	if (MUTEX_TYPE_ADAPTIVE(lp))
    539     0    stevel 		return (MUTEX_OWNER(lp) == curthread);
    540     0    stevel 	return (LOCK_HELD(&lp->m_spin.m_spinlock));
    541     0    stevel }
    542     0    stevel 
    543     0    stevel kthread_t *
    544  6712     tomee mutex_owner(const kmutex_t *mp)
    545     0    stevel {
    546  6712     tomee 	const mutex_impl_t *lp = (const mutex_impl_t *)mp;
    547     0    stevel 	kthread_id_t t;
    548     0    stevel 
    549     0    stevel 	if (MUTEX_TYPE_ADAPTIVE(lp) && (t = MUTEX_OWNER(lp)) != MUTEX_NO_OWNER)
    550     0    stevel 		return (t);
    551     0    stevel 	return (NULL);
    552     0    stevel }
    553     0    stevel 
    554     0    stevel /*
    555     0    stevel  * The iblock cookie 'ibc' is the spl level associated with the lock;
    556     0    stevel  * this alone determines whether the lock will be ADAPTIVE or SPIN.
    557     0    stevel  *
    558     0    stevel  * Adaptive mutexes created in zeroed memory do not need to call
    559     0    stevel  * mutex_init() as their allocation in this fashion guarantees
    560     0    stevel  * their initialization.
    561     0    stevel  *   eg adaptive mutexes created as static within the BSS or allocated
    562     0    stevel  *      by kmem_zalloc().
    563     0    stevel  */
    564     0    stevel /* ARGSUSED */
    565     0    stevel void
    566     0    stevel mutex_init(kmutex_t *mp, char *name, kmutex_type_t type, void *ibc)
    567     0    stevel {
    568     0    stevel 	mutex_impl_t *lp = (mutex_impl_t *)mp;
    569     0    stevel 
    570     0    stevel 	ASSERT(ibc < (void *)KERNELBASE);	/* see 1215173 */
    571     0    stevel 
    572     0    stevel 	if ((intptr_t)ibc > ipltospl(LOCK_LEVEL) && ibc < (void *)KERNELBASE) {
    573     0    stevel 		ASSERT(type != MUTEX_ADAPTIVE && type != MUTEX_DEFAULT);
    574     0    stevel 		MUTEX_SET_TYPE(lp, MUTEX_SPIN);
    575     0    stevel 		LOCK_INIT_CLEAR(&lp->m_spin.m_spinlock);
    576     0    stevel 		LOCK_INIT_HELD(&lp->m_spin.m_dummylock);
    577     0    stevel 		lp->m_spin.m_minspl = (int)(intptr_t)ibc;
    578     0    stevel 	} else {
    579  6617  ck142721 #ifdef MUTEX_ALIGN
    580  6617  ck142721 		static int misalign_cnt = 0;
    581  6617  ck142721 
    582  6617  ck142721 		if (((uintptr_t)lp & (uintptr_t)(MUTEX_ALIGN - 1)) &&
    583  6617  ck142721 		    (misalign_cnt < MUTEX_ALIGN_WARNINGS)) {
    584  6617  ck142721 			/*
    585  6617  ck142721 			 * The mutex is not aligned and may cross a cache line.
    586  6617  ck142721 			 * This is not supported and may cause a panic.
    587  6617  ck142721 			 * Show a warning that the mutex is not aligned
    588  6617  ck142721 			 * and attempt to identify the origin.
    589  6617  ck142721 			 * Unaligned mutexes are not (supposed to be)
    590  6617  ck142721 			 * possible on SPARC.
    591  6617  ck142721 			 */
    592  6617  ck142721 			char *funcname;
    593  6617  ck142721 			ulong_t offset = 0;
    594  6617  ck142721 
    595  6617  ck142721 			funcname = modgetsymname((uintptr_t)caller(), &offset);
    596  6617  ck142721 			cmn_err(CE_WARN, "mutex_init: %p is not %d byte "
    597  6617  ck142721 			    "aligned; caller %s+%lx in module %s. "
    598  6617  ck142721 			    "This is unsupported and may cause a panic. "
    599  6617  ck142721 			    "Please report this to the kernel module supplier.",
    600  6626  ck142721 			    (void *)lp, MUTEX_ALIGN,
    601  6617  ck142721 			    funcname ? funcname : "unknown", offset,
    602  6617  ck142721 			    mod_containing_pc(caller()));
    603  6617  ck142721 			misalign_cnt++;
    604  6617  ck142721 			if (misalign_cnt >= MUTEX_ALIGN_WARNINGS) {
    605  6617  ck142721 				cmn_err(CE_WARN, "mutex_init: further unaligned"
    606  6617  ck142721 				    " mutex warnings will be suppressed.");
    607  6617  ck142721 			}
    608  6617  ck142721 		}
    609  6617  ck142721 #endif	/* MUTEX_ALIGN */
    610     0    stevel 		ASSERT(type != MUTEX_SPIN);
    611  6617  ck142721 
    612     0    stevel 		MUTEX_SET_TYPE(lp, MUTEX_ADAPTIVE);
    613     0    stevel 		MUTEX_CLEAR_LOCK_AND_WAITERS(lp);
    614     0    stevel 	}
    615     0    stevel }
    616     0    stevel 
    617     0    stevel void
    618     0    stevel mutex_destroy(kmutex_t *mp)
    619     0    stevel {
    620     0    stevel 	mutex_impl_t *lp = (mutex_impl_t *)mp;
    621     0    stevel 
    622     0    stevel 	if (lp->m_owner == 0 && !MUTEX_HAS_WAITERS(lp)) {
    623     0    stevel 		MUTEX_DESTROY(lp);
    624     0    stevel 	} else if (MUTEX_TYPE_SPIN(lp)) {
    625     0    stevel 		LOCKSTAT_RECORD0(LS_MUTEX_DESTROY_RELEASE, lp);
    626     0    stevel 		MUTEX_DESTROY(lp);
    627     0    stevel 	} else if (MUTEX_TYPE_ADAPTIVE(lp)) {
    628     0    stevel 		LOCKSTAT_RECORD0(LS_MUTEX_DESTROY_RELEASE, lp);
    629     0    stevel 		if (MUTEX_OWNER(lp) != curthread)
    630     0    stevel 			mutex_panic("mutex_destroy: not owner", lp);
    631     0    stevel 		if (MUTEX_HAS_WAITERS(lp)) {
    632     0    stevel 			turnstile_t *ts = turnstile_lookup(lp);
    633     0    stevel 			turnstile_exit(lp);
    634     0    stevel 			if (ts != NULL)
    635     0    stevel 				mutex_panic("mutex_destroy: has waiters", lp);
    636     0    stevel 		}
    637     0    stevel 		MUTEX_DESTROY(lp);
    638     0    stevel 	} else {
    639     0    stevel 		mutex_panic("mutex_destroy: bad mutex", lp);
    640     0    stevel 	}
    641     0    stevel }
    642     0    stevel 
    643     0    stevel /*
    644     0    stevel  * Simple C support for the cases where spin locks miss on the first try.
    645     0    stevel  */
    646     0    stevel void
    647     0    stevel lock_set_spin(lock_t *lp)
    648     0    stevel {
    649  5834  pt157919 	int loop_count = 0;
    650  5834  pt157919 	uint_t backoff = 0;	/* current backoff */
    651  6103  ck142721 	hrtime_t spin_time = 0;	/* how long we spun */
    652     0    stevel 
    653     0    stevel 	if (panicstr)
    654     0    stevel 		return;
    655     0    stevel 
    656     0    stevel 	if (ncpus == 1)
    657  7632      Nick 		panic("lock_set: %p lock held and only one CPU", (void *)lp);
    658     0    stevel 
    659  6103  ck142721 	spin_time = LOCKSTAT_START_TIME(LS_LOCK_SET_SPIN);
    660  6103  ck142721 
    661     0    stevel 	while (LOCK_HELD(lp) || !lock_spin_try(lp)) {
    662     0    stevel 		if (panicstr)
    663     0    stevel 			return;
    664  5834  pt157919 		loop_count++;
    665  5834  pt157919 
    666  5834  pt157919 		if (ncpus_online == loop_count) {
    667  5834  pt157919 			backoff = mutex_lock_backoff(0);
    668  5834  pt157919 			loop_count = 0;
    669  3914  pm145316 		} else {
    670  5834  pt157919 			backoff = mutex_lock_backoff(backoff);
    671     0    stevel 		}
    672  5834  pt157919 		mutex_lock_delay(backoff);
    673     0    stevel 	}
    674     0    stevel 
    675  6103  ck142721 	LOCKSTAT_RECORD_TIME(LS_LOCK_SET_SPIN, lp, spin_time);
    676     0    stevel 
    677     0    stevel 	LOCKSTAT_RECORD0(LS_LOCK_SET_ACQUIRE, lp);
    678     0    stevel }
    679     0    stevel 
    680     0    stevel void
    681     0    stevel lock_set_spl_spin(lock_t *lp, int new_pil, ushort_t *old_pil_addr, int old_pil)
    682     0    stevel {
    683  5834  pt157919 	int loop_count = 0;
    684  5834  pt157919 	uint_t backoff = 0;	/* current backoff */
    685  6103  ck142721 	hrtime_t spin_time = 0;	/* how long we spun */
    686     0    stevel 
    687     0    stevel 	if (panicstr)
    688     0    stevel 		return;
    689     0    stevel 
    690     0    stevel 	if (ncpus == 1)
    691  7632      Nick 		panic("lock_set_spl: %p lock held and only one CPU",
    692  7632      Nick 		    (void *)lp);
    693     0    stevel 
    694     0    stevel 	ASSERT(new_pil > LOCK_LEVEL);
    695     0    stevel 
    696  6103  ck142721 	spin_time = LOCKSTAT_START_TIME(LS_LOCK_SET_SPL_SPIN);
    697  6103  ck142721 
    698     0    stevel 	do {
    699     0    stevel 		splx(old_pil);
    700     0    stevel 		while (LOCK_HELD(lp)) {
    701  5834  pt157919 			loop_count++;
    702  5834  pt157919 
    703     0    stevel 			if (panicstr) {
    704     0    stevel 				*old_pil_addr = (ushort_t)splr(new_pil);
    705     0    stevel 				return;
    706     0    stevel 			}
    707  5834  pt157919 			if (ncpus_online == loop_count) {
    708  5834  pt157919 				backoff = mutex_lock_backoff(0);
    709  5834  pt157919 				loop_count = 0;
    710  3914  pm145316 			} else {
    711  5834  pt157919 				backoff = mutex_lock_backoff(backoff);
    712     0    stevel 			}
    713  5834  pt157919 			mutex_lock_delay(backoff);
    714     0    stevel 		}
    715     0    stevel 		old_pil = splr(new_pil);
    716     0    stevel 	} while (!lock_spin_try(lp));
    717     0    stevel 
    718     0    stevel 	*old_pil_addr = (ushort_t)old_pil;
    719     0    stevel 
    720  6103  ck142721 	LOCKSTAT_RECORD_TIME(LS_LOCK_SET_SPL_SPIN, lp, spin_time);
    721     0    stevel 
    722  6103  ck142721 	LOCKSTAT_RECORD0(LS_LOCK_SET_SPL_ACQUIRE, lp);
    723     0    stevel }
    724