1 0 stevel /* 2 0 stevel * CDDL HEADER START 3 0 stevel * 4 0 stevel * The contents of this file are subject to the terms of the 5 2205 dv142724 * Common Development and Distribution License (the "License"). 6 2205 dv142724 * You may not use this file except in compliance with the License. 7 0 stevel * 8 0 stevel * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 0 stevel * or http://www.opensolaris.org/os/licensing. 10 0 stevel * See the License for the specific language governing permissions 11 0 stevel * and limitations under the License. 12 0 stevel * 13 0 stevel * When distributing Covered Code, include this CDDL HEADER in each 14 0 stevel * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 0 stevel * If applicable, add the following below this CDDL HEADER, with the 16 0 stevel * fields enclosed by brackets "[]" replaced with your own identifying 17 0 stevel * information: Portions Copyright [yyyy] [name of copyright owner] 18 0 stevel * 19 0 stevel * CDDL HEADER END 20 0 stevel */ 21 0 stevel /* 22 9160 Sherry * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 0 stevel * Use is subject to license terms. 24 0 stevel */ 25 0 stevel 26 0 stevel /* 27 0 stevel * Big Theory Statement for mutual exclusion locking primitives. 28 0 stevel * 29 0 stevel * A mutex serializes multiple threads so that only one thread 30 0 stevel * (the "owner" of the mutex) is active at a time. See mutex(9F) 31 0 stevel * for a full description of the interfaces and programming model. 32 0 stevel * The rest of this comment describes the implementation. 33 0 stevel * 34 0 stevel * Mutexes come in two flavors: adaptive and spin. mutex_init(9F) 35 0 stevel * determines the type based solely on the iblock cookie (PIL) argument. 36 0 stevel * PIL > LOCK_LEVEL implies a spin lock; everything else is adaptive. 37 0 stevel * 38 0 stevel * Spin mutexes block interrupts and spin until the lock becomes available. 39 0 stevel * A thread may not sleep, or call any function that might sleep, while 40 0 stevel * holding a spin mutex. With few exceptions, spin mutexes should only 41 0 stevel * be used to synchronize with interrupt handlers. 42 0 stevel * 43 0 stevel * Adaptive mutexes (the default type) spin if the owner is running on 44 0 stevel * another CPU and block otherwise. This policy is based on the assumption 45 0 stevel * that mutex hold times are typically short enough that the time spent 46 0 stevel * spinning is less than the time it takes to block. If you need mutual 47 0 stevel * exclusion semantics with long hold times, consider an rwlock(9F) as 48 0 stevel * RW_WRITER. Better still, reconsider the algorithm: if it requires 49 0 stevel * mutual exclusion for long periods of time, it's probably not scalable. 50 0 stevel * 51 0 stevel * Adaptive mutexes are overwhelmingly more common than spin mutexes, 52 0 stevel * so mutex_enter() assumes that the lock is adaptive. We get away 53 0 stevel * with this by structuring mutexes so that an attempt to acquire a 54 0 stevel * spin mutex as adaptive always fails. When mutex_enter() fails 55 0 stevel * it punts to mutex_vector_enter(), which does all the hard stuff. 56 0 stevel * 57 0 stevel * mutex_vector_enter() first checks the type. If it's spin mutex, 58 0 stevel * we just call lock_set_spl() and return. If it's an adaptive mutex, 59 0 stevel * we check to see what the owner is doing. If the owner is running, 60 0 stevel * we spin until the lock becomes available; if not, we mark the lock 61 0 stevel * as having waiters and block. 62 0 stevel * 63 0 stevel * Blocking on a mutex is surprisingly delicate dance because, for speed, 64 0 stevel * mutex_exit() doesn't use an atomic instruction. Thus we have to work 65 0 stevel * a little harder in the (rarely-executed) blocking path to make sure 66 0 stevel * we don't block on a mutex that's just been released -- otherwise we 67 0 stevel * might never be woken up. 68 0 stevel * 69 0 stevel * The logic for synchronizing mutex_vector_enter() with mutex_exit() 70 0 stevel * in the face of preemption and relaxed memory ordering is as follows: 71 0 stevel * 72 0 stevel * (1) Preemption in the middle of mutex_exit() must cause mutex_exit() 73 0 stevel * to restart. Each platform must enforce this by checking the 74 0 stevel * interrupted PC in the interrupt handler (or on return from trap -- 75 0 stevel * whichever is more convenient for the platform). If the PC 76 0 stevel * lies within the critical region of mutex_exit(), the interrupt 77 0 stevel * handler must reset the PC back to the beginning of mutex_exit(). 78 0 stevel * The critical region consists of all instructions up to, but not 79 0 stevel * including, the store that clears the lock (which, of course, 80 0 stevel * must never be executed twice.) 81 0 stevel * 82 0 stevel * This ensures that the owner will always check for waiters after 83 0 stevel * resuming from a previous preemption. 84 0 stevel * 85 0 stevel * (2) A thread resuming in mutex_exit() does (at least) the following: 86 0 stevel * 87 0 stevel * when resuming: set CPU_THREAD = owner 88 0 stevel * membar #StoreLoad 89 0 stevel * 90 0 stevel * in mutex_exit: check waiters bit; do wakeup if set 91 0 stevel * membar #LoadStore|#StoreStore 92 0 stevel * clear owner 93 0 stevel * (at this point, other threads may or may not grab 94 0 stevel * the lock, and we may or may not reacquire it) 95 0 stevel * 96 0 stevel * when blocking: membar #StoreStore (due to disp_lock_enter()) 97 0 stevel * set CPU_THREAD = (possibly) someone else 98 0 stevel * 99 0 stevel * (3) A thread blocking in mutex_vector_enter() does the following: 100 0 stevel * 101 0 stevel * set waiters bit 102 0 stevel * membar #StoreLoad (via membar_enter()) 103 5834 pt157919 * check CPU_THREAD for owner's t_cpu 104 5834 pt157919 * continue if owner running 105 0 stevel * membar #LoadLoad (via membar_consumer()) 106 0 stevel * check owner and waiters bit; abort if either changed 107 0 stevel * block 108 0 stevel * 109 0 stevel * Thus the global memory orderings for (2) and (3) are as follows: 110 0 stevel * 111 0 stevel * (2M) mutex_exit() memory order: 112 0 stevel * 113 0 stevel * STORE CPU_THREAD = owner 114 0 stevel * LOAD waiters bit 115 0 stevel * STORE owner = NULL 116 0 stevel * STORE CPU_THREAD = (possibly) someone else 117 0 stevel * 118 0 stevel * (3M) mutex_vector_enter() memory order: 119 0 stevel * 120 0 stevel * STORE waiters bit = 1 121 0 stevel * LOAD CPU_THREAD for each CPU 122 0 stevel * LOAD owner and waiters bit 123 0 stevel * 124 0 stevel * It has been verified by exhaustive simulation that all possible global 125 0 stevel * memory orderings of (2M) interleaved with (3M) result in correct 126 0 stevel * behavior. Moreover, these ordering constraints are minimal: changing 127 0 stevel * the ordering of anything in (2M) or (3M) breaks the algorithm, creating 128 0 stevel * windows for missed wakeups. Note: the possibility that other threads 129 0 stevel * may grab the lock after the owner drops it can be factored out of the 130 0 stevel * memory ordering analysis because mutex_vector_enter() won't block 131 0 stevel * if the lock isn't still owned by the same thread. 132 0 stevel * 133 0 stevel * The only requirements of code outside the mutex implementation are 134 0 stevel * (1) mutex_exit() preemption fixup in interrupt handlers or trap return, 135 5834 pt157919 * (2) a membar #StoreLoad after setting CPU_THREAD in resume(), 136 5834 pt157919 * (3) mutex_owner_running() preemption fixup in interrupt handlers 137 5834 pt157919 * or trap returns. 138 0 stevel * Note: idle threads cannot grab adaptive locks (since they cannot block), 139 0 stevel * so the membar may be safely omitted when resuming an idle thread. 140 0 stevel * 141 0 stevel * When a mutex has waiters, mutex_vector_exit() has several options: 142 0 stevel * 143 0 stevel * (1) Choose a waiter and make that thread the owner before waking it; 144 0 stevel * this is known as "direct handoff" of ownership. 145 0 stevel * 146 0 stevel * (2) Drop the lock and wake one waiter. 147 0 stevel * 148 0 stevel * (3) Drop the lock, clear the waiters bit, and wake all waiters. 149 0 stevel * 150 0 stevel * In many ways (1) is the cleanest solution, but if a lock is moderately 151 0 stevel * contended it defeats the adaptive spin logic. If we make some other 152 0 stevel * thread the owner, but he's not ONPROC yet, then all other threads on 153 0 stevel * other cpus that try to get the lock will conclude that the owner is 154 0 stevel * blocked, so they'll block too. And so on -- it escalates quickly, 155 0 stevel * with every thread taking the blocking path rather than the spin path. 156 0 stevel * Thus, direct handoff is *not* a good idea for adaptive mutexes. 157 0 stevel * 158 0 stevel * Option (2) is the next most natural-seeming option, but it has several 159 0 stevel * annoying properties. If there's more than one waiter, we must preserve 160 0 stevel * the waiters bit on an unheld lock. On cas-capable platforms, where 161 0 stevel * the waiters bit is part of the lock word, this means that both 0x0 162 0 stevel * and 0x1 represent unheld locks, so we have to cas against *both*. 163 0 stevel * Priority inheritance also gets more complicated, because a lock can 164 0 stevel * have waiters but no owner to whom priority can be willed. So while 165 0 stevel * it is possible to make option (2) work, it's surprisingly vile. 166 0 stevel * 167 0 stevel * Option (3), the least-intuitive at first glance, is what we actually do. 168 0 stevel * It has the advantage that because you always wake all waiters, you 169 0 stevel * never have to preserve the waiters bit. Waking all waiters seems like 170 0 stevel * begging for a thundering herd problem, but consider: under option (2), 171 0 stevel * every thread that grabs and drops the lock will wake one waiter -- so 172 0 stevel * if the lock is fairly active, all waiters will be awakened very quickly 173 0 stevel * anyway. Moreover, this is how adaptive locks are *supposed* to work. 174 0 stevel * The blocking case is rare; the more common case (by 3-4 orders of 175 0 stevel * magnitude) is that one or more threads spin waiting to get the lock. 176 0 stevel * Only direct handoff can prevent the thundering herd problem, but as 177 0 stevel * mentioned earlier, that would tend to defeat the adaptive spin logic. 178 0 stevel * In practice, option (3) works well because the blocking case is rare. 179 0 stevel */ 180 0 stevel 181 0 stevel /* 182 0 stevel * delayed lock retry with exponential delay for spin locks 183 0 stevel * 184 0 stevel * It is noted above that for both the spin locks and the adaptive locks, 185 0 stevel * spinning is the dominate mode of operation. So long as there is only 186 0 stevel * one thread waiting on a lock, the naive spin loop works very well in 187 0 stevel * cache based architectures. The lock data structure is pulled into the 188 0 stevel * cache of the processor with the waiting/spinning thread and no further 189 0 stevel * memory traffic is generated until the lock is released. Unfortunately, 190 0 stevel * once two or more threads are waiting on a lock, the naive spin has 191 0 stevel * the property of generating maximum memory traffic from each spinning 192 0 stevel * thread as the spinning threads contend for the lock data structure. 193 0 stevel * 194 0 stevel * By executing a delay loop before retrying a lock, a waiting thread 195 0 stevel * can reduce its memory traffic by a large factor, depending on the 196 0 stevel * size of the delay loop. A large delay loop greatly reduced the memory 197 0 stevel * traffic, but has the drawback of having a period of time when 198 0 stevel * no thread is attempting to gain the lock even though several threads 199 0 stevel * might be waiting. A small delay loop has the drawback of not 200 0 stevel * much reduction in memory traffic, but reduces the potential idle time. 201 0 stevel * The theory of the exponential delay code is to start with a short 202 0 stevel * delay loop and double the waiting time on each iteration, up to 203 5834 pt157919 * a preselected maximum. 204 0 stevel */ 205 0 stevel 206 0 stevel #include <sys/param.h> 207 0 stevel #include <sys/time.h> 208 0 stevel #include <sys/cpuvar.h> 209 0 stevel #include <sys/thread.h> 210 0 stevel #include <sys/debug.h> 211 0 stevel #include <sys/cmn_err.h> 212 0 stevel #include <sys/sobject.h> 213 0 stevel #include <sys/turnstile.h> 214 0 stevel #include <sys/systm.h> 215 0 stevel #include <sys/mutex_impl.h> 216 0 stevel #include <sys/spl.h> 217 0 stevel #include <sys/lockstat.h> 218 0 stevel #include <sys/atomic.h> 219 0 stevel #include <sys/cpu.h> 220 0 stevel #include <sys/stack.h> 221 5084 johnlev #include <sys/archsystm.h> 222 5834 pt157919 #include <sys/machsystm.h> 223 5834 pt157919 #include <sys/x_call.h> 224 0 stevel 225 0 stevel /* 226 0 stevel * The sobj_ops vector exports a set of functions needed when a thread 227 0 stevel * is asleep on a synchronization object of this type. 228 0 stevel */ 229 0 stevel static sobj_ops_t mutex_sobj_ops = { 230 0 stevel SOBJ_MUTEX, mutex_owner, turnstile_stay_asleep, turnstile_change_pri 231 0 stevel }; 232 0 stevel 233 0 stevel /* 234 0 stevel * If the system panics on a mutex, save the address of the offending 235 0 stevel * mutex in panic_mutex_addr, and save the contents in panic_mutex. 236 0 stevel */ 237 0 stevel static mutex_impl_t panic_mutex; 238 0 stevel static mutex_impl_t *panic_mutex_addr; 239 0 stevel 240 0 stevel static void 241 0 stevel mutex_panic(char *msg, mutex_impl_t *lp) 242 0 stevel { 243 0 stevel if (panicstr) 244 0 stevel return; 245 0 stevel 246 0 stevel if (casptr(&panic_mutex_addr, NULL, lp) == NULL) 247 0 stevel panic_mutex = *lp; 248 0 stevel 249 0 stevel panic("%s, lp=%p owner=%p thread=%p", 250 7632 Nick msg, (void *)lp, (void *)MUTEX_OWNER(&panic_mutex), 251 7632 Nick (void *)curthread); 252 0 stevel } 253 0 stevel 254 5834 pt157919 /* "tunables" for per-platform backoff constants. */ 255 5834 pt157919 uint_t mutex_backoff_cap = 0; 256 5834 pt157919 ushort_t mutex_backoff_base = MUTEX_BACKOFF_BASE; 257 5834 pt157919 ushort_t mutex_cap_factor = MUTEX_CAP_FACTOR; 258 5834 pt157919 uchar_t mutex_backoff_shift = MUTEX_BACKOFF_SHIFT; 259 5834 pt157919 260 5834 pt157919 void 261 5834 pt157919 mutex_sync(void) 262 5834 pt157919 { 263 5834 pt157919 MUTEX_SYNC(); 264 5834 pt157919 } 265 5834 pt157919 266 5834 pt157919 /* calculate the backoff interval */ 267 6138 svemuri uint_t 268 5834 pt157919 default_lock_backoff(uint_t backoff) 269 5834 pt157919 { 270 5834 pt157919 uint_t cap; /* backoff cap calculated */ 271 5834 pt157919 272 5834 pt157919 if (backoff == 0) { 273 5834 pt157919 backoff = mutex_backoff_base; 274 5834 pt157919 /* first call just sets the base */ 275 5834 pt157919 return (backoff); 276 5834 pt157919 } 277 5834 pt157919 278 5834 pt157919 /* set cap */ 279 5834 pt157919 if (mutex_backoff_cap == 0) { 280 5834 pt157919 /* 281 5834 pt157919 * For a contended lock, in the worst case a load + cas may 282 5834 pt157919 * be queued at the controller for each contending CPU. 283 5834 pt157919 * Therefore, to avoid queueing, the accesses for all CPUS must 284 5834 pt157919 * be spread out in time over an interval of (ncpu * 285 5834 pt157919 * cap-factor). Maximum backoff is set to this value, and 286 5834 pt157919 * actual backoff is a random number from 0 to the current max. 287 5834 pt157919 */ 288 5834 pt157919 cap = ncpus_online * mutex_cap_factor; 289 5834 pt157919 } else { 290 5834 pt157919 cap = mutex_backoff_cap; 291 5834 pt157919 } 292 5834 pt157919 293 5834 pt157919 /* calculate new backoff value */ 294 5834 pt157919 backoff <<= mutex_backoff_shift; /* increase backoff */ 295 5834 pt157919 if (backoff > cap) { 296 5834 pt157919 if (cap < mutex_backoff_base) 297 5834 pt157919 backoff = mutex_backoff_base; 298 5834 pt157919 else 299 5834 pt157919 backoff = cap; 300 5834 pt157919 } 301 5834 pt157919 302 5834 pt157919 return (backoff); 303 5834 pt157919 } 304 5834 pt157919 305 5834 pt157919 /* 306 5834 pt157919 * default delay function for mutexes. 307 5834 pt157919 */ 308 6138 svemuri void 309 5834 pt157919 default_lock_delay(uint_t backoff) 310 5834 pt157919 { 311 5834 pt157919 ulong_t rnd; /* random factor */ 312 5834 pt157919 uint_t cur_backoff; /* calculated backoff */ 313 5834 pt157919 uint_t backctr; 314 5834 pt157919 315 5834 pt157919 /* 316 5834 pt157919 * Modify backoff by a random amount to avoid lockstep, and to 317 5834 pt157919 * make it probable that some thread gets a small backoff, and 318 5834 pt157919 * re-checks quickly 319 5834 pt157919 */ 320 5834 pt157919 rnd = (((long)curthread >> PTR24_LSB) ^ (long)MUTEX_GETTICK()); 321 5834 pt157919 cur_backoff = (uint_t)(rnd % (backoff - mutex_backoff_base + 1)) + 322 5834 pt157919 mutex_backoff_base; 323 5834 pt157919 324 5834 pt157919 /* 325 5834 pt157919 * Delay before trying 326 5834 pt157919 * to touch the mutex data structure. 327 5834 pt157919 */ 328 5834 pt157919 for (backctr = cur_backoff; backctr; backctr--) { 329 5834 pt157919 MUTEX_DELAY(); 330 5834 pt157919 }; 331 5834 pt157919 } 332 5834 pt157919 333 5834 pt157919 uint_t (*mutex_lock_backoff)(uint_t) = default_lock_backoff; 334 5834 pt157919 void (*mutex_lock_delay)(uint_t) = default_lock_delay; 335 5834 pt157919 void (*mutex_delay)(void) = mutex_delay_default; 336 5834 pt157919 337 0 stevel /* 338 0 stevel * mutex_vector_enter() is called from the assembly mutex_enter() routine 339 0 stevel * if the lock is held or is not of type MUTEX_ADAPTIVE. 340 0 stevel */ 341 0 stevel void 342 0 stevel mutex_vector_enter(mutex_impl_t *lp) 343 0 stevel { 344 0 stevel kthread_id_t owner; 345 5834 pt157919 kthread_id_t lastowner = MUTEX_NO_OWNER; /* track owner changes */ 346 0 stevel hrtime_t sleep_time = 0; /* how long we slept */ 347 6103 ck142721 hrtime_t spin_time = 0; /* how long we spun */ 348 5834 pt157919 cpu_t *cpup; 349 0 stevel turnstile_t *ts; 350 0 stevel volatile mutex_impl_t *vlp = (volatile mutex_impl_t *)lp; 351 5834 pt157919 uint_t backoff = 0; /* current backoff */ 352 5834 pt157919 int changecnt = 0; /* count of owner changes */ 353 0 stevel 354 0 stevel ASSERT_STACK_ALIGNED(); 355 0 stevel 356 0 stevel if (MUTEX_TYPE_SPIN(lp)) { 357 0 stevel lock_set_spl(&lp->m_spin.m_spinlock, lp->m_spin.m_minspl, 358 0 stevel &lp->m_spin.m_oldspl); 359 0 stevel return; 360 0 stevel } 361 0 stevel 362 0 stevel if (!MUTEX_TYPE_ADAPTIVE(lp)) { 363 0 stevel mutex_panic("mutex_enter: bad mutex", lp); 364 0 stevel return; 365 0 stevel } 366 0 stevel 367 0 stevel /* 368 0 stevel * Adaptive mutexes must not be acquired from above LOCK_LEVEL. 369 0 stevel * We can migrate after loading CPU but before checking CPU_ON_INTR, 370 0 stevel * so we must verify by disabling preemption and loading CPU again. 371 0 stevel */ 372 0 stevel cpup = CPU; 373 0 stevel if (CPU_ON_INTR(cpup) && !panicstr) { 374 0 stevel kpreempt_disable(); 375 0 stevel if (CPU_ON_INTR(CPU)) 376 0 stevel mutex_panic("mutex_enter: adaptive at high PIL", lp); 377 0 stevel kpreempt_enable(); 378 0 stevel } 379 0 stevel 380 0 stevel CPU_STATS_ADDQ(cpup, sys, mutex_adenters, 1); 381 0 stevel 382 6103 ck142721 spin_time = LOCKSTAT_START_TIME(LS_MUTEX_ENTER_SPIN); 383 6103 ck142721 384 5834 pt157919 backoff = mutex_lock_backoff(0); /* set base backoff */ 385 0 stevel for (;;) { 386 5834 pt157919 mutex_lock_delay(backoff); /* backoff delay */ 387 0 stevel 388 0 stevel if (panicstr) 389 0 stevel return; 390 0 stevel 391 0 stevel if ((owner = MUTEX_OWNER(vlp)) == NULL) { 392 5834 pt157919 if (mutex_adaptive_tryenter(lp)) { 393 0 stevel break; 394 5834 pt157919 } 395 5834 pt157919 /* increase backoff only on failed attempt. */ 396 5834 pt157919 backoff = mutex_lock_backoff(backoff); 397 5834 pt157919 changecnt++; 398 0 stevel continue; 399 5834 pt157919 } else if (lastowner != owner) { 400 5834 pt157919 lastowner = owner; 401 5834 pt157919 backoff = mutex_lock_backoff(backoff); 402 5834 pt157919 changecnt++; 403 5834 pt157919 } 404 5834 pt157919 405 5834 pt157919 if (changecnt >= ncpus_online) { 406 5834 pt157919 backoff = mutex_lock_backoff(0); 407 5834 pt157919 changecnt = 0; 408 0 stevel } 409 0 stevel 410 0 stevel if (owner == curthread) 411 0 stevel mutex_panic("recursive mutex_enter", lp); 412 0 stevel 413 0 stevel /* 414 0 stevel * If lock is held but owner is not yet set, spin. 415 0 stevel * (Only relevant for platforms that don't have cas.) 416 0 stevel */ 417 0 stevel if (owner == MUTEX_NO_OWNER) 418 0 stevel continue; 419 0 stevel 420 5834 pt157919 if (mutex_owner_running(lp) != NULL) { 421 5834 pt157919 continue; 422 5834 pt157919 } 423 0 stevel 424 0 stevel /* 425 0 stevel * The owner appears not to be running, so block. 426 0 stevel * See the Big Theory Statement for memory ordering issues. 427 0 stevel */ 428 0 stevel ts = turnstile_lookup(lp); 429 0 stevel MUTEX_SET_WAITERS(lp); 430 0 stevel membar_enter(); 431 0 stevel 432 0 stevel /* 433 0 stevel * Recheck whether owner is running after waiters bit hits 434 0 stevel * global visibility (above). If owner is running, spin. 435 0 stevel */ 436 5834 pt157919 if (mutex_owner_running(lp) != NULL) { 437 5834 pt157919 turnstile_exit(lp); 438 5834 pt157919 continue; 439 5834 pt157919 } 440 0 stevel membar_consumer(); 441 0 stevel 442 0 stevel /* 443 0 stevel * If owner and waiters bit are unchanged, block. 444 0 stevel */ 445 0 stevel if (MUTEX_OWNER(vlp) == owner && MUTEX_HAS_WAITERS(vlp)) { 446 0 stevel sleep_time -= gethrtime(); 447 0 stevel (void) turnstile_block(ts, TS_WRITER_Q, lp, 448 0 stevel &mutex_sobj_ops, NULL, NULL); 449 0 stevel sleep_time += gethrtime(); 450 5834 pt157919 /* reset backoff after turnstile */ 451 5834 pt157919 backoff = mutex_lock_backoff(0); 452 0 stevel } else { 453 0 stevel turnstile_exit(lp); 454 0 stevel } 455 0 stevel } 456 0 stevel 457 0 stevel ASSERT(MUTEX_OWNER(lp) == curthread); 458 0 stevel 459 2205 dv142724 if (sleep_time != 0) { 460 2205 dv142724 /* 461 2205 dv142724 * Note, sleep time is the sum of all the sleeping we 462 2205 dv142724 * did. 463 2205 dv142724 */ 464 0 stevel LOCKSTAT_RECORD(LS_MUTEX_ENTER_BLOCK, lp, sleep_time); 465 0 stevel } 466 2205 dv142724 467 6103 ck142721 /* record spin time, don't count sleep time */ 468 6103 ck142721 if (spin_time != 0) { 469 6103 ck142721 LOCKSTAT_RECORD_TIME(LS_MUTEX_ENTER_SPIN, lp, 470 6103 ck142721 spin_time + sleep_time); 471 5834 pt157919 } 472 0 stevel 473 0 stevel LOCKSTAT_RECORD0(LS_MUTEX_ENTER_ACQUIRE, lp); 474 0 stevel } 475 0 stevel 476 0 stevel /* 477 0 stevel * mutex_vector_tryenter() is called from the assembly mutex_tryenter() 478 0 stevel * routine if the lock is held or is not of type MUTEX_ADAPTIVE. 479 0 stevel */ 480 0 stevel int 481 0 stevel mutex_vector_tryenter(mutex_impl_t *lp) 482 0 stevel { 483 0 stevel int s; 484 0 stevel 485 0 stevel if (MUTEX_TYPE_ADAPTIVE(lp)) 486 0 stevel return (0); /* we already tried in assembly */ 487 0 stevel 488 0 stevel if (!MUTEX_TYPE_SPIN(lp)) { 489 0 stevel mutex_panic("mutex_tryenter: bad mutex", lp); 490 0 stevel return (0); 491 0 stevel } 492 0 stevel 493 0 stevel s = splr(lp->m_spin.m_minspl); 494 0 stevel if (lock_try(&lp->m_spin.m_spinlock)) { 495 0 stevel lp->m_spin.m_oldspl = (ushort_t)s; 496 0 stevel return (1); 497 0 stevel } 498 0 stevel splx(s); 499 0 stevel return (0); 500 0 stevel } 501 0 stevel 502 0 stevel /* 503 0 stevel * mutex_vector_exit() is called from mutex_exit() if the lock is not 504 0 stevel * adaptive, has waiters, or is not owned by the current thread (panic). 505 0 stevel */ 506 0 stevel void 507 0 stevel mutex_vector_exit(mutex_impl_t *lp) 508 0 stevel { 509 0 stevel turnstile_t *ts; 510 0 stevel 511 0 stevel if (MUTEX_TYPE_SPIN(lp)) { 512 0 stevel lock_clear_splx(&lp->m_spin.m_spinlock, lp->m_spin.m_oldspl); 513 0 stevel return; 514 0 stevel } 515 0 stevel 516 0 stevel if (MUTEX_OWNER(lp) != curthread) { 517 0 stevel mutex_panic("mutex_exit: not owner", lp); 518 0 stevel return; 519 0 stevel } 520 0 stevel 521 0 stevel ts = turnstile_lookup(lp); 522 0 stevel MUTEX_CLEAR_LOCK_AND_WAITERS(lp); 523 0 stevel if (ts == NULL) 524 0 stevel turnstile_exit(lp); 525 0 stevel else 526 0 stevel turnstile_wakeup(ts, TS_WRITER_Q, ts->ts_waiters, NULL); 527 0 stevel LOCKSTAT_RECORD0(LS_MUTEX_EXIT_RELEASE, lp); 528 0 stevel } 529 0 stevel 530 0 stevel int 531 6712 tomee mutex_owned(const kmutex_t *mp) 532 0 stevel { 533 6712 tomee const mutex_impl_t *lp = (const mutex_impl_t *)mp; 534 0 stevel 535 7656 Sherry if (panicstr || quiesce_active) 536 0 stevel return (1); 537 0 stevel 538 0 stevel if (MUTEX_TYPE_ADAPTIVE(lp)) 539 0 stevel return (MUTEX_OWNER(lp) == curthread); 540 0 stevel return (LOCK_HELD(&lp->m_spin.m_spinlock)); 541 0 stevel } 542 0 stevel 543 0 stevel kthread_t * 544 6712 tomee mutex_owner(const kmutex_t *mp) 545 0 stevel { 546 6712 tomee const mutex_impl_t *lp = (const mutex_impl_t *)mp; 547 0 stevel kthread_id_t t; 548 0 stevel 549 0 stevel if (MUTEX_TYPE_ADAPTIVE(lp) && (t = MUTEX_OWNER(lp)) != MUTEX_NO_OWNER) 550 0 stevel return (t); 551 0 stevel return (NULL); 552 0 stevel } 553 0 stevel 554 0 stevel /* 555 0 stevel * The iblock cookie 'ibc' is the spl level associated with the lock; 556 0 stevel * this alone determines whether the lock will be ADAPTIVE or SPIN. 557 0 stevel * 558 0 stevel * Adaptive mutexes created in zeroed memory do not need to call 559 0 stevel * mutex_init() as their allocation in this fashion guarantees 560 0 stevel * their initialization. 561 0 stevel * eg adaptive mutexes created as static within the BSS or allocated 562 0 stevel * by kmem_zalloc(). 563 0 stevel */ 564 0 stevel /* ARGSUSED */ 565 0 stevel void 566 0 stevel mutex_init(kmutex_t *mp, char *name, kmutex_type_t type, void *ibc) 567 0 stevel { 568 0 stevel mutex_impl_t *lp = (mutex_impl_t *)mp; 569 0 stevel 570 0 stevel ASSERT(ibc < (void *)KERNELBASE); /* see 1215173 */ 571 0 stevel 572 0 stevel if ((intptr_t)ibc > ipltospl(LOCK_LEVEL) && ibc < (void *)KERNELBASE) { 573 0 stevel ASSERT(type != MUTEX_ADAPTIVE && type != MUTEX_DEFAULT); 574 0 stevel MUTEX_SET_TYPE(lp, MUTEX_SPIN); 575 0 stevel LOCK_INIT_CLEAR(&lp->m_spin.m_spinlock); 576 0 stevel LOCK_INIT_HELD(&lp->m_spin.m_dummylock); 577 0 stevel lp->m_spin.m_minspl = (int)(intptr_t)ibc; 578 0 stevel } else { 579 6617 ck142721 #ifdef MUTEX_ALIGN 580 6617 ck142721 static int misalign_cnt = 0; 581 6617 ck142721 582 6617 ck142721 if (((uintptr_t)lp & (uintptr_t)(MUTEX_ALIGN - 1)) && 583 6617 ck142721 (misalign_cnt < MUTEX_ALIGN_WARNINGS)) { 584 6617 ck142721 /* 585 6617 ck142721 * The mutex is not aligned and may cross a cache line. 586 6617 ck142721 * This is not supported and may cause a panic. 587 6617 ck142721 * Show a warning that the mutex is not aligned 588 6617 ck142721 * and attempt to identify the origin. 589 6617 ck142721 * Unaligned mutexes are not (supposed to be) 590 6617 ck142721 * possible on SPARC. 591 6617 ck142721 */ 592 6617 ck142721 char *funcname; 593 6617 ck142721 ulong_t offset = 0; 594 6617 ck142721 595 6617 ck142721 funcname = modgetsymname((uintptr_t)caller(), &offset); 596 6617 ck142721 cmn_err(CE_WARN, "mutex_init: %p is not %d byte " 597 6617 ck142721 "aligned; caller %s+%lx in module %s. " 598 6617 ck142721 "This is unsupported and may cause a panic. " 599 6617 ck142721 "Please report this to the kernel module supplier.", 600 6626 ck142721 (void *)lp, MUTEX_ALIGN, 601 6617 ck142721 funcname ? funcname : "unknown", offset, 602 6617 ck142721 mod_containing_pc(caller())); 603 6617 ck142721 misalign_cnt++; 604 6617 ck142721 if (misalign_cnt >= MUTEX_ALIGN_WARNINGS) { 605 6617 ck142721 cmn_err(CE_WARN, "mutex_init: further unaligned" 606 6617 ck142721 " mutex warnings will be suppressed."); 607 6617 ck142721 } 608 6617 ck142721 } 609 6617 ck142721 #endif /* MUTEX_ALIGN */ 610 0 stevel ASSERT(type != MUTEX_SPIN); 611 6617 ck142721 612 0 stevel MUTEX_SET_TYPE(lp, MUTEX_ADAPTIVE); 613 0 stevel MUTEX_CLEAR_LOCK_AND_WAITERS(lp); 614 0 stevel } 615 0 stevel } 616 0 stevel 617 0 stevel void 618 0 stevel mutex_destroy(kmutex_t *mp) 619 0 stevel { 620 0 stevel mutex_impl_t *lp = (mutex_impl_t *)mp; 621 0 stevel 622 0 stevel if (lp->m_owner == 0 && !MUTEX_HAS_WAITERS(lp)) { 623 0 stevel MUTEX_DESTROY(lp); 624 0 stevel } else if (MUTEX_TYPE_SPIN(lp)) { 625 0 stevel LOCKSTAT_RECORD0(LS_MUTEX_DESTROY_RELEASE, lp); 626 0 stevel MUTEX_DESTROY(lp); 627 0 stevel } else if (MUTEX_TYPE_ADAPTIVE(lp)) { 628 0 stevel LOCKSTAT_RECORD0(LS_MUTEX_DESTROY_RELEASE, lp); 629 0 stevel if (MUTEX_OWNER(lp) != curthread) 630 0 stevel mutex_panic("mutex_destroy: not owner", lp); 631 0 stevel if (MUTEX_HAS_WAITERS(lp)) { 632 0 stevel turnstile_t *ts = turnstile_lookup(lp); 633 0 stevel turnstile_exit(lp); 634 0 stevel if (ts != NULL) 635 0 stevel mutex_panic("mutex_destroy: has waiters", lp); 636 0 stevel } 637 0 stevel MUTEX_DESTROY(lp); 638 0 stevel } else { 639 0 stevel mutex_panic("mutex_destroy: bad mutex", lp); 640 0 stevel } 641 0 stevel } 642 0 stevel 643 0 stevel /* 644 0 stevel * Simple C support for the cases where spin locks miss on the first try. 645 0 stevel */ 646 0 stevel void 647 0 stevel lock_set_spin(lock_t *lp) 648 0 stevel { 649 5834 pt157919 int loop_count = 0; 650 5834 pt157919 uint_t backoff = 0; /* current backoff */ 651 6103 ck142721 hrtime_t spin_time = 0; /* how long we spun */ 652 0 stevel 653 0 stevel if (panicstr) 654 0 stevel return; 655 0 stevel 656 0 stevel if (ncpus == 1) 657 7632 Nick panic("lock_set: %p lock held and only one CPU", (void *)lp); 658 0 stevel 659 6103 ck142721 spin_time = LOCKSTAT_START_TIME(LS_LOCK_SET_SPIN); 660 6103 ck142721 661 0 stevel while (LOCK_HELD(lp) || !lock_spin_try(lp)) { 662 0 stevel if (panicstr) 663 0 stevel return; 664 5834 pt157919 loop_count++; 665 5834 pt157919 666 5834 pt157919 if (ncpus_online == loop_count) { 667 5834 pt157919 backoff = mutex_lock_backoff(0); 668 5834 pt157919 loop_count = 0; 669 3914 pm145316 } else { 670 5834 pt157919 backoff = mutex_lock_backoff(backoff); 671 0 stevel } 672 5834 pt157919 mutex_lock_delay(backoff); 673 0 stevel } 674 0 stevel 675 6103 ck142721 LOCKSTAT_RECORD_TIME(LS_LOCK_SET_SPIN, lp, spin_time); 676 0 stevel 677 0 stevel LOCKSTAT_RECORD0(LS_LOCK_SET_ACQUIRE, lp); 678 0 stevel } 679 0 stevel 680 0 stevel void 681 0 stevel lock_set_spl_spin(lock_t *lp, int new_pil, ushort_t *old_pil_addr, int old_pil) 682 0 stevel { 683 5834 pt157919 int loop_count = 0; 684 5834 pt157919 uint_t backoff = 0; /* current backoff */ 685 6103 ck142721 hrtime_t spin_time = 0; /* how long we spun */ 686 0 stevel 687 0 stevel if (panicstr) 688 0 stevel return; 689 0 stevel 690 0 stevel if (ncpus == 1) 691 7632 Nick panic("lock_set_spl: %p lock held and only one CPU", 692 7632 Nick (void *)lp); 693 0 stevel 694 0 stevel ASSERT(new_pil > LOCK_LEVEL); 695 0 stevel 696 6103 ck142721 spin_time = LOCKSTAT_START_TIME(LS_LOCK_SET_SPL_SPIN); 697 6103 ck142721 698 0 stevel do { 699 0 stevel splx(old_pil); 700 0 stevel while (LOCK_HELD(lp)) { 701 5834 pt157919 loop_count++; 702 5834 pt157919 703 0 stevel if (panicstr) { 704 0 stevel *old_pil_addr = (ushort_t)splr(new_pil); 705 0 stevel return; 706 0 stevel } 707 5834 pt157919 if (ncpus_online == loop_count) { 708 5834 pt157919 backoff = mutex_lock_backoff(0); 709 5834 pt157919 loop_count = 0; 710 3914 pm145316 } else { 711 5834 pt157919 backoff = mutex_lock_backoff(backoff); 712 0 stevel } 713 5834 pt157919 mutex_lock_delay(backoff); 714 0 stevel } 715 0 stevel old_pil = splr(new_pil); 716 0 stevel } while (!lock_spin_try(lp)); 717 0 stevel 718 0 stevel *old_pil_addr = (ushort_t)old_pil; 719 0 stevel 720 6103 ck142721 LOCKSTAT_RECORD_TIME(LS_LOCK_SET_SPL_SPIN, lp, spin_time); 721 0 stevel 722 6103 ck142721 LOCKSTAT_RECORD0(LS_LOCK_SET_SPL_ACQUIRE, lp); 723 0 stevel } 724