Home | History | Annotate | Download | only in threads
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 
     22 /*
     23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     24  * Use is subject to license terms.
     25  */
     26 
     27 #include "lint.h"
     28 #include "thr_uberdata.h"
     29 #include <sys/sdt.h>
     30 
     31 #define	TRY_FLAG		0x10
     32 #define	READ_LOCK		0
     33 #define	WRITE_LOCK		1
     34 #define	READ_LOCK_TRY		(READ_LOCK | TRY_FLAG)
     35 #define	WRITE_LOCK_TRY		(WRITE_LOCK | TRY_FLAG)
     36 
     37 #define	NLOCKS	4	/* initial number of readlock_t structs allocated */
     38 
     39 #define	ASSERT_CONSISTENT_STATE(readers)		\
     40 	ASSERT(!((readers) & URW_WRITE_LOCKED) ||	\
     41 		((readers) & ~URW_HAS_WAITERS) == URW_WRITE_LOCKED)
     42 
     43 /*
     44  * Find/allocate an entry for rwlp in our array of rwlocks held for reading.
     45  * We must be deferring signals for this to be safe.
     46  * Else if we are returning an entry with ul_rdlockcnt == 0,
     47  * it could be reassigned behind our back in a signal handler.
     48  */
     49 static readlock_t *
     50 rwl_entry(rwlock_t *rwlp)
     51 {
     52 	ulwp_t *self = curthread;
     53 	readlock_t *remembered = NULL;
     54 	readlock_t *readlockp;
     55 	uint_t nlocks;
     56 
     57 	/* we must be deferring signals */
     58 	ASSERT((self->ul_critical + self->ul_sigdefer) != 0);
     59 
     60 	if ((nlocks = self->ul_rdlockcnt) != 0)
     61 		readlockp = self->ul_readlock.array;
     62 	else {
     63 		nlocks = 1;
     64 		readlockp = &self->ul_readlock.single;
     65 	}
     66 
     67 	for (; nlocks; nlocks--, readlockp++) {
     68 		if (readlockp->rd_rwlock == rwlp)
     69 			return (readlockp);
     70 		if (readlockp->rd_count == 0 && remembered == NULL)
     71 			remembered = readlockp;
     72 	}
     73 	if (remembered != NULL) {
     74 		remembered->rd_rwlock = rwlp;
     75 		return (remembered);
     76 	}
     77 
     78 	/*
     79 	 * No entry available.  Allocate more space, converting the single
     80 	 * readlock_t entry into an array of readlock_t entries if necessary.
     81 	 */
     82 	if ((nlocks = self->ul_rdlockcnt) == 0) {
     83 		/*
     84 		 * Initial allocation of the readlock_t array.
     85 		 * Convert the single entry into an array.
     86 		 */
     87 		self->ul_rdlockcnt = nlocks = NLOCKS;
     88 		readlockp = lmalloc(nlocks * sizeof (readlock_t));
     89 		/*
     90 		 * The single readlock_t becomes the first entry in the array.
     91 		 */
     92 		*readlockp = self->ul_readlock.single;
     93 		self->ul_readlock.single.rd_count = 0;
     94 		self->ul_readlock.array = readlockp;
     95 		/*
     96 		 * Return the next available entry in the array.
     97 		 */
     98 		(++readlockp)->rd_rwlock = rwlp;
     99 		return (readlockp);
    100 	}
    101 	/*
    102 	 * Reallocate the array, double the size each time.
    103 	 */
    104 	readlockp = lmalloc(nlocks * 2 * sizeof (readlock_t));
    105 	(void) memcpy(readlockp, self->ul_readlock.array,
    106 	    nlocks * sizeof (readlock_t));
    107 	lfree(self->ul_readlock.array, nlocks * sizeof (readlock_t));
    108 	self->ul_readlock.array = readlockp;
    109 	self->ul_rdlockcnt *= 2;
    110 	/*
    111 	 * Return the next available entry in the newly allocated array.
    112 	 */
    113 	(readlockp += nlocks)->rd_rwlock = rwlp;
    114 	return (readlockp);
    115 }
    116 
    117 /*
    118  * Free the array of rwlocks held for reading.
    119  */
    120 void
    121 rwl_free(ulwp_t *ulwp)
    122 {
    123 	uint_t nlocks;
    124 
    125 	if ((nlocks = ulwp->ul_rdlockcnt) != 0)
    126 		lfree(ulwp->ul_readlock.array, nlocks * sizeof (readlock_t));
    127 	ulwp->ul_rdlockcnt = 0;
    128 	ulwp->ul_readlock.single.rd_rwlock = NULL;
    129 	ulwp->ul_readlock.single.rd_count = 0;
    130 }
    131 
    132 /*
    133  * Check if a reader version of the lock is held by the current thread.
    134  */
    135 #pragma weak _rw_read_held = rw_read_held
    136 int
    137 rw_read_held(rwlock_t *rwlp)
    138 {
    139 	volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers;
    140 	uint32_t readers;
    141 	ulwp_t *self = curthread;
    142 	readlock_t *readlockp;
    143 	uint_t nlocks;
    144 	int rval = 0;
    145 
    146 	no_preempt(self);
    147 
    148 	readers = *rwstate;
    149 	ASSERT_CONSISTENT_STATE(readers);
    150 	if (!(readers & URW_WRITE_LOCKED) &&
    151 	    (readers & URW_READERS_MASK) != 0) {
    152 		/*
    153 		 * The lock is held for reading by some thread.
    154 		 * Search our array of rwlocks held for reading for a match.
    155 		 */
    156 		if ((nlocks = self->ul_rdlockcnt) != 0)
    157 			readlockp = self->ul_readlock.array;
    158 		else {
    159 			nlocks = 1;
    160 			readlockp = &self->ul_readlock.single;
    161 		}
    162 		for (; nlocks; nlocks--, readlockp++) {
    163 			if (readlockp->rd_rwlock == rwlp) {
    164 				if (readlockp->rd_count)
    165 					rval = 1;
    166 				break;
    167 			}
    168 		}
    169 	}
    170 
    171 	preempt(self);
    172 	return (rval);
    173 }
    174 
    175 /*
    176  * Check if a writer version of the lock is held by the current thread.
    177  */
    178 #pragma weak _rw_write_held = rw_write_held
    179 int
    180 rw_write_held(rwlock_t *rwlp)
    181 {
    182 	volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers;
    183 	uint32_t readers;
    184 	ulwp_t *self = curthread;
    185 	int rval;
    186 
    187 	no_preempt(self);
    188 
    189 	readers = *rwstate;
    190 	ASSERT_CONSISTENT_STATE(readers);
    191 	rval = ((readers & URW_WRITE_LOCKED) &&
    192 	    rwlp->rwlock_owner == (uintptr_t)self &&
    193 	    (rwlp->rwlock_type == USYNC_THREAD ||
    194 	    rwlp->rwlock_ownerpid == self->ul_uberdata->pid));
    195 
    196 	preempt(self);
    197 	return (rval);
    198 }
    199 
    200 #pragma weak _rwlock_init = rwlock_init
    201 /* ARGSUSED2 */
    202 int
    203 rwlock_init(rwlock_t *rwlp, int type, void *arg)
    204 {
    205 	ulwp_t *self = curthread;
    206 
    207 	if (type != USYNC_THREAD && type != USYNC_PROCESS)
    208 		return (EINVAL);
    209 	/*
    210 	 * Once reinitialized, we can no longer be holding a read or write lock.
    211 	 * We can do nothing about other threads that are holding read locks.
    212 	 */
    213 	sigoff(self);
    214 	rwl_entry(rwlp)->rd_count = 0;
    215 	sigon(self);
    216 	(void) memset(rwlp, 0, sizeof (*rwlp));
    217 	rwlp->rwlock_type = (uint16_t)type;
    218 	rwlp->rwlock_magic = RWL_MAGIC;
    219 	rwlp->mutex.mutex_type = (uint8_t)type;
    220 	rwlp->mutex.mutex_flag = LOCK_INITED;
    221 	rwlp->mutex.mutex_magic = MUTEX_MAGIC;
    222 
    223 	/*
    224 	 * This should be at the beginning of the function,
    225 	 * but for the sake of old broken applications that
    226 	 * do not have proper alignment for their rwlocks
    227 	 * (and don't check the return code from rwlock_init),
    228 	 * we put it here, after initializing the rwlock regardless.
    229 	 */
    230 	if (((uintptr_t)rwlp & (_LONG_LONG_ALIGNMENT - 1)) &&
    231 	    self->ul_misaligned == 0)
    232 		return (EINVAL);
    233 
    234 	return (0);
    235 }
    236 
    237 #pragma weak pthread_rwlock_destroy = rwlock_destroy
    238 #pragma weak _rwlock_destroy = rwlock_destroy
    239 int
    240 rwlock_destroy(rwlock_t *rwlp)
    241 {
    242 	ulwp_t *self = curthread;
    243 
    244 	/*
    245 	 * Once destroyed, we can no longer be holding a read or write lock.
    246 	 * We can do nothing about other threads that are holding read locks.
    247 	 */
    248 	sigoff(self);
    249 	rwl_entry(rwlp)->rd_count = 0;
    250 	sigon(self);
    251 	rwlp->rwlock_magic = 0;
    252 	tdb_sync_obj_deregister(rwlp);
    253 	return (0);
    254 }
    255 
    256 /*
    257  * Attempt to acquire a readers lock.  Return true on success.
    258  */
    259 static int
    260 read_lock_try(rwlock_t *rwlp, int ignore_waiters_flag)
    261 {
    262 	volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers;
    263 	uint32_t mask = ignore_waiters_flag?
    264 	    URW_WRITE_LOCKED : (URW_HAS_WAITERS | URW_WRITE_LOCKED);
    265 	uint32_t readers;
    266 	ulwp_t *self = curthread;
    267 
    268 	no_preempt(self);
    269 	while (((readers = *rwstate) & mask) == 0) {
    270 		if (atomic_cas_32(rwstate, readers, readers + 1) == readers) {
    271 			preempt(self);
    272 			return (1);
    273 		}
    274 	}
    275 	preempt(self);
    276 	return (0);
    277 }
    278 
    279 /*
    280  * Attempt to release a reader lock.  Return true on success.
    281  */
    282 static int
    283 read_unlock_try(rwlock_t *rwlp)
    284 {
    285 	volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers;
    286 	uint32_t readers;
    287 	ulwp_t *self = curthread;
    288 
    289 	no_preempt(self);
    290 	while (((readers = *rwstate) & URW_HAS_WAITERS) == 0) {
    291 		if (atomic_cas_32(rwstate, readers, readers - 1) == readers) {
    292 			preempt(self);
    293 			return (1);
    294 		}
    295 	}
    296 	preempt(self);
    297 	return (0);
    298 }
    299 
    300 /*
    301  * Attempt to acquire a writer lock.  Return true on success.
    302  */
    303 static int
    304 write_lock_try(rwlock_t *rwlp, int ignore_waiters_flag)
    305 {
    306 	volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers;
    307 	uint32_t mask = ignore_waiters_flag?
    308 	    (URW_WRITE_LOCKED | URW_READERS_MASK) :
    309 	    (URW_HAS_WAITERS | URW_WRITE_LOCKED | URW_READERS_MASK);
    310 	ulwp_t *self = curthread;
    311 	uint32_t readers;
    312 
    313 	no_preempt(self);
    314 	while (((readers = *rwstate) & mask) == 0) {
    315 		if (atomic_cas_32(rwstate, readers, readers | URW_WRITE_LOCKED)
    316 		    == readers) {
    317 			preempt(self);
    318 			return (1);
    319 		}
    320 	}
    321 	preempt(self);
    322 	return (0);
    323 }
    324 
    325 /*
    326  * Attempt to release a writer lock.  Return true on success.
    327  */
    328 static int
    329 write_unlock_try(rwlock_t *rwlp)
    330 {
    331 	volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers;
    332 	uint32_t readers;
    333 	ulwp_t *self = curthread;
    334 
    335 	no_preempt(self);
    336 	while (((readers = *rwstate) & URW_HAS_WAITERS) == 0) {
    337 		if (atomic_cas_32(rwstate, readers, 0) == readers) {
    338 			preempt(self);
    339 			return (1);
    340 		}
    341 	}
    342 	preempt(self);
    343 	return (0);
    344 }
    345 
    346 /*
    347  * Wake up thread(s) sleeping on the rwlock queue and then
    348  * drop the queue lock.  Return non-zero if we wake up someone.
    349  * This is called when a thread releases a lock that appears to have waiters.
    350  */
    351 static int
    352 rw_queue_release(queue_head_t *qp, rwlock_t *rwlp)
    353 {
    354 	volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers;
    355 	uint32_t readers;
    356 	uint32_t writers;
    357 	ulwp_t **ulwpp;
    358 	ulwp_t *ulwp;
    359 	ulwp_t *prev;
    360 	int nlwpid = 0;
    361 	int more;
    362 	int maxlwps = MAXLWPS;
    363 	lwpid_t buffer[MAXLWPS];
    364 	lwpid_t *lwpid = buffer;
    365 
    366 	readers = *rwstate;
    367 	ASSERT_CONSISTENT_STATE(readers);
    368 	if (!(readers & URW_HAS_WAITERS)) {
    369 		queue_unlock(qp);
    370 		return (0);
    371 	}
    372 	readers &= URW_READERS_MASK;
    373 	writers = 0;
    374 
    375 	/*
    376 	 * Examine the queue of waiters in priority order and prepare
    377 	 * to wake up as many readers as we encounter before encountering
    378 	 * a writer.  If the highest priority thread on the queue is a
    379 	 * writer, stop there and wake it up.
    380 	 *
    381 	 * We keep track of lwpids that are to be unparked in lwpid[].
    382 	 * __lwp_unpark_all() is called to unpark all of them after
    383 	 * they have been removed from the sleep queue and the sleep
    384 	 * queue lock has been dropped.  If we run out of space in our
    385 	 * on-stack buffer, we need to allocate more but we can't call
    386 	 * lmalloc() because we are holding a queue lock when the overflow
    387 	 * occurs and lmalloc() acquires a lock.  We can't use alloca()
    388 	 * either because the application may have allocated a small
    389 	 * stack and we don't want to overrun the stack.  So we call
    390 	 * alloc_lwpids() to allocate a bigger buffer using the mmap()
    391 	 * system call directly since that path acquires no locks.
    392 	 */
    393 	while ((ulwpp = queue_slot(qp, &prev, &more)) != NULL) {
    394 		ulwp = *ulwpp;
    395 		ASSERT(ulwp->ul_wchan == rwlp);
    396 		if (ulwp->ul_writer) {
    397 			if (writers != 0 || readers != 0)
    398 				break;
    399 			/* one writer to wake */
    400 			writers++;
    401 		} else {
    402 			if (writers != 0)
    403 				break;
    404 			/* at least one reader to wake */
    405 			readers++;
    406 			if (nlwpid == maxlwps)
    407 				lwpid = alloc_lwpids(lwpid, &nlwpid, &maxlwps);
    408 		}
    409 		queue_unlink(qp, ulwpp, prev);
    410 		ulwp->ul_sleepq = NULL;
    411 		ulwp->ul_wchan = NULL;
    412 		lwpid[nlwpid++] = ulwp->ul_lwpid;
    413 	}
    414 	if (ulwpp == NULL)
    415 		atomic_and_32(rwstate, ~URW_HAS_WAITERS);
    416 	if (nlwpid == 0) {
    417 		queue_unlock(qp);
    418 	} else {
    419 		ulwp_t *self = curthread;
    420 		no_preempt(self);
    421 		queue_unlock(qp);
    422 		if (nlwpid == 1)
    423 			(void) __lwp_unpark(lwpid[0]);
    424 		else
    425 			(void) __lwp_unpark_all(lwpid, nlwpid);
    426 		preempt(self);
    427 	}
    428 	if (lwpid != buffer)
    429 		(void) munmap((caddr_t)lwpid, maxlwps * sizeof (lwpid_t));
    430 	return (nlwpid != 0);
    431 }
    432 
    433 /*
    434  * Common code for rdlock, timedrdlock, wrlock, timedwrlock, tryrdlock,
    435  * and trywrlock for process-shared (USYNC_PROCESS) rwlocks.
    436  *
    437  * Note: if the lock appears to be contended we call __lwp_rwlock_rdlock()
    438  * or __lwp_rwlock_wrlock() holding the mutex. These return with the mutex
    439  * released, and if they need to sleep will release the mutex first. In the
    440  * event of a spurious wakeup, these will return EAGAIN (because it is much
    441  * easier for us to re-acquire the mutex here).
    442  */
    443 int
    444 shared_rwlock_lock(rwlock_t *rwlp, timespec_t *tsp, int rd_wr)
    445 {
    446 	volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers;
    447 	mutex_t *mp = &rwlp->mutex;
    448 	uint32_t readers;
    449 	int try_flag;
    450 	int error;
    451 
    452 	try_flag = (rd_wr & TRY_FLAG);
    453 	rd_wr &= ~TRY_FLAG;
    454 	ASSERT(rd_wr == READ_LOCK || rd_wr == WRITE_LOCK);
    455 
    456 	if (!try_flag) {
    457 		DTRACE_PROBE2(plockstat, rw__block, rwlp, rd_wr);
    458 	}
    459 
    460 	do {
    461 		if (try_flag && (*rwstate & URW_WRITE_LOCKED)) {
    462 			error = EBUSY;
    463 			break;
    464 		}
    465 		if ((error = mutex_lock(mp)) != 0)
    466 			break;
    467 		if (rd_wr == READ_LOCK) {
    468 			if (read_lock_try(rwlp, 0)) {
    469 				(void) mutex_unlock(mp);
    470 				break;
    471 			}
    472 		} else {
    473 			if (write_lock_try(rwlp, 0)) {
    474 				(void) mutex_unlock(mp);
    475 				break;
    476 			}
    477 		}
    478 		atomic_or_32(rwstate, URW_HAS_WAITERS);
    479 		readers = *rwstate;
    480 		ASSERT_CONSISTENT_STATE(readers);
    481 		/*
    482 		 * The calls to __lwp_rwlock_*() below will release the mutex,
    483 		 * so we need a dtrace probe here.  The owner field of the
    484 		 * mutex is cleared in the kernel when the mutex is released,
    485 		 * so we should not clear it here.
    486 		 */
    487 		DTRACE_PROBE2(plockstat, mutex__release, mp, 0);
    488 		/*
    489 		 * The waiters bit may be inaccurate.
    490 		 * Only the kernel knows for sure.
    491 		 */
    492 		if (rd_wr == READ_LOCK) {
    493 			if (try_flag)
    494 				error = __lwp_rwlock_tryrdlock(rwlp);
    495 			else
    496 				error = __lwp_rwlock_rdlock(rwlp, tsp);
    497 		} else {
    498 			if (try_flag)
    499 				error = __lwp_rwlock_trywrlock(rwlp);
    500 			else
    501 				error = __lwp_rwlock_wrlock(rwlp, tsp);
    502 		}
    503 	} while (error == EAGAIN || error == EINTR);
    504 
    505 	if (!try_flag) {
    506 		DTRACE_PROBE3(plockstat, rw__blocked, rwlp, rd_wr, error == 0);
    507 	}
    508 
    509 	return (error);
    510 }
    511 
    512 /*
    513  * Common code for rdlock, timedrdlock, wrlock, timedwrlock, tryrdlock,
    514  * and trywrlock for process-private (USYNC_THREAD) rwlocks.
    515  */
    516 int
    517 rwlock_lock(rwlock_t *rwlp, timespec_t *tsp, int rd_wr)
    518 {
    519 	volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers;
    520 	uint32_t readers;
    521 	ulwp_t *self = curthread;
    522 	queue_head_t *qp;
    523 	ulwp_t *ulwp;
    524 	int try_flag;
    525 	int ignore_waiters_flag;
    526 	int error = 0;
    527 
    528 	try_flag = (rd_wr & TRY_FLAG);
    529 	rd_wr &= ~TRY_FLAG;
    530 	ASSERT(rd_wr == READ_LOCK || rd_wr == WRITE_LOCK);
    531 
    532 	if (!try_flag) {
    533 		DTRACE_PROBE2(plockstat, rw__block, rwlp, rd_wr);
    534 	}
    535 
    536 	qp = queue_lock(rwlp, MX);
    537 	/* initial attempt to acquire the lock fails if there are waiters */
    538 	ignore_waiters_flag = 0;
    539 	while (error == 0) {
    540 		if (rd_wr == READ_LOCK) {
    541 			if (read_lock_try(rwlp, ignore_waiters_flag))
    542 				break;
    543 		} else {
    544 			if (write_lock_try(rwlp, ignore_waiters_flag))
    545 				break;
    546 		}
    547 		/* subsequent attempts do not fail due to waiters */
    548 		ignore_waiters_flag = 1;
    549 		atomic_or_32(rwstate, URW_HAS_WAITERS);
    550 		readers = *rwstate;
    551 		ASSERT_CONSISTENT_STATE(readers);
    552 		if ((readers & URW_WRITE_LOCKED) ||
    553 		    (rd_wr == WRITE_LOCK &&
    554 		    (readers & URW_READERS_MASK) != 0))
    555 			/* EMPTY */;	/* somebody holds the lock */
    556 		else if ((ulwp = queue_waiter(qp)) == NULL) {
    557 			atomic_and_32(rwstate, ~URW_HAS_WAITERS);
    558 			continue;	/* no queued waiters, try again */
    559 		} else {
    560 			/*
    561 			 * Do a priority check on the queued waiter (the
    562 			 * highest priority thread on the queue) to see
    563 			 * if we should defer to him or just grab the lock.
    564 			 */
    565 			int our_pri = real_priority(self);
    566 			int his_pri = real_priority(ulwp);
    567 
    568 			if (rd_wr == WRITE_LOCK) {
    569 				/*
    570 				 * We defer to a queued thread that has
    571 				 * a higher priority than ours.
    572 				 */
    573 				if (his_pri <= our_pri)
    574 					continue;	/* try again */
    575 			} else {
    576 				/*
    577 				 * We defer to a queued thread that has
    578 				 * a higher priority than ours or that
    579 				 * is a writer whose priority equals ours.
    580 				 */
    581 				if (his_pri < our_pri ||
    582 				    (his_pri == our_pri && !ulwp->ul_writer))
    583 					continue;	/* try again */
    584 			}
    585 		}
    586 		/*
    587 		 * We are about to block.
    588 		 * If we're doing a trylock, return EBUSY instead.
    589 		 */
    590 		if (try_flag) {
    591 			error = EBUSY;
    592 			break;
    593 		}
    594 		/*
    595 		 * Enqueue writers ahead of readers.
    596 		 */
    597 		self->ul_writer = rd_wr;	/* *must* be 0 or 1 */
    598 		enqueue(qp, self, 0);
    599 		set_parking_flag(self, 1);
    600 		queue_unlock(qp);
    601 		if ((error = __lwp_park(tsp, 0)) == EINTR)
    602 			error = ignore_waiters_flag = 0;
    603 		set_parking_flag(self, 0);
    604 		qp = queue_lock(rwlp, MX);
    605 		if (self->ul_sleepq && dequeue_self(qp) == 0)
    606 			atomic_and_32(rwstate, ~URW_HAS_WAITERS);
    607 		self->ul_writer = 0;
    608 	}
    609 
    610 	queue_unlock(qp);
    611 
    612 	if (!try_flag) {
    613 		DTRACE_PROBE3(plockstat, rw__blocked, rwlp, rd_wr, error == 0);
    614 	}
    615 
    616 	return (error);
    617 }
    618 
    619 int
    620 rw_rdlock_impl(rwlock_t *rwlp, timespec_t *tsp)
    621 {
    622 	ulwp_t *self = curthread;
    623 	uberdata_t *udp = self->ul_uberdata;
    624 	readlock_t *readlockp;
    625 	tdb_rwlock_stats_t *rwsp = RWLOCK_STATS(rwlp, udp);
    626 	int error;
    627 
    628 	/*
    629 	 * If we already hold a readers lock on this rwlock,
    630 	 * just increment our reference count and return.
    631 	 */
    632 	sigoff(self);
    633 	readlockp = rwl_entry(rwlp);
    634 	if (readlockp->rd_count != 0) {
    635 		if (readlockp->rd_count == READ_LOCK_MAX) {
    636 			sigon(self);
    637 			error = EAGAIN;
    638 			goto out;
    639 		}
    640 		sigon(self);
    641 		error = 0;
    642 		goto out;
    643 	}
    644 	sigon(self);
    645 
    646 	/*
    647 	 * If we hold the writer lock, bail out.
    648 	 */
    649 	if (rw_write_held(rwlp)) {
    650 		if (self->ul_error_detection)
    651 			rwlock_error(rwlp, "rwlock_rdlock",
    652 			    "calling thread owns the writer lock");
    653 		error = EDEADLK;
    654 		goto out;
    655 	}
    656 
    657 	if (read_lock_try(rwlp, 0))
    658 		error = 0;
    659 	else if (rwlp->rwlock_type == USYNC_PROCESS)	/* kernel-level */
    660 		error = shared_rwlock_lock(rwlp, tsp, READ_LOCK);
    661 	else						/* user-level */
    662 		error = rwlock_lock(rwlp, tsp, READ_LOCK);
    663 
    664 out:
    665 	if (error == 0) {
    666 		sigoff(self);
    667 		rwl_entry(rwlp)->rd_count++;
    668 		sigon(self);
    669 		if (rwsp)
    670 			tdb_incr(rwsp->rw_rdlock);
    671 		DTRACE_PROBE2(plockstat, rw__acquire, rwlp, READ_LOCK);
    672 	} else {
    673 		DTRACE_PROBE3(plockstat, rw__error, rwlp, READ_LOCK, error);
    674 	}
    675 
    676 	return (error);
    677 }
    678 
    679 #pragma weak pthread_rwlock_rdlock = rw_rdlock
    680 #pragma weak _rw_rdlock = rw_rdlock
    681 int
    682 rw_rdlock(rwlock_t *rwlp)
    683 {
    684 	ASSERT(!curthread->ul_critical || curthread->ul_bindflags);
    685 	return (rw_rdlock_impl(rwlp, NULL));
    686 }
    687 
    688 void
    689 lrw_rdlock(rwlock_t *rwlp)
    690 {
    691 	enter_critical(curthread);
    692 	(void) rw_rdlock_impl(rwlp, NULL);
    693 }
    694 
    695 int
    696 pthread_rwlock_reltimedrdlock_np(pthread_rwlock_t *_RESTRICT_KYWD rwlp,
    697     const struct timespec *_RESTRICT_KYWD reltime)
    698 {
    699 	timespec_t tslocal = *reltime;
    700 	int error;
    701 
    702 	ASSERT(!curthread->ul_critical || curthread->ul_bindflags);
    703 	error = rw_rdlock_impl((rwlock_t *)rwlp, &tslocal);
    704 	if (error == ETIME)
    705 		error = ETIMEDOUT;
    706 	return (error);
    707 }
    708 
    709 int
    710 pthread_rwlock_timedrdlock(pthread_rwlock_t *_RESTRICT_KYWD rwlp,
    711     const struct timespec *_RESTRICT_KYWD abstime)
    712 {
    713 	timespec_t tslocal;
    714 	int error;
    715 
    716 	ASSERT(!curthread->ul_critical || curthread->ul_bindflags);
    717 	abstime_to_reltime(CLOCK_REALTIME, abstime, &tslocal);
    718 	error = rw_rdlock_impl((rwlock_t *)rwlp, &tslocal);
    719 	if (error == ETIME)
    720 		error = ETIMEDOUT;
    721 	return (error);
    722 }
    723 
    724 int
    725 rw_wrlock_impl(rwlock_t *rwlp, timespec_t *tsp)
    726 {
    727 	ulwp_t *self = curthread;
    728 	uberdata_t *udp = self->ul_uberdata;
    729 	tdb_rwlock_stats_t *rwsp = RWLOCK_STATS(rwlp, udp);
    730 	int error;
    731 
    732 	/*
    733 	 * If we hold a readers lock on this rwlock, bail out.
    734 	 */
    735 	if (rw_read_held(rwlp)) {
    736 		if (self->ul_error_detection)
    737 			rwlock_error(rwlp, "rwlock_wrlock",
    738 			    "calling thread owns the readers lock");
    739 		error = EDEADLK;
    740 		goto out;
    741 	}
    742 
    743 	/*
    744 	 * If we hold the writer lock, bail out.
    745 	 */
    746 	if (rw_write_held(rwlp)) {
    747 		if (self->ul_error_detection)
    748 			rwlock_error(rwlp, "rwlock_wrlock",
    749 			    "calling thread owns the writer lock");
    750 		error = EDEADLK;
    751 		goto out;
    752 	}
    753 
    754 	if (write_lock_try(rwlp, 0))
    755 		error = 0;
    756 	else if (rwlp->rwlock_type == USYNC_PROCESS)	/* kernel-level */
    757 		error = shared_rwlock_lock(rwlp, tsp, WRITE_LOCK);
    758 	else						/* user-level */
    759 		error = rwlock_lock(rwlp, tsp, WRITE_LOCK);
    760 
    761 out:
    762 	if (error == 0) {
    763 		rwlp->rwlock_owner = (uintptr_t)self;
    764 		if (rwlp->rwlock_type == USYNC_PROCESS)
    765 			rwlp->rwlock_ownerpid = udp->pid;
    766 		if (rwsp) {
    767 			tdb_incr(rwsp->rw_wrlock);
    768 			rwsp->rw_wrlock_begin_hold = gethrtime();
    769 		}
    770 		DTRACE_PROBE2(plockstat, rw__acquire, rwlp, WRITE_LOCK);
    771 	} else {
    772 		DTRACE_PROBE3(plockstat, rw__error, rwlp, WRITE_LOCK, error);
    773 	}
    774 	return (error);
    775 }
    776 
    777 #pragma weak pthread_rwlock_wrlock = rw_wrlock
    778 #pragma weak _rw_wrlock = rw_wrlock
    779 int
    780 rw_wrlock(rwlock_t *rwlp)
    781 {
    782 	ASSERT(!curthread->ul_critical || curthread->ul_bindflags);
    783 	return (rw_wrlock_impl(rwlp, NULL));
    784 }
    785 
    786 void
    787 lrw_wrlock(rwlock_t *rwlp)
    788 {
    789 	enter_critical(curthread);
    790 	(void) rw_wrlock_impl(rwlp, NULL);
    791 }
    792 
    793 int
    794 pthread_rwlock_reltimedwrlock_np(pthread_rwlock_t *_RESTRICT_KYWD rwlp,
    795     const struct timespec *_RESTRICT_KYWD reltime)
    796 {
    797 	timespec_t tslocal = *reltime;
    798 	int error;
    799 
    800 	ASSERT(!curthread->ul_critical || curthread->ul_bindflags);
    801 	error = rw_wrlock_impl((rwlock_t *)rwlp, &tslocal);
    802 	if (error == ETIME)
    803 		error = ETIMEDOUT;
    804 	return (error);
    805 }
    806 
    807 int
    808 pthread_rwlock_timedwrlock(pthread_rwlock_t *rwlp, const timespec_t *abstime)
    809 {
    810 	timespec_t tslocal;
    811 	int error;
    812 
    813 	ASSERT(!curthread->ul_critical || curthread->ul_bindflags);
    814 	abstime_to_reltime(CLOCK_REALTIME, abstime, &tslocal);
    815 	error = rw_wrlock_impl((rwlock_t *)rwlp, &tslocal);
    816 	if (error == ETIME)
    817 		error = ETIMEDOUT;
    818 	return (error);
    819 }
    820 
    821 #pragma weak pthread_rwlock_tryrdlock = rw_tryrdlock
    822 int
    823 rw_tryrdlock(rwlock_t *rwlp)
    824 {
    825 	ulwp_t *self = curthread;
    826 	uberdata_t *udp = self->ul_uberdata;
    827 	tdb_rwlock_stats_t *rwsp = RWLOCK_STATS(rwlp, udp);
    828 	readlock_t *readlockp;
    829 	int error;
    830 
    831 	ASSERT(!curthread->ul_critical || curthread->ul_bindflags);
    832 
    833 	if (rwsp)
    834 		tdb_incr(rwsp->rw_rdlock_try);
    835 
    836 	/*
    837 	 * If we already hold a readers lock on this rwlock,
    838 	 * just increment our reference count and return.
    839 	 */
    840 	sigoff(self);
    841 	readlockp = rwl_entry(rwlp);
    842 	if (readlockp->rd_count != 0) {
    843 		if (readlockp->rd_count == READ_LOCK_MAX) {
    844 			sigon(self);
    845 			error = EAGAIN;
    846 			goto out;
    847 		}
    848 		sigon(self);
    849 		error = 0;
    850 		goto out;
    851 	}
    852 	sigon(self);
    853 
    854 	if (read_lock_try(rwlp, 0))
    855 		error = 0;
    856 	else if (rwlp->rwlock_type == USYNC_PROCESS)	/* kernel-level */
    857 		error = shared_rwlock_lock(rwlp, NULL, READ_LOCK_TRY);
    858 	else						/* user-level */
    859 		error = rwlock_lock(rwlp, NULL, READ_LOCK_TRY);
    860 
    861 out:
    862 	if (error == 0) {
    863 		sigoff(self);
    864 		rwl_entry(rwlp)->rd_count++;
    865 		sigon(self);
    866 		DTRACE_PROBE2(plockstat, rw__acquire, rwlp, READ_LOCK);
    867 	} else {
    868 		if (rwsp)
    869 			tdb_incr(rwsp->rw_rdlock_try_fail);
    870 		if (error != EBUSY) {
    871 			DTRACE_PROBE3(plockstat, rw__error, rwlp, READ_LOCK,
    872 			    error);
    873 		}
    874 	}
    875 
    876 	return (error);
    877 }
    878 
    879 #pragma weak pthread_rwlock_trywrlock = rw_trywrlock
    880 int
    881 rw_trywrlock(rwlock_t *rwlp)
    882 {
    883 	ulwp_t *self = curthread;
    884 	uberdata_t *udp = self->ul_uberdata;
    885 	tdb_rwlock_stats_t *rwsp = RWLOCK_STATS(rwlp, udp);
    886 	int error;
    887 
    888 	ASSERT(!self->ul_critical || self->ul_bindflags);
    889 
    890 	if (rwsp)
    891 		tdb_incr(rwsp->rw_wrlock_try);
    892 
    893 	if (write_lock_try(rwlp, 0))
    894 		error = 0;
    895 	else if (rwlp->rwlock_type == USYNC_PROCESS)	/* kernel-level */
    896 		error = shared_rwlock_lock(rwlp, NULL, WRITE_LOCK_TRY);
    897 	else						/* user-level */
    898 		error = rwlock_lock(rwlp, NULL, WRITE_LOCK_TRY);
    899 
    900 	if (error == 0) {
    901 		rwlp->rwlock_owner = (uintptr_t)self;
    902 		if (rwlp->rwlock_type == USYNC_PROCESS)
    903 			rwlp->rwlock_ownerpid = udp->pid;
    904 		if (rwsp)
    905 			rwsp->rw_wrlock_begin_hold = gethrtime();
    906 		DTRACE_PROBE2(plockstat, rw__acquire, rwlp, WRITE_LOCK);
    907 	} else {
    908 		if (rwsp)
    909 			tdb_incr(rwsp->rw_wrlock_try_fail);
    910 		if (error != EBUSY) {
    911 			DTRACE_PROBE3(plockstat, rw__error, rwlp, WRITE_LOCK,
    912 			    error);
    913 		}
    914 	}
    915 	return (error);
    916 }
    917 
    918 #pragma weak pthread_rwlock_unlock = rw_unlock
    919 #pragma weak _rw_unlock = rw_unlock
    920 int
    921 rw_unlock(rwlock_t *rwlp)
    922 {
    923 	volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers;
    924 	uint32_t readers;
    925 	ulwp_t *self = curthread;
    926 	uberdata_t *udp = self->ul_uberdata;
    927 	tdb_rwlock_stats_t *rwsp;
    928 	queue_head_t *qp;
    929 	int rd_wr;
    930 	int waked = 0;
    931 
    932 	readers = *rwstate;
    933 	ASSERT_CONSISTENT_STATE(readers);
    934 	if (readers & URW_WRITE_LOCKED) {
    935 		rd_wr = WRITE_LOCK;
    936 		readers = 0;
    937 	} else {
    938 		rd_wr = READ_LOCK;
    939 		readers &= URW_READERS_MASK;
    940 	}
    941 
    942 	if (rd_wr == WRITE_LOCK) {
    943 		/*
    944 		 * Since the writer lock is held, we'd better be
    945 		 * holding it, else we cannot legitimately be here.
    946 		 */
    947 		if (!rw_write_held(rwlp)) {
    948 			if (self->ul_error_detection)
    949 				rwlock_error(rwlp, "rwlock_unlock",
    950 				    "writer lock held, "
    951 				    "but not by the calling thread");
    952 			return (EPERM);
    953 		}
    954 		if ((rwsp = RWLOCK_STATS(rwlp, udp)) != NULL) {
    955 			if (rwsp->rw_wrlock_begin_hold)
    956 				rwsp->rw_wrlock_hold_time +=
    957 				    gethrtime() - rwsp->rw_wrlock_begin_hold;
    958 			rwsp->rw_wrlock_begin_hold = 0;
    959 		}
    960 		rwlp->rwlock_owner = 0;
    961 		rwlp->rwlock_ownerpid = 0;
    962 	} else if (readers > 0) {
    963 		/*
    964 		 * A readers lock is held; if we don't hold one, bail out.
    965 		 */
    966 		readlock_t *readlockp;
    967 
    968 		sigoff(self);
    969 		readlockp = rwl_entry(rwlp);
    970 		if (readlockp->rd_count == 0) {
    971 			sigon(self);
    972 			if (self->ul_error_detection)
    973 				rwlock_error(rwlp, "rwlock_unlock",
    974 				    "readers lock held, "
    975 				    "but not by the calling thread");
    976 			return (EPERM);
    977 		}
    978 		/*
    979 		 * If we hold more than one readers lock on this rwlock,
    980 		 * just decrement our reference count and return.
    981 		 */
    982 		if (--readlockp->rd_count != 0) {
    983 			sigon(self);
    984 			goto out;
    985 		}
    986 		sigon(self);
    987 	} else {
    988 		/*
    989 		 * This is a usage error.
    990 		 * No thread should release an unowned lock.
    991 		 */
    992 		if (self->ul_error_detection)
    993 			rwlock_error(rwlp, "rwlock_unlock", "lock not owned");
    994 		return (EPERM);
    995 	}
    996 
    997 	if (rd_wr == WRITE_LOCK && write_unlock_try(rwlp)) {
    998 		/* EMPTY */;
    999 	} else if (rd_wr == READ_LOCK && read_unlock_try(rwlp)) {
   1000 		/* EMPTY */;
   1001 	} else if (rwlp->rwlock_type == USYNC_PROCESS) {
   1002 		(void) mutex_lock(&rwlp->mutex);
   1003 		(void) __lwp_rwlock_unlock(rwlp);
   1004 		(void) mutex_unlock(&rwlp->mutex);
   1005 		waked = 1;
   1006 	} else {
   1007 		qp = queue_lock(rwlp, MX);
   1008 		if (rd_wr == READ_LOCK)
   1009 			atomic_dec_32(rwstate);
   1010 		else
   1011 			atomic_and_32(rwstate, ~URW_WRITE_LOCKED);
   1012 		waked = rw_queue_release(qp, rwlp);
   1013 	}
   1014 
   1015 out:
   1016 	DTRACE_PROBE2(plockstat, rw__release, rwlp, rd_wr);
   1017 
   1018 	/*
   1019 	 * Yield to the thread we just waked up, just in case we might
   1020 	 * be about to grab the rwlock again immediately upon return.
   1021 	 * This is pretty weak but it helps on a uniprocessor and also
   1022 	 * when cpu affinity has assigned both ourself and the other
   1023 	 * thread to the same CPU.  Note that lwp_yield() will yield
   1024 	 * the processor only if the writer is at the same or higher
   1025 	 * priority than ourself.  This provides more balanced program
   1026 	 * behavior; it doesn't guarantee acquisition of the lock by
   1027 	 * the pending writer.
   1028 	 */
   1029 	if (waked)
   1030 		yield();
   1031 	return (0);
   1032 }
   1033 
   1034 void
   1035 lrw_unlock(rwlock_t *rwlp)
   1036 {
   1037 	(void) rw_unlock(rwlp);
   1038 	exit_critical(curthread);
   1039 }
   1040