Home | History | Annotate | Download | only in threads
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 
     22 /*
     23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
     24  * Use is subject to license terms.
     25  */
     26 
     27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
     28 
     29 #include "lint.h"
     30 #include "thr_uberdata.h"
     31 #include <stddef.h>
     32 
     33 /*
     34  * These symbols should not be exported from libc, but
     35  * /lib/libm.so.2 references them.  libm needs to be fixed.
     36  * Also, some older versions of the Studio compiler/debugger
     37  * components reference them.  These need to be fixed, too.
     38  */
     39 #pragma weak _thr_getspecific = thr_getspecific
     40 #pragma weak _thr_keycreate = thr_keycreate
     41 #pragma weak _thr_setspecific = thr_setspecific
     42 
     43 /*
     44  * 128 million keys should be enough for anyone.
     45  * This allocates half a gigabyte of memory for the keys themselves and
     46  * half a gigabyte of memory for each thread that uses the largest key.
     47  */
     48 #define	MAX_KEYS	0x08000000U
     49 
     50 int
     51 thr_keycreate(thread_key_t *pkey, void (*destructor)(void *))
     52 {
     53 	tsd_metadata_t *tsdm = &curthread->ul_uberdata->tsd_metadata;
     54 	void (**old_data)(void *) = NULL;
     55 	void (**new_data)(void *);
     56 	uint_t old_nkeys;
     57 	uint_t new_nkeys;
     58 
     59 	lmutex_lock(&tsdm->tsdm_lock);
     60 
     61 	/*
     62 	 * Unfortunately, pthread_getspecific() specifies that a
     63 	 * pthread_getspecific() on an allocated key upon which the
     64 	 * calling thread has not performed a pthread_setspecifc()
     65 	 * must return NULL.  Consider the following sequence:
     66 	 *
     67 	 *	pthread_key_create(&key);
     68 	 *	pthread_setspecific(key, datum);
     69 	 *	pthread_key_delete(&key);
     70 	 *	pthread_key_create(&key);
     71 	 *	val = pthread_getspecific(key);
     72 	 *
     73 	 * According to POSIX, if the deleted key is reused for the new
     74 	 * key returned by the second pthread_key_create(), then the
     75 	 * pthread_getspecific() in the above example must return NULL
     76 	 * (and not the stale datum).  The implementation is thus left
     77 	 * with two alternatives:
     78 	 *
     79 	 *  (1)	Reuse deleted keys.  If this is to be implemented optimally,
     80 	 *	it requires that pthread_key_create() somehow associate
     81 	 *	the value NULL with the new (reused) key for each thread.
     82 	 *	Keeping the hot path fast and lock-free induces substantial
     83 	 *	complexity on the implementation.
     84 	 *
     85 	 *  (2)	Never reuse deleted keys. This allows the pthread_getspecific()
     86 	 *	implementation to simply perform a check against the number
     87 	 *	of keys set by the calling thread, returning NULL if the
     88 	 *	specified key is larger than the highest set key.  This has
     89 	 *	the disadvantage of wasting memory (a program which simply
     90 	 *	loops calling pthread_key_create()/pthread_key_delete()
     91 	 *	will ultimately run out of memory), but permits an optimal
     92 	 *	pthread_getspecific() while allowing for simple key creation
     93 	 *	and deletion.
     94 	 *
     95 	 * All Solaris implementations have opted for (2).  Given the
     96 	 * ~10 years that this has been in the field, it is safe to assume
     97 	 * that applications don't loop creating and destroying keys; we
     98 	 * stick with (2).
     99 	 */
    100 	if (tsdm->tsdm_nused == (old_nkeys = tsdm->tsdm_nkeys)) {
    101 		/*
    102 		 * We need to allocate or double the number of keys.
    103 		 * tsdm->tsdm_nused must always be a power of two.
    104 		 */
    105 		if ((new_nkeys = (old_nkeys << 1)) == 0)
    106 			new_nkeys = 8;
    107 
    108 		if (new_nkeys > MAX_KEYS) {
    109 			lmutex_unlock(&tsdm->tsdm_lock);
    110 			return (EAGAIN);
    111 		}
    112 		if ((new_data = lmalloc(new_nkeys * sizeof (void *))) == NULL) {
    113 			lmutex_unlock(&tsdm->tsdm_lock);
    114 			return (ENOMEM);
    115 		}
    116 		if ((old_data = tsdm->tsdm_destro) == NULL) {
    117 			/* key == 0 is always invalid */
    118 			new_data[0] = TSD_UNALLOCATED;
    119 			tsdm->tsdm_nused = 1;
    120 		} else {
    121 			(void) memcpy(new_data, old_data,
    122 			    old_nkeys * sizeof (void *));
    123 		}
    124 		tsdm->tsdm_destro = new_data;
    125 		tsdm->tsdm_nkeys = new_nkeys;
    126 	}
    127 
    128 	*pkey = tsdm->tsdm_nused;
    129 	tsdm->tsdm_destro[tsdm->tsdm_nused++] = destructor;
    130 	lmutex_unlock(&tsdm->tsdm_lock);
    131 
    132 	if (old_data != NULL)
    133 		lfree(old_data, old_nkeys * sizeof (void *));
    134 
    135 	return (0);
    136 }
    137 
    138 #pragma weak _pthread_key_create = pthread_key_create
    139 int
    140 pthread_key_create(pthread_key_t *pkey, void (*destructor)(void *))
    141 {
    142 	return (thr_keycreate(pkey, destructor));
    143 }
    144 
    145 /*
    146  * Same as thr_keycreate(), above, except that the key creation
    147  * is performed only once.  This relies upon the fact that a key
    148  * value of THR_ONCE_KEY is invalid, and requires that the key be
    149  * allocated with a value of THR_ONCE_KEY before calling here.
    150  * THR_ONCE_KEY and PTHREAD_ONCE_KEY_NP, defined in <thread.h>
    151  * and <pthread.h> respectively, must have the same value.
    152  * Example:
    153  *
    154  *	static pthread_key_t key = PTHREAD_ONCE_KEY_NP;
    155  *	...
    156  *	pthread_key_create_once_np(&key, destructor);
    157  */
    158 #pragma weak pthread_key_create_once_np = thr_keycreate_once
    159 int
    160 thr_keycreate_once(thread_key_t *keyp, void (*destructor)(void *))
    161 {
    162 	static mutex_t key_lock = DEFAULTMUTEX;
    163 	thread_key_t key;
    164 	int error;
    165 
    166 	if (*keyp == THR_ONCE_KEY) {
    167 		lmutex_lock(&key_lock);
    168 		if (*keyp == THR_ONCE_KEY) {
    169 			error = thr_keycreate(&key, destructor);
    170 			if (error) {
    171 				lmutex_unlock(&key_lock);
    172 				return (error);
    173 			}
    174 			membar_producer();
    175 			*keyp = key;
    176 		}
    177 		lmutex_unlock(&key_lock);
    178 	}
    179 	membar_consumer();
    180 
    181 	return (0);
    182 }
    183 
    184 int
    185 pthread_key_delete(pthread_key_t key)
    186 {
    187 	tsd_metadata_t *tsdm = &curthread->ul_uberdata->tsd_metadata;
    188 
    189 	lmutex_lock(&tsdm->tsdm_lock);
    190 
    191 	if (key >= tsdm->tsdm_nused ||
    192 	    tsdm->tsdm_destro[key] == TSD_UNALLOCATED) {
    193 		lmutex_unlock(&tsdm->tsdm_lock);
    194 		return (EINVAL);
    195 	}
    196 
    197 	tsdm->tsdm_destro[key] = TSD_UNALLOCATED;
    198 	lmutex_unlock(&tsdm->tsdm_lock);
    199 
    200 	return (0);
    201 }
    202 
    203 /*
    204  * Blessedly, the pthread_getspecific() interface is much better than the
    205  * thr_getspecific() interface in that it cannot return an error status.
    206  * Thus, if the key specified is bogus, pthread_getspecific()'s behavior
    207  * is undefined.  As an added bonus (and as an artificat of not returning
    208  * an error code), the requested datum is returned rather than stored
    209  * through a parameter -- thereby avoiding the unnecessary store/load pair
    210  * incurred by thr_getspecific().  Every once in a while, the Standards
    211  * get it right -- but usually by accident.
    212  */
    213 void *
    214 pthread_getspecific(pthread_key_t key)
    215 {
    216 	tsd_t *stsd;
    217 
    218 	/*
    219 	 * We are cycle-shaving in this function because some
    220 	 * applications make heavy use of it and one machine cycle
    221 	 * can make a measurable difference in performance.  This
    222 	 * is why we waste a little memory and allocate a NULL value
    223 	 * for the invalid key == 0 in curthread->ul_ftsd[0] rather
    224 	 * than adjusting the key by subtracting one.
    225 	 */
    226 	if (key < TSD_NFAST)
    227 		return (curthread->ul_ftsd[key]);
    228 
    229 	if ((stsd = curthread->ul_stsd) != NULL && key < stsd->tsd_nalloc)
    230 		return (stsd->tsd_data[key]);
    231 
    232 	return (NULL);
    233 }
    234 
    235 int
    236 thr_getspecific(thread_key_t key, void **valuep)
    237 {
    238 	tsd_t *stsd;
    239 
    240 	/*
    241 	 * Amazingly, some application code (and worse, some particularly
    242 	 * fugly Solaris library code) _relies_ on the fact that 0 is always
    243 	 * an invalid key.  To preserve this semantic, 0 is never returned
    244 	 * as a key from thr_/pthread_key_create(); we explicitly check
    245 	 * for it here and return EINVAL.
    246 	 */
    247 	if (key == 0)
    248 		return (EINVAL);
    249 
    250 	if (key < TSD_NFAST)
    251 		*valuep = curthread->ul_ftsd[key];
    252 	else if ((stsd = curthread->ul_stsd) != NULL && key < stsd->tsd_nalloc)
    253 		*valuep = stsd->tsd_data[key];
    254 	else
    255 		*valuep = NULL;
    256 
    257 	return (0);
    258 }
    259 
    260 /*
    261  * We call thr_setspecific_slow() when the key specified
    262  * is beyond the current thread's currently allocated range.
    263  * This case is in a separate function because we want
    264  * the compiler to optimize for the common case.
    265  */
    266 static int
    267 thr_setspecific_slow(thread_key_t key, void *value)
    268 {
    269 	ulwp_t *self = curthread;
    270 	tsd_metadata_t *tsdm = &self->ul_uberdata->tsd_metadata;
    271 	tsd_t *stsd;
    272 	tsd_t *ntsd;
    273 	uint_t nkeys;
    274 
    275 	/*
    276 	 * It isn't necessary to grab locks in this path;
    277 	 * tsdm->tsdm_nused can only increase.
    278 	 */
    279 	if (key >= tsdm->tsdm_nused)
    280 		return (EINVAL);
    281 
    282 	/*
    283 	 * We would like to test (tsdm->tsdm_destro[key] == TSD_UNALLOCATED)
    284 	 * here but that would require acquiring tsdm->tsdm_lock and we
    285 	 * want to avoid locks in this path.
    286 	 *
    287 	 * We have a key which is (or at least _was_) valid.  If this key
    288 	 * is later deleted (or indeed, is deleted before we set the value),
    289 	 * we don't care; such a condition would indicate an application
    290 	 * race for which POSIX thankfully leaves the behavior unspecified.
    291 	 *
    292 	 * First, determine our new size.  To avoid allocating more than we
    293 	 * have to, continue doubling our size only until the new key fits.
    294 	 * stsd->tsd_nalloc must always be a power of two.
    295 	 */
    296 	nkeys = ((stsd = self->ul_stsd) != NULL)? stsd->tsd_nalloc : 8;
    297 	for (; key >= nkeys; nkeys <<= 1)
    298 		continue;
    299 
    300 	/*
    301 	 * Allocate the new TSD.
    302 	 */
    303 	if ((ntsd = lmalloc(nkeys * sizeof (void *))) == NULL)
    304 		return (ENOMEM);
    305 
    306 	if (stsd != NULL) {
    307 		/*
    308 		 * Copy the old TSD across to the new.
    309 		 */
    310 		(void) memcpy(ntsd, stsd, stsd->tsd_nalloc * sizeof (void *));
    311 		lfree(stsd, stsd->tsd_nalloc * sizeof (void *));
    312 	}
    313 
    314 	ntsd->tsd_nalloc = nkeys;
    315 	ntsd->tsd_data[key] = value;
    316 	self->ul_stsd = ntsd;
    317 
    318 	return (0);
    319 }
    320 
    321 int
    322 thr_setspecific(thread_key_t key, void *value)
    323 {
    324 	tsd_t *stsd;
    325 	int ret;
    326 	ulwp_t *self = curthread;
    327 
    328 	/*
    329 	 * See the comment in thr_getspecific(), above.
    330 	 */
    331 	if (key == 0)
    332 		return (EINVAL);
    333 
    334 	if (key < TSD_NFAST) {
    335 		curthread->ul_ftsd[key] = value;
    336 		return (0);
    337 	}
    338 
    339 	if ((stsd = curthread->ul_stsd) != NULL && key < stsd->tsd_nalloc) {
    340 		stsd->tsd_data[key] = value;
    341 		return (0);
    342 	}
    343 
    344 	/*
    345 	 * This is a critical region since we are dealing with memory
    346 	 * allocation and free. Similar protection required in tsd_free().
    347 	 */
    348 	enter_critical(self);
    349 	ret = thr_setspecific_slow(key, value);
    350 	exit_critical(self);
    351 	return (ret);
    352 }
    353 
    354 int
    355 pthread_setspecific(pthread_key_t key, const void *value)
    356 {
    357 	return (thr_setspecific(key, (void *)value));
    358 }
    359 
    360 /*
    361  * Contract-private interface for java.  See PSARC/2003/159
    362  *
    363  * If the key falls within the TSD_NFAST range, return a non-negative
    364  * offset that can be used by the caller to fetch the TSD data value
    365  * directly out of the thread structure using %g7 (sparc) or %gs (x86).
    366  * With the advent of TLS, %g7 and %gs are part of the ABI, even though
    367  * the definition of the thread structure itself (ulwp_t) is private.
    368  *
    369  * We guarantee that the offset returned on sparc will fit within
    370  * a SIMM13 field (that is, it is less than 2048).
    371  *
    372  * On failure (key is not in the TSD_NFAST range), return -1.
    373  */
    374 ptrdiff_t
    375 _thr_slot_offset(thread_key_t key)
    376 {
    377 	if (key != 0 && key < TSD_NFAST)
    378 		return ((ptrdiff_t)offsetof(ulwp_t, ul_ftsd[key]));
    379 	return (-1);
    380 }
    381 
    382 /*
    383  * This is called by _thrp_exit() to apply destructors to the thread's tsd.
    384  */
    385 void
    386 tsd_exit()
    387 {
    388 	ulwp_t *self = curthread;
    389 	tsd_metadata_t *tsdm = &self->ul_uberdata->tsd_metadata;
    390 	thread_key_t key;
    391 	int recheck;
    392 	void *val;
    393 	void (*func)(void *);
    394 
    395 	lmutex_lock(&tsdm->tsdm_lock);
    396 
    397 	do {
    398 		recheck = 0;
    399 
    400 		for (key = 1; key < TSD_NFAST &&
    401 		    key < tsdm->tsdm_nused; key++) {
    402 			if ((func = tsdm->tsdm_destro[key]) != NULL &&
    403 			    func != TSD_UNALLOCATED &&
    404 			    (val = self->ul_ftsd[key]) != NULL) {
    405 				self->ul_ftsd[key] = NULL;
    406 				lmutex_unlock(&tsdm->tsdm_lock);
    407 				(*func)(val);
    408 				lmutex_lock(&tsdm->tsdm_lock);
    409 				recheck = 1;
    410 			}
    411 		}
    412 
    413 		if (self->ul_stsd == NULL)
    414 			continue;
    415 
    416 		/*
    417 		 * Any of these destructors could cause us to grow the number
    418 		 * TSD keys in the slow TSD; we cannot cache the slow TSD
    419 		 * pointer through this loop.
    420 		 */
    421 		for (; key < self->ul_stsd->tsd_nalloc &&
    422 		    key < tsdm->tsdm_nused; key++) {
    423 			if ((func = tsdm->tsdm_destro[key]) != NULL &&
    424 			    func != TSD_UNALLOCATED &&
    425 			    (val = self->ul_stsd->tsd_data[key]) != NULL) {
    426 				self->ul_stsd->tsd_data[key] = NULL;
    427 				lmutex_unlock(&tsdm->tsdm_lock);
    428 				(*func)(val);
    429 				lmutex_lock(&tsdm->tsdm_lock);
    430 				recheck = 1;
    431 			}
    432 		}
    433 	} while (recheck);
    434 
    435 	lmutex_unlock(&tsdm->tsdm_lock);
    436 
    437 	/*
    438 	 * We're done; if we have slow TSD, we need to free it.
    439 	 */
    440 	tsd_free(self);
    441 }
    442 
    443 void
    444 tsd_free(ulwp_t *ulwp)
    445 {
    446 	tsd_t *stsd;
    447 	ulwp_t *self = curthread;
    448 
    449 	enter_critical(self);
    450 	if ((stsd = ulwp->ul_stsd) != NULL)
    451 		lfree(stsd, stsd->tsd_nalloc * sizeof (void *));
    452 	ulwp->ul_stsd = NULL;
    453 	exit_critical(self);
    454 }
    455