Home | History | Annotate | Download | only in aio
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 
     22 /*
     23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
     24  * Use is subject to license terms.
     25  */
     26 
     27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
     28 
     29 #include "lint.h"
     30 #include "thr_uberdata.h"
     31 #include "asyncio.h"
     32 
     33 /*
     34  * The aio subsystem memory allocation strategy:
     35  *
     36  * For each of the structure types we wish to allocate/free
     37  * (aio_worker_t, aio_req_t, aio_lio_t), we use mmap() to allocate
     38  * chunks of memory which are then subdivided into individual
     39  * elements which are put into a free list from which allocations
     40  * are made and to which frees are returned.
     41  *
     42  * Chunks start small (8 Kbytes) and get larger (size doubling)
     43  * as more chunks are needed.  This keeps memory usage small for
     44  * light use and fragmentation small for heavy use.
     45  *
     46  * Chunks are never unmapped except as an aftermath of fork()
     47  * in the child process, when they are all unmapped (because
     48  * all of the worker threads disappear in the child).
     49  */
     50 
     51 #define	INITIAL_CHUNKSIZE	(8 * 1024)
     52 
     53 /*
     54  * The header structure for each chunk.
     55  * A pointer and a size_t ensures proper alignment for whatever follows.
     56  */
     57 typedef struct chunk {
     58 	struct chunk	*chunk_next;	/* linked list */
     59 	size_t		chunk_size;	/* size of this chunk */
     60 } chunk_t;
     61 
     62 chunk_t *chunk_list = NULL;		/* list of all chunks */
     63 mutex_t chunk_lock = DEFAULTMUTEX;
     64 
     65 chunk_t *
     66 chunk_alloc(size_t size)
     67 {
     68 	chunk_t *chp = NULL;
     69 	void *ptr;
     70 
     71 	ptr = mmap(NULL, size, PROT_READ | PROT_WRITE,
     72 	    MAP_PRIVATE | MAP_ANON, -1, (off_t)0);
     73 	if (ptr != MAP_FAILED) {
     74 		lmutex_lock(&chunk_lock);
     75 		chp = ptr;
     76 		chp->chunk_next = chunk_list;
     77 		chunk_list = chp;
     78 		chp->chunk_size = size;
     79 		lmutex_unlock(&chunk_lock);
     80 	}
     81 
     82 	return (chp);
     83 }
     84 
     85 aio_worker_t *worker_freelist = NULL;	/* free list of worker structures */
     86 aio_worker_t *worker_freelast = NULL;
     87 size_t worker_chunksize = 0;
     88 mutex_t worker_lock = DEFAULTMUTEX;
     89 
     90 /*
     91  * Allocate a worker control block.
     92  */
     93 aio_worker_t *
     94 _aio_worker_alloc(void)
     95 {
     96 	aio_worker_t *aiowp;
     97 	chunk_t *chp;
     98 	size_t chunksize;
     99 	int nelem;
    100 	int i;
    101 
    102 	lmutex_lock(&worker_lock);
    103 	if ((aiowp = worker_freelist) == NULL) {
    104 		if ((chunksize = 2 * worker_chunksize) == 0)
    105 			chunksize = INITIAL_CHUNKSIZE;
    106 		if ((chp = chunk_alloc(chunksize)) == NULL) {
    107 			lmutex_unlock(&worker_lock);
    108 			return (NULL);
    109 		}
    110 		worker_chunksize = chunksize;
    111 		worker_freelist = (aio_worker_t *)(uintptr_t)(chp + 1);
    112 		nelem = (chunksize - sizeof (chunk_t)) / sizeof (aio_worker_t);
    113 		for (i = 0, aiowp = worker_freelist; i < nelem; i++, aiowp++)
    114 			aiowp->work_forw = aiowp + 1;
    115 		worker_freelast = aiowp - 1;
    116 		worker_freelast->work_forw = NULL;
    117 		aiowp = worker_freelist;
    118 	}
    119 	if ((worker_freelist = aiowp->work_forw) == NULL)
    120 		worker_freelast = NULL;
    121 	lmutex_unlock(&worker_lock);
    122 
    123 	aiowp->work_forw = NULL;
    124 	(void) mutex_init(&aiowp->work_qlock1, USYNC_THREAD, NULL);
    125 	(void) cond_init(&aiowp->work_idle_cv, USYNC_THREAD, NULL);
    126 
    127 	return (aiowp);
    128 }
    129 
    130 /*
    131  * Free a worker control block.
    132  * Declared with void *arg so it can be a pthread_key_create() destructor.
    133  */
    134 void
    135 _aio_worker_free(void *arg)
    136 {
    137 	aio_worker_t *aiowp = arg;
    138 
    139 	(void) mutex_destroy(&aiowp->work_qlock1);
    140 	(void) cond_destroy(&aiowp->work_idle_cv);
    141 	(void) memset(aiowp, 0, sizeof (*aiowp));
    142 
    143 	lmutex_lock(&worker_lock);
    144 	if (worker_freelast == NULL) {
    145 		worker_freelist = worker_freelast = aiowp;
    146 	} else {
    147 		worker_freelast->work_forw = aiowp;
    148 		worker_freelast = aiowp;
    149 	}
    150 	lmutex_unlock(&worker_lock);
    151 }
    152 
    153 aio_req_t *_aio_freelist = NULL;	/* free list of request structures */
    154 aio_req_t *_aio_freelast = NULL;
    155 size_t request_chunksize = 0;
    156 int _aio_freelist_cnt = 0;
    157 int _aio_allocated_cnt = 0;
    158 mutex_t __aio_cache_lock = DEFAULTMUTEX;
    159 
    160 /*
    161  * Allocate an aio request structure.
    162  */
    163 aio_req_t *
    164 _aio_req_alloc(void)
    165 {
    166 	aio_req_t *reqp;
    167 	chunk_t *chp;
    168 	size_t chunksize;
    169 	int nelem;
    170 	int i;
    171 
    172 	lmutex_lock(&__aio_cache_lock);
    173 	if ((reqp = _aio_freelist) == NULL) {
    174 		if ((chunksize = 2 * request_chunksize) == 0)
    175 			chunksize = INITIAL_CHUNKSIZE;
    176 		if ((chp = chunk_alloc(chunksize)) == NULL) {
    177 			lmutex_unlock(&__aio_cache_lock);
    178 			return (NULL);
    179 		}
    180 		request_chunksize = chunksize;
    181 		_aio_freelist = (aio_req_t *)(uintptr_t)(chp + 1);
    182 		nelem = (chunksize - sizeof (chunk_t)) / sizeof (aio_req_t);
    183 		for (i = 0, reqp = _aio_freelist; i < nelem; i++, reqp++) {
    184 			reqp->req_state = AIO_REQ_FREE;
    185 			reqp->req_link = reqp + 1;
    186 		}
    187 		_aio_freelast = reqp - 1;
    188 		_aio_freelast->req_link = NULL;
    189 		_aio_freelist_cnt = nelem;
    190 		reqp = _aio_freelist;
    191 	}
    192 	if ((_aio_freelist = reqp->req_link) == NULL)
    193 		_aio_freelast = NULL;
    194 	_aio_freelist_cnt--;
    195 	_aio_allocated_cnt++;
    196 	lmutex_unlock(&__aio_cache_lock);
    197 
    198 	ASSERT(reqp->req_state == AIO_REQ_FREE);
    199 	reqp->req_state = 0;
    200 	reqp->req_link = NULL;
    201 	reqp->req_sigevent.sigev_notify = SIGEV_NONE;
    202 
    203 	return (reqp);
    204 }
    205 
    206 /*
    207  * Free an aio request structure.
    208  */
    209 void
    210 _aio_req_free(aio_req_t *reqp)
    211 {
    212 	ASSERT(reqp->req_state != AIO_REQ_FREE &&
    213 	    reqp->req_state != AIO_REQ_DONEQ);
    214 	(void) memset(reqp, 0, sizeof (*reqp));
    215 	reqp->req_state = AIO_REQ_FREE;
    216 
    217 	lmutex_lock(&__aio_cache_lock);
    218 	if (_aio_freelast == NULL) {
    219 		_aio_freelist = _aio_freelast = reqp;
    220 	} else {
    221 		_aio_freelast->req_link = reqp;
    222 		_aio_freelast = reqp;
    223 	}
    224 	_aio_freelist_cnt++;
    225 	_aio_allocated_cnt--;
    226 	lmutex_unlock(&__aio_cache_lock);
    227 }
    228 
    229 aio_lio_t *_lio_head_freelist = NULL;	/* free list of lio head structures */
    230 aio_lio_t *_lio_head_freelast = NULL;
    231 size_t lio_head_chunksize = 0;
    232 int _lio_alloc = 0;
    233 int _lio_free = 0;
    234 mutex_t __lio_mutex = DEFAULTMUTEX;
    235 
    236 /*
    237  * Allocate a listio head structure.
    238  */
    239 aio_lio_t *
    240 _aio_lio_alloc(void)
    241 {
    242 	aio_lio_t *head;
    243 	chunk_t *chp;
    244 	size_t chunksize;
    245 	int nelem;
    246 	int i;
    247 
    248 	lmutex_lock(&__lio_mutex);
    249 	if ((head = _lio_head_freelist) == NULL) {
    250 		if ((chunksize = 2 * lio_head_chunksize) == 0)
    251 			chunksize = INITIAL_CHUNKSIZE;
    252 		if ((chp = chunk_alloc(chunksize)) == NULL) {
    253 			lmutex_unlock(&__lio_mutex);
    254 			return (NULL);
    255 		}
    256 		lio_head_chunksize = chunksize;
    257 		_lio_head_freelist = (aio_lio_t *)(uintptr_t)(chp + 1);
    258 		nelem = (chunksize - sizeof (chunk_t)) / sizeof (aio_lio_t);
    259 		for (i = 0, head = _lio_head_freelist; i < nelem; i++, head++)
    260 			head->lio_next = head + 1;
    261 		_lio_head_freelast = head - 1;
    262 		_lio_head_freelast->lio_next = NULL;
    263 		_lio_alloc += nelem;
    264 		_lio_free = nelem;
    265 		head = _lio_head_freelist;
    266 	}
    267 	if ((_lio_head_freelist = head->lio_next) == NULL)
    268 		_lio_head_freelast = NULL;
    269 	_lio_free--;
    270 	lmutex_unlock(&__lio_mutex);
    271 
    272 	ASSERT(head->lio_nent == 0 && head->lio_refcnt == 0);
    273 	head->lio_next = NULL;
    274 	head->lio_port = -1;
    275 	(void) mutex_init(&head->lio_mutex, USYNC_THREAD, NULL);
    276 	(void) cond_init(&head->lio_cond_cv, USYNC_THREAD, NULL);
    277 
    278 	return (head);
    279 }
    280 
    281 /*
    282  * Free a listio head structure.
    283  */
    284 void
    285 _aio_lio_free(aio_lio_t *head)
    286 {
    287 	ASSERT(head->lio_nent == 0 && head->lio_refcnt == 0);
    288 	(void) mutex_destroy(&head->lio_mutex);
    289 	(void) cond_destroy(&head->lio_cond_cv);
    290 	(void) memset(head, 0, sizeof (*head));
    291 
    292 	lmutex_lock(&__lio_mutex);
    293 	if (_lio_head_freelast == NULL) {
    294 		_lio_head_freelist = _lio_head_freelast = head;
    295 	} else {
    296 		_lio_head_freelast->lio_next = head;
    297 		_lio_head_freelast = head;
    298 	}
    299 	_lio_free++;
    300 	lmutex_unlock(&__lio_mutex);
    301 }
    302 
    303 void
    304 postfork1_child_aio(void)
    305 {
    306 	chunk_t *chp;
    307 
    308 	/*
    309 	 * All of the workers are gone; free their structures.
    310 	 */
    311 	if (_kaio_supported != NULL) {
    312 		(void) munmap((void *)_kaio_supported,
    313 		    MAX_KAIO_FDARRAY_SIZE * sizeof (uint32_t));
    314 		_kaio_supported = NULL;
    315 	}
    316 	if (_aio_hash != NULL) {
    317 		(void) munmap((void *)_aio_hash, HASHSZ * sizeof (aio_hash_t));
    318 		_aio_hash = NULL;
    319 	}
    320 	for (chp = chunk_list; chp != NULL; chp = chunk_list) {
    321 		chunk_list = chp->chunk_next;
    322 		(void) munmap((void *)chp, chp->chunk_size);
    323 	}
    324 
    325 	/*
    326 	 * Reinitialize global variables
    327 	 */
    328 
    329 	worker_freelist = NULL;
    330 	worker_freelast = NULL;
    331 	worker_chunksize = 0;
    332 	(void) mutex_init(&worker_lock, USYNC_THREAD, NULL);
    333 
    334 	_aio_freelist = NULL;
    335 	_aio_freelast = NULL;
    336 	request_chunksize = 0;
    337 	_aio_freelist_cnt = 0;
    338 	_aio_allocated_cnt = 0;
    339 	(void) mutex_init(&__aio_cache_lock, USYNC_THREAD, NULL);
    340 
    341 	_lio_head_freelist = NULL;
    342 	_lio_head_freelast = NULL;
    343 	lio_head_chunksize = 0;
    344 	_lio_alloc = 0;
    345 	_lio_free = 0;
    346 	(void) mutex_init(&__lio_mutex, USYNC_THREAD, NULL);
    347 
    348 	(void) mutex_init(&__aio_initlock, USYNC_THREAD, NULL);
    349 	(void) cond_init(&__aio_initcv, USYNC_THREAD, NULL);
    350 	__aio_initbusy = 0;
    351 
    352 	(void) mutex_init(&__aio_mutex, USYNC_THREAD, NULL);
    353 	(void) cond_init(&_aio_iowait_cv, USYNC_THREAD, NULL);
    354 	(void) cond_init(&_aio_waitn_cv, USYNC_THREAD, NULL);
    355 
    356 	_kaio_ok = 0;
    357 	__uaio_ok = 0;
    358 
    359 	_kaiowp = NULL;
    360 
    361 	__workers_rw = NULL;
    362 	__nextworker_rw = NULL;
    363 	__rw_workerscnt = 0;
    364 
    365 	__workers_no = NULL;
    366 	__nextworker_no = NULL;
    367 	__no_workerscnt = 0;
    368 
    369 	_aio_worker_cnt = 0;
    370 
    371 	_aio_done_head = NULL;
    372 	_aio_done_tail = NULL;
    373 	_aio_donecnt = 0;
    374 
    375 	_aio_doneq = NULL;
    376 	_aio_doneq_cnt = 0;
    377 
    378 	_aio_waitncnt = 0;
    379 	_aio_outstand_cnt = 0;
    380 	_kaio_outstand_cnt = 0;
    381 	_aio_req_done_cnt = 0;
    382 	_aio_kernel_suspend = 0;
    383 	_aio_suscv_cnt = 0;
    384 
    385 	_aiowait_flag = 0;
    386 	_aio_flags = 0;
    387 }
    388 
    389 #define	DISPLAY(var)	\
    390 	(void) fprintf(stderr, #var "\t= %d\n", var)
    391 
    392 static void
    393 _aio_exit_info(void)
    394 {
    395 	if ((_kaio_ok | __uaio_ok) == 0)
    396 		return;
    397 	(void) fprintf(stderr, "\n");
    398 	DISPLAY(_aio_freelist_cnt);
    399 	DISPLAY(_aio_allocated_cnt);
    400 	DISPLAY(_lio_alloc);
    401 	DISPLAY(_lio_free);
    402 	DISPLAY(__rw_workerscnt);
    403 	DISPLAY(__no_workerscnt);
    404 	DISPLAY(_aio_worker_cnt);
    405 	DISPLAY(_aio_donecnt);
    406 	DISPLAY(_aio_doneq_cnt);
    407 	DISPLAY(_aio_waitncnt);
    408 	DISPLAY(_aio_outstand_cnt);
    409 	DISPLAY(_kaio_outstand_cnt);
    410 	DISPLAY(_aio_req_done_cnt);
    411 	DISPLAY(_aio_kernel_suspend);
    412 	DISPLAY(_aio_suscv_cnt);
    413 	DISPLAY(_aiowait_flag);
    414 	DISPLAY(_aio_flags);
    415 }
    416 
    417 void
    418 init_aio(void)
    419 {
    420 	char *str;
    421 
    422 	(void) pthread_key_create(&_aio_key, _aio_worker_free);
    423 	if ((str = getenv("_AIO_MIN_WORKERS")) != NULL) {
    424 		if ((_min_workers = atoi(str)) <= 0)
    425 			_min_workers = 4;
    426 	}
    427 	if ((str = getenv("_AIO_MAX_WORKERS")) != NULL) {
    428 		if ((_max_workers = atoi(str)) <= 0)
    429 			_max_workers = 256;
    430 		if (_max_workers < _min_workers + 1)
    431 			_max_workers = _min_workers + 1;
    432 	}
    433 	if ((str = getenv("_AIO_EXIT_INFO")) != NULL && atoi(str) != 0)
    434 		(void) atexit(_aio_exit_info);
    435 }
    436