12248Sraf /*
22248Sraf * CDDL HEADER START
32248Sraf *
42248Sraf * The contents of this file are subject to the terms of the
52248Sraf * Common Development and Distribution License (the "License").
62248Sraf * You may not use this file except in compliance with the License.
72248Sraf *
82248Sraf * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
92248Sraf * or http://www.opensolaris.org/os/licensing.
102248Sraf * See the License for the specific language governing permissions
112248Sraf * and limitations under the License.
122248Sraf *
132248Sraf * When distributing Covered Code, include this CDDL HEADER in each
142248Sraf * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
152248Sraf * If applicable, add the following below this CDDL HEADER, with the
162248Sraf * fields enclosed by brackets "[]" replaced with your own identifying
172248Sraf * information: Portions Copyright [yyyy] [name of copyright owner]
182248Sraf *
192248Sraf * CDDL HEADER END
202248Sraf */
212248Sraf
222248Sraf /*
23*6812Sraf * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
242248Sraf * Use is subject to license terms.
252248Sraf */
262248Sraf
272248Sraf #pragma ident "%Z%%M% %I% %E% SMI"
282248Sraf
29*6812Sraf #include "lint.h"
302248Sraf #include "thr_uberdata.h"
312248Sraf #include "asyncio.h"
322248Sraf
332248Sraf /*
342248Sraf * The aio subsystem memory allocation strategy:
352248Sraf *
362248Sraf * For each of the structure types we wish to allocate/free
372248Sraf * (aio_worker_t, aio_req_t, aio_lio_t), we use mmap() to allocate
382248Sraf * chunks of memory which are then subdivided into individual
392248Sraf * elements which are put into a free list from which allocations
402248Sraf * are made and to which frees are returned.
412248Sraf *
422248Sraf * Chunks start small (8 Kbytes) and get larger (size doubling)
432248Sraf * as more chunks are needed. This keeps memory usage small for
442248Sraf * light use and fragmentation small for heavy use.
452248Sraf *
462248Sraf * Chunks are never unmapped except as an aftermath of fork()
472248Sraf * in the child process, when they are all unmapped (because
482248Sraf * all of the worker threads disappear in the child).
492248Sraf */
502248Sraf
512248Sraf #define INITIAL_CHUNKSIZE (8 * 1024)
522248Sraf
532248Sraf /*
542248Sraf * The header structure for each chunk.
552248Sraf * A pointer and a size_t ensures proper alignment for whatever follows.
562248Sraf */
572248Sraf typedef struct chunk {
582248Sraf struct chunk *chunk_next; /* linked list */
592248Sraf size_t chunk_size; /* size of this chunk */
602248Sraf } chunk_t;
612248Sraf
622248Sraf chunk_t *chunk_list = NULL; /* list of all chunks */
632248Sraf mutex_t chunk_lock = DEFAULTMUTEX;
642248Sraf
652248Sraf chunk_t *
chunk_alloc(size_t size)662248Sraf chunk_alloc(size_t size)
672248Sraf {
682248Sraf chunk_t *chp = NULL;
692248Sraf void *ptr;
702248Sraf
712248Sraf ptr = mmap(NULL, size, PROT_READ | PROT_WRITE,
722248Sraf MAP_PRIVATE | MAP_ANON, -1, (off_t)0);
732248Sraf if (ptr != MAP_FAILED) {
742248Sraf lmutex_lock(&chunk_lock);
752248Sraf chp = ptr;
762248Sraf chp->chunk_next = chunk_list;
772248Sraf chunk_list = chp;
782248Sraf chp->chunk_size = size;
792248Sraf lmutex_unlock(&chunk_lock);
802248Sraf }
812248Sraf
822248Sraf return (chp);
832248Sraf }
842248Sraf
852248Sraf aio_worker_t *worker_freelist = NULL; /* free list of worker structures */
862248Sraf aio_worker_t *worker_freelast = NULL;
872248Sraf size_t worker_chunksize = 0;
882248Sraf mutex_t worker_lock = DEFAULTMUTEX;
892248Sraf
902248Sraf /*
912248Sraf * Allocate a worker control block.
922248Sraf */
932248Sraf aio_worker_t *
_aio_worker_alloc(void)942248Sraf _aio_worker_alloc(void)
952248Sraf {
962248Sraf aio_worker_t *aiowp;
972248Sraf chunk_t *chp;
982248Sraf size_t chunksize;
992248Sraf int nelem;
1002248Sraf int i;
1012248Sraf
1022248Sraf lmutex_lock(&worker_lock);
1032248Sraf if ((aiowp = worker_freelist) == NULL) {
1042248Sraf if ((chunksize = 2 * worker_chunksize) == 0)
1052248Sraf chunksize = INITIAL_CHUNKSIZE;
1062248Sraf if ((chp = chunk_alloc(chunksize)) == NULL) {
1072248Sraf lmutex_unlock(&worker_lock);
1082248Sraf return (NULL);
1092248Sraf }
1102248Sraf worker_chunksize = chunksize;
1112248Sraf worker_freelist = (aio_worker_t *)(uintptr_t)(chp + 1);
1122248Sraf nelem = (chunksize - sizeof (chunk_t)) / sizeof (aio_worker_t);
1132248Sraf for (i = 0, aiowp = worker_freelist; i < nelem; i++, aiowp++)
1142248Sraf aiowp->work_forw = aiowp + 1;
1152248Sraf worker_freelast = aiowp - 1;
1162248Sraf worker_freelast->work_forw = NULL;
1172248Sraf aiowp = worker_freelist;
1182248Sraf }
1192248Sraf if ((worker_freelist = aiowp->work_forw) == NULL)
1202248Sraf worker_freelast = NULL;
1212248Sraf lmutex_unlock(&worker_lock);
1222248Sraf
1232248Sraf aiowp->work_forw = NULL;
1242248Sraf (void) mutex_init(&aiowp->work_qlock1, USYNC_THREAD, NULL);
1252248Sraf (void) cond_init(&aiowp->work_idle_cv, USYNC_THREAD, NULL);
1262248Sraf
1272248Sraf return (aiowp);
1282248Sraf }
1292248Sraf
1302248Sraf /*
1312248Sraf * Free a worker control block.
1322248Sraf * Declared with void *arg so it can be a pthread_key_create() destructor.
1332248Sraf */
1342248Sraf void
_aio_worker_free(void * arg)1352248Sraf _aio_worker_free(void *arg)
1362248Sraf {
1372248Sraf aio_worker_t *aiowp = arg;
1382248Sraf
1392248Sraf (void) mutex_destroy(&aiowp->work_qlock1);
1402248Sraf (void) cond_destroy(&aiowp->work_idle_cv);
1412248Sraf (void) memset(aiowp, 0, sizeof (*aiowp));
1422248Sraf
1432248Sraf lmutex_lock(&worker_lock);
1442248Sraf if (worker_freelast == NULL) {
1452248Sraf worker_freelist = worker_freelast = aiowp;
1462248Sraf } else {
1472248Sraf worker_freelast->work_forw = aiowp;
1482248Sraf worker_freelast = aiowp;
1492248Sraf }
1502248Sraf lmutex_unlock(&worker_lock);
1512248Sraf }
1522248Sraf
1532248Sraf aio_req_t *_aio_freelist = NULL; /* free list of request structures */
1542248Sraf aio_req_t *_aio_freelast = NULL;
1552248Sraf size_t request_chunksize = 0;
1562248Sraf int _aio_freelist_cnt = 0;
1572248Sraf int _aio_allocated_cnt = 0;
1582248Sraf mutex_t __aio_cache_lock = DEFAULTMUTEX;
1592248Sraf
1602248Sraf /*
1612248Sraf * Allocate an aio request structure.
1622248Sraf */
1632248Sraf aio_req_t *
_aio_req_alloc(void)1642248Sraf _aio_req_alloc(void)
1652248Sraf {
1662248Sraf aio_req_t *reqp;
1672248Sraf chunk_t *chp;
1682248Sraf size_t chunksize;
1692248Sraf int nelem;
1702248Sraf int i;
1712248Sraf
1722248Sraf lmutex_lock(&__aio_cache_lock);
1732248Sraf if ((reqp = _aio_freelist) == NULL) {
1742248Sraf if ((chunksize = 2 * request_chunksize) == 0)
1752248Sraf chunksize = INITIAL_CHUNKSIZE;
1762248Sraf if ((chp = chunk_alloc(chunksize)) == NULL) {
1772248Sraf lmutex_unlock(&__aio_cache_lock);
1782248Sraf return (NULL);
1792248Sraf }
1802248Sraf request_chunksize = chunksize;
1812248Sraf _aio_freelist = (aio_req_t *)(uintptr_t)(chp + 1);
1822248Sraf nelem = (chunksize - sizeof (chunk_t)) / sizeof (aio_req_t);
1832248Sraf for (i = 0, reqp = _aio_freelist; i < nelem; i++, reqp++) {
1842248Sraf reqp->req_state = AIO_REQ_FREE;
1852248Sraf reqp->req_link = reqp + 1;
1862248Sraf }
1872248Sraf _aio_freelast = reqp - 1;
1882248Sraf _aio_freelast->req_link = NULL;
1892248Sraf _aio_freelist_cnt = nelem;
1902248Sraf reqp = _aio_freelist;
1912248Sraf }
1922248Sraf if ((_aio_freelist = reqp->req_link) == NULL)
1932248Sraf _aio_freelast = NULL;
1942248Sraf _aio_freelist_cnt--;
1952248Sraf _aio_allocated_cnt++;
1962248Sraf lmutex_unlock(&__aio_cache_lock);
1972248Sraf
1982248Sraf ASSERT(reqp->req_state == AIO_REQ_FREE);
1992248Sraf reqp->req_state = 0;
2002248Sraf reqp->req_link = NULL;
2012248Sraf reqp->req_sigevent.sigev_notify = SIGEV_NONE;
2022248Sraf
2032248Sraf return (reqp);
2042248Sraf }
2052248Sraf
2062248Sraf /*
2072248Sraf * Free an aio request structure.
2082248Sraf */
2092248Sraf void
_aio_req_free(aio_req_t * reqp)2102248Sraf _aio_req_free(aio_req_t *reqp)
2112248Sraf {
2122248Sraf ASSERT(reqp->req_state != AIO_REQ_FREE &&
2132248Sraf reqp->req_state != AIO_REQ_DONEQ);
2142248Sraf (void) memset(reqp, 0, sizeof (*reqp));
2152248Sraf reqp->req_state = AIO_REQ_FREE;
2162248Sraf
2172248Sraf lmutex_lock(&__aio_cache_lock);
2182248Sraf if (_aio_freelast == NULL) {
2192248Sraf _aio_freelist = _aio_freelast = reqp;
2202248Sraf } else {
2212248Sraf _aio_freelast->req_link = reqp;
2222248Sraf _aio_freelast = reqp;
2232248Sraf }
2242248Sraf _aio_freelist_cnt++;
2252248Sraf _aio_allocated_cnt--;
2262248Sraf lmutex_unlock(&__aio_cache_lock);
2272248Sraf }
2282248Sraf
2292248Sraf aio_lio_t *_lio_head_freelist = NULL; /* free list of lio head structures */
2302248Sraf aio_lio_t *_lio_head_freelast = NULL;
2312248Sraf size_t lio_head_chunksize = 0;
2322248Sraf int _lio_alloc = 0;
2332248Sraf int _lio_free = 0;
2342248Sraf mutex_t __lio_mutex = DEFAULTMUTEX;
2352248Sraf
2362248Sraf /*
2372248Sraf * Allocate a listio head structure.
2382248Sraf */
2392248Sraf aio_lio_t *
_aio_lio_alloc(void)2402248Sraf _aio_lio_alloc(void)
2412248Sraf {
2422248Sraf aio_lio_t *head;
2432248Sraf chunk_t *chp;
2442248Sraf size_t chunksize;
2452248Sraf int nelem;
2462248Sraf int i;
2472248Sraf
2482248Sraf lmutex_lock(&__lio_mutex);
2492248Sraf if ((head = _lio_head_freelist) == NULL) {
2502248Sraf if ((chunksize = 2 * lio_head_chunksize) == 0)
2512248Sraf chunksize = INITIAL_CHUNKSIZE;
2522248Sraf if ((chp = chunk_alloc(chunksize)) == NULL) {
2532248Sraf lmutex_unlock(&__lio_mutex);
2542248Sraf return (NULL);
2552248Sraf }
2562248Sraf lio_head_chunksize = chunksize;
2572248Sraf _lio_head_freelist = (aio_lio_t *)(uintptr_t)(chp + 1);
2582248Sraf nelem = (chunksize - sizeof (chunk_t)) / sizeof (aio_lio_t);
2592248Sraf for (i = 0, head = _lio_head_freelist; i < nelem; i++, head++)
2602248Sraf head->lio_next = head + 1;
2612248Sraf _lio_head_freelast = head - 1;
2622248Sraf _lio_head_freelast->lio_next = NULL;
2632248Sraf _lio_alloc += nelem;
2642248Sraf _lio_free = nelem;
2652248Sraf head = _lio_head_freelist;
2662248Sraf }
2672248Sraf if ((_lio_head_freelist = head->lio_next) == NULL)
2682248Sraf _lio_head_freelast = NULL;
2692248Sraf _lio_free--;
2702248Sraf lmutex_unlock(&__lio_mutex);
2712248Sraf
2722248Sraf ASSERT(head->lio_nent == 0 && head->lio_refcnt == 0);
2732248Sraf head->lio_next = NULL;
2742248Sraf head->lio_port = -1;
2752248Sraf (void) mutex_init(&head->lio_mutex, USYNC_THREAD, NULL);
2762248Sraf (void) cond_init(&head->lio_cond_cv, USYNC_THREAD, NULL);
2772248Sraf
2782248Sraf return (head);
2792248Sraf }
2802248Sraf
2812248Sraf /*
2822248Sraf * Free a listio head structure.
2832248Sraf */
2842248Sraf void
_aio_lio_free(aio_lio_t * head)2852248Sraf _aio_lio_free(aio_lio_t *head)
2862248Sraf {
2872248Sraf ASSERT(head->lio_nent == 0 && head->lio_refcnt == 0);
2882248Sraf (void) mutex_destroy(&head->lio_mutex);
2892248Sraf (void) cond_destroy(&head->lio_cond_cv);
2902248Sraf (void) memset(head, 0, sizeof (*head));
2912248Sraf
2922248Sraf lmutex_lock(&__lio_mutex);
2932248Sraf if (_lio_head_freelast == NULL) {
2942248Sraf _lio_head_freelist = _lio_head_freelast = head;
2952248Sraf } else {
2962248Sraf _lio_head_freelast->lio_next = head;
2972248Sraf _lio_head_freelast = head;
2982248Sraf }
2992248Sraf _lio_free++;
3002248Sraf lmutex_unlock(&__lio_mutex);
3012248Sraf }
3022248Sraf
3032248Sraf void
postfork1_child_aio(void)3042248Sraf postfork1_child_aio(void)
3052248Sraf {
3062248Sraf chunk_t *chp;
3072248Sraf
3082248Sraf /*
3092248Sraf * All of the workers are gone; free their structures.
3102248Sraf */
3112248Sraf if (_kaio_supported != NULL) {
3122248Sraf (void) munmap((void *)_kaio_supported,
3132248Sraf MAX_KAIO_FDARRAY_SIZE * sizeof (uint32_t));
3142248Sraf _kaio_supported = NULL;
3152248Sraf }
3162248Sraf if (_aio_hash != NULL) {
3172248Sraf (void) munmap((void *)_aio_hash, HASHSZ * sizeof (aio_hash_t));
3182248Sraf _aio_hash = NULL;
3192248Sraf }
3202248Sraf for (chp = chunk_list; chp != NULL; chp = chunk_list) {
3212248Sraf chunk_list = chp->chunk_next;
3222248Sraf (void) munmap((void *)chp, chp->chunk_size);
3232248Sraf }
3242248Sraf
3252248Sraf /*
3262248Sraf * Reinitialize global variables
3272248Sraf */
3282248Sraf
3292248Sraf worker_freelist = NULL;
3302248Sraf worker_freelast = NULL;
3312248Sraf worker_chunksize = 0;
3322248Sraf (void) mutex_init(&worker_lock, USYNC_THREAD, NULL);
3332248Sraf
3342248Sraf _aio_freelist = NULL;
3352248Sraf _aio_freelast = NULL;
3362248Sraf request_chunksize = 0;
3372248Sraf _aio_freelist_cnt = 0;
3382248Sraf _aio_allocated_cnt = 0;
3392248Sraf (void) mutex_init(&__aio_cache_lock, USYNC_THREAD, NULL);
3402248Sraf
3412248Sraf _lio_head_freelist = NULL;
3422248Sraf _lio_head_freelast = NULL;
3432248Sraf lio_head_chunksize = 0;
3442248Sraf _lio_alloc = 0;
3452248Sraf _lio_free = 0;
3462248Sraf (void) mutex_init(&__lio_mutex, USYNC_THREAD, NULL);
3472248Sraf
3482248Sraf (void) mutex_init(&__aio_initlock, USYNC_THREAD, NULL);
3492248Sraf (void) cond_init(&__aio_initcv, USYNC_THREAD, NULL);
3502248Sraf __aio_initbusy = 0;
3512248Sraf
3522248Sraf (void) mutex_init(&__aio_mutex, USYNC_THREAD, NULL);
3532248Sraf (void) cond_init(&_aio_iowait_cv, USYNC_THREAD, NULL);
3542248Sraf (void) cond_init(&_aio_waitn_cv, USYNC_THREAD, NULL);
3552248Sraf
3562248Sraf _kaio_ok = 0;
3572248Sraf __uaio_ok = 0;
3582248Sraf
3592248Sraf _kaiowp = NULL;
3602248Sraf
3612248Sraf __workers_rw = NULL;
3622248Sraf __nextworker_rw = NULL;
3632248Sraf __rw_workerscnt = 0;
3642248Sraf
3652248Sraf __workers_no = NULL;
3662248Sraf __nextworker_no = NULL;
3672248Sraf __no_workerscnt = 0;
3682248Sraf
3692248Sraf _aio_worker_cnt = 0;
3702248Sraf
3712248Sraf _aio_done_head = NULL;
3722248Sraf _aio_done_tail = NULL;
3732248Sraf _aio_donecnt = 0;
3742248Sraf
3752248Sraf _aio_doneq = NULL;
3762248Sraf _aio_doneq_cnt = 0;
3772248Sraf
3782248Sraf _aio_waitncnt = 0;
3792248Sraf _aio_outstand_cnt = 0;
3802248Sraf _kaio_outstand_cnt = 0;
3812248Sraf _aio_req_done_cnt = 0;
3822248Sraf _aio_kernel_suspend = 0;
3832248Sraf _aio_suscv_cnt = 0;
3842248Sraf
3852248Sraf _aiowait_flag = 0;
3862248Sraf _aio_flags = 0;
3872248Sraf }
3882248Sraf
3892248Sraf #define DISPLAY(var) \
3902248Sraf (void) fprintf(stderr, #var "\t= %d\n", var)
3912248Sraf
3922248Sraf static void
_aio_exit_info(void)3932248Sraf _aio_exit_info(void)
3942248Sraf {
3952248Sraf if ((_kaio_ok | __uaio_ok) == 0)
3962248Sraf return;
3972248Sraf (void) fprintf(stderr, "\n");
3982248Sraf DISPLAY(_aio_freelist_cnt);
3992248Sraf DISPLAY(_aio_allocated_cnt);
4002248Sraf DISPLAY(_lio_alloc);
4012248Sraf DISPLAY(_lio_free);
4022248Sraf DISPLAY(__rw_workerscnt);
4032248Sraf DISPLAY(__no_workerscnt);
4042248Sraf DISPLAY(_aio_worker_cnt);
4052248Sraf DISPLAY(_aio_donecnt);
4062248Sraf DISPLAY(_aio_doneq_cnt);
4072248Sraf DISPLAY(_aio_waitncnt);
4082248Sraf DISPLAY(_aio_outstand_cnt);
4092248Sraf DISPLAY(_kaio_outstand_cnt);
4102248Sraf DISPLAY(_aio_req_done_cnt);
4112248Sraf DISPLAY(_aio_kernel_suspend);
4122248Sraf DISPLAY(_aio_suscv_cnt);
4132248Sraf DISPLAY(_aiowait_flag);
4142248Sraf DISPLAY(_aio_flags);
4152248Sraf }
4162248Sraf
4172248Sraf void
init_aio(void)4182248Sraf init_aio(void)
4192248Sraf {
4202248Sraf char *str;
4212248Sraf
4222248Sraf (void) pthread_key_create(&_aio_key, _aio_worker_free);
4232248Sraf if ((str = getenv("_AIO_MIN_WORKERS")) != NULL) {
4242248Sraf if ((_min_workers = atoi(str)) <= 0)
4252248Sraf _min_workers = 4;
4262248Sraf }
4272248Sraf if ((str = getenv("_AIO_MAX_WORKERS")) != NULL) {
4282248Sraf if ((_max_workers = atoi(str)) <= 0)
4292248Sraf _max_workers = 256;
4302248Sraf if (_max_workers < _min_workers + 1)
4312248Sraf _max_workers = _min_workers + 1;
4322248Sraf }
4332248Sraf if ((str = getenv("_AIO_EXIT_INFO")) != NULL && atoi(str) != 0)
4342248Sraf (void) atexit(_aio_exit_info);
4352248Sraf }
436