12248Sraf /*
22248Sraf * CDDL HEADER START
32248Sraf *
42248Sraf * The contents of this file are subject to the terms of the
52248Sraf * Common Development and Distribution License (the "License").
62248Sraf * You may not use this file except in compliance with the License.
72248Sraf *
82248Sraf * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
92248Sraf * or http://www.opensolaris.org/os/licensing.
102248Sraf * See the License for the specific language governing permissions
112248Sraf * and limitations under the License.
122248Sraf *
132248Sraf * When distributing Covered Code, include this CDDL HEADER in each
142248Sraf * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
152248Sraf * If applicable, add the following below this CDDL HEADER, with the
162248Sraf * fields enclosed by brackets "[]" replaced with your own identifying
172248Sraf * information: Portions Copyright [yyyy] [name of copyright owner]
182248Sraf *
192248Sraf * CDDL HEADER END
202248Sraf */
212248Sraf
222248Sraf /*
23*10011SPrakash.Sangappa@Sun.COM * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
242248Sraf * Use is subject to license terms.
252248Sraf */
262248Sraf
272248Sraf /*
282248Sraf * posix_aio.c implements the POSIX async. I/O functions.
292248Sraf *
302248Sraf * aio_read
312248Sraf * aio_write
322248Sraf * aio_error
332248Sraf * aio_return
342248Sraf * aio_suspend
352248Sraf * lio_listio
362248Sraf * aio_fsync
372248Sraf * aio_cancel
382248Sraf */
392248Sraf
406812Sraf #include "lint.h"
412248Sraf #include "thr_uberdata.h"
422248Sraf #include "asyncio.h"
432248Sraf #include <atomic.h>
442248Sraf #include <sys/file.h>
452248Sraf #include <sys/port.h>
462248Sraf
472248Sraf extern int __fdsync(int, int);
482248Sraf
492248Sraf cond_t _aio_waitn_cv = DEFAULTCV; /* wait for end of aio_waitn */
502248Sraf
512248Sraf static int _aio_check_timeout(const timespec_t *, timespec_t *, int *);
522248Sraf
532248Sraf /* defines for timedwait in __aio_waitn() and __aio_suspend() */
542248Sraf #define AIO_TIMEOUT_INDEF -1
552248Sraf #define AIO_TIMEOUT_POLL 0
562248Sraf #define AIO_TIMEOUT_WAIT 1
572248Sraf #define AIO_TIMEOUT_UNDEF 2
582248Sraf
592248Sraf /*
602248Sraf * List I/O stuff
612248Sraf */
622248Sraf static void _lio_list_decr(aio_lio_t *);
632248Sraf static long aio_list_max = 0;
642248Sraf
652248Sraf int
aio_read(aiocb_t * aiocbp)662248Sraf aio_read(aiocb_t *aiocbp)
672248Sraf {
682472Sraf if (aiocbp == NULL || aiocbp->aio_reqprio != 0) {
692248Sraf errno = EINVAL;
702248Sraf return (-1);
712248Sraf }
722248Sraf if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) {
732248Sraf errno = EBUSY;
742248Sraf return (-1);
752248Sraf }
762248Sraf if (_aio_sigev_thread(aiocbp) != 0)
772248Sraf return (-1);
782248Sraf aiocbp->aio_lio_opcode = LIO_READ;
792248Sraf return (_aio_rw(aiocbp, NULL, &__nextworker_rw, AIOAREAD,
802248Sraf (AIO_KAIO | AIO_NO_DUPS)));
812248Sraf }
822248Sraf
832248Sraf int
aio_write(aiocb_t * aiocbp)842248Sraf aio_write(aiocb_t *aiocbp)
852248Sraf {
862472Sraf if (aiocbp == NULL || aiocbp->aio_reqprio != 0) {
872248Sraf errno = EINVAL;
882248Sraf return (-1);
892248Sraf }
902248Sraf if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) {
912248Sraf errno = EBUSY;
922248Sraf return (-1);
932248Sraf }
942248Sraf if (_aio_sigev_thread(aiocbp) != 0)
952248Sraf return (-1);
962248Sraf aiocbp->aio_lio_opcode = LIO_WRITE;
972248Sraf return (_aio_rw(aiocbp, NULL, &__nextworker_rw, AIOAWRITE,
982248Sraf (AIO_KAIO | AIO_NO_DUPS)));
992248Sraf }
1002248Sraf
1012248Sraf /*
1022248Sraf * __lio_listio() cancellation handler.
1032248Sraf */
1042248Sraf /* ARGSUSED */
1052248Sraf static void
_lio_listio_cleanup(aio_lio_t * head)1062248Sraf _lio_listio_cleanup(aio_lio_t *head)
1072248Sraf {
1082248Sraf int freeit = 0;
1092248Sraf
1102248Sraf ASSERT(MUTEX_HELD(&head->lio_mutex));
1112248Sraf if (head->lio_refcnt == 0) {
1122248Sraf ASSERT(head->lio_nent == 0);
1132248Sraf freeit = 1;
1142248Sraf }
1152248Sraf head->lio_waiting = 0;
1162248Sraf sig_mutex_unlock(&head->lio_mutex);
1172248Sraf if (freeit)
1182248Sraf _aio_lio_free(head);
1192248Sraf }
1202248Sraf
1212248Sraf int
lio_listio(int mode,aiocb_t * _RESTRICT_KYWD const * _RESTRICT_KYWD list,int nent,struct sigevent * _RESTRICT_KYWD sigevp)1222248Sraf lio_listio(int mode, aiocb_t *_RESTRICT_KYWD const *_RESTRICT_KYWD list,
1232248Sraf int nent, struct sigevent *_RESTRICT_KYWD sigevp)
1242248Sraf {
1252248Sraf int aio_ufs = 0;
1262248Sraf int oerrno = 0;
1272248Sraf aio_lio_t *head = NULL;
1282248Sraf aiocb_t *aiocbp;
1292248Sraf int state = 0;
1302248Sraf int EIOflg = 0;
1312248Sraf int rw;
1322248Sraf int do_kaio = 0;
1332248Sraf int error;
1342248Sraf int i;
1352248Sraf
1362248Sraf if (!_kaio_ok)
1372248Sraf _kaio_init();
1382248Sraf
1392248Sraf if (aio_list_max == 0)
1402248Sraf aio_list_max = sysconf(_SC_AIO_LISTIO_MAX);
1412248Sraf
1422248Sraf if (nent <= 0 || nent > aio_list_max) {
1432248Sraf errno = EINVAL;
1442248Sraf return (-1);
1452248Sraf }
1462248Sraf
1472248Sraf switch (mode) {
1482248Sraf case LIO_WAIT:
1492248Sraf state = NOCHECK;
1502248Sraf break;
1512248Sraf case LIO_NOWAIT:
1522248Sraf state = CHECK;
1532248Sraf break;
1542248Sraf default:
1552248Sraf errno = EINVAL;
1562248Sraf return (-1);
1572248Sraf }
1582248Sraf
1592248Sraf for (i = 0; i < nent; i++) {
1602248Sraf if ((aiocbp = list[i]) == NULL)
1612248Sraf continue;
1622248Sraf if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) {
1632248Sraf errno = EBUSY;
1642248Sraf return (-1);
1652248Sraf }
1662248Sraf if (_aio_sigev_thread(aiocbp) != 0)
1672248Sraf return (-1);
1682248Sraf if (aiocbp->aio_lio_opcode == LIO_NOP)
1692248Sraf aiocbp->aio_state = NOCHECK;
1702248Sraf else {
1712248Sraf aiocbp->aio_state = state;
1722248Sraf if (KAIO_SUPPORTED(aiocbp->aio_fildes))
1732248Sraf do_kaio++;
1742248Sraf else
1752248Sraf aiocbp->aio_resultp.aio_errno = ENOTSUP;
1762248Sraf }
1772248Sraf }
1782248Sraf if (_aio_sigev_thread_init(sigevp) != 0)
1792248Sraf return (-1);
1802248Sraf
1812248Sraf if (do_kaio) {
1822248Sraf error = (int)_kaio(AIOLIO, mode, list, nent, sigevp);
1832248Sraf if (error == 0)
1842248Sraf return (0);
1852248Sraf oerrno = errno;
1862248Sraf } else {
1872248Sraf oerrno = errno = ENOTSUP;
1882248Sraf error = -1;
1892248Sraf }
1902248Sraf
1912248Sraf if (error == -1 && errno == ENOTSUP) {
1922248Sraf error = errno = 0;
1932248Sraf /*
1942248Sraf * If LIO_WAIT, or notification required, allocate a list head.
1952248Sraf */
1962248Sraf if (mode == LIO_WAIT ||
1972248Sraf (sigevp != NULL &&
1982248Sraf (sigevp->sigev_notify == SIGEV_SIGNAL ||
1992248Sraf sigevp->sigev_notify == SIGEV_THREAD ||
2002248Sraf sigevp->sigev_notify == SIGEV_PORT)))
2012248Sraf head = _aio_lio_alloc();
2022248Sraf if (head) {
2032248Sraf sig_mutex_lock(&head->lio_mutex);
2042248Sraf head->lio_mode = mode;
2052248Sraf head->lio_largefile = 0;
2062248Sraf if (mode == LIO_NOWAIT && sigevp != NULL) {
2072248Sraf if (sigevp->sigev_notify == SIGEV_THREAD) {
2082248Sraf head->lio_port = sigevp->sigev_signo;
2092248Sraf head->lio_event = AIOLIO;
2102248Sraf head->lio_sigevent = sigevp;
2112248Sraf head->lio_sigval.sival_ptr =
2122248Sraf sigevp->sigev_value.sival_ptr;
2132248Sraf } else if (sigevp->sigev_notify == SIGEV_PORT) {
2142248Sraf port_notify_t *pn =
2152248Sraf sigevp->sigev_value.sival_ptr;
2162248Sraf head->lio_port = pn->portnfy_port;
2172248Sraf head->lio_event = AIOLIO;
2182248Sraf head->lio_sigevent = sigevp;
2192248Sraf head->lio_sigval.sival_ptr =
2202248Sraf pn->portnfy_user;
2212248Sraf } else { /* SIGEV_SIGNAL */
2222248Sraf head->lio_signo = sigevp->sigev_signo;
2232248Sraf head->lio_sigval.sival_ptr =
2242248Sraf sigevp->sigev_value.sival_ptr;
2252248Sraf }
2262248Sraf }
2272248Sraf head->lio_nent = head->lio_refcnt = nent;
2282248Sraf sig_mutex_unlock(&head->lio_mutex);
2292248Sraf }
2302248Sraf /*
2312248Sraf * find UFS requests, errno == ENOTSUP/EBADFD,
2322248Sraf */
2332248Sraf for (i = 0; i < nent; i++) {
2342248Sraf if ((aiocbp = list[i]) == NULL ||
2352248Sraf aiocbp->aio_lio_opcode == LIO_NOP ||
2362248Sraf (aiocbp->aio_resultp.aio_errno != ENOTSUP &&
2372248Sraf aiocbp->aio_resultp.aio_errno != EBADFD)) {
2382248Sraf if (head)
2392248Sraf _lio_list_decr(head);
2402248Sraf continue;
2412248Sraf }
2422248Sraf if (aiocbp->aio_resultp.aio_errno == EBADFD)
2432248Sraf SET_KAIO_NOT_SUPPORTED(aiocbp->aio_fildes);
2442472Sraf if (aiocbp->aio_reqprio != 0) {
2452248Sraf aiocbp->aio_resultp.aio_errno = EINVAL;
2462248Sraf aiocbp->aio_resultp.aio_return = -1;
2472248Sraf EIOflg = 1;
2482248Sraf if (head)
2492248Sraf _lio_list_decr(head);
2502248Sraf continue;
2512248Sraf }
2522248Sraf /*
2532248Sraf * submit an AIO request with flags AIO_NO_KAIO
2542248Sraf * to avoid the kaio() syscall in _aio_rw()
2552248Sraf */
2562248Sraf switch (aiocbp->aio_lio_opcode) {
2572248Sraf case LIO_READ:
2582248Sraf rw = AIOAREAD;
2592248Sraf break;
2602248Sraf case LIO_WRITE:
2612248Sraf rw = AIOAWRITE;
2622248Sraf break;
2632248Sraf }
2642248Sraf error = _aio_rw(aiocbp, head, &__nextworker_rw, rw,
2652248Sraf (AIO_NO_KAIO | AIO_NO_DUPS));
2662248Sraf if (error == 0)
2672248Sraf aio_ufs++;
2682248Sraf else {
2692248Sraf if (head)
2702248Sraf _lio_list_decr(head);
2712248Sraf aiocbp->aio_resultp.aio_errno = error;
2722248Sraf EIOflg = 1;
2732248Sraf }
2742248Sraf }
2752248Sraf }
2762248Sraf if (EIOflg) {
2772248Sraf errno = EIO;
2782248Sraf return (-1);
2792248Sraf }
2802248Sraf if (mode == LIO_WAIT && oerrno == ENOTSUP) {
2812248Sraf /*
2822248Sraf * call kaio(AIOLIOWAIT) to get all outstanding
2832248Sraf * kernel AIO requests
2842248Sraf */
2852248Sraf if ((nent - aio_ufs) > 0)
2862248Sraf (void) _kaio(AIOLIOWAIT, mode, list, nent, sigevp);
2872248Sraf if (head != NULL && head->lio_nent > 0) {
2882248Sraf sig_mutex_lock(&head->lio_mutex);
2892248Sraf while (head->lio_refcnt > 0) {
2902248Sraf int err;
2912248Sraf head->lio_waiting = 1;
2922248Sraf pthread_cleanup_push(_lio_listio_cleanup, head);
2932248Sraf err = sig_cond_wait(&head->lio_cond_cv,
2942248Sraf &head->lio_mutex);
2952248Sraf pthread_cleanup_pop(0);
2962248Sraf head->lio_waiting = 0;
2972248Sraf if (err && head->lio_nent > 0) {
2982248Sraf sig_mutex_unlock(&head->lio_mutex);
2992248Sraf errno = err;
3002248Sraf return (-1);
3012248Sraf }
3022248Sraf }
3032248Sraf sig_mutex_unlock(&head->lio_mutex);
3042248Sraf ASSERT(head->lio_nent == 0 && head->lio_refcnt == 0);
3052248Sraf _aio_lio_free(head);
3062248Sraf for (i = 0; i < nent; i++) {
3072248Sraf if ((aiocbp = list[i]) != NULL &&
3082248Sraf aiocbp->aio_resultp.aio_errno) {
3092248Sraf errno = EIO;
3102248Sraf return (-1);
3112248Sraf }
3122248Sraf }
3132248Sraf }
3142248Sraf return (0);
3152248Sraf }
3162248Sraf return (error);
3172248Sraf }
3182248Sraf
3192248Sraf static void
_lio_list_decr(aio_lio_t * head)3202248Sraf _lio_list_decr(aio_lio_t *head)
3212248Sraf {
3222248Sraf sig_mutex_lock(&head->lio_mutex);
3232248Sraf head->lio_nent--;
3242248Sraf head->lio_refcnt--;
3252248Sraf sig_mutex_unlock(&head->lio_mutex);
3262248Sraf }
3272248Sraf
3282248Sraf /*
3292248Sraf * __aio_suspend() cancellation handler.
3302248Sraf */
3312248Sraf /* ARGSUSED */
3322248Sraf static void
_aio_suspend_cleanup(int * counter)3332248Sraf _aio_suspend_cleanup(int *counter)
3342248Sraf {
3352248Sraf ASSERT(MUTEX_HELD(&__aio_mutex));
3362248Sraf (*counter)--; /* _aio_kernel_suspend or _aio_suscv_cnt */
3372248Sraf sig_mutex_unlock(&__aio_mutex);
3382248Sraf }
3392248Sraf
3402248Sraf static int
__aio_suspend(void ** list,int nent,const timespec_t * timo,int largefile)3412248Sraf __aio_suspend(void **list, int nent, const timespec_t *timo, int largefile)
3422248Sraf {
3432248Sraf int cv_err; /* error code from cond_xxx() */
3442248Sraf int kerr; /* error code from _kaio(AIOSUSPEND) */
3452248Sraf int i;
3462248Sraf timespec_t twait; /* copy of timo for internal calculations */
3472248Sraf timespec_t *wait = NULL;
3482248Sraf int timedwait;
3492248Sraf int req_outstanding;
3502248Sraf aiocb_t **listp;
3512248Sraf aiocb_t *aiocbp;
3522248Sraf #if !defined(_LP64)
3532248Sraf aiocb64_t **listp64;
3542248Sraf aiocb64_t *aiocbp64;
3552248Sraf #endif
3562248Sraf hrtime_t hrtstart;
3572248Sraf hrtime_t hrtend;
3582248Sraf hrtime_t hrtres;
3592248Sraf
3602248Sraf #if defined(_LP64)
3612248Sraf if (largefile)
3622248Sraf aio_panic("__aio_suspend: largefile set when _LP64 defined");
3632248Sraf #endif
3642248Sraf
3652248Sraf if (nent <= 0) {
3662248Sraf errno = EINVAL;
3672248Sraf return (-1);
3682248Sraf }
3692248Sraf
3702248Sraf if (timo) {
3712248Sraf if (timo->tv_sec < 0 || timo->tv_nsec < 0 ||
3722248Sraf timo->tv_nsec >= NANOSEC) {
3732248Sraf errno = EINVAL;
3742248Sraf return (-1);
3752248Sraf }
3762248Sraf /* Initialize start time if time monitoring desired */
3772248Sraf if (timo->tv_sec > 0 || timo->tv_nsec > 0) {
3782248Sraf timedwait = AIO_TIMEOUT_WAIT;
3792248Sraf hrtstart = gethrtime();
3802248Sraf } else {
3812248Sraf /* content of timeout = 0 : polling */
3822248Sraf timedwait = AIO_TIMEOUT_POLL;
3832248Sraf }
3842248Sraf } else {
3852248Sraf /* timeout pointer = NULL : wait indefinitely */
3862248Sraf timedwait = AIO_TIMEOUT_INDEF;
3872248Sraf }
3882248Sraf
3892248Sraf #if !defined(_LP64)
3902248Sraf if (largefile) {
3912248Sraf listp64 = (aiocb64_t **)list;
3922248Sraf for (i = 0; i < nent; i++) {
3932248Sraf if ((aiocbp64 = listp64[i]) != NULL &&
3942248Sraf aiocbp64->aio_state == CHECK)
3952248Sraf aiocbp64->aio_state = CHECKED;
3962248Sraf }
3972248Sraf } else
3982248Sraf #endif /* !_LP64 */
3992248Sraf {
4002248Sraf listp = (aiocb_t **)list;
4012248Sraf for (i = 0; i < nent; i++) {
4022248Sraf if ((aiocbp = listp[i]) != NULL &&
4032248Sraf aiocbp->aio_state == CHECK)
4042248Sraf aiocbp->aio_state = CHECKED;
4052248Sraf }
4062248Sraf }
4072248Sraf
4082248Sraf sig_mutex_lock(&__aio_mutex);
4092248Sraf
4102248Sraf /*
4112248Sraf * The next "if -case" is required to accelerate the
4122248Sraf * access to completed RAW-IO requests.
4132248Sraf */
4142248Sraf if ((_aio_doneq_cnt + _aio_outstand_cnt) == 0) {
4152248Sraf /* Only kernel requests pending */
4162248Sraf
4172248Sraf /*
4182248Sraf * _aio_kernel_suspend is used to detect completed non RAW-IO
4192248Sraf * requests.
4202248Sraf * As long as this thread resides in the kernel (_kaio) further
4212248Sraf * asynchronous non RAW-IO requests could be submitted.
4222248Sraf */
4232248Sraf _aio_kernel_suspend++;
4242248Sraf
4252248Sraf /*
4262248Sraf * Always do the kaio() call without using the KAIO_SUPPORTED()
4272248Sraf * checks because it is not mandatory to have a valid fd
4282248Sraf * set in the list entries, only the resultp must be set.
4292248Sraf *
4302248Sraf * _kaio(AIOSUSPEND ...) return values :
4312248Sraf * 0: everythink ok, completed request found
4322248Sraf * -1: error
4332248Sraf * 1: no error : _aiodone awaked the _kaio(AIOSUSPEND,,)
4342248Sraf * system call using _kaio(AIONOTIFY). It means, that some
4352248Sraf * non RAW-IOs completed inbetween.
4362248Sraf */
4372248Sraf
4382248Sraf pthread_cleanup_push(_aio_suspend_cleanup,
4392248Sraf &_aio_kernel_suspend);
4402248Sraf pthread_cleanup_push(sig_mutex_lock, &__aio_mutex);
4412248Sraf sig_mutex_unlock(&__aio_mutex);
4422248Sraf _cancel_prologue();
4432248Sraf kerr = (int)_kaio(largefile? AIOSUSPEND64 : AIOSUSPEND,
4442248Sraf list, nent, timo, -1);
4452248Sraf _cancel_epilogue();
4462248Sraf pthread_cleanup_pop(1); /* sig_mutex_lock(&__aio_mutex) */
4472248Sraf pthread_cleanup_pop(0);
4482248Sraf
4492248Sraf _aio_kernel_suspend--;
4502248Sraf
4512248Sraf if (!kerr) {
4522248Sraf sig_mutex_unlock(&__aio_mutex);
4532248Sraf return (0);
4542248Sraf }
4552248Sraf } else {
4562248Sraf kerr = 1; /* simulation: _kaio detected AIONOTIFY */
4572248Sraf }
4582248Sraf
4592248Sraf /*
4602248Sraf * Return kernel error code if no other IOs are outstanding.
4612248Sraf */
4622248Sraf req_outstanding = _aio_doneq_cnt + _aio_outstand_cnt;
4632248Sraf
4642248Sraf sig_mutex_unlock(&__aio_mutex);
4652248Sraf
4662248Sraf if (req_outstanding == 0) {
4672248Sraf /* no IOs outstanding in the thread pool */
4682248Sraf if (kerr == 1)
4692248Sraf /* return "no IOs completed" */
4702248Sraf errno = EAGAIN;
4712248Sraf return (-1);
4722248Sraf }
4732248Sraf
4742248Sraf /*
4752248Sraf * IOs using the thread pool are outstanding.
4762248Sraf */
4772248Sraf if (timedwait == AIO_TIMEOUT_WAIT) {
4782248Sraf /* time monitoring */
4792248Sraf hrtend = hrtstart + (hrtime_t)timo->tv_sec * (hrtime_t)NANOSEC +
4802248Sraf (hrtime_t)timo->tv_nsec;
4812248Sraf hrtres = hrtend - gethrtime();
4822248Sraf if (hrtres <= 0)
4832248Sraf hrtres = 1;
4842248Sraf twait.tv_sec = hrtres / (hrtime_t)NANOSEC;
4852248Sraf twait.tv_nsec = hrtres % (hrtime_t)NANOSEC;
4862248Sraf wait = &twait;
4872248Sraf } else if (timedwait == AIO_TIMEOUT_POLL) {
4882248Sraf twait = *timo; /* content of timo = 0 : polling */
4892248Sraf wait = &twait;
4902248Sraf }
4912248Sraf
4922248Sraf for (;;) {
4932248Sraf int error;
4942248Sraf int inprogress;
4952248Sraf
4962248Sraf /* first scan file system requests */
4972248Sraf inprogress = 0;
4982248Sraf for (i = 0; i < nent; i++) {
4992248Sraf #if !defined(_LP64)
5002248Sraf if (largefile) {
5012248Sraf if ((aiocbp64 = listp64[i]) == NULL)
5022248Sraf continue;
5032248Sraf error = aiocbp64->aio_resultp.aio_errno;
5042248Sraf } else
5052248Sraf #endif
5062248Sraf {
5072248Sraf if ((aiocbp = listp[i]) == NULL)
5082248Sraf continue;
5092248Sraf error = aiocbp->aio_resultp.aio_errno;
5102248Sraf }
5112248Sraf if (error == EINPROGRESS)
5122248Sraf inprogress = 1;
5132248Sraf else if (error != ECANCELED) {
5142248Sraf errno = 0;
5152248Sraf return (0);
5162248Sraf }
5172248Sraf }
5182248Sraf
5192248Sraf sig_mutex_lock(&__aio_mutex);
5202248Sraf
5212248Sraf /*
5222248Sraf * If there aren't outstanding I/Os in the thread pool then
5232248Sraf * we have to return here, provided that all kernel RAW-IOs
5242248Sraf * also completed.
5252248Sraf * If the kernel was notified to return, then we have to check
5262248Sraf * possible pending RAW-IOs.
5272248Sraf */
5282248Sraf if (_aio_outstand_cnt == 0 && inprogress == 0 && kerr != 1) {
5292248Sraf sig_mutex_unlock(&__aio_mutex);
5302248Sraf errno = EAGAIN;
5312248Sraf break;
5322248Sraf }
5332248Sraf
5342248Sraf /*
5352248Sraf * There are outstanding IOs in the thread pool or the kernel
5362248Sraf * was notified to return.
5372248Sraf * Check pending RAW-IOs first.
5382248Sraf */
5392248Sraf if (kerr == 1) {
5402248Sraf /*
5412248Sraf * _aiodone just notified the kernel about
5422248Sraf * completed non RAW-IOs (AIONOTIFY was detected).
5432248Sraf */
5442248Sraf if (timedwait == AIO_TIMEOUT_WAIT) {
5452248Sraf /* Update remaining timeout for the kernel */
5462248Sraf hrtres = hrtend - gethrtime();
5472248Sraf if (hrtres <= 0) {
5482248Sraf /* timer expired */
5492248Sraf sig_mutex_unlock(&__aio_mutex);
5502248Sraf errno = EAGAIN;
5512248Sraf break;
5522248Sraf }
5532248Sraf wait->tv_sec = hrtres / (hrtime_t)NANOSEC;
5542248Sraf wait->tv_nsec = hrtres % (hrtime_t)NANOSEC;
5552248Sraf }
5562248Sraf _aio_kernel_suspend++;
5572248Sraf
5582248Sraf pthread_cleanup_push(_aio_suspend_cleanup,
5592248Sraf &_aio_kernel_suspend);
5602248Sraf pthread_cleanup_push(sig_mutex_lock, &__aio_mutex);
5612248Sraf sig_mutex_unlock(&__aio_mutex);
5622248Sraf _cancel_prologue();
5632248Sraf kerr = (int)_kaio(largefile? AIOSUSPEND64 : AIOSUSPEND,
5642248Sraf list, nent, wait, -1);
5652248Sraf _cancel_epilogue();
5662248Sraf pthread_cleanup_pop(1);
5672248Sraf pthread_cleanup_pop(0);
5682248Sraf
5692248Sraf _aio_kernel_suspend--;
5702248Sraf
5712248Sraf if (!kerr) {
5722248Sraf sig_mutex_unlock(&__aio_mutex);
5732248Sraf return (0);
5742248Sraf }
5752248Sraf }
5762248Sraf
5772248Sraf if (timedwait == AIO_TIMEOUT_POLL) {
5782248Sraf sig_mutex_unlock(&__aio_mutex);
5792248Sraf errno = EAGAIN;
5802248Sraf break;
5812248Sraf }
5822248Sraf
5832248Sraf if (timedwait == AIO_TIMEOUT_WAIT) {
5842248Sraf /* Update remaining timeout */
5852248Sraf hrtres = hrtend - gethrtime();
5862248Sraf if (hrtres <= 0) {
5872248Sraf /* timer expired */
5882248Sraf sig_mutex_unlock(&__aio_mutex);
5892248Sraf errno = EAGAIN;
5902248Sraf break;
5912248Sraf }
5922248Sraf wait->tv_sec = hrtres / (hrtime_t)NANOSEC;
5932248Sraf wait->tv_nsec = hrtres % (hrtime_t)NANOSEC;
5942248Sraf }
5952248Sraf
5962248Sraf if (_aio_outstand_cnt == 0) {
5972248Sraf sig_mutex_unlock(&__aio_mutex);
5982248Sraf continue;
5992248Sraf }
6002248Sraf
6012248Sraf _aio_suscv_cnt++; /* ID for _aiodone (wake up) */
6022248Sraf
6032248Sraf pthread_cleanup_push(_aio_suspend_cleanup, &_aio_suscv_cnt);
6042248Sraf if (timedwait == AIO_TIMEOUT_WAIT) {
6052248Sraf cv_err = sig_cond_reltimedwait(&_aio_iowait_cv,
6062248Sraf &__aio_mutex, wait);
6072248Sraf if (cv_err == ETIME)
6082248Sraf cv_err = EAGAIN;
6092248Sraf } else {
6102248Sraf /* wait indefinitely */
6112248Sraf cv_err = sig_cond_wait(&_aio_iowait_cv, &__aio_mutex);
6122248Sraf }
6132248Sraf /* this decrements _aio_suscv_cnt and drops __aio_mutex */
6142248Sraf pthread_cleanup_pop(1);
6152248Sraf
6162248Sraf if (cv_err) {
6172248Sraf errno = cv_err;
6182248Sraf break;
6192248Sraf }
6202248Sraf }
6212248Sraf return (-1);
6222248Sraf }
6232248Sraf
6242248Sraf int
aio_suspend(const aiocb_t * const list[],int nent,const timespec_t * timeout)6252248Sraf aio_suspend(const aiocb_t * const list[], int nent,
6262248Sraf const timespec_t *timeout)
6272248Sraf {
6282248Sraf return (__aio_suspend((void **)list, nent, timeout, 0));
6292248Sraf }
6302248Sraf
6312248Sraf int
aio_error(const aiocb_t * aiocbp)6322248Sraf aio_error(const aiocb_t *aiocbp)
6332248Sraf {
6342248Sraf const aio_result_t *resultp = &aiocbp->aio_resultp;
635*10011SPrakash.Sangappa@Sun.COM aio_req_t *reqp;
6362248Sraf int error;
6372248Sraf
6382248Sraf if ((error = resultp->aio_errno) == EINPROGRESS) {
6392248Sraf if (aiocbp->aio_state == CHECK) {
6402248Sraf /*
6412248Sraf * Always do the kaio() call without using the
6422248Sraf * KAIO_SUPPORTED() checks because it is not
6432248Sraf * mandatory to have a valid fd set in the
6442248Sraf * aiocb, only the resultp must be set.
6452248Sraf */
6462248Sraf if ((int)_kaio(AIOERROR, aiocbp) == EINVAL) {
6472248Sraf errno = EINVAL;
6482248Sraf return (-1);
6492248Sraf }
6502248Sraf error = resultp->aio_errno;
6512248Sraf } else if (aiocbp->aio_state == CHECKED) {
6522248Sraf ((aiocb_t *)aiocbp)->aio_state = CHECK;
6532248Sraf }
654*10011SPrakash.Sangappa@Sun.COM } else if (aiocbp->aio_state == USERAIO) {
655*10011SPrakash.Sangappa@Sun.COM sig_mutex_lock(&__aio_mutex);
656*10011SPrakash.Sangappa@Sun.COM if ((reqp = _aio_hash_del((aio_result_t *)resultp)) == NULL) {
657*10011SPrakash.Sangappa@Sun.COM sig_mutex_unlock(&__aio_mutex);
658*10011SPrakash.Sangappa@Sun.COM ((aiocb_t *)aiocbp)->aio_state = CHECKED;
659*10011SPrakash.Sangappa@Sun.COM } else {
660*10011SPrakash.Sangappa@Sun.COM ((aiocb_t *)aiocbp)->aio_state = NOCHECK;
661*10011SPrakash.Sangappa@Sun.COM ASSERT(reqp->req_head == NULL);
662*10011SPrakash.Sangappa@Sun.COM (void) _aio_req_remove(reqp);
663*10011SPrakash.Sangappa@Sun.COM sig_mutex_unlock(&__aio_mutex);
664*10011SPrakash.Sangappa@Sun.COM _aio_req_free(reqp);
665*10011SPrakash.Sangappa@Sun.COM }
6662248Sraf }
6672248Sraf return (error);
6682248Sraf }
6692248Sraf
6702248Sraf ssize_t
aio_return(aiocb_t * aiocbp)6712248Sraf aio_return(aiocb_t *aiocbp)
6722248Sraf {
6732248Sraf aio_result_t *resultp = &aiocbp->aio_resultp;
6742248Sraf aio_req_t *reqp;
6752248Sraf int error;
6762248Sraf ssize_t retval;
6772248Sraf
6782248Sraf /*
6792248Sraf * The _aiodone() function stores resultp->aio_return before
6802248Sraf * storing resultp->aio_errno (with an membar_producer() in
6812248Sraf * between). We use membar_consumer() below to ensure proper
6822248Sraf * memory ordering between _aiodone() and ourself.
6832248Sraf */
6842248Sraf error = resultp->aio_errno;
6852248Sraf membar_consumer();
6862248Sraf retval = resultp->aio_return;
6872248Sraf
6882248Sraf /*
6892248Sraf * we use this condition to indicate either that
6902248Sraf * aio_return() has been called before or should
6912248Sraf * not have been called yet.
6922248Sraf */
6932248Sraf if ((retval == -1 && error == EINVAL) || error == EINPROGRESS) {
6942248Sraf errno = error;
6952248Sraf return (-1);
6962248Sraf }
6972248Sraf
6982248Sraf /*
6992248Sraf * Before we return, mark the result as being returned so that later
7002248Sraf * calls to aio_return() will return the fact that the result has
7012248Sraf * already been returned.
7022248Sraf */
7032248Sraf sig_mutex_lock(&__aio_mutex);
7042248Sraf /* retest, in case more than one thread actually got in here */
7052248Sraf if (resultp->aio_return == -1 && resultp->aio_errno == EINVAL) {
7062248Sraf sig_mutex_unlock(&__aio_mutex);
7072248Sraf errno = EINVAL;
7082248Sraf return (-1);
7092248Sraf }
7102248Sraf resultp->aio_return = -1;
7112248Sraf resultp->aio_errno = EINVAL;
7122248Sraf if ((reqp = _aio_hash_del(resultp)) == NULL)
7132248Sraf sig_mutex_unlock(&__aio_mutex);
7142248Sraf else {
7152248Sraf aiocbp->aio_state = NOCHECK;
7162248Sraf ASSERT(reqp->req_head == NULL);
7172248Sraf (void) _aio_req_remove(reqp);
7182248Sraf sig_mutex_unlock(&__aio_mutex);
7192248Sraf _aio_req_free(reqp);
7202248Sraf }
7212248Sraf
7222248Sraf if (retval == -1)
7232248Sraf errno = error;
7242248Sraf return (retval);
7252248Sraf }
7262248Sraf
7272248Sraf void
_lio_remove(aio_req_t * reqp)7282248Sraf _lio_remove(aio_req_t *reqp)
7292248Sraf {
7302248Sraf aio_lio_t *head;
7312248Sraf int refcnt;
7322248Sraf
7332248Sraf if ((head = reqp->req_head) != NULL) {
7342248Sraf sig_mutex_lock(&head->lio_mutex);
7352248Sraf ASSERT(head->lio_refcnt == head->lio_nent);
7362248Sraf refcnt = --head->lio_nent;
7372248Sraf head->lio_refcnt--;
7382248Sraf sig_mutex_unlock(&head->lio_mutex);
7392248Sraf if (refcnt == 0)
7402248Sraf _aio_lio_free(head);
7412248Sraf reqp->req_head = NULL;
7422248Sraf }
7432248Sraf }
7442248Sraf
7452248Sraf /*
7462248Sraf * This function returns the number of asynchronous I/O requests submitted.
7472248Sraf */
7482248Sraf static int
__aio_fsync_bar(aiocb_t * aiocbp,aio_lio_t * head,aio_worker_t * aiowp,int workerscnt)7492248Sraf __aio_fsync_bar(aiocb_t *aiocbp, aio_lio_t *head, aio_worker_t *aiowp,
7502248Sraf int workerscnt)
7512248Sraf {
7522248Sraf int i;
7532248Sraf int error;
7542248Sraf aio_worker_t *next = aiowp;
7552248Sraf
7562248Sraf for (i = 0; i < workerscnt; i++) {
7572248Sraf error = _aio_rw(aiocbp, head, &next, AIOFSYNC, AIO_NO_KAIO);
7582248Sraf if (error != 0) {
7592248Sraf sig_mutex_lock(&head->lio_mutex);
7602248Sraf head->lio_mode = LIO_DESTROY; /* ignore fsync */
7612248Sraf head->lio_nent -= workerscnt - i;
7622248Sraf head->lio_refcnt -= workerscnt - i;
7632248Sraf sig_mutex_unlock(&head->lio_mutex);
7642248Sraf errno = EAGAIN;
7652248Sraf return (i);
7662248Sraf }
7672248Sraf next = next->work_forw;
7682248Sraf }
7692248Sraf return (i);
7702248Sraf }
7712248Sraf
7722248Sraf int
aio_fsync(int op,aiocb_t * aiocbp)7732248Sraf aio_fsync(int op, aiocb_t *aiocbp)
7742248Sraf {
7752248Sraf aio_lio_t *head;
7762248Sraf struct stat statb;
7772248Sraf int fret;
7782248Sraf
7792248Sraf if (aiocbp == NULL)
7802248Sraf return (0);
7812472Sraf if (op != O_DSYNC && op != O_SYNC) {
7822248Sraf errno = EINVAL;
7832248Sraf return (-1);
7842248Sraf }
7852248Sraf if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) {
7862248Sraf errno = EBUSY;
7872248Sraf return (-1);
7882248Sraf }
7892248Sraf if (fstat(aiocbp->aio_fildes, &statb) < 0)
7902248Sraf return (-1);
7912248Sraf if (_aio_sigev_thread(aiocbp) != 0)
7922248Sraf return (-1);
7932248Sraf
7942248Sraf /*
7952248Sraf * Kernel aio_fsync() is not supported.
7962248Sraf * We force user-level aio_fsync() just
7972248Sraf * for the notification side-effect.
7982248Sraf */
7992248Sraf if (!__uaio_ok && __uaio_init() == -1)
8002248Sraf return (-1);
8012248Sraf
8022248Sraf /*
8032248Sraf * The first asynchronous I/O request in the current process will
8042248Sraf * create a bunch of workers (via __uaio_init()). If the number
8052248Sraf * of workers is zero then the number of pending asynchronous I/O
8062248Sraf * requests is zero. In such a case only execute the standard
8072248Sraf * fsync(3C) or fdatasync(3RT) as appropriate.
8082248Sraf */
8092248Sraf if (__rw_workerscnt == 0) {
8102248Sraf if (op == O_DSYNC)
8112248Sraf return (__fdsync(aiocbp->aio_fildes, FDSYNC));
8122248Sraf else
8132248Sraf return (__fdsync(aiocbp->aio_fildes, FSYNC));
8142248Sraf }
8152248Sraf
8162248Sraf /*
8172248Sraf * re-use aio_offset as the op field.
8182248Sraf * O_DSYNC - fdatasync()
8192248Sraf * O_SYNC - fsync()
8202248Sraf */
8212248Sraf aiocbp->aio_offset = op;
8222248Sraf aiocbp->aio_lio_opcode = AIOFSYNC;
8232248Sraf
8242248Sraf /*
8252248Sraf * Create a list of fsync requests. The worker that
8262248Sraf * gets the last request will do the fsync request.
8272248Sraf */
8282248Sraf head = _aio_lio_alloc();
8292248Sraf if (head == NULL) {
8302248Sraf errno = EAGAIN;
8312248Sraf return (-1);
8322248Sraf }
8332248Sraf head->lio_mode = LIO_FSYNC;
8342248Sraf head->lio_nent = head->lio_refcnt = __rw_workerscnt;
8352248Sraf head->lio_largefile = 0;
8362248Sraf
8372248Sraf /*
8382248Sraf * Insert an fsync request on every worker's queue.
8392248Sraf */
8402248Sraf fret = __aio_fsync_bar(aiocbp, head, __workers_rw, __rw_workerscnt);
8412248Sraf if (fret != __rw_workerscnt) {
8422248Sraf /*
8432248Sraf * Fewer fsync requests than workers means that it was
8442248Sraf * not possible to submit fsync requests to all workers.
8452248Sraf * Actions:
8462248Sraf * a) number of fsync requests submitted is 0:
8472248Sraf * => free allocated memory (aio_lio_t).
8482248Sraf * b) number of fsync requests submitted is > 0:
8492248Sraf * => the last worker executing the fsync request
8502248Sraf * will free the aio_lio_t struct.
8512248Sraf */
8522248Sraf if (fret == 0)
8532248Sraf _aio_lio_free(head);
8542248Sraf return (-1);
8552248Sraf }
8562248Sraf return (0);
8572248Sraf }
8582248Sraf
8592248Sraf int
aio_cancel(int fd,aiocb_t * aiocbp)8602248Sraf aio_cancel(int fd, aiocb_t *aiocbp)
8612248Sraf {
8622248Sraf aio_req_t *reqp;
8632248Sraf aio_worker_t *aiowp;
8642248Sraf int done = 0;
8652248Sraf int canceled = 0;
8662248Sraf struct stat buf;
8672248Sraf
8682248Sraf if (fstat(fd, &buf) < 0)
8692248Sraf return (-1);
8702248Sraf
8712248Sraf if (aiocbp != NULL) {
8722248Sraf if (fd != aiocbp->aio_fildes) {
8732248Sraf errno = EINVAL;
8742248Sraf return (-1);
8752248Sraf }
8762248Sraf if (aiocbp->aio_state == USERAIO) {
8772248Sraf sig_mutex_lock(&__aio_mutex);
8782248Sraf reqp = _aio_hash_find(&aiocbp->aio_resultp);
8792248Sraf if (reqp == NULL) {
8802248Sraf sig_mutex_unlock(&__aio_mutex);
8812248Sraf return (AIO_ALLDONE);
8822248Sraf }
8832248Sraf aiowp = reqp->req_worker;
8842248Sraf sig_mutex_lock(&aiowp->work_qlock1);
8852248Sraf (void) _aio_cancel_req(aiowp, reqp, &canceled, &done);
8862248Sraf sig_mutex_unlock(&aiowp->work_qlock1);
8872248Sraf sig_mutex_unlock(&__aio_mutex);
8882248Sraf if (done)
8892248Sraf return (AIO_ALLDONE);
8902248Sraf if (canceled)
8912248Sraf return (AIO_CANCELED);
8922248Sraf return (AIO_NOTCANCELED);
8932248Sraf }
8942248Sraf if (aiocbp->aio_state == USERAIO_DONE)
8952248Sraf return (AIO_ALLDONE);
8962248Sraf return ((int)_kaio(AIOCANCEL, fd, aiocbp));
8972248Sraf }
8982248Sraf
8992248Sraf return (aiocancel_all(fd));
9002248Sraf }
9012248Sraf
9022248Sraf /*
9032248Sraf * __aio_waitn() cancellation handler.
9042248Sraf */
9052248Sraf /* ARGSUSED */
9062248Sraf static void
_aio_waitn_cleanup(void * arg)9072248Sraf _aio_waitn_cleanup(void *arg)
9082248Sraf {
9092248Sraf ASSERT(MUTEX_HELD(&__aio_mutex));
9102248Sraf
9112248Sraf /* check for pending aio_waitn() calls */
9122248Sraf _aio_flags &= ~(AIO_LIB_WAITN | AIO_WAIT_INPROGRESS | AIO_IO_WAITING);
9132248Sraf if (_aio_flags & AIO_LIB_WAITN_PENDING) {
9142248Sraf _aio_flags &= ~AIO_LIB_WAITN_PENDING;
9152248Sraf (void) cond_signal(&_aio_waitn_cv);
9162248Sraf }
9172248Sraf
9182248Sraf sig_mutex_unlock(&__aio_mutex);
9192248Sraf }
9202248Sraf
9212248Sraf /*
9222248Sraf * aio_waitn can be used to reap the results of several I/O operations that
9232248Sraf * were submitted asynchronously. The submission of I/Os can be done using
9242248Sraf * existing POSIX interfaces: lio_listio, aio_write or aio_read.
9252248Sraf * aio_waitn waits until "nwait" I/Os (supplied as a parameter) have
9262248Sraf * completed and it returns the descriptors for these I/Os in "list". The
9272248Sraf * maximum size of this list is given by "nent" and the actual number of I/Os
9282248Sraf * completed is returned in "nwait". Otherwise aio_waitn might also
9292248Sraf * return if the timeout expires. Additionally, aio_waitn returns 0 if
9302248Sraf * successful or -1 if an error occurred.
9312248Sraf */
9322248Sraf static int
__aio_waitn(void ** list,uint_t nent,uint_t * nwait,const timespec_t * utimo)9332248Sraf __aio_waitn(void **list, uint_t nent, uint_t *nwait, const timespec_t *utimo)
9342248Sraf {
9352248Sraf int error = 0;
9362248Sraf uint_t dnwait = 0; /* amount of requests in the waitn-done list */
9372248Sraf uint_t kwaitcnt; /* expected "done" requests from kernel */
9382248Sraf uint_t knentcnt; /* max. expected "done" requests from kernel */
9392248Sraf int uerrno = 0;
9402248Sraf int kerrno = 0; /* save errno from _kaio() call */
9412248Sraf int timedwait = AIO_TIMEOUT_UNDEF;
9422248Sraf aio_req_t *reqp;
9432248Sraf timespec_t end;
9442248Sraf timespec_t twait; /* copy of utimo for internal calculations */
9452248Sraf timespec_t *wait = NULL;
9462248Sraf
9472248Sraf if (nent == 0 || *nwait == 0 || *nwait > nent) {
9482248Sraf errno = EINVAL;
9492248Sraf return (-1);
9502248Sraf }
9512248Sraf
9522248Sraf /*
9532248Sraf * Only one running aio_waitn call per process allowed.
9542248Sraf * Further calls will be blocked here until the running
9552248Sraf * call finishes.
9562248Sraf */
9572248Sraf
9582248Sraf sig_mutex_lock(&__aio_mutex);
9592248Sraf
9602248Sraf while (_aio_flags & AIO_LIB_WAITN) {
9612248Sraf if (utimo && utimo->tv_sec == 0 && utimo->tv_nsec == 0) {
9622248Sraf sig_mutex_unlock(&__aio_mutex);
9632248Sraf *nwait = 0;
9642248Sraf return (0);
9652248Sraf }
9662248Sraf _aio_flags |= AIO_LIB_WAITN_PENDING;
9672248Sraf pthread_cleanup_push(sig_mutex_unlock, &__aio_mutex);
9682248Sraf error = sig_cond_wait(&_aio_waitn_cv, &__aio_mutex);
9692248Sraf pthread_cleanup_pop(0);
9702248Sraf if (error != 0) {
9712248Sraf sig_mutex_unlock(&__aio_mutex);
9722248Sraf *nwait = 0;
9732248Sraf errno = error;
9742248Sraf return (-1);
9752248Sraf }
9762248Sraf }
9772248Sraf
9782248Sraf pthread_cleanup_push(_aio_waitn_cleanup, NULL);
9792248Sraf
9802248Sraf _aio_flags |= AIO_LIB_WAITN;
9812248Sraf
9826552Seriko if (_aio_check_timeout(utimo, &end, &timedwait) != 0) {
9836552Seriko error = -1;
9846552Seriko dnwait = 0;
9856552Seriko goto out;
9866552Seriko }
9876552Seriko if (timedwait != AIO_TIMEOUT_INDEF) {
9886552Seriko twait = *utimo;
9896552Seriko wait = &twait;
9902248Sraf }
9912248Sraf
9922248Sraf /*
9932248Sraf * If both counters are still set to zero, then only
9942248Sraf * kernel requests are currently outstanding (raw-I/Os).
9952248Sraf */
9962248Sraf if ((_aio_doneq_cnt + _aio_outstand_cnt) == 0) {
9972248Sraf for (;;) {
9982248Sraf kwaitcnt = *nwait - dnwait;
9992248Sraf knentcnt = nent - dnwait;
10002248Sraf if (knentcnt > AIO_WAITN_MAXIOCBS)
10012248Sraf knentcnt = AIO_WAITN_MAXIOCBS;
10022248Sraf kwaitcnt = (kwaitcnt > knentcnt) ? knentcnt : kwaitcnt;
10032248Sraf
10042248Sraf pthread_cleanup_push(sig_mutex_lock, &__aio_mutex);
10052248Sraf sig_mutex_unlock(&__aio_mutex);
10062248Sraf _cancel_prologue();
10072248Sraf error = (int)_kaio(AIOWAITN, &list[dnwait], knentcnt,
10082248Sraf &kwaitcnt, wait);
10092248Sraf _cancel_epilogue();
10102248Sraf pthread_cleanup_pop(1);
10112248Sraf
10122248Sraf if (error == 0) {
10132248Sraf dnwait += kwaitcnt;
10142248Sraf if (dnwait >= *nwait ||
10152248Sraf *nwait < AIO_WAITN_MAXIOCBS)
10162248Sraf break;
10172248Sraf if (timedwait == AIO_TIMEOUT_WAIT) {
10182248Sraf error = _aio_get_timedelta(&end, wait);
10192248Sraf if (error == -1) {
10202248Sraf /* timer expired */
10212248Sraf errno = ETIME;
10222248Sraf break;
10232248Sraf }
10242248Sraf }
10252248Sraf continue;
10262248Sraf }
10272248Sraf if (errno == EAGAIN) {
10282248Sraf if (dnwait > 0)
10292248Sraf error = 0;
10302248Sraf break;
10312248Sraf }
10322248Sraf if (errno == ETIME || errno == EINTR) {
10332248Sraf dnwait += kwaitcnt;
10342248Sraf break;
10352248Sraf }
10362248Sraf /* fatal error */
10372248Sraf break;
10382248Sraf }
10392248Sraf
10402248Sraf goto out;
10412248Sraf }
10422248Sraf
10432248Sraf /* File system I/Os outstanding ... */
10442248Sraf
10452248Sraf if (timedwait == AIO_TIMEOUT_UNDEF) {
10462248Sraf if (_aio_check_timeout(utimo, &end, &timedwait) != 0) {
10472248Sraf error = -1;
10482248Sraf dnwait = 0;
10492248Sraf goto out;
10502248Sraf }
10512248Sraf if (timedwait != AIO_TIMEOUT_INDEF) {
10522248Sraf twait = *utimo;
10532248Sraf wait = &twait;
10542248Sraf }
10552248Sraf }
10562248Sraf
10572248Sraf for (;;) {
10582248Sraf uint_t sum_reqs;
10592248Sraf
10602248Sraf /*
10612248Sraf * Calculate sum of active non RAW-IO requests (sum_reqs).
10622248Sraf * If the expected amount of completed requests (*nwait) is
10632248Sraf * greater than the calculated sum (sum_reqs) then
10642248Sraf * use _kaio to check pending RAW-IO requests.
10652248Sraf */
10662248Sraf sum_reqs = _aio_doneq_cnt + dnwait + _aio_outstand_cnt;
10672248Sraf kwaitcnt = (*nwait > sum_reqs) ? *nwait - sum_reqs : 0;
10682248Sraf
10692248Sraf if (kwaitcnt != 0) {
10702248Sraf /* possibly some kernel I/Os outstanding */
10712248Sraf knentcnt = nent - dnwait;
10722248Sraf if (knentcnt > AIO_WAITN_MAXIOCBS)
10732248Sraf knentcnt = AIO_WAITN_MAXIOCBS;
10742248Sraf kwaitcnt = (kwaitcnt > knentcnt) ? knentcnt : kwaitcnt;
10752248Sraf
10762248Sraf _aio_flags |= AIO_WAIT_INPROGRESS;
10772248Sraf
10782248Sraf pthread_cleanup_push(sig_mutex_lock, &__aio_mutex);
10792248Sraf sig_mutex_unlock(&__aio_mutex);
10802248Sraf _cancel_prologue();
10812248Sraf error = (int)_kaio(AIOWAITN, &list[dnwait], knentcnt,
10822248Sraf &kwaitcnt, wait);
10832248Sraf _cancel_epilogue();
10842248Sraf pthread_cleanup_pop(1);
10852248Sraf
10862248Sraf _aio_flags &= ~AIO_WAIT_INPROGRESS;
10872248Sraf
10882248Sraf if (error == 0) {
10892248Sraf dnwait += kwaitcnt;
10902248Sraf } else {
10912248Sraf switch (errno) {
10922248Sraf case EINVAL:
10932248Sraf case EAGAIN:
10942248Sraf /* don't wait for kernel I/Os */
10952248Sraf kerrno = 0; /* ignore _kaio() errno */
10962248Sraf *nwait = _aio_doneq_cnt +
10972248Sraf _aio_outstand_cnt + dnwait;
10982248Sraf error = 0;
10992248Sraf break;
11002248Sraf case EINTR:
11012248Sraf case ETIME:
11022248Sraf /* just scan for completed LIB I/Os */
11032248Sraf dnwait += kwaitcnt;
11042248Sraf timedwait = AIO_TIMEOUT_POLL;
11052248Sraf kerrno = errno; /* save _kaio() errno */
11062248Sraf error = 0;
11072248Sraf break;
11082248Sraf default:
11092248Sraf kerrno = errno; /* save _kaio() errno */
11102248Sraf break;
11112248Sraf }
11122248Sraf }
11132248Sraf if (error)
11142248Sraf break; /* fatal kernel error */
11152248Sraf }
11162248Sraf
11172248Sraf /* check completed FS requests in the "done" queue */
11182248Sraf
11192248Sraf while (_aio_doneq_cnt && dnwait < nent) {
11202248Sraf /* get done requests */
11212248Sraf if ((reqp = _aio_req_remove(NULL)) != NULL) {
11222248Sraf (void) _aio_hash_del(reqp->req_resultp);
11232248Sraf list[dnwait++] = reqp->req_aiocbp;
11242248Sraf _aio_req_mark_done(reqp);
11252248Sraf _lio_remove(reqp);
11262248Sraf _aio_req_free(reqp);
11272248Sraf }
11282248Sraf }
11292248Sraf
11302248Sraf if (dnwait >= *nwait) {
11312248Sraf /* min. requested amount of completed I/Os satisfied */
11322248Sraf break;
11332248Sraf }
11342248Sraf if (timedwait == AIO_TIMEOUT_WAIT &&
11352248Sraf (error = _aio_get_timedelta(&end, wait)) == -1) {
11362248Sraf /* timer expired */
11372248Sraf uerrno = ETIME;
11382248Sraf break;
11392248Sraf }
11402248Sraf
11412248Sraf /*
11422248Sraf * If some I/Os are outstanding and we have to wait for them,
11432248Sraf * then sleep here. _aiodone() will call _aio_waitn_wakeup()
11442248Sraf * to wakeup this thread as soon as the required amount of
11452248Sraf * completed I/Os is done.
11462248Sraf */
11472248Sraf if (_aio_outstand_cnt > 0 && timedwait != AIO_TIMEOUT_POLL) {
11482248Sraf /*
11492248Sraf * _aio_waitn_wakeup() will wake up this thread when:
11502248Sraf * - _aio_waitncnt requests are completed or
11512248Sraf * - _aio_outstand_cnt becomes zero.
11522248Sraf * sig_cond_reltimedwait() could also return with
11532248Sraf * a timeout error (ETIME).
11542248Sraf */
11552248Sraf if (*nwait < _aio_outstand_cnt)
11562248Sraf _aio_waitncnt = *nwait;
11572248Sraf else
11582248Sraf _aio_waitncnt = _aio_outstand_cnt;
11592248Sraf
11602248Sraf _aio_flags |= AIO_IO_WAITING;
11612248Sraf
11622248Sraf if (wait)
11632248Sraf uerrno = sig_cond_reltimedwait(&_aio_iowait_cv,
11642248Sraf &__aio_mutex, wait);
11652248Sraf else
11662248Sraf uerrno = sig_cond_wait(&_aio_iowait_cv,
11672248Sraf &__aio_mutex);
11682248Sraf
11692248Sraf _aio_flags &= ~AIO_IO_WAITING;
11702248Sraf
11712248Sraf if (uerrno == ETIME) {
11722248Sraf timedwait = AIO_TIMEOUT_POLL;
11732248Sraf continue;
11742248Sraf }
11752248Sraf if (uerrno != 0)
11762248Sraf timedwait = AIO_TIMEOUT_POLL;
11772248Sraf }
11782248Sraf
11792248Sraf if (timedwait == AIO_TIMEOUT_POLL) {
11802248Sraf /* polling or timer expired */
11812248Sraf break;
11822248Sraf }
11832248Sraf }
11842248Sraf
11852248Sraf errno = uerrno == 0 ? kerrno : uerrno;
11862248Sraf if (errno)
11872248Sraf error = -1;
11882248Sraf else
11892248Sraf error = 0;
11902248Sraf
11912248Sraf out:
11922248Sraf *nwait = dnwait;
11932248Sraf
11942248Sraf pthread_cleanup_pop(1); /* drops __aio_mutex */
11952248Sraf
11962248Sraf return (error);
11972248Sraf }
11982248Sraf
11992248Sraf int
aio_waitn(aiocb_t * list[],uint_t nent,uint_t * nwait,const timespec_t * timeout)12002248Sraf aio_waitn(aiocb_t *list[], uint_t nent, uint_t *nwait,
12012248Sraf const timespec_t *timeout)
12022248Sraf {
12032248Sraf return (__aio_waitn((void **)list, nent, nwait, timeout));
12042248Sraf }
12052248Sraf
12062248Sraf void
_aio_waitn_wakeup(void)12072248Sraf _aio_waitn_wakeup(void)
12082248Sraf {
12092248Sraf /*
12102248Sraf * __aio_waitn() sets AIO_IO_WAITING to notify _aiodone() that
12112248Sraf * it is waiting for completed I/Os. The number of required
12122248Sraf * completed I/Os is stored into "_aio_waitncnt".
12132248Sraf * aio_waitn() is woken up when
12142248Sraf * - there are no further outstanding I/Os
12152248Sraf * (_aio_outstand_cnt == 0) or
12162248Sraf * - the expected number of I/Os has completed.
12172248Sraf * Only one __aio_waitn() function waits for completed I/Os at
12182248Sraf * a time.
12192248Sraf *
12202248Sraf * __aio_suspend() increments "_aio_suscv_cnt" to notify
12212248Sraf * _aiodone() that at least one __aio_suspend() call is
12222248Sraf * waiting for completed I/Os.
12232248Sraf * There could be more than one __aio_suspend() function
12242248Sraf * waiting for completed I/Os. Because every function should
12252248Sraf * be waiting for different I/Os, _aiodone() has to wake up all
12262248Sraf * __aio_suspend() functions each time.
12272248Sraf * Every __aio_suspend() function will compare the recently
12282248Sraf * completed I/O with its own list.
12292248Sraf */
12302248Sraf ASSERT(MUTEX_HELD(&__aio_mutex));
12312248Sraf if (_aio_flags & AIO_IO_WAITING) {
12322248Sraf if (_aio_waitncnt > 0)
12332248Sraf _aio_waitncnt--;
12342248Sraf if (_aio_outstand_cnt == 0 || _aio_waitncnt == 0 ||
12352248Sraf _aio_suscv_cnt > 0)
12362248Sraf (void) cond_broadcast(&_aio_iowait_cv);
12372248Sraf } else {
12382248Sraf /* Wake up waiting aio_suspend calls */
12392248Sraf if (_aio_suscv_cnt > 0)
12402248Sraf (void) cond_broadcast(&_aio_iowait_cv);
12412248Sraf }
12422248Sraf }
12432248Sraf
12442248Sraf /*
12452248Sraf * timedwait values :
12462248Sraf * AIO_TIMEOUT_POLL : polling
12472248Sraf * AIO_TIMEOUT_WAIT : timeout
12482248Sraf * AIO_TIMEOUT_INDEF : wait indefinitely
12492248Sraf */
12502248Sraf static int
_aio_check_timeout(const timespec_t * utimo,timespec_t * end,int * timedwait)12512248Sraf _aio_check_timeout(const timespec_t *utimo, timespec_t *end, int *timedwait)
12522248Sraf {
12532248Sraf struct timeval curtime;
12542248Sraf
12552248Sraf if (utimo) {
12562248Sraf if (utimo->tv_sec < 0 || utimo->tv_nsec < 0 ||
12572248Sraf utimo->tv_nsec >= NANOSEC) {
12582248Sraf errno = EINVAL;
12592248Sraf return (-1);
12602248Sraf }
12612248Sraf if (utimo->tv_sec > 0 || utimo->tv_nsec > 0) {
12622248Sraf (void) gettimeofday(&curtime, NULL);
12632248Sraf end->tv_sec = utimo->tv_sec + curtime.tv_sec;
12642248Sraf end->tv_nsec = utimo->tv_nsec + 1000 * curtime.tv_usec;
12652248Sraf if (end->tv_nsec >= NANOSEC) {
12662248Sraf end->tv_nsec -= NANOSEC;
12672248Sraf end->tv_sec += 1;
12682248Sraf }
12692248Sraf *timedwait = AIO_TIMEOUT_WAIT;
12702248Sraf } else {
12712248Sraf /* polling */
12722248Sraf *timedwait = AIO_TIMEOUT_POLL;
12732248Sraf }
12742248Sraf } else {
12752248Sraf *timedwait = AIO_TIMEOUT_INDEF; /* wait indefinitely */
12762248Sraf }
12772248Sraf return (0);
12782248Sraf }
12792248Sraf
12802248Sraf #if !defined(_LP64)
12812248Sraf
12822248Sraf int
aio_read64(aiocb64_t * aiocbp)12832248Sraf aio_read64(aiocb64_t *aiocbp)
12842248Sraf {
12852472Sraf if (aiocbp == NULL || aiocbp->aio_reqprio != 0) {
12862248Sraf errno = EINVAL;
12872248Sraf return (-1);
12882248Sraf }
12892248Sraf if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) {
12902248Sraf errno = EBUSY;
12912248Sraf return (-1);
12922248Sraf }
12932248Sraf if (_aio_sigev_thread64(aiocbp) != 0)
12942248Sraf return (-1);
12952248Sraf aiocbp->aio_lio_opcode = LIO_READ;
12962248Sraf return (_aio_rw64(aiocbp, NULL, &__nextworker_rw, AIOAREAD64,
12972248Sraf (AIO_KAIO | AIO_NO_DUPS)));
12982248Sraf }
12992248Sraf
13002248Sraf int
aio_write64(aiocb64_t * aiocbp)13012248Sraf aio_write64(aiocb64_t *aiocbp)
13022248Sraf {
13032472Sraf if (aiocbp == NULL || aiocbp->aio_reqprio != 0) {
13042248Sraf errno = EINVAL;
13052248Sraf return (-1);
13062248Sraf }
13072248Sraf if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) {
13082248Sraf errno = EBUSY;
13092248Sraf return (-1);
13102248Sraf }
13112248Sraf if (_aio_sigev_thread64(aiocbp) != 0)
13122248Sraf return (-1);
13132248Sraf aiocbp->aio_lio_opcode = LIO_WRITE;
13142248Sraf return (_aio_rw64(aiocbp, NULL, &__nextworker_rw, AIOAWRITE64,
13152248Sraf (AIO_KAIO | AIO_NO_DUPS)));
13162248Sraf }
13172248Sraf
13182248Sraf int
lio_listio64(int mode,aiocb64_t * _RESTRICT_KYWD const * _RESTRICT_KYWD list,int nent,struct sigevent * _RESTRICT_KYWD sigevp)13192248Sraf lio_listio64(int mode, aiocb64_t *_RESTRICT_KYWD const *_RESTRICT_KYWD list,
13202248Sraf int nent, struct sigevent *_RESTRICT_KYWD sigevp)
13212248Sraf {
13222248Sraf int aio_ufs = 0;
13232248Sraf int oerrno = 0;
13242248Sraf aio_lio_t *head = NULL;
13252248Sraf aiocb64_t *aiocbp;
13262248Sraf int state = 0;
13272248Sraf int EIOflg = 0;
13282248Sraf int rw;
13292248Sraf int do_kaio = 0;
13302248Sraf int error;
13312248Sraf int i;
13322248Sraf
13332248Sraf if (!_kaio_ok)
13342248Sraf _kaio_init();
13352248Sraf
13362248Sraf if (aio_list_max == 0)
13372248Sraf aio_list_max = sysconf(_SC_AIO_LISTIO_MAX);
13382248Sraf
13392248Sraf if (nent <= 0 || nent > aio_list_max) {
13402248Sraf errno = EINVAL;
13412248Sraf return (-1);
13422248Sraf }
13432248Sraf
13442248Sraf switch (mode) {
13452248Sraf case LIO_WAIT:
13462248Sraf state = NOCHECK;
13472248Sraf break;
13482248Sraf case LIO_NOWAIT:
13492248Sraf state = CHECK;
13502248Sraf break;
13512248Sraf default:
13522248Sraf errno = EINVAL;
13532248Sraf return (-1);
13542248Sraf }
13552248Sraf
13562248Sraf for (i = 0; i < nent; i++) {
13572248Sraf if ((aiocbp = list[i]) == NULL)
13582248Sraf continue;
13592248Sraf if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) {
13602248Sraf errno = EBUSY;
13612248Sraf return (-1);
13622248Sraf }
13632248Sraf if (_aio_sigev_thread64(aiocbp) != 0)
13642248Sraf return (-1);
13652248Sraf if (aiocbp->aio_lio_opcode == LIO_NOP)
13662248Sraf aiocbp->aio_state = NOCHECK;
13672248Sraf else {
13682248Sraf aiocbp->aio_state = state;
13692248Sraf if (KAIO_SUPPORTED(aiocbp->aio_fildes))
13702248Sraf do_kaio++;
13712248Sraf else
13722248Sraf aiocbp->aio_resultp.aio_errno = ENOTSUP;
13732248Sraf }
13742248Sraf }
13752248Sraf if (_aio_sigev_thread_init(sigevp) != 0)
13762248Sraf return (-1);
13772248Sraf
13782248Sraf if (do_kaio) {
13792248Sraf error = (int)_kaio(AIOLIO64, mode, list, nent, sigevp);
13802248Sraf if (error == 0)
13812248Sraf return (0);
13822248Sraf oerrno = errno;
13832248Sraf } else {
13842248Sraf oerrno = errno = ENOTSUP;
13852248Sraf error = -1;
13862248Sraf }
13872248Sraf
13882248Sraf if (error == -1 && errno == ENOTSUP) {
13892248Sraf error = errno = 0;
13902248Sraf /*
13912248Sraf * If LIO_WAIT, or notification required, allocate a list head.
13922248Sraf */
13932248Sraf if (mode == LIO_WAIT ||
13942248Sraf (sigevp != NULL &&
13952248Sraf (sigevp->sigev_notify == SIGEV_SIGNAL ||
13962248Sraf sigevp->sigev_notify == SIGEV_THREAD ||
13972248Sraf sigevp->sigev_notify == SIGEV_PORT)))
13982248Sraf head = _aio_lio_alloc();
13992248Sraf if (head) {
14002248Sraf sig_mutex_lock(&head->lio_mutex);
14012248Sraf head->lio_mode = mode;
14022248Sraf head->lio_largefile = 1;
14032248Sraf if (mode == LIO_NOWAIT && sigevp != NULL) {
14042248Sraf if (sigevp->sigev_notify == SIGEV_THREAD) {
14052248Sraf head->lio_port = sigevp->sigev_signo;
14062248Sraf head->lio_event = AIOLIO64;
14072248Sraf head->lio_sigevent = sigevp;
14082248Sraf head->lio_sigval.sival_ptr =
14092248Sraf sigevp->sigev_value.sival_ptr;
14102248Sraf } else if (sigevp->sigev_notify == SIGEV_PORT) {
14112248Sraf port_notify_t *pn =
14122248Sraf sigevp->sigev_value.sival_ptr;
14132248Sraf head->lio_port = pn->portnfy_port;
14142248Sraf head->lio_event = AIOLIO64;
14152248Sraf head->lio_sigevent = sigevp;
14162248Sraf head->lio_sigval.sival_ptr =
14172248Sraf pn->portnfy_user;
14182248Sraf } else { /* SIGEV_SIGNAL */
14192248Sraf head->lio_signo = sigevp->sigev_signo;
14202248Sraf head->lio_sigval.sival_ptr =
14212248Sraf sigevp->sigev_value.sival_ptr;
14222248Sraf }
14232248Sraf }
14242248Sraf head->lio_nent = head->lio_refcnt = nent;
14252248Sraf sig_mutex_unlock(&head->lio_mutex);
14262248Sraf }
14272248Sraf /*
14282248Sraf * find UFS requests, errno == ENOTSUP/EBADFD,
14292248Sraf */
14302248Sraf for (i = 0; i < nent; i++) {
14312248Sraf if ((aiocbp = list[i]) == NULL ||
14322248Sraf aiocbp->aio_lio_opcode == LIO_NOP ||
14332248Sraf (aiocbp->aio_resultp.aio_errno != ENOTSUP &&
14342248Sraf aiocbp->aio_resultp.aio_errno != EBADFD)) {
14352248Sraf if (head)
14362248Sraf _lio_list_decr(head);
14372248Sraf continue;
14382248Sraf }
14392248Sraf if (aiocbp->aio_resultp.aio_errno == EBADFD)
14402248Sraf SET_KAIO_NOT_SUPPORTED(aiocbp->aio_fildes);
14412472Sraf if (aiocbp->aio_reqprio != 0) {
14422248Sraf aiocbp->aio_resultp.aio_errno = EINVAL;
14432248Sraf aiocbp->aio_resultp.aio_return = -1;
14442248Sraf EIOflg = 1;
14452248Sraf if (head)
14462248Sraf _lio_list_decr(head);
14472248Sraf continue;
14482248Sraf }
14492248Sraf /*
14502248Sraf * submit an AIO request with flags AIO_NO_KAIO
14512248Sraf * to avoid the kaio() syscall in _aio_rw()
14522248Sraf */
14532248Sraf switch (aiocbp->aio_lio_opcode) {
14542248Sraf case LIO_READ:
14552248Sraf rw = AIOAREAD64;
14562248Sraf break;
14572248Sraf case LIO_WRITE:
14582248Sraf rw = AIOAWRITE64;
14592248Sraf break;
14602248Sraf }
14612248Sraf error = _aio_rw64(aiocbp, head, &__nextworker_rw, rw,
14622248Sraf (AIO_NO_KAIO | AIO_NO_DUPS));
14632248Sraf if (error == 0)
14642248Sraf aio_ufs++;
14652248Sraf else {
14662248Sraf if (head)
14672248Sraf _lio_list_decr(head);
14682248Sraf aiocbp->aio_resultp.aio_errno = error;
14692248Sraf EIOflg = 1;
14702248Sraf }
14712248Sraf }
14722248Sraf }
14732248Sraf if (EIOflg) {
14742248Sraf errno = EIO;
14752248Sraf return (-1);
14762248Sraf }
14772248Sraf if (mode == LIO_WAIT && oerrno == ENOTSUP) {
14782248Sraf /*
14792248Sraf * call kaio(AIOLIOWAIT) to get all outstanding
14802248Sraf * kernel AIO requests
14812248Sraf */
14822248Sraf if ((nent - aio_ufs) > 0)
14832248Sraf (void) _kaio(AIOLIOWAIT, mode, list, nent, sigevp);
14842248Sraf if (head != NULL && head->lio_nent > 0) {
14852248Sraf sig_mutex_lock(&head->lio_mutex);
14862248Sraf while (head->lio_refcnt > 0) {
14872248Sraf int err;
14882248Sraf head->lio_waiting = 1;
14892248Sraf pthread_cleanup_push(_lio_listio_cleanup, head);
14902248Sraf err = sig_cond_wait(&head->lio_cond_cv,
14912248Sraf &head->lio_mutex);
14922248Sraf pthread_cleanup_pop(0);
14932248Sraf head->lio_waiting = 0;
14942248Sraf if (err && head->lio_nent > 0) {
14952248Sraf sig_mutex_unlock(&head->lio_mutex);
14962248Sraf errno = err;
14972248Sraf return (-1);
14982248Sraf }
14992248Sraf }
15002248Sraf sig_mutex_unlock(&head->lio_mutex);
15012248Sraf ASSERT(head->lio_nent == 0 && head->lio_refcnt == 0);
15022248Sraf _aio_lio_free(head);
15032248Sraf for (i = 0; i < nent; i++) {
15042248Sraf if ((aiocbp = list[i]) != NULL &&
15052248Sraf aiocbp->aio_resultp.aio_errno) {
15062248Sraf errno = EIO;
15072248Sraf return (-1);
15082248Sraf }
15092248Sraf }
15102248Sraf }
15112248Sraf return (0);
15122248Sraf }
15132248Sraf return (error);
15142248Sraf }
15152248Sraf
15162248Sraf int
aio_suspend64(const aiocb64_t * const list[],int nent,const timespec_t * timeout)15172248Sraf aio_suspend64(const aiocb64_t * const list[], int nent,
15182248Sraf const timespec_t *timeout)
15192248Sraf {
15202248Sraf return (__aio_suspend((void **)list, nent, timeout, 1));
15212248Sraf }
15222248Sraf
15232248Sraf int
aio_error64(const aiocb64_t * aiocbp)15242248Sraf aio_error64(const aiocb64_t *aiocbp)
15252248Sraf {
15262248Sraf const aio_result_t *resultp = &aiocbp->aio_resultp;
15272248Sraf int error;
15282248Sraf
15292248Sraf if ((error = resultp->aio_errno) == EINPROGRESS) {
15302248Sraf if (aiocbp->aio_state == CHECK) {
15312248Sraf /*
15322248Sraf * Always do the kaio() call without using the
15332248Sraf * KAIO_SUPPORTED() checks because it is not
15342248Sraf * mandatory to have a valid fd set in the
15352248Sraf * aiocb, only the resultp must be set.
15362248Sraf */
15372248Sraf if ((int)_kaio(AIOERROR64, aiocbp) == EINVAL) {
15382248Sraf errno = EINVAL;
15392248Sraf return (-1);
15402248Sraf }
15412248Sraf error = resultp->aio_errno;
15422248Sraf } else if (aiocbp->aio_state == CHECKED) {
15432248Sraf ((aiocb64_t *)aiocbp)->aio_state = CHECK;
15442248Sraf }
15452248Sraf }
15462248Sraf return (error);
15472248Sraf }
15482248Sraf
15492248Sraf ssize_t
aio_return64(aiocb64_t * aiocbp)15502248Sraf aio_return64(aiocb64_t *aiocbp)
15512248Sraf {
15522248Sraf aio_result_t *resultp = &aiocbp->aio_resultp;
15532248Sraf aio_req_t *reqp;
15542248Sraf int error;
15552248Sraf ssize_t retval;
15562248Sraf
15572248Sraf /*
15582248Sraf * The _aiodone() function stores resultp->aio_return before
15592248Sraf * storing resultp->aio_errno (with an membar_producer() in
15602248Sraf * between). We use membar_consumer() below to ensure proper
15612248Sraf * memory ordering between _aiodone() and ourself.
15622248Sraf */
15632248Sraf error = resultp->aio_errno;
15642248Sraf membar_consumer();
15652248Sraf retval = resultp->aio_return;
15662248Sraf
15672248Sraf /*
15682248Sraf * we use this condition to indicate either that
15692248Sraf * aio_return() has been called before or should
15702248Sraf * not have been called yet.
15712248Sraf */
15722248Sraf if ((retval == -1 && error == EINVAL) || error == EINPROGRESS) {
15732248Sraf errno = error;
15742248Sraf return (-1);
15752248Sraf }
15762248Sraf
15772248Sraf /*
15782248Sraf * Before we return, mark the result as being returned so that later
15792248Sraf * calls to aio_return() will return the fact that the result has
15802248Sraf * already been returned.
15812248Sraf */
15822248Sraf sig_mutex_lock(&__aio_mutex);
15832248Sraf /* retest, in case more than one thread actually got in here */
15842248Sraf if (resultp->aio_return == -1 && resultp->aio_errno == EINVAL) {
15852248Sraf sig_mutex_unlock(&__aio_mutex);
15862248Sraf errno = EINVAL;
15872248Sraf return (-1);
15882248Sraf }
15892248Sraf resultp->aio_return = -1;
15902248Sraf resultp->aio_errno = EINVAL;
15912248Sraf if ((reqp = _aio_hash_del(resultp)) == NULL)
15922248Sraf sig_mutex_unlock(&__aio_mutex);
15932248Sraf else {
15942248Sraf aiocbp->aio_state = NOCHECK;
15952248Sraf ASSERT(reqp->req_head == NULL);
15962248Sraf (void) _aio_req_remove(reqp);
15972248Sraf sig_mutex_unlock(&__aio_mutex);
15982248Sraf _aio_req_free(reqp);
15992248Sraf }
16002248Sraf
16012248Sraf if (retval == -1)
16022248Sraf errno = error;
16032248Sraf return (retval);
16042248Sraf }
16052248Sraf
16062248Sraf static int
__aio_fsync_bar64(aiocb64_t * aiocbp,aio_lio_t * head,aio_worker_t * aiowp,int workerscnt)16072248Sraf __aio_fsync_bar64(aiocb64_t *aiocbp, aio_lio_t *head, aio_worker_t *aiowp,
16082248Sraf int workerscnt)
16092248Sraf {
16102248Sraf int i;
16112248Sraf int error;
16122248Sraf aio_worker_t *next = aiowp;
16132248Sraf
16142248Sraf for (i = 0; i < workerscnt; i++) {
16152248Sraf error = _aio_rw64(aiocbp, head, &next, AIOFSYNC, AIO_NO_KAIO);
16162248Sraf if (error != 0) {
16172248Sraf sig_mutex_lock(&head->lio_mutex);
16182248Sraf head->lio_mode = LIO_DESTROY; /* ignore fsync */
16192248Sraf head->lio_nent -= workerscnt - i;
16202248Sraf head->lio_refcnt -= workerscnt - i;
16212248Sraf sig_mutex_unlock(&head->lio_mutex);
16222248Sraf errno = EAGAIN;
16232248Sraf return (i);
16242248Sraf }
16252248Sraf next = next->work_forw;
16262248Sraf }
16272248Sraf return (i);
16282248Sraf }
16292248Sraf
16302248Sraf int
aio_fsync64(int op,aiocb64_t * aiocbp)16312248Sraf aio_fsync64(int op, aiocb64_t *aiocbp)
16322248Sraf {
16332248Sraf aio_lio_t *head;
16348220SArindam.Sarkar@Sun.COM struct stat64 statb;
16352248Sraf int fret;
16362248Sraf
16372248Sraf if (aiocbp == NULL)
16382248Sraf return (0);
16392472Sraf if (op != O_DSYNC && op != O_SYNC) {
16402248Sraf errno = EINVAL;
16412248Sraf return (-1);
16422248Sraf }
16432248Sraf if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) {
16442248Sraf errno = EBUSY;
16452248Sraf return (-1);
16462248Sraf }
16478220SArindam.Sarkar@Sun.COM if (fstat64(aiocbp->aio_fildes, &statb) < 0)
16482248Sraf return (-1);
16492248Sraf if (_aio_sigev_thread64(aiocbp) != 0)
16502248Sraf return (-1);
16512248Sraf
16522248Sraf /*
16532248Sraf * Kernel aio_fsync() is not supported.
16542248Sraf * We force user-level aio_fsync() just
16552248Sraf * for the notification side-effect.
16562248Sraf */
16572248Sraf if (!__uaio_ok && __uaio_init() == -1)
16582248Sraf return (-1);
16592248Sraf
16602248Sraf /*
16612248Sraf * The first asynchronous I/O request in the current process will
16622248Sraf * create a bunch of workers (via __uaio_init()). If the number
16632248Sraf * of workers is zero then the number of pending asynchronous I/O
16642248Sraf * requests is zero. In such a case only execute the standard
16652248Sraf * fsync(3C) or fdatasync(3RT) as appropriate.
16662248Sraf */
16672248Sraf if (__rw_workerscnt == 0) {
16682248Sraf if (op == O_DSYNC)
16692248Sraf return (__fdsync(aiocbp->aio_fildes, FDSYNC));
16702248Sraf else
16712248Sraf return (__fdsync(aiocbp->aio_fildes, FSYNC));
16722248Sraf }
16732248Sraf
16742248Sraf /*
16752248Sraf * re-use aio_offset as the op field.
16762248Sraf * O_DSYNC - fdatasync()
16772248Sraf * O_SYNC - fsync()
16782248Sraf */
16792248Sraf aiocbp->aio_offset = op;
16802248Sraf aiocbp->aio_lio_opcode = AIOFSYNC;
16812248Sraf
16822248Sraf /*
16832248Sraf * Create a list of fsync requests. The worker that
16842248Sraf * gets the last request will do the fsync request.
16852248Sraf */
16862248Sraf head = _aio_lio_alloc();
16872248Sraf if (head == NULL) {
16882248Sraf errno = EAGAIN;
16892248Sraf return (-1);
16902248Sraf }
16912248Sraf head->lio_mode = LIO_FSYNC;
16922248Sraf head->lio_nent = head->lio_refcnt = __rw_workerscnt;
16932248Sraf head->lio_largefile = 1;
16942248Sraf
16952248Sraf /*
16962248Sraf * Insert an fsync request on every worker's queue.
16972248Sraf */
16982248Sraf fret = __aio_fsync_bar64(aiocbp, head, __workers_rw, __rw_workerscnt);
16992248Sraf if (fret != __rw_workerscnt) {
17002248Sraf /*
17012248Sraf * Fewer fsync requests than workers means that it was
17022248Sraf * not possible to submit fsync requests to all workers.
17032248Sraf * Actions:
17042248Sraf * a) number of fsync requests submitted is 0:
17052248Sraf * => free allocated memory (aio_lio_t).
17062248Sraf * b) number of fsync requests submitted is > 0:
17072248Sraf * => the last worker executing the fsync request
17082248Sraf * will free the aio_lio_t struct.
17092248Sraf */
17102248Sraf if (fret == 0)
17112248Sraf _aio_lio_free(head);
17122248Sraf return (-1);
17132248Sraf }
17142248Sraf return (0);
17152248Sraf }
17162248Sraf
17172248Sraf int
aio_cancel64(int fd,aiocb64_t * aiocbp)17182248Sraf aio_cancel64(int fd, aiocb64_t *aiocbp)
17192248Sraf {
17202248Sraf aio_req_t *reqp;
17212248Sraf aio_worker_t *aiowp;
17222248Sraf int done = 0;
17232248Sraf int canceled = 0;
17248220SArindam.Sarkar@Sun.COM struct stat64 buf;
17252248Sraf
17268220SArindam.Sarkar@Sun.COM if (fstat64(fd, &buf) < 0)
17272248Sraf return (-1);
17282248Sraf
17292248Sraf if (aiocbp != NULL) {
17302248Sraf if (fd != aiocbp->aio_fildes) {
17312248Sraf errno = EINVAL;
17322248Sraf return (-1);
17332248Sraf }
17342248Sraf if (aiocbp->aio_state == USERAIO) {
17352248Sraf sig_mutex_lock(&__aio_mutex);
17362248Sraf reqp = _aio_hash_find(&aiocbp->aio_resultp);
17372248Sraf if (reqp == NULL) {
17382248Sraf sig_mutex_unlock(&__aio_mutex);
17392248Sraf return (AIO_ALLDONE);
17402248Sraf }
17412248Sraf aiowp = reqp->req_worker;
17422248Sraf sig_mutex_lock(&aiowp->work_qlock1);
17432248Sraf (void) _aio_cancel_req(aiowp, reqp, &canceled, &done);
17442248Sraf sig_mutex_unlock(&aiowp->work_qlock1);
17452248Sraf sig_mutex_unlock(&__aio_mutex);
17462248Sraf if (done)
17472248Sraf return (AIO_ALLDONE);
17482248Sraf if (canceled)
17492248Sraf return (AIO_CANCELED);
17502248Sraf return (AIO_NOTCANCELED);
17512248Sraf }
17522248Sraf if (aiocbp->aio_state == USERAIO_DONE)
17532248Sraf return (AIO_ALLDONE);
17542248Sraf return ((int)_kaio(AIOCANCEL, fd, aiocbp));
17552248Sraf }
17562248Sraf
17572248Sraf return (aiocancel_all(fd));
17582248Sraf }
17592248Sraf
17602248Sraf int
aio_waitn64(aiocb64_t * list[],uint_t nent,uint_t * nwait,const timespec_t * timeout)17612248Sraf aio_waitn64(aiocb64_t *list[], uint_t nent, uint_t *nwait,
17622248Sraf const timespec_t *timeout)
17632248Sraf {
17642248Sraf return (__aio_waitn((void **)list, nent, nwait, timeout));
17652248Sraf }
17662248Sraf
17672248Sraf #endif /* !defined(_LP64) */
1768