1*34335fd2Sriastradh /* $NetBSD: sys_aio.c,v 1.50 2024/12/07 02:38:51 riastradh Exp $ */ 210c3d35cSrmind 310c3d35cSrmind /* 4b8ea6ca4Srmind * Copyright (c) 2007 Mindaugas Rasiukevicius <rmind at NetBSD org> 5c75dc327Srmind * All rights reserved. 610c3d35cSrmind * 710c3d35cSrmind * Redistribution and use in source and binary forms, with or without 810c3d35cSrmind * modification, are permitted provided that the following conditions 910c3d35cSrmind * are met: 1010c3d35cSrmind * 1. Redistributions of source code must retain the above copyright 1110c3d35cSrmind * notice, this list of conditions and the following disclaimer. 1210c3d35cSrmind * 2. Redistributions in binary form must reproduce the above copyright 1310c3d35cSrmind * notice, this list of conditions and the following disclaimer in the 1410c3d35cSrmind * documentation and/or other materials provided with the distribution. 1510c3d35cSrmind * 1606171502Srmind * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 1706171502Srmind * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 1806171502Srmind * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 1906171502Srmind * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 2006171502Srmind * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 2106171502Srmind * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 2206171502Srmind * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 2306171502Srmind * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 2406171502Srmind * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 2506171502Srmind * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 2606171502Srmind * SUCH DAMAGE. 2710c3d35cSrmind */ 2810c3d35cSrmind 2910c3d35cSrmind /* 3006171502Srmind * Implementation of POSIX asynchronous I/O. 3106171502Srmind * Defined in the Base Definitions volume of IEEE Std 1003.1-2001. 3210c3d35cSrmind */ 3310c3d35cSrmind 3410c3d35cSrmind #include <sys/cdefs.h> 35*34335fd2Sriastradh __KERNEL_RCSID(0, "$NetBSD: sys_aio.c,v 1.50 2024/12/07 02:38:51 riastradh Exp $"); 360a227b19Srmind 37e6a33851Sad #ifdef _KERNEL_OPT 380a227b19Srmind #include "opt_ddb.h" 39e6a33851Sad #endif 4010c3d35cSrmind 4110c3d35cSrmind #include <sys/param.h> 424d07ef19Sriastradh #include <sys/types.h> 434d07ef19Sriastradh 444d07ef19Sriastradh #include <sys/atomic.h> 454d07ef19Sriastradh #include <sys/buf.h> 4610c3d35cSrmind #include <sys/condvar.h> 4710c3d35cSrmind #include <sys/file.h> 4810c3d35cSrmind #include <sys/filedesc.h> 4910c3d35cSrmind #include <sys/kernel.h> 5010c3d35cSrmind #include <sys/kmem.h> 5110c3d35cSrmind #include <sys/lwp.h> 524d07ef19Sriastradh #include <sys/module.h> 5310c3d35cSrmind #include <sys/mutex.h> 5410c3d35cSrmind #include <sys/pool.h> 5510c3d35cSrmind #include <sys/proc.h> 5610c3d35cSrmind #include <sys/queue.h> 57*34335fd2Sriastradh #include <sys/sdt.h> 5810c3d35cSrmind #include <sys/signal.h> 5910c3d35cSrmind #include <sys/signalvar.h> 60e6a33851Sad #include <sys/syscall.h> 6110c3d35cSrmind #include <sys/syscallargs.h> 62e6a33851Sad #include <sys/syscallvar.h> 6310c3d35cSrmind #include <sys/sysctl.h> 6410c3d35cSrmind #include <sys/systm.h> 6510c3d35cSrmind #include <sys/types.h> 6610c3d35cSrmind #include <sys/vnode.h> 6710c3d35cSrmind 6810c3d35cSrmind #include <uvm/uvm_extern.h> 6910c3d35cSrmind 70e6a33851Sad MODULE(MODULE_CLASS_MISC, aio, NULL); 71e6a33851Sad 7210c3d35cSrmind /* 7310c3d35cSrmind * System-wide limits and counter of AIO operations. 7410c3d35cSrmind */ 75461a86f9Schristos u_int aio_listio_max = AIO_LISTIO_MAX; 760a227b19Srmind static u_int aio_max = AIO_MAX; 770a227b19Srmind static u_int aio_jobs_count; 7810c3d35cSrmind 790a227b19Srmind static struct pool aio_job_pool; 800a227b19Srmind static struct pool aio_lio_pool; 81e6a33851Sad static void * aio_ehook; 8210c3d35cSrmind 83b8562be5Syamt static void aio_worker(void *); 8410c3d35cSrmind static void aio_process(struct aio_job *); 8510c3d35cSrmind static void aio_sendsig(struct proc *, struct sigevent *); 8610c3d35cSrmind static int aio_enqueue_job(int, void *, struct lio_req *); 87e6a33851Sad static void aio_exit(proc_t *, void *); 88e6a33851Sad 8972795172Sjruoho static int sysctl_aio_listio_max(SYSCTLFN_PROTO); 9072795172Sjruoho static int sysctl_aio_max(SYSCTLFN_PROTO); 9172795172Sjruoho 92e6a33851Sad static const struct syscall_package aio_syscalls[] = { 93e6a33851Sad { SYS_aio_cancel, 0, (sy_call_t *)sys_aio_cancel }, 94e6a33851Sad { SYS_aio_error, 0, (sy_call_t *)sys_aio_error }, 95e6a33851Sad { SYS_aio_fsync, 0, (sy_call_t *)sys_aio_fsync }, 96e6a33851Sad { SYS_aio_read, 0, (sy_call_t *)sys_aio_read }, 97e6a33851Sad { SYS_aio_return, 0, (sy_call_t *)sys_aio_return }, 98461a86f9Schristos { SYS___aio_suspend50, 0, (sy_call_t *)sys___aio_suspend50 }, 99e6a33851Sad { SYS_aio_write, 0, (sy_call_t *)sys_aio_write }, 100e6a33851Sad { SYS_lio_listio, 0, (sy_call_t *)sys_lio_listio }, 101e6a33851Sad { 0, 0, NULL }, 102e6a33851Sad }; 10310c3d35cSrmind 10410c3d35cSrmind /* 105e6a33851Sad * Tear down all AIO state. 1060a227b19Srmind */ 107e6a33851Sad static int 108e6a33851Sad aio_fini(bool interface) 1090a227b19Srmind { 110e6a33851Sad int error; 111e6a33851Sad proc_t *p; 112e6a33851Sad 113e6a33851Sad if (interface) { 114e6a33851Sad /* Stop syscall activity. */ 115e6a33851Sad error = syscall_disestablish(NULL, aio_syscalls); 116e6a33851Sad if (error != 0) 117e6a33851Sad return error; 118e6a33851Sad /* Abort if any processes are using AIO. */ 1190eaaa024Sad mutex_enter(&proc_lock); 120e6a33851Sad PROCLIST_FOREACH(p, &allproc) { 121e6a33851Sad if (p->p_aio != NULL) 122e6a33851Sad break; 123e6a33851Sad } 1240eaaa024Sad mutex_exit(&proc_lock); 125e6a33851Sad if (p != NULL) { 126e6a33851Sad error = syscall_establish(NULL, aio_syscalls); 127e6a33851Sad KASSERT(error == 0); 128*34335fd2Sriastradh return SET_ERROR(EBUSY); 129e6a33851Sad } 130e6a33851Sad } 13172795172Sjruoho 132e6a33851Sad KASSERT(aio_jobs_count == 0); 133e6a33851Sad exithook_disestablish(aio_ehook); 134e6a33851Sad pool_destroy(&aio_job_pool); 135e6a33851Sad pool_destroy(&aio_lio_pool); 136e6a33851Sad return 0; 137e6a33851Sad } 138e6a33851Sad 139e6a33851Sad /* 140e6a33851Sad * Initialize global AIO state. 141e6a33851Sad */ 142e6a33851Sad static int 143e6a33851Sad aio_init(void) 144e6a33851Sad { 145e6a33851Sad int error; 1460a227b19Srmind 1470a227b19Srmind pool_init(&aio_job_pool, sizeof(struct aio_job), 0, 0, 0, 1480a227b19Srmind "aio_jobs_pool", &pool_allocator_nointr, IPL_NONE); 1490a227b19Srmind pool_init(&aio_lio_pool, sizeof(struct lio_req), 0, 0, 0, 1500a227b19Srmind "aio_lio_pool", &pool_allocator_nointr, IPL_NONE); 151e6a33851Sad aio_ehook = exithook_establish(aio_exit, NULL); 15272795172Sjruoho 153e6a33851Sad error = syscall_establish(NULL, aio_syscalls); 154e6a33851Sad if (error != 0) 15572795172Sjruoho (void)aio_fini(false); 156e6a33851Sad return error; 157e6a33851Sad } 158e6a33851Sad 159e6a33851Sad /* 160e6a33851Sad * Module interface. 161e6a33851Sad */ 162e6a33851Sad static int 163e6a33851Sad aio_modcmd(modcmd_t cmd, void *arg) 164e6a33851Sad { 165e6a33851Sad 166e6a33851Sad switch (cmd) { 167e6a33851Sad case MODULE_CMD_INIT: 168e6a33851Sad return aio_init(); 169e6a33851Sad case MODULE_CMD_FINI: 170e6a33851Sad return aio_fini(true); 171e6a33851Sad default: 172*34335fd2Sriastradh return SET_ERROR(ENOTTY); 173e6a33851Sad } 1740a227b19Srmind } 1750a227b19Srmind 1760a227b19Srmind /* 17710c3d35cSrmind * Initialize Asynchronous I/O data structures for the process. 17810c3d35cSrmind */ 179e6a33851Sad static int 180e6a33851Sad aio_procinit(struct proc *p) 18110c3d35cSrmind { 18210c3d35cSrmind struct aioproc *aio; 18310c3d35cSrmind struct lwp *l; 184d831186dSad int error; 18510c3d35cSrmind vaddr_t uaddr; 18610c3d35cSrmind 18710c3d35cSrmind /* Allocate and initialize AIO structure */ 1886ef6e006Sad aio = kmem_zalloc(sizeof(struct aioproc), KM_SLEEP); 18910c3d35cSrmind 1900a227b19Srmind /* Initialize queue and their synchronization structures */ 19110c3d35cSrmind mutex_init(&aio->aio_mtx, MUTEX_DEFAULT, IPL_NONE); 19210c3d35cSrmind cv_init(&aio->aio_worker_cv, "aiowork"); 19310c3d35cSrmind cv_init(&aio->done_cv, "aiodone"); 19410c3d35cSrmind TAILQ_INIT(&aio->jobs_queue); 19510c3d35cSrmind 19610c3d35cSrmind /* 19710c3d35cSrmind * Create an AIO worker thread. 19810c3d35cSrmind * XXX: Currently, AIO thread is not protected against user's actions. 19910c3d35cSrmind */ 20040cf6f36Srmind uaddr = uvm_uarea_alloc(); 20110c3d35cSrmind if (uaddr == 0) { 20259085afdSrmind aio_exit(p, aio); 203*34335fd2Sriastradh return SET_ERROR(EAGAIN); 20410c3d35cSrmind } 20540cf6f36Srmind error = lwp_create(curlwp, p, uaddr, 0, NULL, 0, aio_worker, 206d7746f2eSchristos NULL, &l, curlwp->l_class, &curlwp->l_sigmask, &curlwp->l_sigstk); 207d831186dSad if (error != 0) { 20840cf6f36Srmind uvm_uarea_free(uaddr); 20959085afdSrmind aio_exit(p, aio); 210d831186dSad return error; 21110c3d35cSrmind } 21210c3d35cSrmind 21359085afdSrmind /* Recheck if we are really first */ 214284c2b9aSad mutex_enter(p->p_lock); 21559085afdSrmind if (p->p_aio) { 216284c2b9aSad mutex_exit(p->p_lock); 21759085afdSrmind aio_exit(p, aio); 21859085afdSrmind lwp_exit(l); 21959085afdSrmind return 0; 22059085afdSrmind } 22159085afdSrmind p->p_aio = aio; 22259085afdSrmind 22310c3d35cSrmind /* Complete the initialization of thread, and run it */ 22410c3d35cSrmind aio->aio_worker = l; 22510c3d35cSrmind lwp_lock(l); 22611ba4e18Sad lwp_changepri(l, MAXPRI_USER); 22711ba4e18Sad setrunnable(l); 22811ba4e18Sad /* LWP now unlocked */ 229284c2b9aSad mutex_exit(p->p_lock); 23010c3d35cSrmind 23110c3d35cSrmind return 0; 23210c3d35cSrmind } 23310c3d35cSrmind 23410c3d35cSrmind /* 23510c3d35cSrmind * Exit of Asynchronous I/O subsystem of process. 23610c3d35cSrmind */ 237e6a33851Sad static void 238e6a33851Sad aio_exit(struct proc *p, void *cookie) 23910c3d35cSrmind { 24010c3d35cSrmind struct aio_job *a_job; 241e6a33851Sad struct aioproc *aio; 24210c3d35cSrmind 243e6a33851Sad if (cookie != NULL) 244e6a33851Sad aio = cookie; 245e6a33851Sad else if ((aio = p->p_aio) == NULL) 24610c3d35cSrmind return; 24710c3d35cSrmind 24810c3d35cSrmind /* Free AIO queue */ 24910c3d35cSrmind while (!TAILQ_EMPTY(&aio->jobs_queue)) { 25010c3d35cSrmind a_job = TAILQ_FIRST(&aio->jobs_queue); 25110c3d35cSrmind TAILQ_REMOVE(&aio->jobs_queue, a_job, list); 2520a227b19Srmind pool_put(&aio_job_pool, a_job); 253a45b048eSad atomic_dec_uint(&aio_jobs_count); 25410c3d35cSrmind } 25510c3d35cSrmind 25610c3d35cSrmind /* Destroy and free the entire AIO data structure */ 25710c3d35cSrmind cv_destroy(&aio->aio_worker_cv); 25810c3d35cSrmind cv_destroy(&aio->done_cv); 25910c3d35cSrmind mutex_destroy(&aio->aio_mtx); 26010c3d35cSrmind kmem_free(aio, sizeof(struct aioproc)); 26110c3d35cSrmind } 26210c3d35cSrmind 26310c3d35cSrmind /* 26410c3d35cSrmind * AIO worker thread and processor. 26510c3d35cSrmind */ 266b8562be5Syamt static void 26710c3d35cSrmind aio_worker(void *arg) 26810c3d35cSrmind { 26910c3d35cSrmind struct proc *p = curlwp->l_proc; 27010c3d35cSrmind struct aioproc *aio = p->p_aio; 27110c3d35cSrmind struct aio_job *a_job; 27210c3d35cSrmind struct lio_req *lio; 27310c3d35cSrmind sigset_t oss, nss; 2747c79fd6cSmartin int error __diagused, refcnt; 27510c3d35cSrmind 27610c3d35cSrmind /* 27710c3d35cSrmind * Make an empty signal mask, so it 27810c3d35cSrmind * handles only SIGKILL and SIGSTOP. 27910c3d35cSrmind */ 28010c3d35cSrmind sigfillset(&nss); 281284c2b9aSad mutex_enter(p->p_lock); 28210c3d35cSrmind error = sigprocmask1(curlwp, SIG_SETMASK, &nss, &oss); 283284c2b9aSad mutex_exit(p->p_lock); 2840a227b19Srmind KASSERT(error == 0); 28510c3d35cSrmind 28610c3d35cSrmind for (;;) { 28710c3d35cSrmind /* 28810c3d35cSrmind * Loop for each job in the queue. If there 2890a227b19Srmind * are no jobs then sleep. 29010c3d35cSrmind */ 29110c3d35cSrmind mutex_enter(&aio->aio_mtx); 29210c3d35cSrmind while ((a_job = TAILQ_FIRST(&aio->jobs_queue)) == NULL) { 29310c3d35cSrmind if (cv_wait_sig(&aio->aio_worker_cv, &aio->aio_mtx)) { 29410c3d35cSrmind /* 2950a227b19Srmind * Thread was interrupted - check for 2960a227b19Srmind * pending exit or suspend. 29710c3d35cSrmind */ 2980a227b19Srmind mutex_exit(&aio->aio_mtx); 2990a227b19Srmind lwp_userret(curlwp); 3000a227b19Srmind mutex_enter(&aio->aio_mtx); 30110c3d35cSrmind } 30210c3d35cSrmind } 30310c3d35cSrmind 30410c3d35cSrmind /* Take the job from the queue */ 30510c3d35cSrmind aio->curjob = a_job; 30610c3d35cSrmind TAILQ_REMOVE(&aio->jobs_queue, a_job, list); 30710c3d35cSrmind 308a45b048eSad atomic_dec_uint(&aio_jobs_count); 30910c3d35cSrmind aio->jobs_count--; 31010c3d35cSrmind 31110c3d35cSrmind mutex_exit(&aio->aio_mtx); 31210c3d35cSrmind 31310c3d35cSrmind /* Process an AIO operation */ 31410c3d35cSrmind aio_process(a_job); 31510c3d35cSrmind 31610c3d35cSrmind /* Copy data structure back to the user-space */ 31710c3d35cSrmind (void)copyout(&a_job->aiocbp, a_job->aiocb_uptr, 31810c3d35cSrmind sizeof(struct aiocb)); 31910c3d35cSrmind 32010c3d35cSrmind mutex_enter(&aio->aio_mtx); 32107928aacSyamt KASSERT(aio->curjob == a_job); 32210c3d35cSrmind aio->curjob = NULL; 3230a227b19Srmind 32410c3d35cSrmind /* Decrease a reference counter, if there is a LIO structure */ 32510c3d35cSrmind lio = a_job->lio; 3260a227b19Srmind refcnt = (lio != NULL ? --lio->refcnt : -1); 3270a227b19Srmind 32810c3d35cSrmind /* Notify all suspenders */ 32910c3d35cSrmind cv_broadcast(&aio->done_cv); 33010c3d35cSrmind mutex_exit(&aio->aio_mtx); 33110c3d35cSrmind 33210c3d35cSrmind /* Send a signal, if any */ 33310c3d35cSrmind aio_sendsig(p, &a_job->aiocbp.aio_sigevent); 33410c3d35cSrmind 33510c3d35cSrmind /* Destroy the LIO structure */ 3360a227b19Srmind if (refcnt == 0) { 33710c3d35cSrmind aio_sendsig(p, &lio->sig); 3380a227b19Srmind pool_put(&aio_lio_pool, lio); 33910c3d35cSrmind } 34010c3d35cSrmind 3410ae57f90Smbalmer /* Destroy the job */ 3420a227b19Srmind pool_put(&aio_job_pool, a_job); 34310c3d35cSrmind } 34410c3d35cSrmind 3450a227b19Srmind /* NOTREACHED */ 34610c3d35cSrmind } 34710c3d35cSrmind 34810c3d35cSrmind static void 34910c3d35cSrmind aio_process(struct aio_job *a_job) 35010c3d35cSrmind { 35110c3d35cSrmind struct proc *p = curlwp->l_proc; 35210c3d35cSrmind struct aiocb *aiocbp = &a_job->aiocbp; 35310c3d35cSrmind struct file *fp; 35410c3d35cSrmind int fd = aiocbp->aio_fildes; 35510c3d35cSrmind int error = 0; 35610c3d35cSrmind 35710c3d35cSrmind KASSERT(a_job->aio_op != 0); 35810c3d35cSrmind 3590a227b19Srmind if ((a_job->aio_op & (AIO_READ | AIO_WRITE)) != 0) { 36010c3d35cSrmind struct iovec aiov; 36110c3d35cSrmind struct uio auio; 36210c3d35cSrmind 36310c3d35cSrmind if (aiocbp->aio_nbytes > SSIZE_MAX) { 364*34335fd2Sriastradh error = SET_ERROR(EINVAL); 36510c3d35cSrmind goto done; 36610c3d35cSrmind } 36710c3d35cSrmind 368a9ca7a37Sad fp = fd_getfile(fd); 36910c3d35cSrmind if (fp == NULL) { 370*34335fd2Sriastradh error = SET_ERROR(EBADF); 37110c3d35cSrmind goto done; 37210c3d35cSrmind } 37310c3d35cSrmind 37410c3d35cSrmind aiov.iov_base = (void *)(uintptr_t)aiocbp->aio_buf; 37510c3d35cSrmind aiov.iov_len = aiocbp->aio_nbytes; 37610c3d35cSrmind auio.uio_iov = &aiov; 37710c3d35cSrmind auio.uio_iovcnt = 1; 37810c3d35cSrmind auio.uio_resid = aiocbp->aio_nbytes; 37910c3d35cSrmind auio.uio_vmspace = p->p_vmspace; 38010c3d35cSrmind 38110c3d35cSrmind if (a_job->aio_op & AIO_READ) { 38210c3d35cSrmind /* 38310c3d35cSrmind * Perform a Read operation 38410c3d35cSrmind */ 38510c3d35cSrmind KASSERT((a_job->aio_op & AIO_WRITE) == 0); 38610c3d35cSrmind 38710c3d35cSrmind if ((fp->f_flag & FREAD) == 0) { 388a9ca7a37Sad fd_putfile(fd); 389*34335fd2Sriastradh error = SET_ERROR(EBADF); 39010c3d35cSrmind goto done; 39110c3d35cSrmind } 39210c3d35cSrmind auio.uio_rw = UIO_READ; 39310c3d35cSrmind error = (*fp->f_ops->fo_read)(fp, &aiocbp->aio_offset, 39410c3d35cSrmind &auio, fp->f_cred, FOF_UPDATE_OFFSET); 39510c3d35cSrmind } else { 39610c3d35cSrmind /* 39710c3d35cSrmind * Perform a Write operation 39810c3d35cSrmind */ 39910c3d35cSrmind KASSERT(a_job->aio_op & AIO_WRITE); 40010c3d35cSrmind 40110c3d35cSrmind if ((fp->f_flag & FWRITE) == 0) { 402a9ca7a37Sad fd_putfile(fd); 403*34335fd2Sriastradh error = SET_ERROR(EBADF); 40410c3d35cSrmind goto done; 40510c3d35cSrmind } 40610c3d35cSrmind auio.uio_rw = UIO_WRITE; 40710c3d35cSrmind error = (*fp->f_ops->fo_write)(fp, &aiocbp->aio_offset, 40810c3d35cSrmind &auio, fp->f_cred, FOF_UPDATE_OFFSET); 40910c3d35cSrmind } 410a9ca7a37Sad fd_putfile(fd); 41110c3d35cSrmind 41210c3d35cSrmind /* Store the result value */ 41310c3d35cSrmind a_job->aiocbp.aio_nbytes -= auio.uio_resid; 41410c3d35cSrmind a_job->aiocbp._retval = (error == 0) ? 41510c3d35cSrmind a_job->aiocbp.aio_nbytes : -1; 41610c3d35cSrmind 4170a227b19Srmind } else if ((a_job->aio_op & (AIO_SYNC | AIO_DSYNC)) != 0) { 41810c3d35cSrmind /* 41910c3d35cSrmind * Perform a file Sync operation 42010c3d35cSrmind */ 42110c3d35cSrmind struct vnode *vp; 42210c3d35cSrmind 423a9ca7a37Sad if ((error = fd_getvnode(fd, &fp)) != 0) 42410c3d35cSrmind goto done; 42510c3d35cSrmind 42610c3d35cSrmind if ((fp->f_flag & FWRITE) == 0) { 427a9ca7a37Sad fd_putfile(fd); 428*34335fd2Sriastradh error = SET_ERROR(EBADF); 42910c3d35cSrmind goto done; 43010c3d35cSrmind } 43110c3d35cSrmind 43245b1ec74Smatt vp = fp->f_vnode; 43310c3d35cSrmind vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 43410c3d35cSrmind if (a_job->aio_op & AIO_DSYNC) { 43510c3d35cSrmind error = VOP_FSYNC(vp, fp->f_cred, 43661e8303eSpooka FSYNC_WAIT | FSYNC_DATAONLY, 0, 0); 43710c3d35cSrmind } else if (a_job->aio_op & AIO_SYNC) { 43810c3d35cSrmind error = VOP_FSYNC(vp, fp->f_cred, 43961e8303eSpooka FSYNC_WAIT, 0, 0); 44010c3d35cSrmind } 4411423e65bShannken VOP_UNLOCK(vp); 442a9ca7a37Sad fd_putfile(fd); 44310c3d35cSrmind 44410c3d35cSrmind /* Store the result value */ 44510c3d35cSrmind a_job->aiocbp._retval = (error == 0) ? 0 : -1; 44610c3d35cSrmind 44710c3d35cSrmind } else 44810c3d35cSrmind panic("aio_process: invalid operation code\n"); 44910c3d35cSrmind 45010c3d35cSrmind done: 45110c3d35cSrmind /* Job is done, set the error, if any */ 45210c3d35cSrmind a_job->aiocbp._errno = error; 45310c3d35cSrmind a_job->aiocbp._state = JOB_DONE; 45410c3d35cSrmind } 45510c3d35cSrmind 45610c3d35cSrmind /* 45710c3d35cSrmind * Send AIO signal. 45810c3d35cSrmind */ 45910c3d35cSrmind static void 46010c3d35cSrmind aio_sendsig(struct proc *p, struct sigevent *sig) 46110c3d35cSrmind { 46210c3d35cSrmind ksiginfo_t ksi; 46310c3d35cSrmind 46410c3d35cSrmind if (sig->sigev_signo == 0 || sig->sigev_notify == SIGEV_NONE) 46510c3d35cSrmind return; 46610c3d35cSrmind 46710c3d35cSrmind KSI_INIT(&ksi); 46810c3d35cSrmind ksi.ksi_signo = sig->sigev_signo; 46910c3d35cSrmind ksi.ksi_code = SI_ASYNCIO; 470c61eed39Schristos ksi.ksi_value = sig->sigev_value; 4710eaaa024Sad mutex_enter(&proc_lock); 47210c3d35cSrmind kpsignal(p, &ksi, NULL); 4730eaaa024Sad mutex_exit(&proc_lock); 47410c3d35cSrmind } 47510c3d35cSrmind 47610c3d35cSrmind /* 47710c3d35cSrmind * Enqueue the job. 47810c3d35cSrmind */ 47910c3d35cSrmind static int 48010c3d35cSrmind aio_enqueue_job(int op, void *aiocb_uptr, struct lio_req *lio) 48110c3d35cSrmind { 48210c3d35cSrmind struct proc *p = curlwp->l_proc; 48310c3d35cSrmind struct aioproc *aio; 48410c3d35cSrmind struct aio_job *a_job; 48510c3d35cSrmind struct aiocb aiocbp; 48610c3d35cSrmind struct sigevent *sig; 48710c3d35cSrmind int error; 48810c3d35cSrmind 4890e5c3c74Srmind /* Non-accurate check for the limit */ 4900e5c3c74Srmind if (aio_jobs_count + 1 > aio_max) 491*34335fd2Sriastradh return SET_ERROR(EAGAIN); 49210c3d35cSrmind 49310c3d35cSrmind /* Get the data structure from user-space */ 49410c3d35cSrmind error = copyin(aiocb_uptr, &aiocbp, sizeof(struct aiocb)); 49510c3d35cSrmind if (error) 49610c3d35cSrmind return error; 49710c3d35cSrmind 49810c3d35cSrmind /* Check if signal is set, and validate it */ 49910c3d35cSrmind sig = &aiocbp.aio_sigevent; 50010c3d35cSrmind if (sig->sigev_signo < 0 || sig->sigev_signo >= NSIG || 50110c3d35cSrmind sig->sigev_notify < SIGEV_NONE || sig->sigev_notify > SIGEV_SA) 502*34335fd2Sriastradh return SET_ERROR(EINVAL); 50310c3d35cSrmind 50410c3d35cSrmind /* Buffer and byte count */ 50510c3d35cSrmind if (((AIO_SYNC | AIO_DSYNC) & op) == 0) 50610c3d35cSrmind if (aiocbp.aio_buf == NULL || aiocbp.aio_nbytes > SSIZE_MAX) 507*34335fd2Sriastradh return SET_ERROR(EINVAL); 50810c3d35cSrmind 50910c3d35cSrmind /* Check the opcode, if LIO_NOP - simply ignore */ 51010c3d35cSrmind if (op == AIO_LIO) { 51110c3d35cSrmind KASSERT(lio != NULL); 51210c3d35cSrmind if (aiocbp.aio_lio_opcode == LIO_WRITE) 51310c3d35cSrmind op = AIO_WRITE; 51410c3d35cSrmind else if (aiocbp.aio_lio_opcode == LIO_READ) 51510c3d35cSrmind op = AIO_READ; 51610c3d35cSrmind else 517*34335fd2Sriastradh return (aiocbp.aio_lio_opcode == LIO_NOP) ? 0 : 518*34335fd2Sriastradh SET_ERROR(EINVAL); 51910c3d35cSrmind } else { 52010c3d35cSrmind KASSERT(lio == NULL); 52110c3d35cSrmind } 52210c3d35cSrmind 52310c3d35cSrmind /* 52410c3d35cSrmind * Look for already existing job. If found - the job is in-progress. 52510c3d35cSrmind * According to POSIX this is invalid, so return the error. 52610c3d35cSrmind */ 52710c3d35cSrmind aio = p->p_aio; 52810c3d35cSrmind if (aio) { 52910c3d35cSrmind mutex_enter(&aio->aio_mtx); 53010c3d35cSrmind TAILQ_FOREACH(a_job, &aio->jobs_queue, list) { 53110c3d35cSrmind if (a_job->aiocb_uptr != aiocb_uptr) 53210c3d35cSrmind continue; 53310c3d35cSrmind mutex_exit(&aio->aio_mtx); 534*34335fd2Sriastradh return SET_ERROR(EINVAL); 53510c3d35cSrmind } 53610c3d35cSrmind mutex_exit(&aio->aio_mtx); 53710c3d35cSrmind } 53810c3d35cSrmind 53910c3d35cSrmind /* 54010c3d35cSrmind * Check if AIO structure is initialized, if not - initialize it. 54110c3d35cSrmind * In LIO case, we did that already. We will recheck this with 542e6a33851Sad * the lock in aio_procinit(). 54310c3d35cSrmind */ 54410c3d35cSrmind if (lio == NULL && p->p_aio == NULL) 545e6a33851Sad if (aio_procinit(p)) 546*34335fd2Sriastradh return SET_ERROR(EAGAIN); 54710c3d35cSrmind aio = p->p_aio; 54810c3d35cSrmind 54910c3d35cSrmind /* 55010c3d35cSrmind * Set the state with errno, and copy data 55110c3d35cSrmind * structure back to the user-space. 55210c3d35cSrmind */ 55310c3d35cSrmind aiocbp._state = JOB_WIP; 554*34335fd2Sriastradh aiocbp._errno = SET_ERROR(EINPROGRESS); 55510c3d35cSrmind aiocbp._retval = -1; 55610c3d35cSrmind error = copyout(&aiocbp, aiocb_uptr, sizeof(struct aiocb)); 55710c3d35cSrmind if (error) 55810c3d35cSrmind return error; 55910c3d35cSrmind 56010c3d35cSrmind /* Allocate and initialize a new AIO job */ 5619577643dSchristos a_job = pool_get(&aio_job_pool, PR_WAITOK | PR_ZERO); 56210c3d35cSrmind 56310c3d35cSrmind /* 56410c3d35cSrmind * Set the data. 56510c3d35cSrmind * Store the user-space pointer for searching. Since we 56610c3d35cSrmind * are storing only per proc pointers - it is safe. 56710c3d35cSrmind */ 56810c3d35cSrmind memcpy(&a_job->aiocbp, &aiocbp, sizeof(struct aiocb)); 56910c3d35cSrmind a_job->aiocb_uptr = aiocb_uptr; 57010c3d35cSrmind a_job->aio_op |= op; 57110c3d35cSrmind a_job->lio = lio; 57210c3d35cSrmind 57310c3d35cSrmind /* 57410c3d35cSrmind * Add the job to the queue, update the counters, and 57510c3d35cSrmind * notify the AIO worker thread to handle the job. 57610c3d35cSrmind */ 57710c3d35cSrmind mutex_enter(&aio->aio_mtx); 57810c3d35cSrmind 57910c3d35cSrmind /* Fail, if the limit was reached */ 580cbb39165Srmind if (atomic_inc_uint_nv(&aio_jobs_count) > aio_max || 581cbb39165Srmind aio->jobs_count >= aio_listio_max) { 5820e5c3c74Srmind atomic_dec_uint(&aio_jobs_count); 58310c3d35cSrmind mutex_exit(&aio->aio_mtx); 5840a227b19Srmind pool_put(&aio_job_pool, a_job); 585*34335fd2Sriastradh return SET_ERROR(EAGAIN); 58610c3d35cSrmind } 58710c3d35cSrmind 58810c3d35cSrmind TAILQ_INSERT_TAIL(&aio->jobs_queue, a_job, list); 58910c3d35cSrmind aio->jobs_count++; 59010c3d35cSrmind if (lio) 59110c3d35cSrmind lio->refcnt++; 59210c3d35cSrmind cv_signal(&aio->aio_worker_cv); 59310c3d35cSrmind 59410c3d35cSrmind mutex_exit(&aio->aio_mtx); 59510c3d35cSrmind 59610c3d35cSrmind /* 59710c3d35cSrmind * One would handle the errors only with aio_error() function. 59810c3d35cSrmind * This way is appropriate according to POSIX. 59910c3d35cSrmind */ 60010c3d35cSrmind return 0; 60110c3d35cSrmind } 60210c3d35cSrmind 60310c3d35cSrmind /* 60410c3d35cSrmind * Syscall functions. 60510c3d35cSrmind */ 60610c3d35cSrmind 60710c3d35cSrmind int 60829e552b0Syamt sys_aio_cancel(struct lwp *l, const struct sys_aio_cancel_args *uap, 60929e552b0Syamt register_t *retval) 61010c3d35cSrmind { 6117e2790cfSdsl /* { 61210c3d35cSrmind syscallarg(int) fildes; 61310c3d35cSrmind syscallarg(struct aiocb *) aiocbp; 6147e2790cfSdsl } */ 61510c3d35cSrmind struct proc *p = l->l_proc; 61610c3d35cSrmind struct aioproc *aio; 61710c3d35cSrmind struct aio_job *a_job; 61810c3d35cSrmind struct aiocb *aiocbp_ptr; 61910c3d35cSrmind struct lio_req *lio; 62010c3d35cSrmind struct filedesc *fdp = p->p_fd; 62110c3d35cSrmind unsigned int cn, errcnt, fildes; 622d991fcb3Sad fdtab_t *dt; 62310c3d35cSrmind 62410c3d35cSrmind TAILQ_HEAD(, aio_job) tmp_jobs_list; 62510c3d35cSrmind 62610c3d35cSrmind /* Check for invalid file descriptor */ 62710c3d35cSrmind fildes = (unsigned int)SCARG(uap, fildes); 6288e6cd4ceSriastradh dt = atomic_load_consume(&fdp->fd_dt); 629d991fcb3Sad if (fildes >= dt->dt_nfiles) 630*34335fd2Sriastradh return SET_ERROR(EBADF); 631d991fcb3Sad if (dt->dt_ff[fildes] == NULL || dt->dt_ff[fildes]->ff_file == NULL) 632*34335fd2Sriastradh return SET_ERROR(EBADF); 63310c3d35cSrmind 63410c3d35cSrmind /* Check if AIO structure is initialized */ 63510c3d35cSrmind if (p->p_aio == NULL) { 63610c3d35cSrmind *retval = AIO_NOTCANCELED; 63710c3d35cSrmind return 0; 63810c3d35cSrmind } 63910c3d35cSrmind 64010c3d35cSrmind aio = p->p_aio; 64110c3d35cSrmind aiocbp_ptr = (struct aiocb *)SCARG(uap, aiocbp); 64210c3d35cSrmind 64310c3d35cSrmind mutex_enter(&aio->aio_mtx); 64410c3d35cSrmind 64510c3d35cSrmind /* Cancel the jobs, and remove them from the queue */ 64610c3d35cSrmind cn = 0; 64710c3d35cSrmind TAILQ_INIT(&tmp_jobs_list); 64810c3d35cSrmind TAILQ_FOREACH(a_job, &aio->jobs_queue, list) { 64910c3d35cSrmind if (aiocbp_ptr) { 65010c3d35cSrmind if (aiocbp_ptr != a_job->aiocb_uptr) 65110c3d35cSrmind continue; 65210c3d35cSrmind if (fildes != a_job->aiocbp.aio_fildes) { 65310c3d35cSrmind mutex_exit(&aio->aio_mtx); 654*34335fd2Sriastradh return SET_ERROR(EBADF); 65510c3d35cSrmind } 65610c3d35cSrmind } else if (a_job->aiocbp.aio_fildes != fildes) 65710c3d35cSrmind continue; 65810c3d35cSrmind 65910c3d35cSrmind TAILQ_REMOVE(&aio->jobs_queue, a_job, list); 66010c3d35cSrmind TAILQ_INSERT_TAIL(&tmp_jobs_list, a_job, list); 66110c3d35cSrmind 66210c3d35cSrmind /* Decrease the counters */ 663a45b048eSad atomic_dec_uint(&aio_jobs_count); 66410c3d35cSrmind aio->jobs_count--; 66510c3d35cSrmind lio = a_job->lio; 6660a227b19Srmind if (lio != NULL && --lio->refcnt != 0) 66710c3d35cSrmind a_job->lio = NULL; 66810c3d35cSrmind 66910c3d35cSrmind cn++; 67010c3d35cSrmind if (aiocbp_ptr) 67110c3d35cSrmind break; 67210c3d35cSrmind } 67310c3d35cSrmind 67410c3d35cSrmind /* There are canceled jobs */ 67510c3d35cSrmind if (cn) 67610c3d35cSrmind *retval = AIO_CANCELED; 67710c3d35cSrmind 67810c3d35cSrmind /* We cannot cancel current job */ 67910c3d35cSrmind a_job = aio->curjob; 68010c3d35cSrmind if (a_job && ((a_job->aiocbp.aio_fildes == fildes) || 68110c3d35cSrmind (a_job->aiocb_uptr == aiocbp_ptr))) 68210c3d35cSrmind *retval = AIO_NOTCANCELED; 68310c3d35cSrmind 68410c3d35cSrmind mutex_exit(&aio->aio_mtx); 68510c3d35cSrmind 68610c3d35cSrmind /* Free the jobs after the lock */ 68710c3d35cSrmind errcnt = 0; 68810c3d35cSrmind while (!TAILQ_EMPTY(&tmp_jobs_list)) { 68910c3d35cSrmind a_job = TAILQ_FIRST(&tmp_jobs_list); 69010c3d35cSrmind TAILQ_REMOVE(&tmp_jobs_list, a_job, list); 69110c3d35cSrmind /* Set the errno and copy structures back to the user-space */ 692*34335fd2Sriastradh a_job->aiocbp._errno = SET_ERROR(ECANCELED); 69310c3d35cSrmind a_job->aiocbp._state = JOB_DONE; 69410c3d35cSrmind if (copyout(&a_job->aiocbp, a_job->aiocb_uptr, 69510c3d35cSrmind sizeof(struct aiocb))) 69610c3d35cSrmind errcnt++; 69710c3d35cSrmind /* Send a signal if any */ 69810c3d35cSrmind aio_sendsig(p, &a_job->aiocbp.aio_sigevent); 6995023159eSrmind if (a_job->lio) { 7005023159eSrmind lio = a_job->lio; 7015023159eSrmind aio_sendsig(p, &lio->sig); 7025023159eSrmind pool_put(&aio_lio_pool, lio); 7035023159eSrmind } 7040a227b19Srmind pool_put(&aio_job_pool, a_job); 70510c3d35cSrmind } 70610c3d35cSrmind 70710c3d35cSrmind if (errcnt) 708*34335fd2Sriastradh return SET_ERROR(EFAULT); 70910c3d35cSrmind 71010c3d35cSrmind /* Set a correct return value */ 71110c3d35cSrmind if (*retval == 0) 71210c3d35cSrmind *retval = AIO_ALLDONE; 71310c3d35cSrmind 71410c3d35cSrmind return 0; 71510c3d35cSrmind } 71610c3d35cSrmind 71710c3d35cSrmind int 71829e552b0Syamt sys_aio_error(struct lwp *l, const struct sys_aio_error_args *uap, 71929e552b0Syamt register_t *retval) 72010c3d35cSrmind { 7217e2790cfSdsl /* { 72210c3d35cSrmind syscallarg(const struct aiocb *) aiocbp; 7237e2790cfSdsl } */ 72410c3d35cSrmind struct proc *p = l->l_proc; 72510c3d35cSrmind struct aioproc *aio = p->p_aio; 72610c3d35cSrmind struct aiocb aiocbp; 72710c3d35cSrmind int error; 72810c3d35cSrmind 72910c3d35cSrmind if (aio == NULL) 730*34335fd2Sriastradh return SET_ERROR(EINVAL); 73110c3d35cSrmind 73210c3d35cSrmind error = copyin(SCARG(uap, aiocbp), &aiocbp, sizeof(struct aiocb)); 73310c3d35cSrmind if (error) 73410c3d35cSrmind return error; 73510c3d35cSrmind 73610c3d35cSrmind if (aiocbp._state == JOB_NONE) 737*34335fd2Sriastradh return SET_ERROR(EINVAL); 73810c3d35cSrmind 73910c3d35cSrmind *retval = aiocbp._errno; 74010c3d35cSrmind 74110c3d35cSrmind return 0; 74210c3d35cSrmind } 74310c3d35cSrmind 74410c3d35cSrmind int 74529e552b0Syamt sys_aio_fsync(struct lwp *l, const struct sys_aio_fsync_args *uap, 74629e552b0Syamt register_t *retval) 74710c3d35cSrmind { 7487e2790cfSdsl /* { 74910c3d35cSrmind syscallarg(int) op; 75010c3d35cSrmind syscallarg(struct aiocb *) aiocbp; 7517e2790cfSdsl } */ 75210c3d35cSrmind int op = SCARG(uap, op); 75310c3d35cSrmind 75410c3d35cSrmind if ((op != O_DSYNC) && (op != O_SYNC)) 755*34335fd2Sriastradh return SET_ERROR(EINVAL); 75610c3d35cSrmind 75710c3d35cSrmind op = O_DSYNC ? AIO_DSYNC : AIO_SYNC; 75810c3d35cSrmind 75910c3d35cSrmind return aio_enqueue_job(op, SCARG(uap, aiocbp), NULL); 76010c3d35cSrmind } 76110c3d35cSrmind 76210c3d35cSrmind int 76329e552b0Syamt sys_aio_read(struct lwp *l, const struct sys_aio_read_args *uap, 76429e552b0Syamt register_t *retval) 76510c3d35cSrmind { 7667e2790cfSdsl /* { 76710c3d35cSrmind syscallarg(struct aiocb *) aiocbp; 7687e2790cfSdsl } */ 76910c3d35cSrmind 77010c3d35cSrmind return aio_enqueue_job(AIO_READ, SCARG(uap, aiocbp), NULL); 77110c3d35cSrmind } 77210c3d35cSrmind 77310c3d35cSrmind int 77429e552b0Syamt sys_aio_return(struct lwp *l, const struct sys_aio_return_args *uap, 77529e552b0Syamt register_t *retval) 77610c3d35cSrmind { 7777e2790cfSdsl /* { 77810c3d35cSrmind syscallarg(struct aiocb *) aiocbp; 7797e2790cfSdsl } */ 78010c3d35cSrmind struct proc *p = l->l_proc; 78110c3d35cSrmind struct aioproc *aio = p->p_aio; 78210c3d35cSrmind struct aiocb aiocbp; 78310c3d35cSrmind int error; 78410c3d35cSrmind 78510c3d35cSrmind if (aio == NULL) 786*34335fd2Sriastradh return SET_ERROR(EINVAL); 78710c3d35cSrmind 78810c3d35cSrmind error = copyin(SCARG(uap, aiocbp), &aiocbp, sizeof(struct aiocb)); 78910c3d35cSrmind if (error) 79010c3d35cSrmind return error; 79110c3d35cSrmind 79210c3d35cSrmind if (aiocbp._errno == EINPROGRESS || aiocbp._state != JOB_DONE) 793*34335fd2Sriastradh return SET_ERROR(EINVAL); 79410c3d35cSrmind 79510c3d35cSrmind *retval = aiocbp._retval; 79610c3d35cSrmind 79710c3d35cSrmind /* Reset the internal variables */ 79810c3d35cSrmind aiocbp._errno = 0; 79910c3d35cSrmind aiocbp._retval = -1; 80010c3d35cSrmind aiocbp._state = JOB_NONE; 80110c3d35cSrmind error = copyout(&aiocbp, SCARG(uap, aiocbp), sizeof(struct aiocb)); 80210c3d35cSrmind 80310c3d35cSrmind return error; 80410c3d35cSrmind } 80510c3d35cSrmind 80610c3d35cSrmind int 807461a86f9Schristos sys___aio_suspend50(struct lwp *l, const struct sys___aio_suspend50_args *uap, 808461a86f9Schristos register_t *retval) 80910c3d35cSrmind { 8107e2790cfSdsl /* { 81110c3d35cSrmind syscallarg(const struct aiocb *const[]) list; 81210c3d35cSrmind syscallarg(int) nent; 81310c3d35cSrmind syscallarg(const struct timespec *) timeout; 8147e2790cfSdsl } */ 815461a86f9Schristos struct aiocb **list; 81610c3d35cSrmind struct timespec ts; 817461a86f9Schristos int error, nent; 81810c3d35cSrmind 81910c3d35cSrmind nent = SCARG(uap, nent); 82010c3d35cSrmind if (nent <= 0 || nent > aio_listio_max) 821*34335fd2Sriastradh return SET_ERROR(EAGAIN); 82210c3d35cSrmind 82310c3d35cSrmind if (SCARG(uap, timeout)) { 82410c3d35cSrmind /* Convert timespec to ticks */ 82510c3d35cSrmind error = copyin(SCARG(uap, timeout), &ts, 82610c3d35cSrmind sizeof(struct timespec)); 82710c3d35cSrmind if (error) 82810c3d35cSrmind return error; 829461a86f9Schristos } 830b8ea6ca4Srmind 831e8947292Syamt list = kmem_alloc(nent * sizeof(*list), KM_SLEEP); 832e8947292Syamt error = copyin(SCARG(uap, list), list, nent * sizeof(*list)); 833461a86f9Schristos if (error) 834461a86f9Schristos goto out; 835461a86f9Schristos error = aio_suspend1(l, list, nent, SCARG(uap, timeout) ? &ts : NULL); 836461a86f9Schristos out: 837e8947292Syamt kmem_free(list, nent * sizeof(*list)); 838461a86f9Schristos return error; 839461a86f9Schristos } 840461a86f9Schristos 841461a86f9Schristos int 842461a86f9Schristos aio_suspend1(struct lwp *l, struct aiocb **aiocbp_list, int nent, 843461a86f9Schristos struct timespec *ts) 844461a86f9Schristos { 845461a86f9Schristos struct proc *p = l->l_proc; 846461a86f9Schristos struct aioproc *aio; 847461a86f9Schristos struct aio_job *a_job; 848461a86f9Schristos int i, error, timo; 849461a86f9Schristos 850461a86f9Schristos if (p->p_aio == NULL) 851*34335fd2Sriastradh return SET_ERROR(EAGAIN); 852461a86f9Schristos aio = p->p_aio; 853461a86f9Schristos 854461a86f9Schristos if (ts) { 855461a86f9Schristos timo = mstohz((ts->tv_sec * 1000) + (ts->tv_nsec / 1000000)); 856461a86f9Schristos if (timo == 0 && ts->tv_sec == 0 && ts->tv_nsec > 0) 85710c3d35cSrmind timo = 1; 85810c3d35cSrmind if (timo <= 0) 859*34335fd2Sriastradh return SET_ERROR(EAGAIN); 86010c3d35cSrmind } else 86110c3d35cSrmind timo = 0; 86210c3d35cSrmind 86310c3d35cSrmind mutex_enter(&aio->aio_mtx); 86410c3d35cSrmind for (;;) { 86510c3d35cSrmind for (i = 0; i < nent; i++) { 86610c3d35cSrmind 86710c3d35cSrmind /* Skip NULL entries */ 86810c3d35cSrmind if (aiocbp_list[i] == NULL) 86910c3d35cSrmind continue; 87010c3d35cSrmind 87110c3d35cSrmind /* Skip current job */ 87210c3d35cSrmind if (aio->curjob) { 87310c3d35cSrmind a_job = aio->curjob; 87410c3d35cSrmind if (a_job->aiocb_uptr == aiocbp_list[i]) 87510c3d35cSrmind continue; 87610c3d35cSrmind } 87710c3d35cSrmind 87810c3d35cSrmind /* Look for a job in the queue */ 87910c3d35cSrmind TAILQ_FOREACH(a_job, &aio->jobs_queue, list) 88010c3d35cSrmind if (a_job->aiocb_uptr == aiocbp_list[i]) 88110c3d35cSrmind break; 88210c3d35cSrmind 88310c3d35cSrmind if (a_job == NULL) { 88410c3d35cSrmind struct aiocb aiocbp; 88510c3d35cSrmind 88610c3d35cSrmind mutex_exit(&aio->aio_mtx); 88710c3d35cSrmind 888b8ea6ca4Srmind /* Check if the job is done. */ 88910c3d35cSrmind error = copyin(aiocbp_list[i], &aiocbp, 89010c3d35cSrmind sizeof(struct aiocb)); 89110c3d35cSrmind if (error == 0 && aiocbp._state != JOB_DONE) { 89210c3d35cSrmind mutex_enter(&aio->aio_mtx); 89310c3d35cSrmind continue; 89410c3d35cSrmind } 89510c3d35cSrmind return error; 89610c3d35cSrmind } 89710c3d35cSrmind } 89810c3d35cSrmind 89910c3d35cSrmind /* Wait for a signal or when timeout occurs */ 90010c3d35cSrmind error = cv_timedwait_sig(&aio->done_cv, &aio->aio_mtx, timo); 90110c3d35cSrmind if (error) { 90210c3d35cSrmind if (error == EWOULDBLOCK) 903*34335fd2Sriastradh error = SET_ERROR(EAGAIN); 90410c3d35cSrmind break; 90510c3d35cSrmind } 90610c3d35cSrmind } 90710c3d35cSrmind mutex_exit(&aio->aio_mtx); 90810c3d35cSrmind return error; 90910c3d35cSrmind } 91010c3d35cSrmind 91110c3d35cSrmind int 91229e552b0Syamt sys_aio_write(struct lwp *l, const struct sys_aio_write_args *uap, 91329e552b0Syamt register_t *retval) 91410c3d35cSrmind { 9157e2790cfSdsl /* { 91610c3d35cSrmind syscallarg(struct aiocb *) aiocbp; 9177e2790cfSdsl } */ 91810c3d35cSrmind 91910c3d35cSrmind return aio_enqueue_job(AIO_WRITE, SCARG(uap, aiocbp), NULL); 92010c3d35cSrmind } 92110c3d35cSrmind 92210c3d35cSrmind int 92329e552b0Syamt sys_lio_listio(struct lwp *l, const struct sys_lio_listio_args *uap, 92429e552b0Syamt register_t *retval) 92510c3d35cSrmind { 9267e2790cfSdsl /* { 92710c3d35cSrmind syscallarg(int) mode; 92810c3d35cSrmind syscallarg(struct aiocb *const[]) list; 92910c3d35cSrmind syscallarg(int) nent; 93010c3d35cSrmind syscallarg(struct sigevent *) sig; 9317e2790cfSdsl } */ 93210c3d35cSrmind struct proc *p = l->l_proc; 93310c3d35cSrmind struct aioproc *aio; 93410c3d35cSrmind struct aiocb **aiocbp_list; 93510c3d35cSrmind struct lio_req *lio; 93610c3d35cSrmind int i, error, errcnt, mode, nent; 93710c3d35cSrmind 93810c3d35cSrmind mode = SCARG(uap, mode); 93910c3d35cSrmind nent = SCARG(uap, nent); 94010c3d35cSrmind 9410e5c3c74Srmind /* Non-accurate checks for the limit and invalid values */ 94210c3d35cSrmind if (nent < 1 || nent > aio_listio_max) 943*34335fd2Sriastradh return SET_ERROR(EINVAL); 9440e5c3c74Srmind if (aio_jobs_count + nent > aio_max) 945*34335fd2Sriastradh return SET_ERROR(EAGAIN); 94610c3d35cSrmind 94710c3d35cSrmind /* Check if AIO structure is initialized, if not - initialize it */ 94810c3d35cSrmind if (p->p_aio == NULL) 949e6a33851Sad if (aio_procinit(p)) 950*34335fd2Sriastradh return SET_ERROR(EAGAIN); 95110c3d35cSrmind aio = p->p_aio; 95210c3d35cSrmind 95310c3d35cSrmind /* Create a LIO structure */ 9540a227b19Srmind lio = pool_get(&aio_lio_pool, PR_WAITOK); 9550a227b19Srmind lio->refcnt = 1; 9560a227b19Srmind error = 0; 9570a227b19Srmind 9580a227b19Srmind switch (mode) { 9590a227b19Srmind case LIO_WAIT: 96010c3d35cSrmind memset(&lio->sig, 0, sizeof(struct sigevent)); 9610a227b19Srmind break; 9620a227b19Srmind case LIO_NOWAIT: 9630a227b19Srmind /* Check for signal, validate it */ 9640a227b19Srmind if (SCARG(uap, sig)) { 9650a227b19Srmind struct sigevent *sig = &lio->sig; 9660a227b19Srmind 9670a227b19Srmind error = copyin(SCARG(uap, sig), &lio->sig, 9680a227b19Srmind sizeof(struct sigevent)); 9690a227b19Srmind if (error == 0 && 9700a227b19Srmind (sig->sigev_signo < 0 || 9710a227b19Srmind sig->sigev_signo >= NSIG || 9720a227b19Srmind sig->sigev_notify < SIGEV_NONE || 9730a227b19Srmind sig->sigev_notify > SIGEV_SA)) 974*34335fd2Sriastradh error = SET_ERROR(EINVAL); 9750a227b19Srmind } else 9760a227b19Srmind memset(&lio->sig, 0, sizeof(struct sigevent)); 9770a227b19Srmind break; 9780a227b19Srmind default: 979*34335fd2Sriastradh error = SET_ERROR(EINVAL); 9800a227b19Srmind break; 9810a227b19Srmind } 9820a227b19Srmind 9830a227b19Srmind if (error != 0) { 9840a227b19Srmind pool_put(&aio_lio_pool, lio); 9850a227b19Srmind return error; 9860a227b19Srmind } 98710c3d35cSrmind 98810c3d35cSrmind /* Get the list from user-space */ 989e8947292Syamt aiocbp_list = kmem_alloc(nent * sizeof(*aiocbp_list), KM_SLEEP); 99010c3d35cSrmind error = copyin(SCARG(uap, list), aiocbp_list, 991e8947292Syamt nent * sizeof(*aiocbp_list)); 9920a227b19Srmind if (error) { 9930a227b19Srmind mutex_enter(&aio->aio_mtx); 99410c3d35cSrmind goto err; 9950a227b19Srmind } 99610c3d35cSrmind 99710c3d35cSrmind /* Enqueue all jobs */ 99810c3d35cSrmind errcnt = 0; 99910c3d35cSrmind for (i = 0; i < nent; i++) { 100010c3d35cSrmind error = aio_enqueue_job(AIO_LIO, aiocbp_list[i], lio); 100110c3d35cSrmind /* 100210c3d35cSrmind * According to POSIX, in such error case it may 100310c3d35cSrmind * fail with other I/O operations initiated. 100410c3d35cSrmind */ 100510c3d35cSrmind if (error) 100610c3d35cSrmind errcnt++; 100710c3d35cSrmind } 100810c3d35cSrmind 10090a227b19Srmind mutex_enter(&aio->aio_mtx); 10100a227b19Srmind 101110c3d35cSrmind /* Return an error, if any */ 101210c3d35cSrmind if (errcnt) { 1013*34335fd2Sriastradh error = SET_ERROR(EIO); 101410c3d35cSrmind goto err; 101510c3d35cSrmind } 101610c3d35cSrmind 101710c3d35cSrmind if (mode == LIO_WAIT) { 101810c3d35cSrmind /* 101910c3d35cSrmind * Wait for AIO completion. In such case, 102010c3d35cSrmind * the LIO structure will be freed here. 102110c3d35cSrmind */ 10220a227b19Srmind while (lio->refcnt > 1 && error == 0) 102310c3d35cSrmind error = cv_wait_sig(&aio->done_cv, &aio->aio_mtx); 102410c3d35cSrmind if (error) 1025*34335fd2Sriastradh error = SET_ERROR(EINTR); 102610c3d35cSrmind } 102710c3d35cSrmind 102810c3d35cSrmind err: 10290a227b19Srmind if (--lio->refcnt != 0) 10300a227b19Srmind lio = NULL; 10310a227b19Srmind mutex_exit(&aio->aio_mtx); 10320a227b19Srmind if (lio != NULL) { 10330a227b19Srmind aio_sendsig(p, &lio->sig); 10340a227b19Srmind pool_put(&aio_lio_pool, lio); 103510c3d35cSrmind } 1036e8947292Syamt kmem_free(aiocbp_list, nent * sizeof(*aiocbp_list)); 103710c3d35cSrmind return error; 103810c3d35cSrmind } 103910c3d35cSrmind 104010c3d35cSrmind /* 104110c3d35cSrmind * SysCtl 104210c3d35cSrmind */ 104310c3d35cSrmind 104410c3d35cSrmind static int 104510c3d35cSrmind sysctl_aio_listio_max(SYSCTLFN_ARGS) 104610c3d35cSrmind { 104710c3d35cSrmind struct sysctlnode node; 104810c3d35cSrmind int error, newsize; 104910c3d35cSrmind 105010c3d35cSrmind node = *rnode; 105110c3d35cSrmind node.sysctl_data = &newsize; 105210c3d35cSrmind 105310c3d35cSrmind newsize = aio_listio_max; 105410c3d35cSrmind error = sysctl_lookup(SYSCTLFN_CALL(&node)); 105510c3d35cSrmind if (error || newp == NULL) 105610c3d35cSrmind return error; 105710c3d35cSrmind 105810c3d35cSrmind if (newsize < 1 || newsize > aio_max) 1059*34335fd2Sriastradh return SET_ERROR(EINVAL); 106010c3d35cSrmind aio_listio_max = newsize; 106110c3d35cSrmind 106210c3d35cSrmind return 0; 106310c3d35cSrmind } 106410c3d35cSrmind 106510c3d35cSrmind static int 106610c3d35cSrmind sysctl_aio_max(SYSCTLFN_ARGS) 106710c3d35cSrmind { 106810c3d35cSrmind struct sysctlnode node; 106910c3d35cSrmind int error, newsize; 107010c3d35cSrmind 107110c3d35cSrmind node = *rnode; 107210c3d35cSrmind node.sysctl_data = &newsize; 107310c3d35cSrmind 107410c3d35cSrmind newsize = aio_max; 107510c3d35cSrmind error = sysctl_lookup(SYSCTLFN_CALL(&node)); 107610c3d35cSrmind if (error || newp == NULL) 107710c3d35cSrmind return error; 107810c3d35cSrmind 107910c3d35cSrmind if (newsize < 1 || newsize < aio_listio_max) 1080*34335fd2Sriastradh return SET_ERROR(EINVAL); 108110c3d35cSrmind aio_max = newsize; 108210c3d35cSrmind 108310c3d35cSrmind return 0; 108410c3d35cSrmind } 108510c3d35cSrmind 10869120d451Spgoyette SYSCTL_SETUP(sysctl_aio_init, "aio sysctl") 108710c3d35cSrmind { 108872795172Sjruoho int rv; 108910c3d35cSrmind 10909120d451Spgoyette rv = sysctl_createv(clog, 0, NULL, NULL, 109110c3d35cSrmind CTLFLAG_PERMANENT | CTLFLAG_IMMEDIATE, 109210c3d35cSrmind CTLTYPE_INT, "posix_aio", 109310c3d35cSrmind SYSCTL_DESCR("Version of IEEE Std 1003.1 and its " 109410c3d35cSrmind "Asynchronous I/O option to which the " 109510c3d35cSrmind "system attempts to conform"), 109610c3d35cSrmind NULL, _POSIX_ASYNCHRONOUS_IO, NULL, 0, 109710c3d35cSrmind CTL_KERN, CTL_CREATE, CTL_EOL); 109872795172Sjruoho 109972795172Sjruoho if (rv != 0) 11009120d451Spgoyette return; 110172795172Sjruoho 11029120d451Spgoyette rv = sysctl_createv(clog, 0, NULL, NULL, 110310c3d35cSrmind CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 110410c3d35cSrmind CTLTYPE_INT, "aio_listio_max", 110510c3d35cSrmind SYSCTL_DESCR("Maximum number of asynchronous I/O " 110610c3d35cSrmind "operations in a single list I/O call"), 110710c3d35cSrmind sysctl_aio_listio_max, 0, &aio_listio_max, 0, 110810c3d35cSrmind CTL_KERN, CTL_CREATE, CTL_EOL); 110972795172Sjruoho 111072795172Sjruoho if (rv != 0) 11119120d451Spgoyette return; 111272795172Sjruoho 11139120d451Spgoyette rv = sysctl_createv(clog, 0, NULL, NULL, 111410c3d35cSrmind CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 111510c3d35cSrmind CTLTYPE_INT, "aio_max", 111610c3d35cSrmind SYSCTL_DESCR("Maximum number of asynchronous I/O " 111710c3d35cSrmind "operations"), 111810c3d35cSrmind sysctl_aio_max, 0, &aio_max, 0, 111910c3d35cSrmind CTL_KERN, CTL_CREATE, CTL_EOL); 112072795172Sjruoho 11219120d451Spgoyette return; 112210c3d35cSrmind } 112310c3d35cSrmind 112410c3d35cSrmind /* 112510c3d35cSrmind * Debugging 112610c3d35cSrmind */ 112710c3d35cSrmind #if defined(DDB) 112810c3d35cSrmind void 112910c3d35cSrmind aio_print_jobs(void (*pr)(const char *, ...)) 113010c3d35cSrmind { 11314b44bf46Smatt struct proc *p = curlwp->l_proc; 113210c3d35cSrmind struct aioproc *aio; 113310c3d35cSrmind struct aio_job *a_job; 113410c3d35cSrmind struct aiocb *aiocbp; 113510c3d35cSrmind 113610c3d35cSrmind if (p == NULL) { 113710c3d35cSrmind (*pr)("AIO: We are not in the processes right now.\n"); 113810c3d35cSrmind return; 113910c3d35cSrmind } 114010c3d35cSrmind 114110c3d35cSrmind aio = p->p_aio; 114210c3d35cSrmind if (aio == NULL) { 114310c3d35cSrmind (*pr)("AIO data is not initialized (PID = %d).\n", p->p_pid); 114410c3d35cSrmind return; 114510c3d35cSrmind } 114610c3d35cSrmind 114710c3d35cSrmind (*pr)("AIO: PID = %d\n", p->p_pid); 114810c3d35cSrmind (*pr)("AIO: Global count of the jobs = %u\n", aio_jobs_count); 114910c3d35cSrmind (*pr)("AIO: Count of the jobs = %u\n", aio->jobs_count); 115010c3d35cSrmind 115110c3d35cSrmind if (aio->curjob) { 115210c3d35cSrmind a_job = aio->curjob; 115310c3d35cSrmind (*pr)("\nAIO current job:\n"); 115410c3d35cSrmind (*pr)(" opcode = %d, errno = %d, state = %d, aiocb_ptr = %p\n", 115510c3d35cSrmind a_job->aio_op, a_job->aiocbp._errno, 115610c3d35cSrmind a_job->aiocbp._state, a_job->aiocb_uptr); 115710c3d35cSrmind aiocbp = &a_job->aiocbp; 115810c3d35cSrmind (*pr)(" fd = %d, offset = %u, buf = %p, nbytes = %u\n", 115910c3d35cSrmind aiocbp->aio_fildes, aiocbp->aio_offset, 116010c3d35cSrmind aiocbp->aio_buf, aiocbp->aio_nbytes); 116110c3d35cSrmind } 116210c3d35cSrmind 116310c3d35cSrmind (*pr)("\nAIO queue:\n"); 116410c3d35cSrmind TAILQ_FOREACH(a_job, &aio->jobs_queue, list) { 116510c3d35cSrmind (*pr)(" opcode = %d, errno = %d, state = %d, aiocb_ptr = %p\n", 116610c3d35cSrmind a_job->aio_op, a_job->aiocbp._errno, 116710c3d35cSrmind a_job->aiocbp._state, a_job->aiocb_uptr); 116810c3d35cSrmind aiocbp = &a_job->aiocbp; 116910c3d35cSrmind (*pr)(" fd = %d, offset = %u, buf = %p, nbytes = %u\n", 117010c3d35cSrmind aiocbp->aio_fildes, aiocbp->aio_offset, 117110c3d35cSrmind aiocbp->aio_buf, aiocbp->aio_nbytes); 117210c3d35cSrmind } 117310c3d35cSrmind } 117410c3d35cSrmind #endif /* defined(DDB) */ 1175