1 /* $NetBSD: sys_aio.c,v 1.43 2017/06/01 02:45:13 chs Exp $ */ 2 3 /* 4 * Copyright (c) 2007 Mindaugas Rasiukevicius <rmind at NetBSD org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 /* 30 * Implementation of POSIX asynchronous I/O. 31 * Defined in the Base Definitions volume of IEEE Std 1003.1-2001. 32 */ 33 34 #include <sys/cdefs.h> 35 __KERNEL_RCSID(0, "$NetBSD: sys_aio.c,v 1.43 2017/06/01 02:45:13 chs Exp $"); 36 37 #ifdef _KERNEL_OPT 38 #include "opt_ddb.h" 39 #endif 40 41 #include <sys/param.h> 42 #include <sys/condvar.h> 43 #include <sys/file.h> 44 #include <sys/filedesc.h> 45 #include <sys/kernel.h> 46 #include <sys/kmem.h> 47 #include <sys/lwp.h> 48 #include <sys/mutex.h> 49 #include <sys/pool.h> 50 #include <sys/proc.h> 51 #include <sys/queue.h> 52 #include <sys/signal.h> 53 #include <sys/signalvar.h> 54 #include <sys/syscall.h> 55 #include <sys/syscallargs.h> 56 #include <sys/syscallvar.h> 57 #include <sys/sysctl.h> 58 #include <sys/systm.h> 59 #include <sys/types.h> 60 #include <sys/vnode.h> 61 #include <sys/atomic.h> 62 #include <sys/module.h> 63 #include <sys/buf.h> 64 65 #include <uvm/uvm_extern.h> 66 67 MODULE(MODULE_CLASS_MISC, aio, NULL); 68 69 /* 70 * System-wide limits and counter of AIO operations. 71 */ 72 u_int aio_listio_max = AIO_LISTIO_MAX; 73 static u_int aio_max = AIO_MAX; 74 static u_int aio_jobs_count; 75 76 static struct sysctllog *aio_sysctl; 77 static struct pool aio_job_pool; 78 static struct pool aio_lio_pool; 79 static void * aio_ehook; 80 81 static void aio_worker(void *); 82 static void aio_process(struct aio_job *); 83 static void aio_sendsig(struct proc *, struct sigevent *); 84 static int aio_enqueue_job(int, void *, struct lio_req *); 85 static void aio_exit(proc_t *, void *); 86 87 static int sysctl_aio_listio_max(SYSCTLFN_PROTO); 88 static int sysctl_aio_max(SYSCTLFN_PROTO); 89 static int sysctl_aio_init(void); 90 91 static const struct syscall_package aio_syscalls[] = { 92 { SYS_aio_cancel, 0, (sy_call_t *)sys_aio_cancel }, 93 { SYS_aio_error, 0, (sy_call_t *)sys_aio_error }, 94 { SYS_aio_fsync, 0, (sy_call_t *)sys_aio_fsync }, 95 { SYS_aio_read, 0, (sy_call_t *)sys_aio_read }, 96 { SYS_aio_return, 0, (sy_call_t *)sys_aio_return }, 97 { SYS___aio_suspend50, 0, (sy_call_t *)sys___aio_suspend50 }, 98 { SYS_aio_write, 0, (sy_call_t *)sys_aio_write }, 99 { SYS_lio_listio, 0, (sy_call_t *)sys_lio_listio }, 100 { 0, 0, NULL }, 101 }; 102 103 /* 104 * Tear down all AIO state. 105 */ 106 static int 107 aio_fini(bool interface) 108 { 109 int error; 110 proc_t *p; 111 112 if (interface) { 113 /* Stop syscall activity. */ 114 error = syscall_disestablish(NULL, aio_syscalls); 115 if (error != 0) 116 return error; 117 /* Abort if any processes are using AIO. */ 118 mutex_enter(proc_lock); 119 PROCLIST_FOREACH(p, &allproc) { 120 if (p->p_aio != NULL) 121 break; 122 } 123 mutex_exit(proc_lock); 124 if (p != NULL) { 125 error = syscall_establish(NULL, aio_syscalls); 126 KASSERT(error == 0); 127 return EBUSY; 128 } 129 } 130 if (aio_sysctl != NULL) 131 sysctl_teardown(&aio_sysctl); 132 133 KASSERT(aio_jobs_count == 0); 134 exithook_disestablish(aio_ehook); 135 pool_destroy(&aio_job_pool); 136 pool_destroy(&aio_lio_pool); 137 return 0; 138 } 139 140 /* 141 * Initialize global AIO state. 142 */ 143 static int 144 aio_init(void) 145 { 146 int error; 147 148 pool_init(&aio_job_pool, sizeof(struct aio_job), 0, 0, 0, 149 "aio_jobs_pool", &pool_allocator_nointr, IPL_NONE); 150 pool_init(&aio_lio_pool, sizeof(struct lio_req), 0, 0, 0, 151 "aio_lio_pool", &pool_allocator_nointr, IPL_NONE); 152 aio_ehook = exithook_establish(aio_exit, NULL); 153 154 error = sysctl_aio_init(); 155 if (error != 0) { 156 (void)aio_fini(false); 157 return error; 158 } 159 error = syscall_establish(NULL, aio_syscalls); 160 if (error != 0) 161 (void)aio_fini(false); 162 return error; 163 } 164 165 /* 166 * Module interface. 167 */ 168 static int 169 aio_modcmd(modcmd_t cmd, void *arg) 170 { 171 172 switch (cmd) { 173 case MODULE_CMD_INIT: 174 return aio_init(); 175 case MODULE_CMD_FINI: 176 return aio_fini(true); 177 default: 178 return ENOTTY; 179 } 180 } 181 182 /* 183 * Initialize Asynchronous I/O data structures for the process. 184 */ 185 static int 186 aio_procinit(struct proc *p) 187 { 188 struct aioproc *aio; 189 struct lwp *l; 190 int error; 191 vaddr_t uaddr; 192 193 /* Allocate and initialize AIO structure */ 194 aio = kmem_zalloc(sizeof(struct aioproc), KM_SLEEP); 195 196 /* Initialize queue and their synchronization structures */ 197 mutex_init(&aio->aio_mtx, MUTEX_DEFAULT, IPL_NONE); 198 cv_init(&aio->aio_worker_cv, "aiowork"); 199 cv_init(&aio->done_cv, "aiodone"); 200 TAILQ_INIT(&aio->jobs_queue); 201 202 /* 203 * Create an AIO worker thread. 204 * XXX: Currently, AIO thread is not protected against user's actions. 205 */ 206 uaddr = uvm_uarea_alloc(); 207 if (uaddr == 0) { 208 aio_exit(p, aio); 209 return EAGAIN; 210 } 211 error = lwp_create(curlwp, p, uaddr, 0, NULL, 0, aio_worker, 212 NULL, &l, curlwp->l_class, &curlwp->l_sigmask, &curlwp->l_sigstk); 213 if (error != 0) { 214 uvm_uarea_free(uaddr); 215 aio_exit(p, aio); 216 return error; 217 } 218 219 /* Recheck if we are really first */ 220 mutex_enter(p->p_lock); 221 if (p->p_aio) { 222 mutex_exit(p->p_lock); 223 aio_exit(p, aio); 224 lwp_exit(l); 225 return 0; 226 } 227 p->p_aio = aio; 228 229 /* Complete the initialization of thread, and run it */ 230 aio->aio_worker = l; 231 lwp_lock(l); 232 l->l_stat = LSRUN; 233 l->l_priority = MAXPRI_USER; 234 sched_enqueue(l, false); 235 lwp_unlock(l); 236 mutex_exit(p->p_lock); 237 238 return 0; 239 } 240 241 /* 242 * Exit of Asynchronous I/O subsystem of process. 243 */ 244 static void 245 aio_exit(struct proc *p, void *cookie) 246 { 247 struct aio_job *a_job; 248 struct aioproc *aio; 249 250 if (cookie != NULL) 251 aio = cookie; 252 else if ((aio = p->p_aio) == NULL) 253 return; 254 255 /* Free AIO queue */ 256 while (!TAILQ_EMPTY(&aio->jobs_queue)) { 257 a_job = TAILQ_FIRST(&aio->jobs_queue); 258 TAILQ_REMOVE(&aio->jobs_queue, a_job, list); 259 pool_put(&aio_job_pool, a_job); 260 atomic_dec_uint(&aio_jobs_count); 261 } 262 263 /* Destroy and free the entire AIO data structure */ 264 cv_destroy(&aio->aio_worker_cv); 265 cv_destroy(&aio->done_cv); 266 mutex_destroy(&aio->aio_mtx); 267 kmem_free(aio, sizeof(struct aioproc)); 268 } 269 270 /* 271 * AIO worker thread and processor. 272 */ 273 static void 274 aio_worker(void *arg) 275 { 276 struct proc *p = curlwp->l_proc; 277 struct aioproc *aio = p->p_aio; 278 struct aio_job *a_job; 279 struct lio_req *lio; 280 sigset_t oss, nss; 281 int error __diagused, refcnt; 282 283 /* 284 * Make an empty signal mask, so it 285 * handles only SIGKILL and SIGSTOP. 286 */ 287 sigfillset(&nss); 288 mutex_enter(p->p_lock); 289 error = sigprocmask1(curlwp, SIG_SETMASK, &nss, &oss); 290 mutex_exit(p->p_lock); 291 KASSERT(error == 0); 292 293 for (;;) { 294 /* 295 * Loop for each job in the queue. If there 296 * are no jobs then sleep. 297 */ 298 mutex_enter(&aio->aio_mtx); 299 while ((a_job = TAILQ_FIRST(&aio->jobs_queue)) == NULL) { 300 if (cv_wait_sig(&aio->aio_worker_cv, &aio->aio_mtx)) { 301 /* 302 * Thread was interrupted - check for 303 * pending exit or suspend. 304 */ 305 mutex_exit(&aio->aio_mtx); 306 lwp_userret(curlwp); 307 mutex_enter(&aio->aio_mtx); 308 } 309 } 310 311 /* Take the job from the queue */ 312 aio->curjob = a_job; 313 TAILQ_REMOVE(&aio->jobs_queue, a_job, list); 314 315 atomic_dec_uint(&aio_jobs_count); 316 aio->jobs_count--; 317 318 mutex_exit(&aio->aio_mtx); 319 320 /* Process an AIO operation */ 321 aio_process(a_job); 322 323 /* Copy data structure back to the user-space */ 324 (void)copyout(&a_job->aiocbp, a_job->aiocb_uptr, 325 sizeof(struct aiocb)); 326 327 mutex_enter(&aio->aio_mtx); 328 KASSERT(aio->curjob == a_job); 329 aio->curjob = NULL; 330 331 /* Decrease a reference counter, if there is a LIO structure */ 332 lio = a_job->lio; 333 refcnt = (lio != NULL ? --lio->refcnt : -1); 334 335 /* Notify all suspenders */ 336 cv_broadcast(&aio->done_cv); 337 mutex_exit(&aio->aio_mtx); 338 339 /* Send a signal, if any */ 340 aio_sendsig(p, &a_job->aiocbp.aio_sigevent); 341 342 /* Destroy the LIO structure */ 343 if (refcnt == 0) { 344 aio_sendsig(p, &lio->sig); 345 pool_put(&aio_lio_pool, lio); 346 } 347 348 /* Destroy the job */ 349 pool_put(&aio_job_pool, a_job); 350 } 351 352 /* NOTREACHED */ 353 } 354 355 static void 356 aio_process(struct aio_job *a_job) 357 { 358 struct proc *p = curlwp->l_proc; 359 struct aiocb *aiocbp = &a_job->aiocbp; 360 struct file *fp; 361 int fd = aiocbp->aio_fildes; 362 int error = 0; 363 364 KASSERT(a_job->aio_op != 0); 365 366 if ((a_job->aio_op & (AIO_READ | AIO_WRITE)) != 0) { 367 struct iovec aiov; 368 struct uio auio; 369 370 if (aiocbp->aio_nbytes > SSIZE_MAX) { 371 error = EINVAL; 372 goto done; 373 } 374 375 fp = fd_getfile(fd); 376 if (fp == NULL) { 377 error = EBADF; 378 goto done; 379 } 380 381 aiov.iov_base = (void *)(uintptr_t)aiocbp->aio_buf; 382 aiov.iov_len = aiocbp->aio_nbytes; 383 auio.uio_iov = &aiov; 384 auio.uio_iovcnt = 1; 385 auio.uio_resid = aiocbp->aio_nbytes; 386 auio.uio_vmspace = p->p_vmspace; 387 388 if (a_job->aio_op & AIO_READ) { 389 /* 390 * Perform a Read operation 391 */ 392 KASSERT((a_job->aio_op & AIO_WRITE) == 0); 393 394 if ((fp->f_flag & FREAD) == 0) { 395 fd_putfile(fd); 396 error = EBADF; 397 goto done; 398 } 399 auio.uio_rw = UIO_READ; 400 error = (*fp->f_ops->fo_read)(fp, &aiocbp->aio_offset, 401 &auio, fp->f_cred, FOF_UPDATE_OFFSET); 402 } else { 403 /* 404 * Perform a Write operation 405 */ 406 KASSERT(a_job->aio_op & AIO_WRITE); 407 408 if ((fp->f_flag & FWRITE) == 0) { 409 fd_putfile(fd); 410 error = EBADF; 411 goto done; 412 } 413 auio.uio_rw = UIO_WRITE; 414 error = (*fp->f_ops->fo_write)(fp, &aiocbp->aio_offset, 415 &auio, fp->f_cred, FOF_UPDATE_OFFSET); 416 } 417 fd_putfile(fd); 418 419 /* Store the result value */ 420 a_job->aiocbp.aio_nbytes -= auio.uio_resid; 421 a_job->aiocbp._retval = (error == 0) ? 422 a_job->aiocbp.aio_nbytes : -1; 423 424 } else if ((a_job->aio_op & (AIO_SYNC | AIO_DSYNC)) != 0) { 425 /* 426 * Perform a file Sync operation 427 */ 428 struct vnode *vp; 429 430 if ((error = fd_getvnode(fd, &fp)) != 0) 431 goto done; 432 433 if ((fp->f_flag & FWRITE) == 0) { 434 fd_putfile(fd); 435 error = EBADF; 436 goto done; 437 } 438 439 vp = fp->f_vnode; 440 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 441 if (a_job->aio_op & AIO_DSYNC) { 442 error = VOP_FSYNC(vp, fp->f_cred, 443 FSYNC_WAIT | FSYNC_DATAONLY, 0, 0); 444 } else if (a_job->aio_op & AIO_SYNC) { 445 error = VOP_FSYNC(vp, fp->f_cred, 446 FSYNC_WAIT, 0, 0); 447 } 448 VOP_UNLOCK(vp); 449 fd_putfile(fd); 450 451 /* Store the result value */ 452 a_job->aiocbp._retval = (error == 0) ? 0 : -1; 453 454 } else 455 panic("aio_process: invalid operation code\n"); 456 457 done: 458 /* Job is done, set the error, if any */ 459 a_job->aiocbp._errno = error; 460 a_job->aiocbp._state = JOB_DONE; 461 } 462 463 /* 464 * Send AIO signal. 465 */ 466 static void 467 aio_sendsig(struct proc *p, struct sigevent *sig) 468 { 469 ksiginfo_t ksi; 470 471 if (sig->sigev_signo == 0 || sig->sigev_notify == SIGEV_NONE) 472 return; 473 474 KSI_INIT(&ksi); 475 ksi.ksi_signo = sig->sigev_signo; 476 ksi.ksi_code = SI_ASYNCIO; 477 ksi.ksi_value = sig->sigev_value; 478 mutex_enter(proc_lock); 479 kpsignal(p, &ksi, NULL); 480 mutex_exit(proc_lock); 481 } 482 483 /* 484 * Enqueue the job. 485 */ 486 static int 487 aio_enqueue_job(int op, void *aiocb_uptr, struct lio_req *lio) 488 { 489 struct proc *p = curlwp->l_proc; 490 struct aioproc *aio; 491 struct aio_job *a_job; 492 struct aiocb aiocbp; 493 struct sigevent *sig; 494 int error; 495 496 /* Non-accurate check for the limit */ 497 if (aio_jobs_count + 1 > aio_max) 498 return EAGAIN; 499 500 /* Get the data structure from user-space */ 501 error = copyin(aiocb_uptr, &aiocbp, sizeof(struct aiocb)); 502 if (error) 503 return error; 504 505 /* Check if signal is set, and validate it */ 506 sig = &aiocbp.aio_sigevent; 507 if (sig->sigev_signo < 0 || sig->sigev_signo >= NSIG || 508 sig->sigev_notify < SIGEV_NONE || sig->sigev_notify > SIGEV_SA) 509 return EINVAL; 510 511 /* Buffer and byte count */ 512 if (((AIO_SYNC | AIO_DSYNC) & op) == 0) 513 if (aiocbp.aio_buf == NULL || aiocbp.aio_nbytes > SSIZE_MAX) 514 return EINVAL; 515 516 /* Check the opcode, if LIO_NOP - simply ignore */ 517 if (op == AIO_LIO) { 518 KASSERT(lio != NULL); 519 if (aiocbp.aio_lio_opcode == LIO_WRITE) 520 op = AIO_WRITE; 521 else if (aiocbp.aio_lio_opcode == LIO_READ) 522 op = AIO_READ; 523 else 524 return (aiocbp.aio_lio_opcode == LIO_NOP) ? 0 : EINVAL; 525 } else { 526 KASSERT(lio == NULL); 527 } 528 529 /* 530 * Look for already existing job. If found - the job is in-progress. 531 * According to POSIX this is invalid, so return the error. 532 */ 533 aio = p->p_aio; 534 if (aio) { 535 mutex_enter(&aio->aio_mtx); 536 TAILQ_FOREACH(a_job, &aio->jobs_queue, list) { 537 if (a_job->aiocb_uptr != aiocb_uptr) 538 continue; 539 mutex_exit(&aio->aio_mtx); 540 return EINVAL; 541 } 542 mutex_exit(&aio->aio_mtx); 543 } 544 545 /* 546 * Check if AIO structure is initialized, if not - initialize it. 547 * In LIO case, we did that already. We will recheck this with 548 * the lock in aio_procinit(). 549 */ 550 if (lio == NULL && p->p_aio == NULL) 551 if (aio_procinit(p)) 552 return EAGAIN; 553 aio = p->p_aio; 554 555 /* 556 * Set the state with errno, and copy data 557 * structure back to the user-space. 558 */ 559 aiocbp._state = JOB_WIP; 560 aiocbp._errno = EINPROGRESS; 561 aiocbp._retval = -1; 562 error = copyout(&aiocbp, aiocb_uptr, sizeof(struct aiocb)); 563 if (error) 564 return error; 565 566 /* Allocate and initialize a new AIO job */ 567 a_job = pool_get(&aio_job_pool, PR_WAITOK); 568 memset(a_job, 0, sizeof(struct aio_job)); 569 570 /* 571 * Set the data. 572 * Store the user-space pointer for searching. Since we 573 * are storing only per proc pointers - it is safe. 574 */ 575 memcpy(&a_job->aiocbp, &aiocbp, sizeof(struct aiocb)); 576 a_job->aiocb_uptr = aiocb_uptr; 577 a_job->aio_op |= op; 578 a_job->lio = lio; 579 580 /* 581 * Add the job to the queue, update the counters, and 582 * notify the AIO worker thread to handle the job. 583 */ 584 mutex_enter(&aio->aio_mtx); 585 586 /* Fail, if the limit was reached */ 587 if (atomic_inc_uint_nv(&aio_jobs_count) > aio_max || 588 aio->jobs_count >= aio_listio_max) { 589 atomic_dec_uint(&aio_jobs_count); 590 mutex_exit(&aio->aio_mtx); 591 pool_put(&aio_job_pool, a_job); 592 return EAGAIN; 593 } 594 595 TAILQ_INSERT_TAIL(&aio->jobs_queue, a_job, list); 596 aio->jobs_count++; 597 if (lio) 598 lio->refcnt++; 599 cv_signal(&aio->aio_worker_cv); 600 601 mutex_exit(&aio->aio_mtx); 602 603 /* 604 * One would handle the errors only with aio_error() function. 605 * This way is appropriate according to POSIX. 606 */ 607 return 0; 608 } 609 610 /* 611 * Syscall functions. 612 */ 613 614 int 615 sys_aio_cancel(struct lwp *l, const struct sys_aio_cancel_args *uap, 616 register_t *retval) 617 { 618 /* { 619 syscallarg(int) fildes; 620 syscallarg(struct aiocb *) aiocbp; 621 } */ 622 struct proc *p = l->l_proc; 623 struct aioproc *aio; 624 struct aio_job *a_job; 625 struct aiocb *aiocbp_ptr; 626 struct lio_req *lio; 627 struct filedesc *fdp = p->p_fd; 628 unsigned int cn, errcnt, fildes; 629 fdtab_t *dt; 630 631 TAILQ_HEAD(, aio_job) tmp_jobs_list; 632 633 /* Check for invalid file descriptor */ 634 fildes = (unsigned int)SCARG(uap, fildes); 635 dt = fdp->fd_dt; 636 if (fildes >= dt->dt_nfiles) 637 return EBADF; 638 if (dt->dt_ff[fildes] == NULL || dt->dt_ff[fildes]->ff_file == NULL) 639 return EBADF; 640 641 /* Check if AIO structure is initialized */ 642 if (p->p_aio == NULL) { 643 *retval = AIO_NOTCANCELED; 644 return 0; 645 } 646 647 aio = p->p_aio; 648 aiocbp_ptr = (struct aiocb *)SCARG(uap, aiocbp); 649 650 mutex_enter(&aio->aio_mtx); 651 652 /* Cancel the jobs, and remove them from the queue */ 653 cn = 0; 654 TAILQ_INIT(&tmp_jobs_list); 655 TAILQ_FOREACH(a_job, &aio->jobs_queue, list) { 656 if (aiocbp_ptr) { 657 if (aiocbp_ptr != a_job->aiocb_uptr) 658 continue; 659 if (fildes != a_job->aiocbp.aio_fildes) { 660 mutex_exit(&aio->aio_mtx); 661 return EBADF; 662 } 663 } else if (a_job->aiocbp.aio_fildes != fildes) 664 continue; 665 666 TAILQ_REMOVE(&aio->jobs_queue, a_job, list); 667 TAILQ_INSERT_TAIL(&tmp_jobs_list, a_job, list); 668 669 /* Decrease the counters */ 670 atomic_dec_uint(&aio_jobs_count); 671 aio->jobs_count--; 672 lio = a_job->lio; 673 if (lio != NULL && --lio->refcnt != 0) 674 a_job->lio = NULL; 675 676 cn++; 677 if (aiocbp_ptr) 678 break; 679 } 680 681 /* There are canceled jobs */ 682 if (cn) 683 *retval = AIO_CANCELED; 684 685 /* We cannot cancel current job */ 686 a_job = aio->curjob; 687 if (a_job && ((a_job->aiocbp.aio_fildes == fildes) || 688 (a_job->aiocb_uptr == aiocbp_ptr))) 689 *retval = AIO_NOTCANCELED; 690 691 mutex_exit(&aio->aio_mtx); 692 693 /* Free the jobs after the lock */ 694 errcnt = 0; 695 while (!TAILQ_EMPTY(&tmp_jobs_list)) { 696 a_job = TAILQ_FIRST(&tmp_jobs_list); 697 TAILQ_REMOVE(&tmp_jobs_list, a_job, list); 698 /* Set the errno and copy structures back to the user-space */ 699 a_job->aiocbp._errno = ECANCELED; 700 a_job->aiocbp._state = JOB_DONE; 701 if (copyout(&a_job->aiocbp, a_job->aiocb_uptr, 702 sizeof(struct aiocb))) 703 errcnt++; 704 /* Send a signal if any */ 705 aio_sendsig(p, &a_job->aiocbp.aio_sigevent); 706 if (a_job->lio) { 707 lio = a_job->lio; 708 aio_sendsig(p, &lio->sig); 709 pool_put(&aio_lio_pool, lio); 710 } 711 pool_put(&aio_job_pool, a_job); 712 } 713 714 if (errcnt) 715 return EFAULT; 716 717 /* Set a correct return value */ 718 if (*retval == 0) 719 *retval = AIO_ALLDONE; 720 721 return 0; 722 } 723 724 int 725 sys_aio_error(struct lwp *l, const struct sys_aio_error_args *uap, 726 register_t *retval) 727 { 728 /* { 729 syscallarg(const struct aiocb *) aiocbp; 730 } */ 731 struct proc *p = l->l_proc; 732 struct aioproc *aio = p->p_aio; 733 struct aiocb aiocbp; 734 int error; 735 736 if (aio == NULL) 737 return EINVAL; 738 739 error = copyin(SCARG(uap, aiocbp), &aiocbp, sizeof(struct aiocb)); 740 if (error) 741 return error; 742 743 if (aiocbp._state == JOB_NONE) 744 return EINVAL; 745 746 *retval = aiocbp._errno; 747 748 return 0; 749 } 750 751 int 752 sys_aio_fsync(struct lwp *l, const struct sys_aio_fsync_args *uap, 753 register_t *retval) 754 { 755 /* { 756 syscallarg(int) op; 757 syscallarg(struct aiocb *) aiocbp; 758 } */ 759 int op = SCARG(uap, op); 760 761 if ((op != O_DSYNC) && (op != O_SYNC)) 762 return EINVAL; 763 764 op = O_DSYNC ? AIO_DSYNC : AIO_SYNC; 765 766 return aio_enqueue_job(op, SCARG(uap, aiocbp), NULL); 767 } 768 769 int 770 sys_aio_read(struct lwp *l, const struct sys_aio_read_args *uap, 771 register_t *retval) 772 { 773 /* { 774 syscallarg(struct aiocb *) aiocbp; 775 } */ 776 777 return aio_enqueue_job(AIO_READ, SCARG(uap, aiocbp), NULL); 778 } 779 780 int 781 sys_aio_return(struct lwp *l, const struct sys_aio_return_args *uap, 782 register_t *retval) 783 { 784 /* { 785 syscallarg(struct aiocb *) aiocbp; 786 } */ 787 struct proc *p = l->l_proc; 788 struct aioproc *aio = p->p_aio; 789 struct aiocb aiocbp; 790 int error; 791 792 if (aio == NULL) 793 return EINVAL; 794 795 error = copyin(SCARG(uap, aiocbp), &aiocbp, sizeof(struct aiocb)); 796 if (error) 797 return error; 798 799 if (aiocbp._errno == EINPROGRESS || aiocbp._state != JOB_DONE) 800 return EINVAL; 801 802 *retval = aiocbp._retval; 803 804 /* Reset the internal variables */ 805 aiocbp._errno = 0; 806 aiocbp._retval = -1; 807 aiocbp._state = JOB_NONE; 808 error = copyout(&aiocbp, SCARG(uap, aiocbp), sizeof(struct aiocb)); 809 810 return error; 811 } 812 813 int 814 sys___aio_suspend50(struct lwp *l, const struct sys___aio_suspend50_args *uap, 815 register_t *retval) 816 { 817 /* { 818 syscallarg(const struct aiocb *const[]) list; 819 syscallarg(int) nent; 820 syscallarg(const struct timespec *) timeout; 821 } */ 822 struct aiocb **list; 823 struct timespec ts; 824 int error, nent; 825 826 nent = SCARG(uap, nent); 827 if (nent <= 0 || nent > aio_listio_max) 828 return EAGAIN; 829 830 if (SCARG(uap, timeout)) { 831 /* Convert timespec to ticks */ 832 error = copyin(SCARG(uap, timeout), &ts, 833 sizeof(struct timespec)); 834 if (error) 835 return error; 836 } 837 838 list = kmem_alloc(nent * sizeof(*list), KM_SLEEP); 839 error = copyin(SCARG(uap, list), list, nent * sizeof(*list)); 840 if (error) 841 goto out; 842 error = aio_suspend1(l, list, nent, SCARG(uap, timeout) ? &ts : NULL); 843 out: 844 kmem_free(list, nent * sizeof(*list)); 845 return error; 846 } 847 848 int 849 aio_suspend1(struct lwp *l, struct aiocb **aiocbp_list, int nent, 850 struct timespec *ts) 851 { 852 struct proc *p = l->l_proc; 853 struct aioproc *aio; 854 struct aio_job *a_job; 855 int i, error, timo; 856 857 if (p->p_aio == NULL) 858 return EAGAIN; 859 aio = p->p_aio; 860 861 if (ts) { 862 timo = mstohz((ts->tv_sec * 1000) + (ts->tv_nsec / 1000000)); 863 if (timo == 0 && ts->tv_sec == 0 && ts->tv_nsec > 0) 864 timo = 1; 865 if (timo <= 0) 866 return EAGAIN; 867 } else 868 timo = 0; 869 870 mutex_enter(&aio->aio_mtx); 871 for (;;) { 872 for (i = 0; i < nent; i++) { 873 874 /* Skip NULL entries */ 875 if (aiocbp_list[i] == NULL) 876 continue; 877 878 /* Skip current job */ 879 if (aio->curjob) { 880 a_job = aio->curjob; 881 if (a_job->aiocb_uptr == aiocbp_list[i]) 882 continue; 883 } 884 885 /* Look for a job in the queue */ 886 TAILQ_FOREACH(a_job, &aio->jobs_queue, list) 887 if (a_job->aiocb_uptr == aiocbp_list[i]) 888 break; 889 890 if (a_job == NULL) { 891 struct aiocb aiocbp; 892 893 mutex_exit(&aio->aio_mtx); 894 895 /* Check if the job is done. */ 896 error = copyin(aiocbp_list[i], &aiocbp, 897 sizeof(struct aiocb)); 898 if (error == 0 && aiocbp._state != JOB_DONE) { 899 mutex_enter(&aio->aio_mtx); 900 continue; 901 } 902 return error; 903 } 904 } 905 906 /* Wait for a signal or when timeout occurs */ 907 error = cv_timedwait_sig(&aio->done_cv, &aio->aio_mtx, timo); 908 if (error) { 909 if (error == EWOULDBLOCK) 910 error = EAGAIN; 911 break; 912 } 913 } 914 mutex_exit(&aio->aio_mtx); 915 return error; 916 } 917 918 int 919 sys_aio_write(struct lwp *l, const struct sys_aio_write_args *uap, 920 register_t *retval) 921 { 922 /* { 923 syscallarg(struct aiocb *) aiocbp; 924 } */ 925 926 return aio_enqueue_job(AIO_WRITE, SCARG(uap, aiocbp), NULL); 927 } 928 929 int 930 sys_lio_listio(struct lwp *l, const struct sys_lio_listio_args *uap, 931 register_t *retval) 932 { 933 /* { 934 syscallarg(int) mode; 935 syscallarg(struct aiocb *const[]) list; 936 syscallarg(int) nent; 937 syscallarg(struct sigevent *) sig; 938 } */ 939 struct proc *p = l->l_proc; 940 struct aioproc *aio; 941 struct aiocb **aiocbp_list; 942 struct lio_req *lio; 943 int i, error, errcnt, mode, nent; 944 945 mode = SCARG(uap, mode); 946 nent = SCARG(uap, nent); 947 948 /* Non-accurate checks for the limit and invalid values */ 949 if (nent < 1 || nent > aio_listio_max) 950 return EINVAL; 951 if (aio_jobs_count + nent > aio_max) 952 return EAGAIN; 953 954 /* Check if AIO structure is initialized, if not - initialize it */ 955 if (p->p_aio == NULL) 956 if (aio_procinit(p)) 957 return EAGAIN; 958 aio = p->p_aio; 959 960 /* Create a LIO structure */ 961 lio = pool_get(&aio_lio_pool, PR_WAITOK); 962 lio->refcnt = 1; 963 error = 0; 964 965 switch (mode) { 966 case LIO_WAIT: 967 memset(&lio->sig, 0, sizeof(struct sigevent)); 968 break; 969 case LIO_NOWAIT: 970 /* Check for signal, validate it */ 971 if (SCARG(uap, sig)) { 972 struct sigevent *sig = &lio->sig; 973 974 error = copyin(SCARG(uap, sig), &lio->sig, 975 sizeof(struct sigevent)); 976 if (error == 0 && 977 (sig->sigev_signo < 0 || 978 sig->sigev_signo >= NSIG || 979 sig->sigev_notify < SIGEV_NONE || 980 sig->sigev_notify > SIGEV_SA)) 981 error = EINVAL; 982 } else 983 memset(&lio->sig, 0, sizeof(struct sigevent)); 984 break; 985 default: 986 error = EINVAL; 987 break; 988 } 989 990 if (error != 0) { 991 pool_put(&aio_lio_pool, lio); 992 return error; 993 } 994 995 /* Get the list from user-space */ 996 aiocbp_list = kmem_alloc(nent * sizeof(*aiocbp_list), KM_SLEEP); 997 error = copyin(SCARG(uap, list), aiocbp_list, 998 nent * sizeof(*aiocbp_list)); 999 if (error) { 1000 mutex_enter(&aio->aio_mtx); 1001 goto err; 1002 } 1003 1004 /* Enqueue all jobs */ 1005 errcnt = 0; 1006 for (i = 0; i < nent; i++) { 1007 error = aio_enqueue_job(AIO_LIO, aiocbp_list[i], lio); 1008 /* 1009 * According to POSIX, in such error case it may 1010 * fail with other I/O operations initiated. 1011 */ 1012 if (error) 1013 errcnt++; 1014 } 1015 1016 mutex_enter(&aio->aio_mtx); 1017 1018 /* Return an error, if any */ 1019 if (errcnt) { 1020 error = EIO; 1021 goto err; 1022 } 1023 1024 if (mode == LIO_WAIT) { 1025 /* 1026 * Wait for AIO completion. In such case, 1027 * the LIO structure will be freed here. 1028 */ 1029 while (lio->refcnt > 1 && error == 0) 1030 error = cv_wait_sig(&aio->done_cv, &aio->aio_mtx); 1031 if (error) 1032 error = EINTR; 1033 } 1034 1035 err: 1036 if (--lio->refcnt != 0) 1037 lio = NULL; 1038 mutex_exit(&aio->aio_mtx); 1039 if (lio != NULL) { 1040 aio_sendsig(p, &lio->sig); 1041 pool_put(&aio_lio_pool, lio); 1042 } 1043 kmem_free(aiocbp_list, nent * sizeof(*aiocbp_list)); 1044 return error; 1045 } 1046 1047 /* 1048 * SysCtl 1049 */ 1050 1051 static int 1052 sysctl_aio_listio_max(SYSCTLFN_ARGS) 1053 { 1054 struct sysctlnode node; 1055 int error, newsize; 1056 1057 node = *rnode; 1058 node.sysctl_data = &newsize; 1059 1060 newsize = aio_listio_max; 1061 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 1062 if (error || newp == NULL) 1063 return error; 1064 1065 if (newsize < 1 || newsize > aio_max) 1066 return EINVAL; 1067 aio_listio_max = newsize; 1068 1069 return 0; 1070 } 1071 1072 static int 1073 sysctl_aio_max(SYSCTLFN_ARGS) 1074 { 1075 struct sysctlnode node; 1076 int error, newsize; 1077 1078 node = *rnode; 1079 node.sysctl_data = &newsize; 1080 1081 newsize = aio_max; 1082 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 1083 if (error || newp == NULL) 1084 return error; 1085 1086 if (newsize < 1 || newsize < aio_listio_max) 1087 return EINVAL; 1088 aio_max = newsize; 1089 1090 return 0; 1091 } 1092 1093 static int 1094 sysctl_aio_init(void) 1095 { 1096 int rv; 1097 1098 aio_sysctl = NULL; 1099 1100 rv = sysctl_createv(&aio_sysctl, 0, NULL, NULL, 1101 CTLFLAG_PERMANENT | CTLFLAG_IMMEDIATE, 1102 CTLTYPE_INT, "posix_aio", 1103 SYSCTL_DESCR("Version of IEEE Std 1003.1 and its " 1104 "Asynchronous I/O option to which the " 1105 "system attempts to conform"), 1106 NULL, _POSIX_ASYNCHRONOUS_IO, NULL, 0, 1107 CTL_KERN, CTL_CREATE, CTL_EOL); 1108 1109 if (rv != 0) 1110 return rv; 1111 1112 rv = sysctl_createv(&aio_sysctl, 0, NULL, NULL, 1113 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 1114 CTLTYPE_INT, "aio_listio_max", 1115 SYSCTL_DESCR("Maximum number of asynchronous I/O " 1116 "operations in a single list I/O call"), 1117 sysctl_aio_listio_max, 0, &aio_listio_max, 0, 1118 CTL_KERN, CTL_CREATE, CTL_EOL); 1119 1120 if (rv != 0) 1121 return rv; 1122 1123 rv = sysctl_createv(&aio_sysctl, 0, NULL, NULL, 1124 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 1125 CTLTYPE_INT, "aio_max", 1126 SYSCTL_DESCR("Maximum number of asynchronous I/O " 1127 "operations"), 1128 sysctl_aio_max, 0, &aio_max, 0, 1129 CTL_KERN, CTL_CREATE, CTL_EOL); 1130 1131 return rv; 1132 } 1133 1134 /* 1135 * Debugging 1136 */ 1137 #if defined(DDB) 1138 void 1139 aio_print_jobs(void (*pr)(const char *, ...)) 1140 { 1141 struct proc *p = curlwp->l_proc; 1142 struct aioproc *aio; 1143 struct aio_job *a_job; 1144 struct aiocb *aiocbp; 1145 1146 if (p == NULL) { 1147 (*pr)("AIO: We are not in the processes right now.\n"); 1148 return; 1149 } 1150 1151 aio = p->p_aio; 1152 if (aio == NULL) { 1153 (*pr)("AIO data is not initialized (PID = %d).\n", p->p_pid); 1154 return; 1155 } 1156 1157 (*pr)("AIO: PID = %d\n", p->p_pid); 1158 (*pr)("AIO: Global count of the jobs = %u\n", aio_jobs_count); 1159 (*pr)("AIO: Count of the jobs = %u\n", aio->jobs_count); 1160 1161 if (aio->curjob) { 1162 a_job = aio->curjob; 1163 (*pr)("\nAIO current job:\n"); 1164 (*pr)(" opcode = %d, errno = %d, state = %d, aiocb_ptr = %p\n", 1165 a_job->aio_op, a_job->aiocbp._errno, 1166 a_job->aiocbp._state, a_job->aiocb_uptr); 1167 aiocbp = &a_job->aiocbp; 1168 (*pr)(" fd = %d, offset = %u, buf = %p, nbytes = %u\n", 1169 aiocbp->aio_fildes, aiocbp->aio_offset, 1170 aiocbp->aio_buf, aiocbp->aio_nbytes); 1171 } 1172 1173 (*pr)("\nAIO queue:\n"); 1174 TAILQ_FOREACH(a_job, &aio->jobs_queue, list) { 1175 (*pr)(" opcode = %d, errno = %d, state = %d, aiocb_ptr = %p\n", 1176 a_job->aio_op, a_job->aiocbp._errno, 1177 a_job->aiocbp._state, a_job->aiocb_uptr); 1178 aiocbp = &a_job->aiocbp; 1179 (*pr)(" fd = %d, offset = %u, buf = %p, nbytes = %u\n", 1180 aiocbp->aio_fildes, aiocbp->aio_offset, 1181 aiocbp->aio_buf, aiocbp->aio_nbytes); 1182 } 1183 } 1184 #endif /* defined(DDB) */ 1185