1 /* $NetBSD: sys_aio.c,v 1.34 2010/06/24 13:03:11 hannken Exp $ */ 2 3 /* 4 * Copyright (c) 2007 Mindaugas Rasiukevicius <rmind at NetBSD org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 /* 30 * Implementation of POSIX asynchronous I/O. 31 * Defined in the Base Definitions volume of IEEE Std 1003.1-2001. 32 */ 33 34 #include <sys/cdefs.h> 35 __KERNEL_RCSID(0, "$NetBSD: sys_aio.c,v 1.34 2010/06/24 13:03:11 hannken Exp $"); 36 37 #ifdef _KERNEL_OPT 38 #include "opt_ddb.h" 39 #endif 40 41 #include <sys/param.h> 42 #include <sys/condvar.h> 43 #include <sys/file.h> 44 #include <sys/filedesc.h> 45 #include <sys/kernel.h> 46 #include <sys/kmem.h> 47 #include <sys/lwp.h> 48 #include <sys/mutex.h> 49 #include <sys/pool.h> 50 #include <sys/proc.h> 51 #include <sys/queue.h> 52 #include <sys/signal.h> 53 #include <sys/signalvar.h> 54 #include <sys/syscall.h> 55 #include <sys/syscallargs.h> 56 #include <sys/syscallvar.h> 57 #include <sys/sysctl.h> 58 #include <sys/systm.h> 59 #include <sys/types.h> 60 #include <sys/vnode.h> 61 #include <sys/atomic.h> 62 #include <sys/module.h> 63 #include <sys/buf.h> 64 65 #include <uvm/uvm_extern.h> 66 67 MODULE(MODULE_CLASS_MISC, aio, NULL); 68 69 /* 70 * System-wide limits and counter of AIO operations. 71 */ 72 u_int aio_listio_max = AIO_LISTIO_MAX; 73 static u_int aio_max = AIO_MAX; 74 static u_int aio_jobs_count; 75 76 static struct pool aio_job_pool; 77 static struct pool aio_lio_pool; 78 static void * aio_ehook; 79 80 static void aio_worker(void *); 81 static void aio_process(struct aio_job *); 82 static void aio_sendsig(struct proc *, struct sigevent *); 83 static int aio_enqueue_job(int, void *, struct lio_req *); 84 static void aio_exit(proc_t *, void *); 85 86 static const struct syscall_package aio_syscalls[] = { 87 { SYS_aio_cancel, 0, (sy_call_t *)sys_aio_cancel }, 88 { SYS_aio_error, 0, (sy_call_t *)sys_aio_error }, 89 { SYS_aio_fsync, 0, (sy_call_t *)sys_aio_fsync }, 90 { SYS_aio_read, 0, (sy_call_t *)sys_aio_read }, 91 { SYS_aio_return, 0, (sy_call_t *)sys_aio_return }, 92 { SYS___aio_suspend50, 0, (sy_call_t *)sys___aio_suspend50 }, 93 { SYS_aio_write, 0, (sy_call_t *)sys_aio_write }, 94 { SYS_lio_listio, 0, (sy_call_t *)sys_lio_listio }, 95 { 0, 0, NULL }, 96 }; 97 98 /* 99 * Tear down all AIO state. 100 */ 101 static int 102 aio_fini(bool interface) 103 { 104 int error; 105 proc_t *p; 106 107 if (interface) { 108 /* Stop syscall activity. */ 109 error = syscall_disestablish(NULL, aio_syscalls); 110 if (error != 0) 111 return error; 112 /* Abort if any processes are using AIO. */ 113 mutex_enter(proc_lock); 114 PROCLIST_FOREACH(p, &allproc) { 115 if (p->p_aio != NULL) 116 break; 117 } 118 mutex_exit(proc_lock); 119 if (p != NULL) { 120 error = syscall_establish(NULL, aio_syscalls); 121 KASSERT(error == 0); 122 return EBUSY; 123 } 124 } 125 KASSERT(aio_jobs_count == 0); 126 exithook_disestablish(aio_ehook); 127 pool_destroy(&aio_job_pool); 128 pool_destroy(&aio_lio_pool); 129 return 0; 130 } 131 132 /* 133 * Initialize global AIO state. 134 */ 135 static int 136 aio_init(void) 137 { 138 int error; 139 140 pool_init(&aio_job_pool, sizeof(struct aio_job), 0, 0, 0, 141 "aio_jobs_pool", &pool_allocator_nointr, IPL_NONE); 142 pool_init(&aio_lio_pool, sizeof(struct lio_req), 0, 0, 0, 143 "aio_lio_pool", &pool_allocator_nointr, IPL_NONE); 144 aio_ehook = exithook_establish(aio_exit, NULL); 145 error = syscall_establish(NULL, aio_syscalls); 146 if (error != 0) 147 aio_fini(false); 148 return error; 149 } 150 151 /* 152 * Module interface. 153 */ 154 static int 155 aio_modcmd(modcmd_t cmd, void *arg) 156 { 157 158 switch (cmd) { 159 case MODULE_CMD_INIT: 160 return aio_init(); 161 case MODULE_CMD_FINI: 162 return aio_fini(true); 163 default: 164 return ENOTTY; 165 } 166 } 167 168 /* 169 * Initialize Asynchronous I/O data structures for the process. 170 */ 171 static int 172 aio_procinit(struct proc *p) 173 { 174 struct aioproc *aio; 175 struct lwp *l; 176 int error; 177 vaddr_t uaddr; 178 179 /* Allocate and initialize AIO structure */ 180 aio = kmem_zalloc(sizeof(struct aioproc), KM_SLEEP); 181 if (aio == NULL) 182 return EAGAIN; 183 184 /* Initialize queue and their synchronization structures */ 185 mutex_init(&aio->aio_mtx, MUTEX_DEFAULT, IPL_NONE); 186 cv_init(&aio->aio_worker_cv, "aiowork"); 187 cv_init(&aio->done_cv, "aiodone"); 188 TAILQ_INIT(&aio->jobs_queue); 189 190 /* 191 * Create an AIO worker thread. 192 * XXX: Currently, AIO thread is not protected against user's actions. 193 */ 194 uaddr = uvm_uarea_alloc(); 195 if (uaddr == 0) { 196 aio_exit(p, aio); 197 return EAGAIN; 198 } 199 error = lwp_create(curlwp, p, uaddr, 0, NULL, 0, aio_worker, 200 NULL, &l, curlwp->l_class); 201 if (error != 0) { 202 uvm_uarea_free(uaddr); 203 aio_exit(p, aio); 204 return error; 205 } 206 207 /* Recheck if we are really first */ 208 mutex_enter(p->p_lock); 209 if (p->p_aio) { 210 mutex_exit(p->p_lock); 211 aio_exit(p, aio); 212 lwp_exit(l); 213 return 0; 214 } 215 p->p_aio = aio; 216 217 /* Complete the initialization of thread, and run it */ 218 aio->aio_worker = l; 219 lwp_lock(l); 220 l->l_stat = LSRUN; 221 l->l_priority = MAXPRI_USER; 222 sched_enqueue(l, false); 223 lwp_unlock(l); 224 mutex_exit(p->p_lock); 225 226 return 0; 227 } 228 229 /* 230 * Exit of Asynchronous I/O subsystem of process. 231 */ 232 static void 233 aio_exit(struct proc *p, void *cookie) 234 { 235 struct aio_job *a_job; 236 struct aioproc *aio; 237 238 if (cookie != NULL) 239 aio = cookie; 240 else if ((aio = p->p_aio) == NULL) 241 return; 242 243 /* Free AIO queue */ 244 while (!TAILQ_EMPTY(&aio->jobs_queue)) { 245 a_job = TAILQ_FIRST(&aio->jobs_queue); 246 TAILQ_REMOVE(&aio->jobs_queue, a_job, list); 247 pool_put(&aio_job_pool, a_job); 248 atomic_dec_uint(&aio_jobs_count); 249 } 250 251 /* Destroy and free the entire AIO data structure */ 252 cv_destroy(&aio->aio_worker_cv); 253 cv_destroy(&aio->done_cv); 254 mutex_destroy(&aio->aio_mtx); 255 kmem_free(aio, sizeof(struct aioproc)); 256 } 257 258 /* 259 * AIO worker thread and processor. 260 */ 261 static void 262 aio_worker(void *arg) 263 { 264 struct proc *p = curlwp->l_proc; 265 struct aioproc *aio = p->p_aio; 266 struct aio_job *a_job; 267 struct lio_req *lio; 268 sigset_t oss, nss; 269 int error, refcnt; 270 271 /* 272 * Make an empty signal mask, so it 273 * handles only SIGKILL and SIGSTOP. 274 */ 275 sigfillset(&nss); 276 mutex_enter(p->p_lock); 277 error = sigprocmask1(curlwp, SIG_SETMASK, &nss, &oss); 278 mutex_exit(p->p_lock); 279 KASSERT(error == 0); 280 281 for (;;) { 282 /* 283 * Loop for each job in the queue. If there 284 * are no jobs then sleep. 285 */ 286 mutex_enter(&aio->aio_mtx); 287 while ((a_job = TAILQ_FIRST(&aio->jobs_queue)) == NULL) { 288 if (cv_wait_sig(&aio->aio_worker_cv, &aio->aio_mtx)) { 289 /* 290 * Thread was interrupted - check for 291 * pending exit or suspend. 292 */ 293 mutex_exit(&aio->aio_mtx); 294 lwp_userret(curlwp); 295 mutex_enter(&aio->aio_mtx); 296 } 297 } 298 299 /* Take the job from the queue */ 300 aio->curjob = a_job; 301 TAILQ_REMOVE(&aio->jobs_queue, a_job, list); 302 303 atomic_dec_uint(&aio_jobs_count); 304 aio->jobs_count--; 305 306 mutex_exit(&aio->aio_mtx); 307 308 /* Process an AIO operation */ 309 aio_process(a_job); 310 311 /* Copy data structure back to the user-space */ 312 (void)copyout(&a_job->aiocbp, a_job->aiocb_uptr, 313 sizeof(struct aiocb)); 314 315 mutex_enter(&aio->aio_mtx); 316 aio->curjob = NULL; 317 318 /* Decrease a reference counter, if there is a LIO structure */ 319 lio = a_job->lio; 320 refcnt = (lio != NULL ? --lio->refcnt : -1); 321 322 /* Notify all suspenders */ 323 cv_broadcast(&aio->done_cv); 324 mutex_exit(&aio->aio_mtx); 325 326 /* Send a signal, if any */ 327 aio_sendsig(p, &a_job->aiocbp.aio_sigevent); 328 329 /* Destroy the LIO structure */ 330 if (refcnt == 0) { 331 aio_sendsig(p, &lio->sig); 332 pool_put(&aio_lio_pool, lio); 333 } 334 335 /* Destroy the job */ 336 pool_put(&aio_job_pool, a_job); 337 } 338 339 /* NOTREACHED */ 340 } 341 342 static void 343 aio_process(struct aio_job *a_job) 344 { 345 struct proc *p = curlwp->l_proc; 346 struct aiocb *aiocbp = &a_job->aiocbp; 347 struct file *fp; 348 int fd = aiocbp->aio_fildes; 349 int error = 0; 350 351 KASSERT(a_job->aio_op != 0); 352 353 if ((a_job->aio_op & (AIO_READ | AIO_WRITE)) != 0) { 354 struct iovec aiov; 355 struct uio auio; 356 357 if (aiocbp->aio_nbytes > SSIZE_MAX) { 358 error = EINVAL; 359 goto done; 360 } 361 362 fp = fd_getfile(fd); 363 if (fp == NULL) { 364 error = EBADF; 365 goto done; 366 } 367 368 aiov.iov_base = (void *)(uintptr_t)aiocbp->aio_buf; 369 aiov.iov_len = aiocbp->aio_nbytes; 370 auio.uio_iov = &aiov; 371 auio.uio_iovcnt = 1; 372 auio.uio_resid = aiocbp->aio_nbytes; 373 auio.uio_vmspace = p->p_vmspace; 374 375 if (a_job->aio_op & AIO_READ) { 376 /* 377 * Perform a Read operation 378 */ 379 KASSERT((a_job->aio_op & AIO_WRITE) == 0); 380 381 if ((fp->f_flag & FREAD) == 0) { 382 fd_putfile(fd); 383 error = EBADF; 384 goto done; 385 } 386 auio.uio_rw = UIO_READ; 387 error = (*fp->f_ops->fo_read)(fp, &aiocbp->aio_offset, 388 &auio, fp->f_cred, FOF_UPDATE_OFFSET); 389 } else { 390 /* 391 * Perform a Write operation 392 */ 393 KASSERT(a_job->aio_op & AIO_WRITE); 394 395 if ((fp->f_flag & FWRITE) == 0) { 396 fd_putfile(fd); 397 error = EBADF; 398 goto done; 399 } 400 auio.uio_rw = UIO_WRITE; 401 error = (*fp->f_ops->fo_write)(fp, &aiocbp->aio_offset, 402 &auio, fp->f_cred, FOF_UPDATE_OFFSET); 403 } 404 fd_putfile(fd); 405 406 /* Store the result value */ 407 a_job->aiocbp.aio_nbytes -= auio.uio_resid; 408 a_job->aiocbp._retval = (error == 0) ? 409 a_job->aiocbp.aio_nbytes : -1; 410 411 } else if ((a_job->aio_op & (AIO_SYNC | AIO_DSYNC)) != 0) { 412 /* 413 * Perform a file Sync operation 414 */ 415 struct vnode *vp; 416 417 if ((error = fd_getvnode(fd, &fp)) != 0) 418 goto done; 419 420 if ((fp->f_flag & FWRITE) == 0) { 421 fd_putfile(fd); 422 error = EBADF; 423 goto done; 424 } 425 426 vp = (struct vnode *)fp->f_data; 427 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 428 if (a_job->aio_op & AIO_DSYNC) { 429 error = VOP_FSYNC(vp, fp->f_cred, 430 FSYNC_WAIT | FSYNC_DATAONLY, 0, 0); 431 } else if (a_job->aio_op & AIO_SYNC) { 432 error = VOP_FSYNC(vp, fp->f_cred, 433 FSYNC_WAIT, 0, 0); 434 } 435 VOP_UNLOCK(vp); 436 fd_putfile(fd); 437 438 /* Store the result value */ 439 a_job->aiocbp._retval = (error == 0) ? 0 : -1; 440 441 } else 442 panic("aio_process: invalid operation code\n"); 443 444 done: 445 /* Job is done, set the error, if any */ 446 a_job->aiocbp._errno = error; 447 a_job->aiocbp._state = JOB_DONE; 448 } 449 450 /* 451 * Send AIO signal. 452 */ 453 static void 454 aio_sendsig(struct proc *p, struct sigevent *sig) 455 { 456 ksiginfo_t ksi; 457 458 if (sig->sigev_signo == 0 || sig->sigev_notify == SIGEV_NONE) 459 return; 460 461 KSI_INIT(&ksi); 462 ksi.ksi_signo = sig->sigev_signo; 463 ksi.ksi_code = SI_ASYNCIO; 464 ksi.ksi_value = sig->sigev_value; 465 mutex_enter(proc_lock); 466 kpsignal(p, &ksi, NULL); 467 mutex_exit(proc_lock); 468 } 469 470 /* 471 * Enqueue the job. 472 */ 473 static int 474 aio_enqueue_job(int op, void *aiocb_uptr, struct lio_req *lio) 475 { 476 struct proc *p = curlwp->l_proc; 477 struct aioproc *aio; 478 struct aio_job *a_job; 479 struct aiocb aiocbp; 480 struct sigevent *sig; 481 int error; 482 483 /* Non-accurate check for the limit */ 484 if (aio_jobs_count + 1 > aio_max) 485 return EAGAIN; 486 487 /* Get the data structure from user-space */ 488 error = copyin(aiocb_uptr, &aiocbp, sizeof(struct aiocb)); 489 if (error) 490 return error; 491 492 /* Check if signal is set, and validate it */ 493 sig = &aiocbp.aio_sigevent; 494 if (sig->sigev_signo < 0 || sig->sigev_signo >= NSIG || 495 sig->sigev_notify < SIGEV_NONE || sig->sigev_notify > SIGEV_SA) 496 return EINVAL; 497 498 /* Buffer and byte count */ 499 if (((AIO_SYNC | AIO_DSYNC) & op) == 0) 500 if (aiocbp.aio_buf == NULL || aiocbp.aio_nbytes > SSIZE_MAX) 501 return EINVAL; 502 503 /* Check the opcode, if LIO_NOP - simply ignore */ 504 if (op == AIO_LIO) { 505 KASSERT(lio != NULL); 506 if (aiocbp.aio_lio_opcode == LIO_WRITE) 507 op = AIO_WRITE; 508 else if (aiocbp.aio_lio_opcode == LIO_READ) 509 op = AIO_READ; 510 else 511 return (aiocbp.aio_lio_opcode == LIO_NOP) ? 0 : EINVAL; 512 } else { 513 KASSERT(lio == NULL); 514 } 515 516 /* 517 * Look for already existing job. If found - the job is in-progress. 518 * According to POSIX this is invalid, so return the error. 519 */ 520 aio = p->p_aio; 521 if (aio) { 522 mutex_enter(&aio->aio_mtx); 523 TAILQ_FOREACH(a_job, &aio->jobs_queue, list) { 524 if (a_job->aiocb_uptr != aiocb_uptr) 525 continue; 526 mutex_exit(&aio->aio_mtx); 527 return EINVAL; 528 } 529 mutex_exit(&aio->aio_mtx); 530 } 531 532 /* 533 * Check if AIO structure is initialized, if not - initialize it. 534 * In LIO case, we did that already. We will recheck this with 535 * the lock in aio_procinit(). 536 */ 537 if (lio == NULL && p->p_aio == NULL) 538 if (aio_procinit(p)) 539 return EAGAIN; 540 aio = p->p_aio; 541 542 /* 543 * Set the state with errno, and copy data 544 * structure back to the user-space. 545 */ 546 aiocbp._state = JOB_WIP; 547 aiocbp._errno = EINPROGRESS; 548 aiocbp._retval = -1; 549 error = copyout(&aiocbp, aiocb_uptr, sizeof(struct aiocb)); 550 if (error) 551 return error; 552 553 /* Allocate and initialize a new AIO job */ 554 a_job = pool_get(&aio_job_pool, PR_WAITOK); 555 memset(a_job, 0, sizeof(struct aio_job)); 556 557 /* 558 * Set the data. 559 * Store the user-space pointer for searching. Since we 560 * are storing only per proc pointers - it is safe. 561 */ 562 memcpy(&a_job->aiocbp, &aiocbp, sizeof(struct aiocb)); 563 a_job->aiocb_uptr = aiocb_uptr; 564 a_job->aio_op |= op; 565 a_job->lio = lio; 566 567 /* 568 * Add the job to the queue, update the counters, and 569 * notify the AIO worker thread to handle the job. 570 */ 571 mutex_enter(&aio->aio_mtx); 572 573 /* Fail, if the limit was reached */ 574 if (atomic_inc_uint_nv(&aio_jobs_count) > aio_max || 575 aio->jobs_count >= aio_listio_max) { 576 atomic_dec_uint(&aio_jobs_count); 577 mutex_exit(&aio->aio_mtx); 578 pool_put(&aio_job_pool, a_job); 579 return EAGAIN; 580 } 581 582 TAILQ_INSERT_TAIL(&aio->jobs_queue, a_job, list); 583 aio->jobs_count++; 584 if (lio) 585 lio->refcnt++; 586 cv_signal(&aio->aio_worker_cv); 587 588 mutex_exit(&aio->aio_mtx); 589 590 /* 591 * One would handle the errors only with aio_error() function. 592 * This way is appropriate according to POSIX. 593 */ 594 return 0; 595 } 596 597 /* 598 * Syscall functions. 599 */ 600 601 int 602 sys_aio_cancel(struct lwp *l, const struct sys_aio_cancel_args *uap, 603 register_t *retval) 604 { 605 /* { 606 syscallarg(int) fildes; 607 syscallarg(struct aiocb *) aiocbp; 608 } */ 609 struct proc *p = l->l_proc; 610 struct aioproc *aio; 611 struct aio_job *a_job; 612 struct aiocb *aiocbp_ptr; 613 struct lio_req *lio; 614 struct filedesc *fdp = p->p_fd; 615 unsigned int cn, errcnt, fildes; 616 fdtab_t *dt; 617 618 TAILQ_HEAD(, aio_job) tmp_jobs_list; 619 620 /* Check for invalid file descriptor */ 621 fildes = (unsigned int)SCARG(uap, fildes); 622 dt = fdp->fd_dt; 623 if (fildes >= dt->dt_nfiles) 624 return EBADF; 625 if (dt->dt_ff[fildes] == NULL || dt->dt_ff[fildes]->ff_file == NULL) 626 return EBADF; 627 628 /* Check if AIO structure is initialized */ 629 if (p->p_aio == NULL) { 630 *retval = AIO_NOTCANCELED; 631 return 0; 632 } 633 634 aio = p->p_aio; 635 aiocbp_ptr = (struct aiocb *)SCARG(uap, aiocbp); 636 637 mutex_enter(&aio->aio_mtx); 638 639 /* Cancel the jobs, and remove them from the queue */ 640 cn = 0; 641 TAILQ_INIT(&tmp_jobs_list); 642 TAILQ_FOREACH(a_job, &aio->jobs_queue, list) { 643 if (aiocbp_ptr) { 644 if (aiocbp_ptr != a_job->aiocb_uptr) 645 continue; 646 if (fildes != a_job->aiocbp.aio_fildes) { 647 mutex_exit(&aio->aio_mtx); 648 return EBADF; 649 } 650 } else if (a_job->aiocbp.aio_fildes != fildes) 651 continue; 652 653 TAILQ_REMOVE(&aio->jobs_queue, a_job, list); 654 TAILQ_INSERT_TAIL(&tmp_jobs_list, a_job, list); 655 656 /* Decrease the counters */ 657 atomic_dec_uint(&aio_jobs_count); 658 aio->jobs_count--; 659 lio = a_job->lio; 660 if (lio != NULL && --lio->refcnt != 0) 661 a_job->lio = NULL; 662 663 cn++; 664 if (aiocbp_ptr) 665 break; 666 } 667 668 /* There are canceled jobs */ 669 if (cn) 670 *retval = AIO_CANCELED; 671 672 /* We cannot cancel current job */ 673 a_job = aio->curjob; 674 if (a_job && ((a_job->aiocbp.aio_fildes == fildes) || 675 (a_job->aiocb_uptr == aiocbp_ptr))) 676 *retval = AIO_NOTCANCELED; 677 678 mutex_exit(&aio->aio_mtx); 679 680 /* Free the jobs after the lock */ 681 errcnt = 0; 682 while (!TAILQ_EMPTY(&tmp_jobs_list)) { 683 a_job = TAILQ_FIRST(&tmp_jobs_list); 684 TAILQ_REMOVE(&tmp_jobs_list, a_job, list); 685 /* Set the errno and copy structures back to the user-space */ 686 a_job->aiocbp._errno = ECANCELED; 687 a_job->aiocbp._state = JOB_DONE; 688 if (copyout(&a_job->aiocbp, a_job->aiocb_uptr, 689 sizeof(struct aiocb))) 690 errcnt++; 691 /* Send a signal if any */ 692 aio_sendsig(p, &a_job->aiocbp.aio_sigevent); 693 if (a_job->lio) { 694 lio = a_job->lio; 695 aio_sendsig(p, &lio->sig); 696 pool_put(&aio_lio_pool, lio); 697 } 698 pool_put(&aio_job_pool, a_job); 699 } 700 701 if (errcnt) 702 return EFAULT; 703 704 /* Set a correct return value */ 705 if (*retval == 0) 706 *retval = AIO_ALLDONE; 707 708 return 0; 709 } 710 711 int 712 sys_aio_error(struct lwp *l, const struct sys_aio_error_args *uap, 713 register_t *retval) 714 { 715 /* { 716 syscallarg(const struct aiocb *) aiocbp; 717 } */ 718 struct proc *p = l->l_proc; 719 struct aioproc *aio = p->p_aio; 720 struct aiocb aiocbp; 721 int error; 722 723 if (aio == NULL) 724 return EINVAL; 725 726 error = copyin(SCARG(uap, aiocbp), &aiocbp, sizeof(struct aiocb)); 727 if (error) 728 return error; 729 730 if (aiocbp._state == JOB_NONE) 731 return EINVAL; 732 733 *retval = aiocbp._errno; 734 735 return 0; 736 } 737 738 int 739 sys_aio_fsync(struct lwp *l, const struct sys_aio_fsync_args *uap, 740 register_t *retval) 741 { 742 /* { 743 syscallarg(int) op; 744 syscallarg(struct aiocb *) aiocbp; 745 } */ 746 int op = SCARG(uap, op); 747 748 if ((op != O_DSYNC) && (op != O_SYNC)) 749 return EINVAL; 750 751 op = O_DSYNC ? AIO_DSYNC : AIO_SYNC; 752 753 return aio_enqueue_job(op, SCARG(uap, aiocbp), NULL); 754 } 755 756 int 757 sys_aio_read(struct lwp *l, const struct sys_aio_read_args *uap, 758 register_t *retval) 759 { 760 /* { 761 syscallarg(struct aiocb *) aiocbp; 762 } */ 763 764 return aio_enqueue_job(AIO_READ, SCARG(uap, aiocbp), NULL); 765 } 766 767 int 768 sys_aio_return(struct lwp *l, const struct sys_aio_return_args *uap, 769 register_t *retval) 770 { 771 /* { 772 syscallarg(struct aiocb *) aiocbp; 773 } */ 774 struct proc *p = l->l_proc; 775 struct aioproc *aio = p->p_aio; 776 struct aiocb aiocbp; 777 int error; 778 779 if (aio == NULL) 780 return EINVAL; 781 782 error = copyin(SCARG(uap, aiocbp), &aiocbp, sizeof(struct aiocb)); 783 if (error) 784 return error; 785 786 if (aiocbp._errno == EINPROGRESS || aiocbp._state != JOB_DONE) 787 return EINVAL; 788 789 *retval = aiocbp._retval; 790 791 /* Reset the internal variables */ 792 aiocbp._errno = 0; 793 aiocbp._retval = -1; 794 aiocbp._state = JOB_NONE; 795 error = copyout(&aiocbp, SCARG(uap, aiocbp), sizeof(struct aiocb)); 796 797 return error; 798 } 799 800 int 801 sys___aio_suspend50(struct lwp *l, const struct sys___aio_suspend50_args *uap, 802 register_t *retval) 803 { 804 /* { 805 syscallarg(const struct aiocb *const[]) list; 806 syscallarg(int) nent; 807 syscallarg(const struct timespec *) timeout; 808 } */ 809 struct aiocb **list; 810 struct timespec ts; 811 int error, nent; 812 813 nent = SCARG(uap, nent); 814 if (nent <= 0 || nent > aio_listio_max) 815 return EAGAIN; 816 817 if (SCARG(uap, timeout)) { 818 /* Convert timespec to ticks */ 819 error = copyin(SCARG(uap, timeout), &ts, 820 sizeof(struct timespec)); 821 if (error) 822 return error; 823 } 824 825 list = kmem_alloc(nent * sizeof(*list), KM_SLEEP); 826 error = copyin(SCARG(uap, list), list, nent * sizeof(*list)); 827 if (error) 828 goto out; 829 error = aio_suspend1(l, list, nent, SCARG(uap, timeout) ? &ts : NULL); 830 out: 831 kmem_free(list, nent * sizeof(*list)); 832 return error; 833 } 834 835 int 836 aio_suspend1(struct lwp *l, struct aiocb **aiocbp_list, int nent, 837 struct timespec *ts) 838 { 839 struct proc *p = l->l_proc; 840 struct aioproc *aio; 841 struct aio_job *a_job; 842 int i, error, timo; 843 844 if (p->p_aio == NULL) 845 return EAGAIN; 846 aio = p->p_aio; 847 848 if (ts) { 849 timo = mstohz((ts->tv_sec * 1000) + (ts->tv_nsec / 1000000)); 850 if (timo == 0 && ts->tv_sec == 0 && ts->tv_nsec > 0) 851 timo = 1; 852 if (timo <= 0) 853 return EAGAIN; 854 } else 855 timo = 0; 856 857 mutex_enter(&aio->aio_mtx); 858 for (;;) { 859 for (i = 0; i < nent; i++) { 860 861 /* Skip NULL entries */ 862 if (aiocbp_list[i] == NULL) 863 continue; 864 865 /* Skip current job */ 866 if (aio->curjob) { 867 a_job = aio->curjob; 868 if (a_job->aiocb_uptr == aiocbp_list[i]) 869 continue; 870 } 871 872 /* Look for a job in the queue */ 873 TAILQ_FOREACH(a_job, &aio->jobs_queue, list) 874 if (a_job->aiocb_uptr == aiocbp_list[i]) 875 break; 876 877 if (a_job == NULL) { 878 struct aiocb aiocbp; 879 880 mutex_exit(&aio->aio_mtx); 881 882 /* Check if the job is done. */ 883 error = copyin(aiocbp_list[i], &aiocbp, 884 sizeof(struct aiocb)); 885 if (error == 0 && aiocbp._state != JOB_DONE) { 886 mutex_enter(&aio->aio_mtx); 887 continue; 888 } 889 return error; 890 } 891 } 892 893 /* Wait for a signal or when timeout occurs */ 894 error = cv_timedwait_sig(&aio->done_cv, &aio->aio_mtx, timo); 895 if (error) { 896 if (error == EWOULDBLOCK) 897 error = EAGAIN; 898 break; 899 } 900 } 901 mutex_exit(&aio->aio_mtx); 902 return error; 903 } 904 905 int 906 sys_aio_write(struct lwp *l, const struct sys_aio_write_args *uap, 907 register_t *retval) 908 { 909 /* { 910 syscallarg(struct aiocb *) aiocbp; 911 } */ 912 913 return aio_enqueue_job(AIO_WRITE, SCARG(uap, aiocbp), NULL); 914 } 915 916 int 917 sys_lio_listio(struct lwp *l, const struct sys_lio_listio_args *uap, 918 register_t *retval) 919 { 920 /* { 921 syscallarg(int) mode; 922 syscallarg(struct aiocb *const[]) list; 923 syscallarg(int) nent; 924 syscallarg(struct sigevent *) sig; 925 } */ 926 struct proc *p = l->l_proc; 927 struct aioproc *aio; 928 struct aiocb **aiocbp_list; 929 struct lio_req *lio; 930 int i, error, errcnt, mode, nent; 931 932 mode = SCARG(uap, mode); 933 nent = SCARG(uap, nent); 934 935 /* Non-accurate checks for the limit and invalid values */ 936 if (nent < 1 || nent > aio_listio_max) 937 return EINVAL; 938 if (aio_jobs_count + nent > aio_max) 939 return EAGAIN; 940 941 /* Check if AIO structure is initialized, if not - initialize it */ 942 if (p->p_aio == NULL) 943 if (aio_procinit(p)) 944 return EAGAIN; 945 aio = p->p_aio; 946 947 /* Create a LIO structure */ 948 lio = pool_get(&aio_lio_pool, PR_WAITOK); 949 lio->refcnt = 1; 950 error = 0; 951 952 switch (mode) { 953 case LIO_WAIT: 954 memset(&lio->sig, 0, sizeof(struct sigevent)); 955 break; 956 case LIO_NOWAIT: 957 /* Check for signal, validate it */ 958 if (SCARG(uap, sig)) { 959 struct sigevent *sig = &lio->sig; 960 961 error = copyin(SCARG(uap, sig), &lio->sig, 962 sizeof(struct sigevent)); 963 if (error == 0 && 964 (sig->sigev_signo < 0 || 965 sig->sigev_signo >= NSIG || 966 sig->sigev_notify < SIGEV_NONE || 967 sig->sigev_notify > SIGEV_SA)) 968 error = EINVAL; 969 } else 970 memset(&lio->sig, 0, sizeof(struct sigevent)); 971 break; 972 default: 973 error = EINVAL; 974 break; 975 } 976 977 if (error != 0) { 978 pool_put(&aio_lio_pool, lio); 979 return error; 980 } 981 982 /* Get the list from user-space */ 983 aiocbp_list = kmem_alloc(nent * sizeof(*aiocbp_list), KM_SLEEP); 984 error = copyin(SCARG(uap, list), aiocbp_list, 985 nent * sizeof(*aiocbp_list)); 986 if (error) { 987 mutex_enter(&aio->aio_mtx); 988 goto err; 989 } 990 991 /* Enqueue all jobs */ 992 errcnt = 0; 993 for (i = 0; i < nent; i++) { 994 error = aio_enqueue_job(AIO_LIO, aiocbp_list[i], lio); 995 /* 996 * According to POSIX, in such error case it may 997 * fail with other I/O operations initiated. 998 */ 999 if (error) 1000 errcnt++; 1001 } 1002 1003 mutex_enter(&aio->aio_mtx); 1004 1005 /* Return an error, if any */ 1006 if (errcnt) { 1007 error = EIO; 1008 goto err; 1009 } 1010 1011 if (mode == LIO_WAIT) { 1012 /* 1013 * Wait for AIO completion. In such case, 1014 * the LIO structure will be freed here. 1015 */ 1016 while (lio->refcnt > 1 && error == 0) 1017 error = cv_wait_sig(&aio->done_cv, &aio->aio_mtx); 1018 if (error) 1019 error = EINTR; 1020 } 1021 1022 err: 1023 if (--lio->refcnt != 0) 1024 lio = NULL; 1025 mutex_exit(&aio->aio_mtx); 1026 if (lio != NULL) { 1027 aio_sendsig(p, &lio->sig); 1028 pool_put(&aio_lio_pool, lio); 1029 } 1030 kmem_free(aiocbp_list, nent * sizeof(*aiocbp_list)); 1031 return error; 1032 } 1033 1034 /* 1035 * SysCtl 1036 */ 1037 1038 static int 1039 sysctl_aio_listio_max(SYSCTLFN_ARGS) 1040 { 1041 struct sysctlnode node; 1042 int error, newsize; 1043 1044 node = *rnode; 1045 node.sysctl_data = &newsize; 1046 1047 newsize = aio_listio_max; 1048 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 1049 if (error || newp == NULL) 1050 return error; 1051 1052 if (newsize < 1 || newsize > aio_max) 1053 return EINVAL; 1054 aio_listio_max = newsize; 1055 1056 return 0; 1057 } 1058 1059 static int 1060 sysctl_aio_max(SYSCTLFN_ARGS) 1061 { 1062 struct sysctlnode node; 1063 int error, newsize; 1064 1065 node = *rnode; 1066 node.sysctl_data = &newsize; 1067 1068 newsize = aio_max; 1069 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 1070 if (error || newp == NULL) 1071 return error; 1072 1073 if (newsize < 1 || newsize < aio_listio_max) 1074 return EINVAL; 1075 aio_max = newsize; 1076 1077 return 0; 1078 } 1079 1080 SYSCTL_SETUP(sysctl_aio_setup, "sysctl aio setup") 1081 { 1082 1083 sysctl_createv(clog, 0, NULL, NULL, 1084 CTLFLAG_PERMANENT, 1085 CTLTYPE_NODE, "kern", NULL, 1086 NULL, 0, NULL, 0, 1087 CTL_KERN, CTL_EOL); 1088 sysctl_createv(clog, 0, NULL, NULL, 1089 CTLFLAG_PERMANENT | CTLFLAG_IMMEDIATE, 1090 CTLTYPE_INT, "posix_aio", 1091 SYSCTL_DESCR("Version of IEEE Std 1003.1 and its " 1092 "Asynchronous I/O option to which the " 1093 "system attempts to conform"), 1094 NULL, _POSIX_ASYNCHRONOUS_IO, NULL, 0, 1095 CTL_KERN, CTL_CREATE, CTL_EOL); 1096 sysctl_createv(clog, 0, NULL, NULL, 1097 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 1098 CTLTYPE_INT, "aio_listio_max", 1099 SYSCTL_DESCR("Maximum number of asynchronous I/O " 1100 "operations in a single list I/O call"), 1101 sysctl_aio_listio_max, 0, &aio_listio_max, 0, 1102 CTL_KERN, CTL_CREATE, CTL_EOL); 1103 sysctl_createv(clog, 0, NULL, NULL, 1104 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 1105 CTLTYPE_INT, "aio_max", 1106 SYSCTL_DESCR("Maximum number of asynchronous I/O " 1107 "operations"), 1108 sysctl_aio_max, 0, &aio_max, 0, 1109 CTL_KERN, CTL_CREATE, CTL_EOL); 1110 } 1111 1112 /* 1113 * Debugging 1114 */ 1115 #if defined(DDB) 1116 void 1117 aio_print_jobs(void (*pr)(const char *, ...)) 1118 { 1119 struct proc *p = (curlwp == NULL ? NULL : curlwp->l_proc); 1120 struct aioproc *aio; 1121 struct aio_job *a_job; 1122 struct aiocb *aiocbp; 1123 1124 if (p == NULL) { 1125 (*pr)("AIO: We are not in the processes right now.\n"); 1126 return; 1127 } 1128 1129 aio = p->p_aio; 1130 if (aio == NULL) { 1131 (*pr)("AIO data is not initialized (PID = %d).\n", p->p_pid); 1132 return; 1133 } 1134 1135 (*pr)("AIO: PID = %d\n", p->p_pid); 1136 (*pr)("AIO: Global count of the jobs = %u\n", aio_jobs_count); 1137 (*pr)("AIO: Count of the jobs = %u\n", aio->jobs_count); 1138 1139 if (aio->curjob) { 1140 a_job = aio->curjob; 1141 (*pr)("\nAIO current job:\n"); 1142 (*pr)(" opcode = %d, errno = %d, state = %d, aiocb_ptr = %p\n", 1143 a_job->aio_op, a_job->aiocbp._errno, 1144 a_job->aiocbp._state, a_job->aiocb_uptr); 1145 aiocbp = &a_job->aiocbp; 1146 (*pr)(" fd = %d, offset = %u, buf = %p, nbytes = %u\n", 1147 aiocbp->aio_fildes, aiocbp->aio_offset, 1148 aiocbp->aio_buf, aiocbp->aio_nbytes); 1149 } 1150 1151 (*pr)("\nAIO queue:\n"); 1152 TAILQ_FOREACH(a_job, &aio->jobs_queue, list) { 1153 (*pr)(" opcode = %d, errno = %d, state = %d, aiocb_ptr = %p\n", 1154 a_job->aio_op, a_job->aiocbp._errno, 1155 a_job->aiocbp._state, a_job->aiocb_uptr); 1156 aiocbp = &a_job->aiocbp; 1157 (*pr)(" fd = %d, offset = %u, buf = %p, nbytes = %u\n", 1158 aiocbp->aio_fildes, aiocbp->aio_offset, 1159 aiocbp->aio_buf, aiocbp->aio_nbytes); 1160 } 1161 } 1162 #endif /* defined(DDB) */ 1163