1 /* $NetBSD: sys_aio.c,v 1.31 2010/01/30 21:23:46 rmind Exp $ */ 2 3 /* 4 * Copyright (c) 2007 Mindaugas Rasiukevicius <rmind at NetBSD org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 /* 30 * Implementation of POSIX asynchronous I/O. 31 * Defined in the Base Definitions volume of IEEE Std 1003.1-2001. 32 */ 33 34 #include <sys/cdefs.h> 35 __KERNEL_RCSID(0, "$NetBSD: sys_aio.c,v 1.31 2010/01/30 21:23:46 rmind Exp $"); 36 37 #ifdef _KERNEL_OPT 38 #include "opt_ddb.h" 39 #endif 40 41 #include <sys/param.h> 42 #include <sys/condvar.h> 43 #include <sys/file.h> 44 #include <sys/filedesc.h> 45 #include <sys/kernel.h> 46 #include <sys/kmem.h> 47 #include <sys/lwp.h> 48 #include <sys/mutex.h> 49 #include <sys/pool.h> 50 #include <sys/proc.h> 51 #include <sys/queue.h> 52 #include <sys/signal.h> 53 #include <sys/signalvar.h> 54 #include <sys/syscall.h> 55 #include <sys/syscallargs.h> 56 #include <sys/syscallvar.h> 57 #include <sys/sysctl.h> 58 #include <sys/systm.h> 59 #include <sys/types.h> 60 #include <sys/vnode.h> 61 #include <sys/atomic.h> 62 #include <sys/module.h> 63 #include <sys/buf.h> 64 65 #include <uvm/uvm_extern.h> 66 67 MODULE(MODULE_CLASS_MISC, aio, NULL); 68 69 /* 70 * System-wide limits and counter of AIO operations. 71 */ 72 u_int aio_listio_max = AIO_LISTIO_MAX; 73 static u_int aio_max = AIO_MAX; 74 static u_int aio_jobs_count; 75 76 static struct pool aio_job_pool; 77 static struct pool aio_lio_pool; 78 static void * aio_ehook; 79 80 static void aio_worker(void *); 81 static void aio_process(struct aio_job *); 82 static void aio_sendsig(struct proc *, struct sigevent *); 83 static int aio_enqueue_job(int, void *, struct lio_req *); 84 static void aio_exit(proc_t *, void *); 85 86 static const struct syscall_package aio_syscalls[] = { 87 { SYS_aio_cancel, 0, (sy_call_t *)sys_aio_cancel }, 88 { SYS_aio_error, 0, (sy_call_t *)sys_aio_error }, 89 { SYS_aio_fsync, 0, (sy_call_t *)sys_aio_fsync }, 90 { SYS_aio_read, 0, (sy_call_t *)sys_aio_read }, 91 { SYS_aio_return, 0, (sy_call_t *)sys_aio_return }, 92 { SYS___aio_suspend50, 0, (sy_call_t *)sys___aio_suspend50 }, 93 { SYS_aio_write, 0, (sy_call_t *)sys_aio_write }, 94 { SYS_lio_listio, 0, (sy_call_t *)sys_lio_listio }, 95 { 0, 0, NULL }, 96 }; 97 98 /* 99 * Tear down all AIO state. 100 */ 101 static int 102 aio_fini(bool interface) 103 { 104 int error; 105 proc_t *p; 106 107 if (interface) { 108 /* Stop syscall activity. */ 109 error = syscall_disestablish(NULL, aio_syscalls); 110 if (error != 0) 111 return error; 112 /* Abort if any processes are using AIO. */ 113 mutex_enter(proc_lock); 114 PROCLIST_FOREACH(p, &allproc) { 115 if (p->p_aio != NULL) 116 break; 117 } 118 mutex_exit(proc_lock); 119 if (p != NULL) { 120 error = syscall_establish(NULL, aio_syscalls); 121 KASSERT(error == 0); 122 return EBUSY; 123 } 124 } 125 KASSERT(aio_jobs_count == 0); 126 exithook_disestablish(aio_ehook); 127 pool_destroy(&aio_job_pool); 128 pool_destroy(&aio_lio_pool); 129 return 0; 130 } 131 132 /* 133 * Initialize global AIO state. 134 */ 135 static int 136 aio_init(void) 137 { 138 int error; 139 140 pool_init(&aio_job_pool, sizeof(struct aio_job), 0, 0, 0, 141 "aio_jobs_pool", &pool_allocator_nointr, IPL_NONE); 142 pool_init(&aio_lio_pool, sizeof(struct lio_req), 0, 0, 0, 143 "aio_lio_pool", &pool_allocator_nointr, IPL_NONE); 144 aio_ehook = exithook_establish(aio_exit, NULL); 145 error = syscall_establish(NULL, aio_syscalls); 146 if (error != 0) 147 aio_fini(false); 148 return error; 149 } 150 151 /* 152 * Module interface. 153 */ 154 static int 155 aio_modcmd(modcmd_t cmd, void *arg) 156 { 157 158 switch (cmd) { 159 case MODULE_CMD_INIT: 160 return aio_init(); 161 case MODULE_CMD_FINI: 162 return aio_fini(true); 163 default: 164 return ENOTTY; 165 } 166 } 167 168 /* 169 * Initialize Asynchronous I/O data structures for the process. 170 */ 171 static int 172 aio_procinit(struct proc *p) 173 { 174 struct aioproc *aio; 175 struct lwp *l; 176 int error; 177 vaddr_t uaddr; 178 179 /* Allocate and initialize AIO structure */ 180 aio = kmem_zalloc(sizeof(struct aioproc), KM_SLEEP); 181 if (aio == NULL) 182 return EAGAIN; 183 184 /* Initialize queue and their synchronization structures */ 185 mutex_init(&aio->aio_mtx, MUTEX_DEFAULT, IPL_NONE); 186 cv_init(&aio->aio_worker_cv, "aiowork"); 187 cv_init(&aio->done_cv, "aiodone"); 188 TAILQ_INIT(&aio->jobs_queue); 189 190 /* 191 * Create an AIO worker thread. 192 * XXX: Currently, AIO thread is not protected against user's actions. 193 */ 194 uaddr = uvm_uarea_alloc(); 195 if (uaddr == 0) { 196 aio_exit(p, aio); 197 return EAGAIN; 198 } 199 error = lwp_create(curlwp, p, uaddr, 0, NULL, 0, aio_worker, 200 NULL, &l, curlwp->l_class); 201 if (error != 0) { 202 uvm_uarea_free(uaddr); 203 aio_exit(p, aio); 204 return error; 205 } 206 207 /* Recheck if we are really first */ 208 mutex_enter(p->p_lock); 209 if (p->p_aio) { 210 mutex_exit(p->p_lock); 211 aio_exit(p, aio); 212 lwp_exit(l); 213 return 0; 214 } 215 p->p_aio = aio; 216 217 /* Complete the initialization of thread, and run it */ 218 aio->aio_worker = l; 219 p->p_nrlwps++; 220 lwp_lock(l); 221 l->l_stat = LSRUN; 222 l->l_priority = MAXPRI_USER; 223 sched_enqueue(l, false); 224 lwp_unlock(l); 225 mutex_exit(p->p_lock); 226 227 return 0; 228 } 229 230 /* 231 * Exit of Asynchronous I/O subsystem of process. 232 */ 233 static void 234 aio_exit(struct proc *p, void *cookie) 235 { 236 struct aio_job *a_job; 237 struct aioproc *aio; 238 239 if (cookie != NULL) 240 aio = cookie; 241 else if ((aio = p->p_aio) == NULL) 242 return; 243 244 /* Free AIO queue */ 245 while (!TAILQ_EMPTY(&aio->jobs_queue)) { 246 a_job = TAILQ_FIRST(&aio->jobs_queue); 247 TAILQ_REMOVE(&aio->jobs_queue, a_job, list); 248 pool_put(&aio_job_pool, a_job); 249 atomic_dec_uint(&aio_jobs_count); 250 } 251 252 /* Destroy and free the entire AIO data structure */ 253 cv_destroy(&aio->aio_worker_cv); 254 cv_destroy(&aio->done_cv); 255 mutex_destroy(&aio->aio_mtx); 256 kmem_free(aio, sizeof(struct aioproc)); 257 } 258 259 /* 260 * AIO worker thread and processor. 261 */ 262 static void 263 aio_worker(void *arg) 264 { 265 struct proc *p = curlwp->l_proc; 266 struct aioproc *aio = p->p_aio; 267 struct aio_job *a_job; 268 struct lio_req *lio; 269 sigset_t oss, nss; 270 int error, refcnt; 271 272 /* 273 * Make an empty signal mask, so it 274 * handles only SIGKILL and SIGSTOP. 275 */ 276 sigfillset(&nss); 277 mutex_enter(p->p_lock); 278 error = sigprocmask1(curlwp, SIG_SETMASK, &nss, &oss); 279 mutex_exit(p->p_lock); 280 KASSERT(error == 0); 281 282 for (;;) { 283 /* 284 * Loop for each job in the queue. If there 285 * are no jobs then sleep. 286 */ 287 mutex_enter(&aio->aio_mtx); 288 while ((a_job = TAILQ_FIRST(&aio->jobs_queue)) == NULL) { 289 if (cv_wait_sig(&aio->aio_worker_cv, &aio->aio_mtx)) { 290 /* 291 * Thread was interrupted - check for 292 * pending exit or suspend. 293 */ 294 mutex_exit(&aio->aio_mtx); 295 lwp_userret(curlwp); 296 mutex_enter(&aio->aio_mtx); 297 } 298 } 299 300 /* Take the job from the queue */ 301 aio->curjob = a_job; 302 TAILQ_REMOVE(&aio->jobs_queue, a_job, list); 303 304 atomic_dec_uint(&aio_jobs_count); 305 aio->jobs_count--; 306 307 mutex_exit(&aio->aio_mtx); 308 309 /* Process an AIO operation */ 310 aio_process(a_job); 311 312 /* Copy data structure back to the user-space */ 313 (void)copyout(&a_job->aiocbp, a_job->aiocb_uptr, 314 sizeof(struct aiocb)); 315 316 mutex_enter(&aio->aio_mtx); 317 aio->curjob = NULL; 318 319 /* Decrease a reference counter, if there is a LIO structure */ 320 lio = a_job->lio; 321 refcnt = (lio != NULL ? --lio->refcnt : -1); 322 323 /* Notify all suspenders */ 324 cv_broadcast(&aio->done_cv); 325 mutex_exit(&aio->aio_mtx); 326 327 /* Send a signal, if any */ 328 aio_sendsig(p, &a_job->aiocbp.aio_sigevent); 329 330 /* Destroy the LIO structure */ 331 if (refcnt == 0) { 332 aio_sendsig(p, &lio->sig); 333 pool_put(&aio_lio_pool, lio); 334 } 335 336 /* Destroy the job */ 337 pool_put(&aio_job_pool, a_job); 338 } 339 340 /* NOTREACHED */ 341 } 342 343 static void 344 aio_process(struct aio_job *a_job) 345 { 346 struct proc *p = curlwp->l_proc; 347 struct aiocb *aiocbp = &a_job->aiocbp; 348 struct file *fp; 349 int fd = aiocbp->aio_fildes; 350 int error = 0; 351 352 KASSERT(a_job->aio_op != 0); 353 354 if ((a_job->aio_op & (AIO_READ | AIO_WRITE)) != 0) { 355 struct iovec aiov; 356 struct uio auio; 357 358 if (aiocbp->aio_nbytes > SSIZE_MAX) { 359 error = EINVAL; 360 goto done; 361 } 362 363 fp = fd_getfile(fd); 364 if (fp == NULL) { 365 error = EBADF; 366 goto done; 367 } 368 369 aiov.iov_base = (void *)(uintptr_t)aiocbp->aio_buf; 370 aiov.iov_len = aiocbp->aio_nbytes; 371 auio.uio_iov = &aiov; 372 auio.uio_iovcnt = 1; 373 auio.uio_resid = aiocbp->aio_nbytes; 374 auio.uio_vmspace = p->p_vmspace; 375 376 if (a_job->aio_op & AIO_READ) { 377 /* 378 * Perform a Read operation 379 */ 380 KASSERT((a_job->aio_op & AIO_WRITE) == 0); 381 382 if ((fp->f_flag & FREAD) == 0) { 383 fd_putfile(fd); 384 error = EBADF; 385 goto done; 386 } 387 auio.uio_rw = UIO_READ; 388 error = (*fp->f_ops->fo_read)(fp, &aiocbp->aio_offset, 389 &auio, fp->f_cred, FOF_UPDATE_OFFSET); 390 } else { 391 /* 392 * Perform a Write operation 393 */ 394 KASSERT(a_job->aio_op & AIO_WRITE); 395 396 if ((fp->f_flag & FWRITE) == 0) { 397 fd_putfile(fd); 398 error = EBADF; 399 goto done; 400 } 401 auio.uio_rw = UIO_WRITE; 402 error = (*fp->f_ops->fo_write)(fp, &aiocbp->aio_offset, 403 &auio, fp->f_cred, FOF_UPDATE_OFFSET); 404 } 405 fd_putfile(fd); 406 407 /* Store the result value */ 408 a_job->aiocbp.aio_nbytes -= auio.uio_resid; 409 a_job->aiocbp._retval = (error == 0) ? 410 a_job->aiocbp.aio_nbytes : -1; 411 412 } else if ((a_job->aio_op & (AIO_SYNC | AIO_DSYNC)) != 0) { 413 /* 414 * Perform a file Sync operation 415 */ 416 struct vnode *vp; 417 418 if ((error = fd_getvnode(fd, &fp)) != 0) 419 goto done; 420 421 if ((fp->f_flag & FWRITE) == 0) { 422 fd_putfile(fd); 423 error = EBADF; 424 goto done; 425 } 426 427 vp = (struct vnode *)fp->f_data; 428 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 429 if (a_job->aio_op & AIO_DSYNC) { 430 error = VOP_FSYNC(vp, fp->f_cred, 431 FSYNC_WAIT | FSYNC_DATAONLY, 0, 0); 432 } else if (a_job->aio_op & AIO_SYNC) { 433 error = VOP_FSYNC(vp, fp->f_cred, 434 FSYNC_WAIT, 0, 0); 435 } 436 VOP_UNLOCK(vp, 0); 437 fd_putfile(fd); 438 439 /* Store the result value */ 440 a_job->aiocbp._retval = (error == 0) ? 0 : -1; 441 442 } else 443 panic("aio_process: invalid operation code\n"); 444 445 done: 446 /* Job is done, set the error, if any */ 447 a_job->aiocbp._errno = error; 448 a_job->aiocbp._state = JOB_DONE; 449 } 450 451 /* 452 * Send AIO signal. 453 */ 454 static void 455 aio_sendsig(struct proc *p, struct sigevent *sig) 456 { 457 ksiginfo_t ksi; 458 459 if (sig->sigev_signo == 0 || sig->sigev_notify == SIGEV_NONE) 460 return; 461 462 KSI_INIT(&ksi); 463 ksi.ksi_signo = sig->sigev_signo; 464 ksi.ksi_code = SI_ASYNCIO; 465 ksi.ksi_value = sig->sigev_value; 466 mutex_enter(proc_lock); 467 kpsignal(p, &ksi, NULL); 468 mutex_exit(proc_lock); 469 } 470 471 /* 472 * Enqueue the job. 473 */ 474 static int 475 aio_enqueue_job(int op, void *aiocb_uptr, struct lio_req *lio) 476 { 477 struct proc *p = curlwp->l_proc; 478 struct aioproc *aio; 479 struct aio_job *a_job; 480 struct aiocb aiocbp; 481 struct sigevent *sig; 482 int error; 483 484 /* Non-accurate check for the limit */ 485 if (aio_jobs_count + 1 > aio_max) 486 return EAGAIN; 487 488 /* Get the data structure from user-space */ 489 error = copyin(aiocb_uptr, &aiocbp, sizeof(struct aiocb)); 490 if (error) 491 return error; 492 493 /* Check if signal is set, and validate it */ 494 sig = &aiocbp.aio_sigevent; 495 if (sig->sigev_signo < 0 || sig->sigev_signo >= NSIG || 496 sig->sigev_notify < SIGEV_NONE || sig->sigev_notify > SIGEV_SA) 497 return EINVAL; 498 499 /* Buffer and byte count */ 500 if (((AIO_SYNC | AIO_DSYNC) & op) == 0) 501 if (aiocbp.aio_buf == NULL || aiocbp.aio_nbytes > SSIZE_MAX) 502 return EINVAL; 503 504 /* Check the opcode, if LIO_NOP - simply ignore */ 505 if (op == AIO_LIO) { 506 KASSERT(lio != NULL); 507 if (aiocbp.aio_lio_opcode == LIO_WRITE) 508 op = AIO_WRITE; 509 else if (aiocbp.aio_lio_opcode == LIO_READ) 510 op = AIO_READ; 511 else 512 return (aiocbp.aio_lio_opcode == LIO_NOP) ? 0 : EINVAL; 513 } else { 514 KASSERT(lio == NULL); 515 } 516 517 /* 518 * Look for already existing job. If found - the job is in-progress. 519 * According to POSIX this is invalid, so return the error. 520 */ 521 aio = p->p_aio; 522 if (aio) { 523 mutex_enter(&aio->aio_mtx); 524 if (aio->curjob) { 525 a_job = aio->curjob; 526 if (a_job->aiocb_uptr == aiocb_uptr) { 527 mutex_exit(&aio->aio_mtx); 528 return EINVAL; 529 } 530 } 531 TAILQ_FOREACH(a_job, &aio->jobs_queue, list) { 532 if (a_job->aiocb_uptr != aiocb_uptr) 533 continue; 534 mutex_exit(&aio->aio_mtx); 535 return EINVAL; 536 } 537 mutex_exit(&aio->aio_mtx); 538 } 539 540 /* 541 * Check if AIO structure is initialized, if not - initialize it. 542 * In LIO case, we did that already. We will recheck this with 543 * the lock in aio_procinit(). 544 */ 545 if (lio == NULL && p->p_aio == NULL) 546 if (aio_procinit(p)) 547 return EAGAIN; 548 aio = p->p_aio; 549 550 /* 551 * Set the state with errno, and copy data 552 * structure back to the user-space. 553 */ 554 aiocbp._state = JOB_WIP; 555 aiocbp._errno = EINPROGRESS; 556 aiocbp._retval = -1; 557 error = copyout(&aiocbp, aiocb_uptr, sizeof(struct aiocb)); 558 if (error) 559 return error; 560 561 /* Allocate and initialize a new AIO job */ 562 a_job = pool_get(&aio_job_pool, PR_WAITOK); 563 memset(a_job, 0, sizeof(struct aio_job)); 564 565 /* 566 * Set the data. 567 * Store the user-space pointer for searching. Since we 568 * are storing only per proc pointers - it is safe. 569 */ 570 memcpy(&a_job->aiocbp, &aiocbp, sizeof(struct aiocb)); 571 a_job->aiocb_uptr = aiocb_uptr; 572 a_job->aio_op |= op; 573 a_job->lio = lio; 574 575 /* 576 * Add the job to the queue, update the counters, and 577 * notify the AIO worker thread to handle the job. 578 */ 579 mutex_enter(&aio->aio_mtx); 580 581 /* Fail, if the limit was reached */ 582 if (atomic_inc_uint_nv(&aio_jobs_count) > aio_max || 583 aio->jobs_count >= aio_listio_max) { 584 atomic_dec_uint(&aio_jobs_count); 585 mutex_exit(&aio->aio_mtx); 586 pool_put(&aio_job_pool, a_job); 587 return EAGAIN; 588 } 589 590 TAILQ_INSERT_TAIL(&aio->jobs_queue, a_job, list); 591 aio->jobs_count++; 592 if (lio) 593 lio->refcnt++; 594 cv_signal(&aio->aio_worker_cv); 595 596 mutex_exit(&aio->aio_mtx); 597 598 /* 599 * One would handle the errors only with aio_error() function. 600 * This way is appropriate according to POSIX. 601 */ 602 return 0; 603 } 604 605 /* 606 * Syscall functions. 607 */ 608 609 int 610 sys_aio_cancel(struct lwp *l, const struct sys_aio_cancel_args *uap, 611 register_t *retval) 612 { 613 /* { 614 syscallarg(int) fildes; 615 syscallarg(struct aiocb *) aiocbp; 616 } */ 617 struct proc *p = l->l_proc; 618 struct aioproc *aio; 619 struct aio_job *a_job; 620 struct aiocb *aiocbp_ptr; 621 struct lio_req *lio; 622 struct filedesc *fdp = p->p_fd; 623 unsigned int cn, errcnt, fildes; 624 fdtab_t *dt; 625 626 TAILQ_HEAD(, aio_job) tmp_jobs_list; 627 628 /* Check for invalid file descriptor */ 629 fildes = (unsigned int)SCARG(uap, fildes); 630 dt = fdp->fd_dt; 631 if (fildes >= dt->dt_nfiles) 632 return EBADF; 633 if (dt->dt_ff[fildes] == NULL || dt->dt_ff[fildes]->ff_file == NULL) 634 return EBADF; 635 636 /* Check if AIO structure is initialized */ 637 if (p->p_aio == NULL) { 638 *retval = AIO_NOTCANCELED; 639 return 0; 640 } 641 642 aio = p->p_aio; 643 aiocbp_ptr = (struct aiocb *)SCARG(uap, aiocbp); 644 645 mutex_enter(&aio->aio_mtx); 646 647 /* Cancel the jobs, and remove them from the queue */ 648 cn = 0; 649 TAILQ_INIT(&tmp_jobs_list); 650 TAILQ_FOREACH(a_job, &aio->jobs_queue, list) { 651 if (aiocbp_ptr) { 652 if (aiocbp_ptr != a_job->aiocb_uptr) 653 continue; 654 if (fildes != a_job->aiocbp.aio_fildes) { 655 mutex_exit(&aio->aio_mtx); 656 return EBADF; 657 } 658 } else if (a_job->aiocbp.aio_fildes != fildes) 659 continue; 660 661 TAILQ_REMOVE(&aio->jobs_queue, a_job, list); 662 TAILQ_INSERT_TAIL(&tmp_jobs_list, a_job, list); 663 664 /* Decrease the counters */ 665 atomic_dec_uint(&aio_jobs_count); 666 aio->jobs_count--; 667 lio = a_job->lio; 668 if (lio != NULL && --lio->refcnt != 0) 669 a_job->lio = NULL; 670 671 cn++; 672 if (aiocbp_ptr) 673 break; 674 } 675 676 /* There are canceled jobs */ 677 if (cn) 678 *retval = AIO_CANCELED; 679 680 /* We cannot cancel current job */ 681 a_job = aio->curjob; 682 if (a_job && ((a_job->aiocbp.aio_fildes == fildes) || 683 (a_job->aiocb_uptr == aiocbp_ptr))) 684 *retval = AIO_NOTCANCELED; 685 686 mutex_exit(&aio->aio_mtx); 687 688 /* Free the jobs after the lock */ 689 errcnt = 0; 690 while (!TAILQ_EMPTY(&tmp_jobs_list)) { 691 a_job = TAILQ_FIRST(&tmp_jobs_list); 692 TAILQ_REMOVE(&tmp_jobs_list, a_job, list); 693 /* Set the errno and copy structures back to the user-space */ 694 a_job->aiocbp._errno = ECANCELED; 695 a_job->aiocbp._state = JOB_DONE; 696 if (copyout(&a_job->aiocbp, a_job->aiocb_uptr, 697 sizeof(struct aiocb))) 698 errcnt++; 699 /* Send a signal if any */ 700 aio_sendsig(p, &a_job->aiocbp.aio_sigevent); 701 if (a_job->lio) { 702 lio = a_job->lio; 703 aio_sendsig(p, &lio->sig); 704 pool_put(&aio_lio_pool, lio); 705 } 706 pool_put(&aio_job_pool, a_job); 707 } 708 709 if (errcnt) 710 return EFAULT; 711 712 /* Set a correct return value */ 713 if (*retval == 0) 714 *retval = AIO_ALLDONE; 715 716 return 0; 717 } 718 719 int 720 sys_aio_error(struct lwp *l, const struct sys_aio_error_args *uap, 721 register_t *retval) 722 { 723 /* { 724 syscallarg(const struct aiocb *) aiocbp; 725 } */ 726 struct proc *p = l->l_proc; 727 struct aioproc *aio = p->p_aio; 728 struct aiocb aiocbp; 729 int error; 730 731 if (aio == NULL) 732 return EINVAL; 733 734 error = copyin(SCARG(uap, aiocbp), &aiocbp, sizeof(struct aiocb)); 735 if (error) 736 return error; 737 738 if (aiocbp._state == JOB_NONE) 739 return EINVAL; 740 741 *retval = aiocbp._errno; 742 743 return 0; 744 } 745 746 int 747 sys_aio_fsync(struct lwp *l, const struct sys_aio_fsync_args *uap, 748 register_t *retval) 749 { 750 /* { 751 syscallarg(int) op; 752 syscallarg(struct aiocb *) aiocbp; 753 } */ 754 int op = SCARG(uap, op); 755 756 if ((op != O_DSYNC) && (op != O_SYNC)) 757 return EINVAL; 758 759 op = O_DSYNC ? AIO_DSYNC : AIO_SYNC; 760 761 return aio_enqueue_job(op, SCARG(uap, aiocbp), NULL); 762 } 763 764 int 765 sys_aio_read(struct lwp *l, const struct sys_aio_read_args *uap, 766 register_t *retval) 767 { 768 /* { 769 syscallarg(struct aiocb *) aiocbp; 770 } */ 771 772 return aio_enqueue_job(AIO_READ, SCARG(uap, aiocbp), NULL); 773 } 774 775 int 776 sys_aio_return(struct lwp *l, const struct sys_aio_return_args *uap, 777 register_t *retval) 778 { 779 /* { 780 syscallarg(struct aiocb *) aiocbp; 781 } */ 782 struct proc *p = l->l_proc; 783 struct aioproc *aio = p->p_aio; 784 struct aiocb aiocbp; 785 int error; 786 787 if (aio == NULL) 788 return EINVAL; 789 790 error = copyin(SCARG(uap, aiocbp), &aiocbp, sizeof(struct aiocb)); 791 if (error) 792 return error; 793 794 if (aiocbp._errno == EINPROGRESS || aiocbp._state != JOB_DONE) 795 return EINVAL; 796 797 *retval = aiocbp._retval; 798 799 /* Reset the internal variables */ 800 aiocbp._errno = 0; 801 aiocbp._retval = -1; 802 aiocbp._state = JOB_NONE; 803 error = copyout(&aiocbp, SCARG(uap, aiocbp), sizeof(struct aiocb)); 804 805 return error; 806 } 807 808 int 809 sys___aio_suspend50(struct lwp *l, const struct sys___aio_suspend50_args *uap, 810 register_t *retval) 811 { 812 /* { 813 syscallarg(const struct aiocb *const[]) list; 814 syscallarg(int) nent; 815 syscallarg(const struct timespec *) timeout; 816 } */ 817 struct aiocb **list; 818 struct timespec ts; 819 int error, nent; 820 821 nent = SCARG(uap, nent); 822 if (nent <= 0 || nent > aio_listio_max) 823 return EAGAIN; 824 825 if (SCARG(uap, timeout)) { 826 /* Convert timespec to ticks */ 827 error = copyin(SCARG(uap, timeout), &ts, 828 sizeof(struct timespec)); 829 if (error) 830 return error; 831 } 832 833 list = kmem_alloc(nent * sizeof(*list), KM_SLEEP); 834 error = copyin(SCARG(uap, list), list, nent * sizeof(*list)); 835 if (error) 836 goto out; 837 error = aio_suspend1(l, list, nent, SCARG(uap, timeout) ? &ts : NULL); 838 out: 839 kmem_free(list, nent * sizeof(*list)); 840 return error; 841 } 842 843 int 844 aio_suspend1(struct lwp *l, struct aiocb **aiocbp_list, int nent, 845 struct timespec *ts) 846 { 847 struct proc *p = l->l_proc; 848 struct aioproc *aio; 849 struct aio_job *a_job; 850 int i, error, timo; 851 852 if (p->p_aio == NULL) 853 return EAGAIN; 854 aio = p->p_aio; 855 856 if (ts) { 857 timo = mstohz((ts->tv_sec * 1000) + (ts->tv_nsec / 1000000)); 858 if (timo == 0 && ts->tv_sec == 0 && ts->tv_nsec > 0) 859 timo = 1; 860 if (timo <= 0) 861 return EAGAIN; 862 } else 863 timo = 0; 864 865 mutex_enter(&aio->aio_mtx); 866 for (;;) { 867 for (i = 0; i < nent; i++) { 868 869 /* Skip NULL entries */ 870 if (aiocbp_list[i] == NULL) 871 continue; 872 873 /* Skip current job */ 874 if (aio->curjob) { 875 a_job = aio->curjob; 876 if (a_job->aiocb_uptr == aiocbp_list[i]) 877 continue; 878 } 879 880 /* Look for a job in the queue */ 881 TAILQ_FOREACH(a_job, &aio->jobs_queue, list) 882 if (a_job->aiocb_uptr == aiocbp_list[i]) 883 break; 884 885 if (a_job == NULL) { 886 struct aiocb aiocbp; 887 888 mutex_exit(&aio->aio_mtx); 889 890 /* Check if the job is done. */ 891 error = copyin(aiocbp_list[i], &aiocbp, 892 sizeof(struct aiocb)); 893 if (error == 0 && aiocbp._state != JOB_DONE) { 894 mutex_enter(&aio->aio_mtx); 895 continue; 896 } 897 return error; 898 } 899 } 900 901 /* Wait for a signal or when timeout occurs */ 902 error = cv_timedwait_sig(&aio->done_cv, &aio->aio_mtx, timo); 903 if (error) { 904 if (error == EWOULDBLOCK) 905 error = EAGAIN; 906 break; 907 } 908 } 909 mutex_exit(&aio->aio_mtx); 910 return error; 911 } 912 913 int 914 sys_aio_write(struct lwp *l, const struct sys_aio_write_args *uap, 915 register_t *retval) 916 { 917 /* { 918 syscallarg(struct aiocb *) aiocbp; 919 } */ 920 921 return aio_enqueue_job(AIO_WRITE, SCARG(uap, aiocbp), NULL); 922 } 923 924 int 925 sys_lio_listio(struct lwp *l, const struct sys_lio_listio_args *uap, 926 register_t *retval) 927 { 928 /* { 929 syscallarg(int) mode; 930 syscallarg(struct aiocb *const[]) list; 931 syscallarg(int) nent; 932 syscallarg(struct sigevent *) sig; 933 } */ 934 struct proc *p = l->l_proc; 935 struct aioproc *aio; 936 struct aiocb **aiocbp_list; 937 struct lio_req *lio; 938 int i, error, errcnt, mode, nent; 939 940 mode = SCARG(uap, mode); 941 nent = SCARG(uap, nent); 942 943 /* Non-accurate checks for the limit and invalid values */ 944 if (nent < 1 || nent > aio_listio_max) 945 return EINVAL; 946 if (aio_jobs_count + nent > aio_max) 947 return EAGAIN; 948 949 /* Check if AIO structure is initialized, if not - initialize it */ 950 if (p->p_aio == NULL) 951 if (aio_procinit(p)) 952 return EAGAIN; 953 aio = p->p_aio; 954 955 /* Create a LIO structure */ 956 lio = pool_get(&aio_lio_pool, PR_WAITOK); 957 lio->refcnt = 1; 958 error = 0; 959 960 switch (mode) { 961 case LIO_WAIT: 962 memset(&lio->sig, 0, sizeof(struct sigevent)); 963 break; 964 case LIO_NOWAIT: 965 /* Check for signal, validate it */ 966 if (SCARG(uap, sig)) { 967 struct sigevent *sig = &lio->sig; 968 969 error = copyin(SCARG(uap, sig), &lio->sig, 970 sizeof(struct sigevent)); 971 if (error == 0 && 972 (sig->sigev_signo < 0 || 973 sig->sigev_signo >= NSIG || 974 sig->sigev_notify < SIGEV_NONE || 975 sig->sigev_notify > SIGEV_SA)) 976 error = EINVAL; 977 } else 978 memset(&lio->sig, 0, sizeof(struct sigevent)); 979 break; 980 default: 981 error = EINVAL; 982 break; 983 } 984 985 if (error != 0) { 986 pool_put(&aio_lio_pool, lio); 987 return error; 988 } 989 990 /* Get the list from user-space */ 991 aiocbp_list = kmem_alloc(nent * sizeof(*aiocbp_list), KM_SLEEP); 992 error = copyin(SCARG(uap, list), aiocbp_list, 993 nent * sizeof(*aiocbp_list)); 994 if (error) { 995 mutex_enter(&aio->aio_mtx); 996 goto err; 997 } 998 999 /* Enqueue all jobs */ 1000 errcnt = 0; 1001 for (i = 0; i < nent; i++) { 1002 error = aio_enqueue_job(AIO_LIO, aiocbp_list[i], lio); 1003 /* 1004 * According to POSIX, in such error case it may 1005 * fail with other I/O operations initiated. 1006 */ 1007 if (error) 1008 errcnt++; 1009 } 1010 1011 mutex_enter(&aio->aio_mtx); 1012 1013 /* Return an error, if any */ 1014 if (errcnt) { 1015 error = EIO; 1016 goto err; 1017 } 1018 1019 if (mode == LIO_WAIT) { 1020 /* 1021 * Wait for AIO completion. In such case, 1022 * the LIO structure will be freed here. 1023 */ 1024 while (lio->refcnt > 1 && error == 0) 1025 error = cv_wait_sig(&aio->done_cv, &aio->aio_mtx); 1026 if (error) 1027 error = EINTR; 1028 } 1029 1030 err: 1031 if (--lio->refcnt != 0) 1032 lio = NULL; 1033 mutex_exit(&aio->aio_mtx); 1034 if (lio != NULL) { 1035 aio_sendsig(p, &lio->sig); 1036 pool_put(&aio_lio_pool, lio); 1037 } 1038 kmem_free(aiocbp_list, nent * sizeof(*aiocbp_list)); 1039 return error; 1040 } 1041 1042 /* 1043 * SysCtl 1044 */ 1045 1046 static int 1047 sysctl_aio_listio_max(SYSCTLFN_ARGS) 1048 { 1049 struct sysctlnode node; 1050 int error, newsize; 1051 1052 node = *rnode; 1053 node.sysctl_data = &newsize; 1054 1055 newsize = aio_listio_max; 1056 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 1057 if (error || newp == NULL) 1058 return error; 1059 1060 if (newsize < 1 || newsize > aio_max) 1061 return EINVAL; 1062 aio_listio_max = newsize; 1063 1064 return 0; 1065 } 1066 1067 static int 1068 sysctl_aio_max(SYSCTLFN_ARGS) 1069 { 1070 struct sysctlnode node; 1071 int error, newsize; 1072 1073 node = *rnode; 1074 node.sysctl_data = &newsize; 1075 1076 newsize = aio_max; 1077 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 1078 if (error || newp == NULL) 1079 return error; 1080 1081 if (newsize < 1 || newsize < aio_listio_max) 1082 return EINVAL; 1083 aio_max = newsize; 1084 1085 return 0; 1086 } 1087 1088 SYSCTL_SETUP(sysctl_aio_setup, "sysctl aio setup") 1089 { 1090 1091 sysctl_createv(clog, 0, NULL, NULL, 1092 CTLFLAG_PERMANENT, 1093 CTLTYPE_NODE, "kern", NULL, 1094 NULL, 0, NULL, 0, 1095 CTL_KERN, CTL_EOL); 1096 sysctl_createv(clog, 0, NULL, NULL, 1097 CTLFLAG_PERMANENT | CTLFLAG_IMMEDIATE, 1098 CTLTYPE_INT, "posix_aio", 1099 SYSCTL_DESCR("Version of IEEE Std 1003.1 and its " 1100 "Asynchronous I/O option to which the " 1101 "system attempts to conform"), 1102 NULL, _POSIX_ASYNCHRONOUS_IO, NULL, 0, 1103 CTL_KERN, CTL_CREATE, CTL_EOL); 1104 sysctl_createv(clog, 0, NULL, NULL, 1105 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 1106 CTLTYPE_INT, "aio_listio_max", 1107 SYSCTL_DESCR("Maximum number of asynchronous I/O " 1108 "operations in a single list I/O call"), 1109 sysctl_aio_listio_max, 0, &aio_listio_max, 0, 1110 CTL_KERN, CTL_CREATE, CTL_EOL); 1111 sysctl_createv(clog, 0, NULL, NULL, 1112 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 1113 CTLTYPE_INT, "aio_max", 1114 SYSCTL_DESCR("Maximum number of asynchronous I/O " 1115 "operations"), 1116 sysctl_aio_max, 0, &aio_max, 0, 1117 CTL_KERN, CTL_CREATE, CTL_EOL); 1118 } 1119 1120 /* 1121 * Debugging 1122 */ 1123 #if defined(DDB) 1124 void 1125 aio_print_jobs(void (*pr)(const char *, ...)) 1126 { 1127 struct proc *p = (curlwp == NULL ? NULL : curlwp->l_proc); 1128 struct aioproc *aio; 1129 struct aio_job *a_job; 1130 struct aiocb *aiocbp; 1131 1132 if (p == NULL) { 1133 (*pr)("AIO: We are not in the processes right now.\n"); 1134 return; 1135 } 1136 1137 aio = p->p_aio; 1138 if (aio == NULL) { 1139 (*pr)("AIO data is not initialized (PID = %d).\n", p->p_pid); 1140 return; 1141 } 1142 1143 (*pr)("AIO: PID = %d\n", p->p_pid); 1144 (*pr)("AIO: Global count of the jobs = %u\n", aio_jobs_count); 1145 (*pr)("AIO: Count of the jobs = %u\n", aio->jobs_count); 1146 1147 if (aio->curjob) { 1148 a_job = aio->curjob; 1149 (*pr)("\nAIO current job:\n"); 1150 (*pr)(" opcode = %d, errno = %d, state = %d, aiocb_ptr = %p\n", 1151 a_job->aio_op, a_job->aiocbp._errno, 1152 a_job->aiocbp._state, a_job->aiocb_uptr); 1153 aiocbp = &a_job->aiocbp; 1154 (*pr)(" fd = %d, offset = %u, buf = %p, nbytes = %u\n", 1155 aiocbp->aio_fildes, aiocbp->aio_offset, 1156 aiocbp->aio_buf, aiocbp->aio_nbytes); 1157 } 1158 1159 (*pr)("\nAIO queue:\n"); 1160 TAILQ_FOREACH(a_job, &aio->jobs_queue, list) { 1161 (*pr)(" opcode = %d, errno = %d, state = %d, aiocb_ptr = %p\n", 1162 a_job->aio_op, a_job->aiocbp._errno, 1163 a_job->aiocbp._state, a_job->aiocb_uptr); 1164 aiocbp = &a_job->aiocbp; 1165 (*pr)(" fd = %d, offset = %u, buf = %p, nbytes = %u\n", 1166 aiocbp->aio_fildes, aiocbp->aio_offset, 1167 aiocbp->aio_buf, aiocbp->aio_nbytes); 1168 } 1169 } 1170 #endif /* defined(DDB) */ 1171