1 /* $NetBSD: sys_generic.c,v 1.123 2009/05/24 21:41:26 ad Exp $ */ 2 3 /*- 4 * Copyright (c) 2007, 2008, 2009 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Copyright (c) 1982, 1986, 1989, 1993 34 * The Regents of the University of California. All rights reserved. 35 * (c) UNIX System Laboratories, Inc. 36 * All or some portions of this file are derived from material licensed 37 * to the University of California by American Telephone and Telegraph 38 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 39 * the permission of UNIX System Laboratories, Inc. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 3. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 * 65 * @(#)sys_generic.c 8.9 (Berkeley) 2/14/95 66 */ 67 68 /* 69 * System calls relating to files. 70 */ 71 72 #include <sys/cdefs.h> 73 __KERNEL_RCSID(0, "$NetBSD: sys_generic.c,v 1.123 2009/05/24 21:41:26 ad Exp $"); 74 75 #include <sys/param.h> 76 #include <sys/systm.h> 77 #include <sys/filedesc.h> 78 #include <sys/ioctl.h> 79 #include <sys/file.h> 80 #include <sys/proc.h> 81 #include <sys/socketvar.h> 82 #include <sys/signalvar.h> 83 #include <sys/uio.h> 84 #include <sys/kernel.h> 85 #include <sys/stat.h> 86 #include <sys/kmem.h> 87 #include <sys/poll.h> 88 #include <sys/vnode.h> 89 #include <sys/mount.h> 90 #include <sys/syscallargs.h> 91 #include <sys/ktrace.h> 92 #include <sys/atomic.h> 93 94 #include <uvm/uvm_extern.h> 95 96 /* 97 * Read system call. 98 */ 99 /* ARGSUSED */ 100 int 101 sys_read(struct lwp *l, const struct sys_read_args *uap, register_t *retval) 102 { 103 /* { 104 syscallarg(int) fd; 105 syscallarg(void *) buf; 106 syscallarg(size_t) nbyte; 107 } */ 108 file_t *fp; 109 int fd; 110 111 fd = SCARG(uap, fd); 112 113 if ((fp = fd_getfile(fd)) == NULL) 114 return (EBADF); 115 116 if ((fp->f_flag & FREAD) == 0) { 117 fd_putfile(fd); 118 return (EBADF); 119 } 120 121 /* dofileread() will unuse the descriptor for us */ 122 return (dofileread(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 123 &fp->f_offset, FOF_UPDATE_OFFSET, retval)); 124 } 125 126 int 127 dofileread(int fd, struct file *fp, void *buf, size_t nbyte, 128 off_t *offset, int flags, register_t *retval) 129 { 130 struct iovec aiov; 131 struct uio auio; 132 size_t cnt; 133 int error; 134 lwp_t *l; 135 136 l = curlwp; 137 138 aiov.iov_base = (void *)buf; 139 aiov.iov_len = nbyte; 140 auio.uio_iov = &aiov; 141 auio.uio_iovcnt = 1; 142 auio.uio_resid = nbyte; 143 auio.uio_rw = UIO_READ; 144 auio.uio_vmspace = l->l_proc->p_vmspace; 145 146 /* 147 * Reads return ssize_t because -1 is returned on error. Therefore 148 * we must restrict the length to SSIZE_MAX to avoid garbage return 149 * values. 150 */ 151 if (auio.uio_resid > SSIZE_MAX) { 152 error = EINVAL; 153 goto out; 154 } 155 156 cnt = auio.uio_resid; 157 error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred, flags); 158 if (error) 159 if (auio.uio_resid != cnt && (error == ERESTART || 160 error == EINTR || error == EWOULDBLOCK)) 161 error = 0; 162 cnt -= auio.uio_resid; 163 ktrgenio(fd, UIO_READ, buf, cnt, error); 164 *retval = cnt; 165 out: 166 fd_putfile(fd); 167 return (error); 168 } 169 170 /* 171 * Scatter read system call. 172 */ 173 int 174 sys_readv(struct lwp *l, const struct sys_readv_args *uap, register_t *retval) 175 { 176 /* { 177 syscallarg(int) fd; 178 syscallarg(const struct iovec *) iovp; 179 syscallarg(int) iovcnt; 180 } */ 181 182 return do_filereadv(SCARG(uap, fd), SCARG(uap, iovp), 183 SCARG(uap, iovcnt), NULL, FOF_UPDATE_OFFSET, retval); 184 } 185 186 int 187 do_filereadv(int fd, const struct iovec *iovp, int iovcnt, 188 off_t *offset, int flags, register_t *retval) 189 { 190 struct uio auio; 191 struct iovec *iov, *needfree = NULL, aiov[UIO_SMALLIOV]; 192 int i, error; 193 size_t cnt; 194 u_int iovlen; 195 struct file *fp; 196 struct iovec *ktriov = NULL; 197 198 if (iovcnt == 0) 199 return EINVAL; 200 201 if ((fp = fd_getfile(fd)) == NULL) 202 return EBADF; 203 204 if ((fp->f_flag & FREAD) == 0) { 205 fd_putfile(fd); 206 return EBADF; 207 } 208 209 if (offset == NULL) 210 offset = &fp->f_offset; 211 else { 212 struct vnode *vp = fp->f_data; 213 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 214 error = ESPIPE; 215 goto out; 216 } 217 /* 218 * Test that the device is seekable ? 219 * XXX This works because no file systems actually 220 * XXX take any action on the seek operation. 221 */ 222 error = VOP_SEEK(vp, fp->f_offset, *offset, fp->f_cred); 223 if (error != 0) 224 goto out; 225 } 226 227 iovlen = iovcnt * sizeof(struct iovec); 228 if (flags & FOF_IOV_SYSSPACE) 229 iov = __UNCONST(iovp); 230 else { 231 iov = aiov; 232 if ((u_int)iovcnt > UIO_SMALLIOV) { 233 if ((u_int)iovcnt > IOV_MAX) { 234 error = EINVAL; 235 goto out; 236 } 237 iov = kmem_alloc(iovlen, KM_SLEEP); 238 if (iov == NULL) { 239 error = ENOMEM; 240 goto out; 241 } 242 needfree = iov; 243 } 244 error = copyin(iovp, iov, iovlen); 245 if (error) 246 goto done; 247 } 248 249 auio.uio_iov = iov; 250 auio.uio_iovcnt = iovcnt; 251 auio.uio_rw = UIO_READ; 252 auio.uio_vmspace = curproc->p_vmspace; 253 254 auio.uio_resid = 0; 255 for (i = 0; i < iovcnt; i++, iov++) { 256 auio.uio_resid += iov->iov_len; 257 /* 258 * Reads return ssize_t because -1 is returned on error. 259 * Therefore we must restrict the length to SSIZE_MAX to 260 * avoid garbage return values. 261 */ 262 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) { 263 error = EINVAL; 264 goto done; 265 } 266 } 267 268 /* 269 * if tracing, save a copy of iovec 270 */ 271 if (ktrpoint(KTR_GENIO)) { 272 ktriov = kmem_alloc(iovlen, KM_SLEEP); 273 if (ktriov != NULL) 274 memcpy(ktriov, auio.uio_iov, iovlen); 275 } 276 277 cnt = auio.uio_resid; 278 error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred, flags); 279 if (error) 280 if (auio.uio_resid != cnt && (error == ERESTART || 281 error == EINTR || error == EWOULDBLOCK)) 282 error = 0; 283 cnt -= auio.uio_resid; 284 *retval = cnt; 285 286 if (ktriov != NULL) { 287 ktrgeniov(fd, UIO_READ, ktriov, cnt, error); 288 kmem_free(ktriov, iovlen); 289 } 290 291 done: 292 if (needfree) 293 kmem_free(needfree, iovlen); 294 out: 295 fd_putfile(fd); 296 return (error); 297 } 298 299 /* 300 * Write system call 301 */ 302 int 303 sys_write(struct lwp *l, const struct sys_write_args *uap, register_t *retval) 304 { 305 /* { 306 syscallarg(int) fd; 307 syscallarg(const void *) buf; 308 syscallarg(size_t) nbyte; 309 } */ 310 file_t *fp; 311 int fd; 312 313 fd = SCARG(uap, fd); 314 315 if ((fp = fd_getfile(fd)) == NULL) 316 return (EBADF); 317 318 if ((fp->f_flag & FWRITE) == 0) { 319 fd_putfile(fd); 320 return (EBADF); 321 } 322 323 /* dofilewrite() will unuse the descriptor for us */ 324 return (dofilewrite(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 325 &fp->f_offset, FOF_UPDATE_OFFSET, retval)); 326 } 327 328 int 329 dofilewrite(int fd, struct file *fp, const void *buf, 330 size_t nbyte, off_t *offset, int flags, register_t *retval) 331 { 332 struct iovec aiov; 333 struct uio auio; 334 size_t cnt; 335 int error; 336 337 aiov.iov_base = __UNCONST(buf); /* XXXUNCONST kills const */ 338 aiov.iov_len = nbyte; 339 auio.uio_iov = &aiov; 340 auio.uio_iovcnt = 1; 341 auio.uio_resid = nbyte; 342 auio.uio_rw = UIO_WRITE; 343 auio.uio_vmspace = curproc->p_vmspace; 344 345 /* 346 * Writes return ssize_t because -1 is returned on error. Therefore 347 * we must restrict the length to SSIZE_MAX to avoid garbage return 348 * values. 349 */ 350 if (auio.uio_resid > SSIZE_MAX) { 351 error = EINVAL; 352 goto out; 353 } 354 355 cnt = auio.uio_resid; 356 error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred, flags); 357 if (error) { 358 if (auio.uio_resid != cnt && (error == ERESTART || 359 error == EINTR || error == EWOULDBLOCK)) 360 error = 0; 361 if (error == EPIPE) { 362 mutex_enter(proc_lock); 363 psignal(curproc, SIGPIPE); 364 mutex_exit(proc_lock); 365 } 366 } 367 cnt -= auio.uio_resid; 368 ktrgenio(fd, UIO_WRITE, buf, cnt, error); 369 *retval = cnt; 370 out: 371 fd_putfile(fd); 372 return (error); 373 } 374 375 /* 376 * Gather write system call 377 */ 378 int 379 sys_writev(struct lwp *l, const struct sys_writev_args *uap, register_t *retval) 380 { 381 /* { 382 syscallarg(int) fd; 383 syscallarg(const struct iovec *) iovp; 384 syscallarg(int) iovcnt; 385 } */ 386 387 return do_filewritev(SCARG(uap, fd), SCARG(uap, iovp), 388 SCARG(uap, iovcnt), NULL, FOF_UPDATE_OFFSET, retval); 389 } 390 391 int 392 do_filewritev(int fd, const struct iovec *iovp, int iovcnt, 393 off_t *offset, int flags, register_t *retval) 394 { 395 struct uio auio; 396 struct iovec *iov, *needfree = NULL, aiov[UIO_SMALLIOV]; 397 int i, error; 398 size_t cnt; 399 u_int iovlen; 400 struct file *fp; 401 struct iovec *ktriov = NULL; 402 403 if (iovcnt == 0) 404 return EINVAL; 405 406 if ((fp = fd_getfile(fd)) == NULL) 407 return EBADF; 408 409 if ((fp->f_flag & FWRITE) == 0) { 410 fd_putfile(fd); 411 return EBADF; 412 } 413 414 if (offset == NULL) 415 offset = &fp->f_offset; 416 else { 417 struct vnode *vp = fp->f_data; 418 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 419 error = ESPIPE; 420 goto out; 421 } 422 /* 423 * Test that the device is seekable ? 424 * XXX This works because no file systems actually 425 * XXX take any action on the seek operation. 426 */ 427 error = VOP_SEEK(vp, fp->f_offset, *offset, fp->f_cred); 428 if (error != 0) 429 goto out; 430 } 431 432 iovlen = iovcnt * sizeof(struct iovec); 433 if (flags & FOF_IOV_SYSSPACE) 434 iov = __UNCONST(iovp); 435 else { 436 iov = aiov; 437 if ((u_int)iovcnt > UIO_SMALLIOV) { 438 if ((u_int)iovcnt > IOV_MAX) { 439 error = EINVAL; 440 goto out; 441 } 442 iov = kmem_alloc(iovlen, KM_SLEEP); 443 if (iov == NULL) { 444 error = ENOMEM; 445 goto out; 446 } 447 needfree = iov; 448 } 449 error = copyin(iovp, iov, iovlen); 450 if (error) 451 goto done; 452 } 453 454 auio.uio_iov = iov; 455 auio.uio_iovcnt = iovcnt; 456 auio.uio_rw = UIO_WRITE; 457 auio.uio_vmspace = curproc->p_vmspace; 458 459 auio.uio_resid = 0; 460 for (i = 0; i < iovcnt; i++, iov++) { 461 auio.uio_resid += iov->iov_len; 462 /* 463 * Writes return ssize_t because -1 is returned on error. 464 * Therefore we must restrict the length to SSIZE_MAX to 465 * avoid garbage return values. 466 */ 467 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) { 468 error = EINVAL; 469 goto done; 470 } 471 } 472 473 /* 474 * if tracing, save a copy of iovec 475 */ 476 if (ktrpoint(KTR_GENIO)) { 477 ktriov = kmem_alloc(iovlen, KM_SLEEP); 478 if (ktriov != NULL) 479 memcpy(ktriov, auio.uio_iov, iovlen); 480 } 481 482 cnt = auio.uio_resid; 483 error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred, flags); 484 if (error) { 485 if (auio.uio_resid != cnt && (error == ERESTART || 486 error == EINTR || error == EWOULDBLOCK)) 487 error = 0; 488 if (error == EPIPE) { 489 mutex_enter(proc_lock); 490 psignal(curproc, SIGPIPE); 491 mutex_exit(proc_lock); 492 } 493 } 494 cnt -= auio.uio_resid; 495 *retval = cnt; 496 497 if (ktriov != NULL) { 498 ktrgeniov(fd, UIO_WRITE, ktriov, cnt, error); 499 kmem_free(ktriov, iovlen); 500 } 501 502 done: 503 if (needfree) 504 kmem_free(needfree, iovlen); 505 out: 506 fd_putfile(fd); 507 return (error); 508 } 509 510 /* 511 * Ioctl system call 512 */ 513 /* ARGSUSED */ 514 int 515 sys_ioctl(struct lwp *l, const struct sys_ioctl_args *uap, register_t *retval) 516 { 517 /* { 518 syscallarg(int) fd; 519 syscallarg(u_long) com; 520 syscallarg(void *) data; 521 } */ 522 struct file *fp; 523 proc_t *p; 524 struct filedesc *fdp; 525 u_long com; 526 int error; 527 u_int size; 528 void *data, *memp; 529 #define STK_PARAMS 128 530 u_long stkbuf[STK_PARAMS/sizeof(u_long)]; 531 fdfile_t *ff; 532 533 error = 0; 534 p = l->l_proc; 535 fdp = p->p_fd; 536 537 if ((fp = fd_getfile(SCARG(uap, fd))) == NULL) 538 return (EBADF); 539 540 if ((fp->f_flag & (FREAD | FWRITE)) == 0) { 541 error = EBADF; 542 com = 0; 543 goto out; 544 } 545 546 ff = fdp->fd_dt->dt_ff[SCARG(uap, fd)]; 547 switch (com = SCARG(uap, com)) { 548 case FIONCLEX: 549 ff->ff_exclose = false; 550 goto out; 551 552 case FIOCLEX: 553 ff->ff_exclose = true; 554 fdp->fd_exclose = true; 555 goto out; 556 } 557 558 /* 559 * Interpret high order word to find amount of data to be 560 * copied to/from the user's address space. 561 */ 562 size = IOCPARM_LEN(com); 563 if (size > IOCPARM_MAX) { 564 error = ENOTTY; 565 goto out; 566 } 567 memp = NULL; 568 if (size > sizeof(stkbuf)) { 569 memp = kmem_alloc(size, KM_SLEEP); 570 data = memp; 571 } else 572 data = (void *)stkbuf; 573 if (com&IOC_IN) { 574 if (size) { 575 error = copyin(SCARG(uap, data), data, size); 576 if (error) { 577 if (memp) 578 kmem_free(memp, size); 579 goto out; 580 } 581 ktrgenio(SCARG(uap, fd), UIO_WRITE, SCARG(uap, data), 582 size, 0); 583 } else 584 *(void **)data = SCARG(uap, data); 585 } else if ((com&IOC_OUT) && size) 586 /* 587 * Zero the buffer so the user always 588 * gets back something deterministic. 589 */ 590 memset(data, 0, size); 591 else if (com&IOC_VOID) 592 *(void **)data = SCARG(uap, data); 593 594 switch (com) { 595 596 case FIONBIO: 597 /* XXX Code block is not atomic */ 598 if (*(int *)data != 0) 599 atomic_or_uint(&fp->f_flag, FNONBLOCK); 600 else 601 atomic_and_uint(&fp->f_flag, ~FNONBLOCK); 602 error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, data); 603 break; 604 605 case FIOASYNC: 606 /* XXX Code block is not atomic */ 607 if (*(int *)data != 0) 608 atomic_or_uint(&fp->f_flag, FASYNC); 609 else 610 atomic_and_uint(&fp->f_flag, ~FASYNC); 611 error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, data); 612 break; 613 614 default: 615 error = (*fp->f_ops->fo_ioctl)(fp, com, data); 616 /* 617 * Copy any data to user, size was 618 * already set and checked above. 619 */ 620 if (error == 0 && (com&IOC_OUT) && size) { 621 error = copyout(data, SCARG(uap, data), size); 622 ktrgenio(SCARG(uap, fd), UIO_READ, SCARG(uap, data), 623 size, error); 624 } 625 break; 626 } 627 if (memp) 628 kmem_free(memp, size); 629 out: 630 fd_putfile(SCARG(uap, fd)); 631 switch (error) { 632 case -1: 633 printf("sys_ioctl: _IO%s%s('%c', %lu, %lu) returned -1: " 634 "pid=%d comm=%s\n", 635 (com & IOC_IN) ? "W" : "", (com & IOC_OUT) ? "R" : "", 636 (char)IOCGROUP(com), (com & 0xff), IOCPARM_LEN(com), 637 p->p_pid, p->p_comm); 638 /* FALLTHROUGH */ 639 case EPASSTHROUGH: 640 error = ENOTTY; 641 /* FALLTHROUGH */ 642 default: 643 return (error); 644 } 645 } 646