1 /* $NetBSD: sys_generic.c,v 1.117 2008/03/23 14:02:49 ad Exp $ */ 2 3 /*- 4 * Copyright (c) 2007, 2008 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the NetBSD 21 * Foundation, Inc. and its contributors. 22 * 4. Neither the name of The NetBSD Foundation nor the names of its 23 * contributors may be used to endorse or promote products derived 24 * from this software without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 36 * POSSIBILITY OF SUCH DAMAGE. 37 */ 38 39 /* 40 * Copyright (c) 1982, 1986, 1989, 1993 41 * The Regents of the University of California. All rights reserved. 42 * (c) UNIX System Laboratories, Inc. 43 * All or some portions of this file are derived from material licensed 44 * to the University of California by American Telephone and Telegraph 45 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 46 * the permission of UNIX System Laboratories, Inc. 47 * 48 * Redistribution and use in source and binary forms, with or without 49 * modification, are permitted provided that the following conditions 50 * are met: 51 * 1. Redistributions of source code must retain the above copyright 52 * notice, this list of conditions and the following disclaimer. 53 * 2. Redistributions in binary form must reproduce the above copyright 54 * notice, this list of conditions and the following disclaimer in the 55 * documentation and/or other materials provided with the distribution. 56 * 3. Neither the name of the University nor the names of its contributors 57 * may be used to endorse or promote products derived from this software 58 * without specific prior written permission. 59 * 60 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 61 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 62 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 63 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 64 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 65 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 66 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 67 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 68 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 69 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 70 * SUCH DAMAGE. 71 * 72 * @(#)sys_generic.c 8.9 (Berkeley) 2/14/95 73 */ 74 75 /* 76 * System calls relating to files. 77 */ 78 79 #include <sys/cdefs.h> 80 __KERNEL_RCSID(0, "$NetBSD: sys_generic.c,v 1.117 2008/03/23 14:02:49 ad Exp $"); 81 82 #include <sys/param.h> 83 #include <sys/systm.h> 84 #include <sys/filedesc.h> 85 #include <sys/ioctl.h> 86 #include <sys/file.h> 87 #include <sys/proc.h> 88 #include <sys/socketvar.h> 89 #include <sys/signalvar.h> 90 #include <sys/uio.h> 91 #include <sys/kernel.h> 92 #include <sys/stat.h> 93 #include <sys/kmem.h> 94 #include <sys/poll.h> 95 #include <sys/vnode.h> 96 #include <sys/mount.h> 97 #include <sys/syscallargs.h> 98 #include <sys/ktrace.h> 99 100 #include <uvm/uvm_extern.h> 101 102 /* 103 * Read system call. 104 */ 105 /* ARGSUSED */ 106 int 107 sys_read(struct lwp *l, const struct sys_read_args *uap, register_t *retval) 108 { 109 /* { 110 syscallarg(int) fd; 111 syscallarg(void *) buf; 112 syscallarg(size_t) nbyte; 113 } */ 114 file_t *fp; 115 int fd; 116 117 fd = SCARG(uap, fd); 118 119 if ((fp = fd_getfile(fd)) == NULL) 120 return (EBADF); 121 122 if ((fp->f_flag & FREAD) == 0) { 123 fd_putfile(fd); 124 return (EBADF); 125 } 126 127 /* dofileread() will unuse the descriptor for us */ 128 return (dofileread(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 129 &fp->f_offset, FOF_UPDATE_OFFSET, retval)); 130 } 131 132 int 133 dofileread(int fd, struct file *fp, void *buf, size_t nbyte, 134 off_t *offset, int flags, register_t *retval) 135 { 136 struct iovec aiov; 137 struct uio auio; 138 size_t cnt; 139 int error; 140 lwp_t *l; 141 142 l = curlwp; 143 144 aiov.iov_base = (void *)buf; 145 aiov.iov_len = nbyte; 146 auio.uio_iov = &aiov; 147 auio.uio_iovcnt = 1; 148 auio.uio_resid = nbyte; 149 auio.uio_rw = UIO_READ; 150 auio.uio_vmspace = l->l_proc->p_vmspace; 151 152 /* 153 * Reads return ssize_t because -1 is returned on error. Therefore 154 * we must restrict the length to SSIZE_MAX to avoid garbage return 155 * values. 156 */ 157 if (auio.uio_resid > SSIZE_MAX) { 158 error = EINVAL; 159 goto out; 160 } 161 162 cnt = auio.uio_resid; 163 error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred, flags); 164 if (error) 165 if (auio.uio_resid != cnt && (error == ERESTART || 166 error == EINTR || error == EWOULDBLOCK)) 167 error = 0; 168 cnt -= auio.uio_resid; 169 ktrgenio(fd, UIO_READ, buf, cnt, error); 170 *retval = cnt; 171 out: 172 fd_putfile(fd); 173 return (error); 174 } 175 176 /* 177 * Scatter read system call. 178 */ 179 int 180 sys_readv(struct lwp *l, const struct sys_readv_args *uap, register_t *retval) 181 { 182 /* { 183 syscallarg(int) fd; 184 syscallarg(const struct iovec *) iovp; 185 syscallarg(int) iovcnt; 186 } */ 187 188 return do_filereadv(SCARG(uap, fd), SCARG(uap, iovp), 189 SCARG(uap, iovcnt), NULL, FOF_UPDATE_OFFSET, retval); 190 } 191 192 int 193 do_filereadv(int fd, const struct iovec *iovp, int iovcnt, 194 off_t *offset, int flags, register_t *retval) 195 { 196 struct uio auio; 197 struct iovec *iov, *needfree = NULL, aiov[UIO_SMALLIOV]; 198 int i, error; 199 size_t cnt; 200 u_int iovlen; 201 struct file *fp; 202 struct iovec *ktriov = NULL; 203 204 if (iovcnt == 0) 205 return EINVAL; 206 207 if ((fp = fd_getfile(fd)) == NULL) 208 return EBADF; 209 210 if ((fp->f_flag & FREAD) == 0) { 211 fd_putfile(fd); 212 return EBADF; 213 } 214 215 if (offset == NULL) 216 offset = &fp->f_offset; 217 else { 218 struct vnode *vp = fp->f_data; 219 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 220 error = ESPIPE; 221 goto out; 222 } 223 /* 224 * Test that the device is seekable ? 225 * XXX This works because no file systems actually 226 * XXX take any action on the seek operation. 227 */ 228 error = VOP_SEEK(vp, fp->f_offset, *offset, fp->f_cred); 229 if (error != 0) 230 goto out; 231 } 232 233 iovlen = iovcnt * sizeof(struct iovec); 234 if (flags & FOF_IOV_SYSSPACE) 235 iov = __UNCONST(iovp); 236 else { 237 iov = aiov; 238 if ((u_int)iovcnt > UIO_SMALLIOV) { 239 if ((u_int)iovcnt > IOV_MAX) { 240 error = EINVAL; 241 goto out; 242 } 243 iov = kmem_alloc(iovlen, KM_SLEEP); 244 if (iov == NULL) { 245 error = ENOMEM; 246 goto out; 247 } 248 needfree = iov; 249 } 250 error = copyin(iovp, iov, iovlen); 251 if (error) 252 goto done; 253 } 254 255 auio.uio_iov = iov; 256 auio.uio_iovcnt = iovcnt; 257 auio.uio_rw = UIO_READ; 258 auio.uio_vmspace = curproc->p_vmspace; 259 260 auio.uio_resid = 0; 261 for (i = 0; i < iovcnt; i++, iov++) { 262 auio.uio_resid += iov->iov_len; 263 /* 264 * Reads return ssize_t because -1 is returned on error. 265 * Therefore we must restrict the length to SSIZE_MAX to 266 * avoid garbage return values. 267 */ 268 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) { 269 error = EINVAL; 270 goto done; 271 } 272 } 273 274 /* 275 * if tracing, save a copy of iovec 276 */ 277 if (ktrpoint(KTR_GENIO)) { 278 ktriov = kmem_alloc(iovlen, KM_SLEEP); 279 if (ktriov != NULL) 280 memcpy(ktriov, auio.uio_iov, iovlen); 281 } 282 283 cnt = auio.uio_resid; 284 error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred, flags); 285 if (error) 286 if (auio.uio_resid != cnt && (error == ERESTART || 287 error == EINTR || error == EWOULDBLOCK)) 288 error = 0; 289 cnt -= auio.uio_resid; 290 *retval = cnt; 291 292 if (ktriov != NULL) { 293 ktrgeniov(fd, UIO_READ, ktriov, cnt, error); 294 kmem_free(ktriov, iovlen); 295 } 296 297 done: 298 if (needfree) 299 kmem_free(needfree, iovlen); 300 out: 301 fd_putfile(fd); 302 return (error); 303 } 304 305 /* 306 * Write system call 307 */ 308 int 309 sys_write(struct lwp *l, const struct sys_write_args *uap, register_t *retval) 310 { 311 /* { 312 syscallarg(int) fd; 313 syscallarg(const void *) buf; 314 syscallarg(size_t) nbyte; 315 } */ 316 file_t *fp; 317 int fd; 318 319 fd = SCARG(uap, fd); 320 321 if ((fp = fd_getfile(fd)) == NULL) 322 return (EBADF); 323 324 if ((fp->f_flag & FWRITE) == 0) { 325 fd_putfile(fd); 326 return (EBADF); 327 } 328 329 /* dofilewrite() will unuse the descriptor for us */ 330 return (dofilewrite(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 331 &fp->f_offset, FOF_UPDATE_OFFSET, retval)); 332 } 333 334 int 335 dofilewrite(int fd, struct file *fp, const void *buf, 336 size_t nbyte, off_t *offset, int flags, register_t *retval) 337 { 338 struct iovec aiov; 339 struct uio auio; 340 size_t cnt; 341 int error; 342 343 aiov.iov_base = __UNCONST(buf); /* XXXUNCONST kills const */ 344 aiov.iov_len = nbyte; 345 auio.uio_iov = &aiov; 346 auio.uio_iovcnt = 1; 347 auio.uio_resid = nbyte; 348 auio.uio_rw = UIO_WRITE; 349 auio.uio_vmspace = curproc->p_vmspace; 350 351 /* 352 * Writes return ssize_t because -1 is returned on error. Therefore 353 * we must restrict the length to SSIZE_MAX to avoid garbage return 354 * values. 355 */ 356 if (auio.uio_resid > SSIZE_MAX) { 357 error = EINVAL; 358 goto out; 359 } 360 361 cnt = auio.uio_resid; 362 error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred, flags); 363 if (error) { 364 if (auio.uio_resid != cnt && (error == ERESTART || 365 error == EINTR || error == EWOULDBLOCK)) 366 error = 0; 367 if (error == EPIPE) { 368 mutex_enter(&proclist_mutex); 369 psignal(curproc, SIGPIPE); 370 mutex_exit(&proclist_mutex); 371 } 372 } 373 cnt -= auio.uio_resid; 374 ktrgenio(fd, UIO_WRITE, buf, cnt, error); 375 *retval = cnt; 376 out: 377 fd_putfile(fd); 378 return (error); 379 } 380 381 /* 382 * Gather write system call 383 */ 384 int 385 sys_writev(struct lwp *l, const struct sys_writev_args *uap, register_t *retval) 386 { 387 /* { 388 syscallarg(int) fd; 389 syscallarg(const struct iovec *) iovp; 390 syscallarg(int) iovcnt; 391 } */ 392 393 return do_filewritev(SCARG(uap, fd), SCARG(uap, iovp), 394 SCARG(uap, iovcnt), NULL, FOF_UPDATE_OFFSET, retval); 395 } 396 397 int 398 do_filewritev(int fd, const struct iovec *iovp, int iovcnt, 399 off_t *offset, int flags, register_t *retval) 400 { 401 struct uio auio; 402 struct iovec *iov, *needfree = NULL, aiov[UIO_SMALLIOV]; 403 int i, error; 404 size_t cnt; 405 u_int iovlen; 406 struct file *fp; 407 struct iovec *ktriov = NULL; 408 409 if (iovcnt == 0) 410 return EINVAL; 411 412 if ((fp = fd_getfile(fd)) == NULL) 413 return EBADF; 414 415 if ((fp->f_flag & FWRITE) == 0) { 416 fd_putfile(fd); 417 return EBADF; 418 } 419 420 if (offset == NULL) 421 offset = &fp->f_offset; 422 else { 423 struct vnode *vp = fp->f_data; 424 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 425 error = ESPIPE; 426 goto out; 427 } 428 /* 429 * Test that the device is seekable ? 430 * XXX This works because no file systems actually 431 * XXX take any action on the seek operation. 432 */ 433 error = VOP_SEEK(vp, fp->f_offset, *offset, fp->f_cred); 434 if (error != 0) 435 goto out; 436 } 437 438 iovlen = iovcnt * sizeof(struct iovec); 439 if (flags & FOF_IOV_SYSSPACE) 440 iov = __UNCONST(iovp); 441 else { 442 iov = aiov; 443 if ((u_int)iovcnt > UIO_SMALLIOV) { 444 if ((u_int)iovcnt > IOV_MAX) { 445 error = EINVAL; 446 goto out; 447 } 448 iov = kmem_alloc(iovlen, KM_SLEEP); 449 if (iov == NULL) { 450 error = ENOMEM; 451 goto out; 452 } 453 needfree = iov; 454 } 455 error = copyin(iovp, iov, iovlen); 456 if (error) 457 goto done; 458 } 459 460 auio.uio_iov = iov; 461 auio.uio_iovcnt = iovcnt; 462 auio.uio_rw = UIO_WRITE; 463 auio.uio_vmspace = curproc->p_vmspace; 464 465 auio.uio_resid = 0; 466 for (i = 0; i < iovcnt; i++, iov++) { 467 auio.uio_resid += iov->iov_len; 468 /* 469 * Writes return ssize_t because -1 is returned on error. 470 * Therefore we must restrict the length to SSIZE_MAX to 471 * avoid garbage return values. 472 */ 473 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) { 474 error = EINVAL; 475 goto done; 476 } 477 } 478 479 /* 480 * if tracing, save a copy of iovec 481 */ 482 if (ktrpoint(KTR_GENIO)) { 483 ktriov = kmem_alloc(iovlen, KM_SLEEP); 484 if (ktriov != NULL) 485 memcpy(ktriov, auio.uio_iov, iovlen); 486 } 487 488 cnt = auio.uio_resid; 489 error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred, flags); 490 if (error) { 491 if (auio.uio_resid != cnt && (error == ERESTART || 492 error == EINTR || error == EWOULDBLOCK)) 493 error = 0; 494 if (error == EPIPE) { 495 mutex_enter(&proclist_mutex); 496 psignal(curproc, SIGPIPE); 497 mutex_exit(&proclist_mutex); 498 } 499 } 500 cnt -= auio.uio_resid; 501 *retval = cnt; 502 503 if (ktriov != NULL) { 504 ktrgeniov(fd, UIO_WRITE, ktriov, cnt, error); 505 kmem_free(ktriov, iovlen); 506 } 507 508 done: 509 if (needfree) 510 kmem_free(needfree, iovlen); 511 out: 512 fd_putfile(fd); 513 return (error); 514 } 515 516 /* 517 * Ioctl system call 518 */ 519 /* ARGSUSED */ 520 int 521 sys_ioctl(struct lwp *l, const struct sys_ioctl_args *uap, register_t *retval) 522 { 523 /* { 524 syscallarg(int) fd; 525 syscallarg(u_long) com; 526 syscallarg(void *) data; 527 } */ 528 struct file *fp; 529 proc_t *p; 530 struct filedesc *fdp; 531 u_long com; 532 int error; 533 u_int size; 534 void *data, *memp; 535 #define STK_PARAMS 128 536 u_long stkbuf[STK_PARAMS/sizeof(u_long)]; 537 fdfile_t *ff; 538 539 error = 0; 540 p = l->l_proc; 541 fdp = p->p_fd; 542 543 if ((fp = fd_getfile(SCARG(uap, fd))) == NULL) 544 return (EBADF); 545 546 if ((fp->f_flag & (FREAD | FWRITE)) == 0) { 547 error = EBADF; 548 com = 0; 549 goto out; 550 } 551 552 ff = fdp->fd_ofiles[SCARG(uap, fd)]; 553 switch (com = SCARG(uap, com)) { 554 case FIONCLEX: 555 ff->ff_exclose = 0; 556 goto out; 557 558 case FIOCLEX: 559 ff->ff_exclose = 1; 560 fdp->fd_exclose = 1; 561 goto out; 562 } 563 564 /* 565 * Interpret high order word to find amount of data to be 566 * copied to/from the user's address space. 567 */ 568 size = IOCPARM_LEN(com); 569 if (size > IOCPARM_MAX) { 570 error = ENOTTY; 571 goto out; 572 } 573 memp = NULL; 574 if (size > sizeof(stkbuf)) { 575 memp = kmem_alloc(size, KM_SLEEP); 576 data = memp; 577 } else 578 data = (void *)stkbuf; 579 if (com&IOC_IN) { 580 if (size) { 581 error = copyin(SCARG(uap, data), data, size); 582 if (error) { 583 if (memp) 584 kmem_free(memp, size); 585 goto out; 586 } 587 ktrgenio(SCARG(uap, fd), UIO_WRITE, SCARG(uap, data), 588 size, 0); 589 } else 590 *(void **)data = SCARG(uap, data); 591 } else if ((com&IOC_OUT) && size) 592 /* 593 * Zero the buffer so the user always 594 * gets back something deterministic. 595 */ 596 memset(data, 0, size); 597 else if (com&IOC_VOID) 598 *(void **)data = SCARG(uap, data); 599 600 switch (com) { 601 602 case FIONBIO: 603 FILE_LOCK(fp); 604 if (*(int *)data != 0) 605 fp->f_flag |= FNONBLOCK; 606 else 607 fp->f_flag &= ~FNONBLOCK; 608 FILE_UNLOCK(fp); 609 error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, data); 610 break; 611 612 case FIOASYNC: 613 FILE_LOCK(fp); 614 if (*(int *)data != 0) 615 fp->f_flag |= FASYNC; 616 else 617 fp->f_flag &= ~FASYNC; 618 FILE_UNLOCK(fp); 619 error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, data); 620 break; 621 622 default: 623 error = (*fp->f_ops->fo_ioctl)(fp, com, data); 624 /* 625 * Copy any data to user, size was 626 * already set and checked above. 627 */ 628 if (error == 0 && (com&IOC_OUT) && size) { 629 error = copyout(data, SCARG(uap, data), size); 630 ktrgenio(SCARG(uap, fd), UIO_READ, SCARG(uap, data), 631 size, error); 632 } 633 break; 634 } 635 if (memp) 636 kmem_free(memp, size); 637 out: 638 fd_putfile(SCARG(uap, fd)); 639 switch (error) { 640 case -1: 641 printf("sys_ioctl: _IO%s%s('%c', %lu, %lu) returned -1: " 642 "pid=%d comm=%s\n", 643 (com & IOC_IN) ? "W" : "", (com & IOC_OUT) ? "R" : "", 644 (char)IOCGROUP(com), (com & 0xff), IOCPARM_LEN(com), 645 p->p_pid, p->p_comm); 646 /* FALLTHROUGH */ 647 case EPASSTHROUGH: 648 error = ENOTTY; 649 /* FALLTHROUGH */ 650 default: 651 return (error); 652 } 653 } 654