1 /* $NetBSD: sys_descrip.c,v 1.17 2009/10/28 18:24:44 njoly Exp $ */ 2 3 /*- 4 * Copyright (c) 2008 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 * POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 /* 30 * Copyright (c) 1982, 1986, 1989, 1991, 1993 31 * The Regents of the University of California. All rights reserved. 32 * (c) UNIX System Laboratories, Inc. 33 * All or some portions of this file are derived from material licensed 34 * to the University of California by American Telephone and Telegraph 35 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 36 * the permission of UNIX System Laboratories, Inc. 37 * 38 * Redistribution and use in source and binary forms, with or without 39 * modification, are permitted provided that the following conditions 40 * are met: 41 * 1. Redistributions of source code must retain the above copyright 42 * notice, this list of conditions and the following disclaimer. 43 * 2. Redistributions in binary form must reproduce the above copyright 44 * notice, this list of conditions and the following disclaimer in the 45 * documentation and/or other materials provided with the distribution. 46 * 3. Neither the name of the University nor the names of its contributors 47 * may be used to endorse or promote products derived from this software 48 * without specific prior written permission. 49 * 50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 53 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 60 * SUCH DAMAGE. 61 * 62 * @(#)kern_descrip.c 8.8 (Berkeley) 2/14/95 63 */ 64 65 /* 66 * System calls on descriptors. 67 */ 68 69 #include <sys/cdefs.h> 70 __KERNEL_RCSID(0, "$NetBSD: sys_descrip.c,v 1.17 2009/10/28 18:24:44 njoly Exp $"); 71 72 #include <sys/param.h> 73 #include <sys/systm.h> 74 #include <sys/filedesc.h> 75 #include <sys/kernel.h> 76 #include <sys/vnode.h> 77 #include <sys/proc.h> 78 #include <sys/file.h> 79 #include <sys/namei.h> 80 #include <sys/socket.h> 81 #include <sys/socketvar.h> 82 #include <sys/stat.h> 83 #include <sys/ioctl.h> 84 #include <sys/fcntl.h> 85 #include <sys/kmem.h> 86 #include <sys/pool.h> 87 #include <sys/syslog.h> 88 #include <sys/unistd.h> 89 #include <sys/resourcevar.h> 90 #include <sys/conf.h> 91 #include <sys/event.h> 92 #include <sys/kauth.h> 93 #include <sys/atomic.h> 94 #include <sys/mount.h> 95 #include <sys/syscallargs.h> 96 97 #include <uvm/uvm_readahead.h> 98 99 /* 100 * Duplicate a file descriptor. 101 */ 102 int 103 sys_dup(struct lwp *l, const struct sys_dup_args *uap, register_t *retval) 104 { 105 /* { 106 syscallarg(int) fd; 107 } */ 108 int new, error, old; 109 file_t *fp; 110 111 old = SCARG(uap, fd); 112 113 if ((fp = fd_getfile(old)) == NULL) { 114 return EBADF; 115 } 116 error = fd_dup(fp, 0, &new, false); 117 fd_putfile(old); 118 *retval = new; 119 return error; 120 } 121 122 /* 123 * Duplicate a file descriptor to a particular value. 124 */ 125 int 126 sys_dup2(struct lwp *l, const struct sys_dup2_args *uap, register_t *retval) 127 { 128 /* { 129 syscallarg(int) from; 130 syscallarg(int) to; 131 } */ 132 int old, new, error; 133 file_t *fp; 134 135 old = SCARG(uap, from); 136 new = SCARG(uap, to); 137 138 if ((fp = fd_getfile(old)) == NULL) { 139 return EBADF; 140 } 141 mutex_enter(&fp->f_lock); 142 fp->f_count++; 143 mutex_exit(&fp->f_lock); 144 fd_putfile(old); 145 146 if ((u_int)new >= curproc->p_rlimit[RLIMIT_NOFILE].rlim_cur || 147 (u_int)new >= maxfiles) { 148 error = EBADF; 149 } else if (old == new) { 150 error = 0; 151 } else { 152 error = fd_dup2(fp, new); 153 } 154 closef(fp); 155 *retval = new; 156 157 return error; 158 } 159 160 /* 161 * fcntl call which is being passed to the file's fs. 162 */ 163 static int 164 fcntl_forfs(int fd, file_t *fp, int cmd, void *arg) 165 { 166 int error; 167 u_int size; 168 void *data, *memp; 169 #define STK_PARAMS 128 170 char stkbuf[STK_PARAMS]; 171 172 if ((fp->f_flag & (FREAD | FWRITE)) == 0) 173 return (EBADF); 174 175 /* 176 * Interpret high order word to find amount of data to be 177 * copied to/from the user's address space. 178 */ 179 size = (size_t)F_PARAM_LEN(cmd); 180 if (size > F_PARAM_MAX) 181 return (EINVAL); 182 memp = NULL; 183 if (size > sizeof(stkbuf)) { 184 memp = kmem_alloc(size, KM_SLEEP); 185 data = memp; 186 } else 187 data = stkbuf; 188 if (cmd & F_FSIN) { 189 if (size) { 190 error = copyin(arg, data, size); 191 if (error) { 192 if (memp) 193 kmem_free(memp, size); 194 return (error); 195 } 196 } else 197 *(void **)data = arg; 198 } else if ((cmd & F_FSOUT) != 0 && size != 0) { 199 /* 200 * Zero the buffer so the user always 201 * gets back something deterministic. 202 */ 203 memset(data, 0, size); 204 } else if (cmd & F_FSVOID) 205 *(void **)data = arg; 206 207 208 error = (*fp->f_ops->fo_fcntl)(fp, cmd, data); 209 210 /* 211 * Copy any data to user, size was 212 * already set and checked above. 213 */ 214 if (error == 0 && (cmd & F_FSOUT) && size) 215 error = copyout(data, arg, size); 216 if (memp) 217 kmem_free(memp, size); 218 return (error); 219 } 220 221 int 222 do_fcntl_lock(int fd, int cmd, struct flock *fl) 223 { 224 file_t *fp; 225 vnode_t *vp; 226 proc_t *p; 227 int error, flg; 228 229 if ((fp = fd_getfile(fd)) == NULL) 230 return EBADF; 231 if (fp->f_type != DTYPE_VNODE) { 232 fd_putfile(fd); 233 return EINVAL; 234 } 235 vp = fp->f_data; 236 if (fl->l_whence == SEEK_CUR) 237 fl->l_start += fp->f_offset; 238 239 flg = F_POSIX; 240 p = curproc; 241 242 switch (cmd) { 243 case F_SETLKW: 244 flg |= F_WAIT; 245 /* Fall into F_SETLK */ 246 247 case F_SETLK: 248 switch (fl->l_type) { 249 case F_RDLCK: 250 if ((fp->f_flag & FREAD) == 0) { 251 error = EBADF; 252 break; 253 } 254 if ((p->p_flag & PK_ADVLOCK) == 0) { 255 mutex_enter(p->p_lock); 256 p->p_flag |= PK_ADVLOCK; 257 mutex_exit(p->p_lock); 258 } 259 error = VOP_ADVLOCK(vp, p, F_SETLK, fl, flg); 260 break; 261 262 case F_WRLCK: 263 if ((fp->f_flag & FWRITE) == 0) { 264 error = EBADF; 265 break; 266 } 267 if ((p->p_flag & PK_ADVLOCK) == 0) { 268 mutex_enter(p->p_lock); 269 p->p_flag |= PK_ADVLOCK; 270 mutex_exit(p->p_lock); 271 } 272 error = VOP_ADVLOCK(vp, p, F_SETLK, fl, flg); 273 break; 274 275 case F_UNLCK: 276 error = VOP_ADVLOCK(vp, p, F_UNLCK, fl, F_POSIX); 277 break; 278 279 default: 280 error = EINVAL; 281 break; 282 } 283 break; 284 285 case F_GETLK: 286 if (fl->l_type != F_RDLCK && 287 fl->l_type != F_WRLCK && 288 fl->l_type != F_UNLCK) { 289 error = EINVAL; 290 break; 291 } 292 error = VOP_ADVLOCK(vp, p, F_GETLK, fl, F_POSIX); 293 break; 294 295 default: 296 error = EINVAL; 297 break; 298 } 299 300 fd_putfile(fd); 301 return error; 302 } 303 304 /* 305 * The file control system call. 306 */ 307 int 308 sys_fcntl(struct lwp *l, const struct sys_fcntl_args *uap, register_t *retval) 309 { 310 /* { 311 syscallarg(int) fd; 312 syscallarg(int) cmd; 313 syscallarg(void *) arg; 314 } */ 315 int fd, i, tmp, error, cmd, newmin; 316 filedesc_t *fdp; 317 file_t *fp; 318 fdfile_t *ff; 319 struct flock fl; 320 321 fd = SCARG(uap, fd); 322 cmd = SCARG(uap, cmd); 323 fdp = l->l_fd; 324 error = 0; 325 326 switch (cmd) { 327 case F_CLOSEM: 328 if (fd < 0) 329 return EBADF; 330 while ((i = fdp->fd_lastfile) >= fd) { 331 if (fd_getfile(i) == NULL) { 332 /* Another thread has updated. */ 333 continue; 334 } 335 fd_close(i); 336 } 337 return 0; 338 339 case F_MAXFD: 340 *retval = fdp->fd_lastfile; 341 return 0; 342 343 case F_SETLKW: 344 case F_SETLK: 345 case F_GETLK: 346 error = copyin(SCARG(uap, arg), &fl, sizeof(fl)); 347 if (error) 348 return error; 349 error = do_fcntl_lock(fd, cmd, &fl); 350 if (cmd == F_GETLK && error == 0) 351 error = copyout(&fl, SCARG(uap, arg), sizeof(fl)); 352 return error; 353 354 default: 355 /* Handled below */ 356 break; 357 } 358 359 if ((fp = fd_getfile(fd)) == NULL) 360 return (EBADF); 361 ff = fdp->fd_dt->dt_ff[fd]; 362 363 if ((cmd & F_FSCTL)) { 364 error = fcntl_forfs(fd, fp, cmd, SCARG(uap, arg)); 365 fd_putfile(fd); 366 return error; 367 } 368 369 switch (cmd) { 370 case F_DUPFD: 371 newmin = (long)SCARG(uap, arg); 372 if ((u_int)newmin >= 373 l->l_proc->p_rlimit[RLIMIT_NOFILE].rlim_cur || 374 (u_int)newmin >= maxfiles) { 375 fd_putfile(fd); 376 return EINVAL; 377 } 378 error = fd_dup(fp, newmin, &i, false); 379 *retval = i; 380 break; 381 382 case F_GETFD: 383 *retval = ff->ff_exclose; 384 break; 385 386 case F_SETFD: 387 if ((long)SCARG(uap, arg) & FD_CLOEXEC) { 388 ff->ff_exclose = true; 389 fdp->fd_exclose = true; 390 } else { 391 ff->ff_exclose = false; 392 } 393 break; 394 395 case F_GETFL: 396 *retval = OFLAGS(fp->f_flag); 397 break; 398 399 case F_SETFL: 400 /* XXX not guaranteed to be atomic. */ 401 tmp = FFLAGS((long)SCARG(uap, arg)) & FCNTLFLAGS; 402 error = (*fp->f_ops->fo_fcntl)(fp, F_SETFL, &tmp); 403 if (error) 404 break; 405 i = tmp ^ fp->f_flag; 406 if (i & FNONBLOCK) { 407 int flgs = tmp & FNONBLOCK; 408 error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, &flgs); 409 if (error) { 410 (*fp->f_ops->fo_fcntl)(fp, F_SETFL, 411 &fp->f_flag); 412 break; 413 } 414 } 415 if (i & FASYNC) { 416 int flgs = tmp & FASYNC; 417 error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, &flgs); 418 if (error) { 419 if (i & FNONBLOCK) { 420 tmp = fp->f_flag & FNONBLOCK; 421 (void)(*fp->f_ops->fo_ioctl)(fp, 422 FIONBIO, &tmp); 423 } 424 (*fp->f_ops->fo_fcntl)(fp, F_SETFL, 425 &fp->f_flag); 426 break; 427 } 428 } 429 fp->f_flag = (fp->f_flag & ~FCNTLFLAGS) | tmp; 430 break; 431 432 case F_GETOWN: 433 error = (*fp->f_ops->fo_ioctl)(fp, FIOGETOWN, &tmp); 434 *retval = tmp; 435 break; 436 437 case F_SETOWN: 438 tmp = (int)(uintptr_t) SCARG(uap, arg); 439 error = (*fp->f_ops->fo_ioctl)(fp, FIOSETOWN, &tmp); 440 break; 441 442 default: 443 error = EINVAL; 444 } 445 446 fd_putfile(fd); 447 return (error); 448 } 449 450 /* 451 * Close a file descriptor. 452 */ 453 int 454 sys_close(struct lwp *l, const struct sys_close_args *uap, register_t *retval) 455 { 456 /* { 457 syscallarg(int) fd; 458 } */ 459 460 if (fd_getfile(SCARG(uap, fd)) == NULL) { 461 return EBADF; 462 } 463 return fd_close(SCARG(uap, fd)); 464 } 465 466 /* 467 * Return status information about a file descriptor. 468 * Common function for compat code. 469 */ 470 int 471 do_sys_fstat(int fd, struct stat *sb) 472 { 473 file_t *fp; 474 int error; 475 476 if ((fp = fd_getfile(fd)) == NULL) { 477 return EBADF; 478 } 479 error = (*fp->f_ops->fo_stat)(fp, sb); 480 fd_putfile(fd); 481 482 return error; 483 } 484 485 /* 486 * Return status information about a file descriptor. 487 */ 488 int 489 sys___fstat50(struct lwp *l, const struct sys___fstat50_args *uap, 490 register_t *retval) 491 { 492 /* { 493 syscallarg(int) fd; 494 syscallarg(struct stat *) sb; 495 } */ 496 struct stat sb; 497 int error; 498 499 error = do_sys_fstat(SCARG(uap, fd), &sb); 500 if (error == 0) { 501 error = copyout(&sb, SCARG(uap, sb), sizeof(sb)); 502 } 503 return error; 504 } 505 506 /* 507 * Return pathconf information about a file descriptor. 508 */ 509 int 510 sys_fpathconf(struct lwp *l, const struct sys_fpathconf_args *uap, 511 register_t *retval) 512 { 513 /* { 514 syscallarg(int) fd; 515 syscallarg(int) name; 516 } */ 517 int fd, error; 518 file_t *fp; 519 520 fd = SCARG(uap, fd); 521 error = 0; 522 523 if ((fp = fd_getfile(fd)) == NULL) { 524 return (EBADF); 525 } 526 switch (fp->f_type) { 527 case DTYPE_SOCKET: 528 case DTYPE_PIPE: 529 if (SCARG(uap, name) != _PC_PIPE_BUF) 530 error = EINVAL; 531 else 532 *retval = PIPE_BUF; 533 break; 534 535 case DTYPE_VNODE: 536 error = VOP_PATHCONF(fp->f_data, SCARG(uap, name), retval); 537 break; 538 539 case DTYPE_KQUEUE: 540 error = EINVAL; 541 break; 542 543 default: 544 error = EOPNOTSUPP; 545 break; 546 } 547 548 fd_putfile(fd); 549 return (error); 550 } 551 552 /* 553 * Apply an advisory lock on a file descriptor. 554 * 555 * Just attempt to get a record lock of the requested type on 556 * the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0). 557 */ 558 /* ARGSUSED */ 559 int 560 sys_flock(struct lwp *l, const struct sys_flock_args *uap, register_t *retval) 561 { 562 /* { 563 syscallarg(int) fd; 564 syscallarg(int) how; 565 } */ 566 int fd, how, error; 567 file_t *fp; 568 vnode_t *vp; 569 struct flock lf; 570 proc_t *p; 571 572 fd = SCARG(uap, fd); 573 how = SCARG(uap, how); 574 error = 0; 575 576 if ((fp = fd_getfile(fd)) == NULL) { 577 return EBADF; 578 } 579 if (fp->f_type != DTYPE_VNODE) { 580 fd_putfile(fd); 581 return EOPNOTSUPP; 582 } 583 584 vp = fp->f_data; 585 lf.l_whence = SEEK_SET; 586 lf.l_start = 0; 587 lf.l_len = 0; 588 589 switch (how & ~LOCK_NB) { 590 case LOCK_UN: 591 lf.l_type = F_UNLCK; 592 atomic_and_uint(&fp->f_flag, ~FHASLOCK); 593 error = VOP_ADVLOCK(vp, fp, F_UNLCK, &lf, F_FLOCK); 594 fd_putfile(fd); 595 return error; 596 case LOCK_EX: 597 lf.l_type = F_WRLCK; 598 break; 599 case LOCK_SH: 600 lf.l_type = F_RDLCK; 601 break; 602 default: 603 fd_putfile(fd); 604 return EINVAL; 605 } 606 607 atomic_or_uint(&fp->f_flag, FHASLOCK); 608 p = curproc; 609 if (how & LOCK_NB) { 610 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, F_FLOCK); 611 } else { 612 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, F_FLOCK|F_WAIT); 613 } 614 fd_putfile(fd); 615 return error; 616 } 617 618 int 619 do_posix_fadvise(int fd, off_t offset, off_t len, int advice) 620 { 621 file_t *fp; 622 vnode_t *vp; 623 off_t endoffset; 624 int error; 625 CTASSERT(POSIX_FADV_NORMAL == UVM_ADV_NORMAL); 626 CTASSERT(POSIX_FADV_RANDOM == UVM_ADV_RANDOM); 627 CTASSERT(POSIX_FADV_SEQUENTIAL == UVM_ADV_SEQUENTIAL); 628 629 if (len == 0) { 630 endoffset = INT64_MAX; 631 } else if (INT64_MAX - offset >= len) { 632 endoffset = offset + len; 633 } else { 634 return EINVAL; 635 } 636 if ((fp = fd_getfile(fd)) == NULL) { 637 return EBADF; 638 } 639 if (fp->f_type != DTYPE_VNODE) { 640 if (fp->f_type == DTYPE_PIPE || fp->f_type == DTYPE_SOCKET) { 641 error = ESPIPE; 642 } else { 643 error = EOPNOTSUPP; 644 } 645 fd_putfile(fd); 646 return error; 647 } 648 649 switch (advice) { 650 case POSIX_FADV_WILLNEED: 651 case POSIX_FADV_DONTNEED: 652 vp = fp->f_data; 653 if (vp->v_type != VREG && vp->v_type != VBLK) { 654 fd_putfile(fd); 655 return 0; 656 } 657 break; 658 } 659 660 switch (advice) { 661 case POSIX_FADV_NORMAL: 662 case POSIX_FADV_RANDOM: 663 case POSIX_FADV_SEQUENTIAL: 664 665 /* 666 * We ignore offset and size. must lock the file to 667 * do this, as f_advice is sub-word sized. 668 */ 669 mutex_enter(&fp->f_lock); 670 fp->f_advice = (u_char)advice; 671 mutex_exit(&fp->f_lock); 672 error = 0; 673 break; 674 675 case POSIX_FADV_WILLNEED: 676 vp = fp->f_data; 677 error = uvm_readahead(&vp->v_uobj, offset, endoffset - offset); 678 break; 679 680 case POSIX_FADV_DONTNEED: 681 vp = fp->f_data; 682 mutex_enter(&vp->v_interlock); 683 error = VOP_PUTPAGES(vp, round_page(offset), 684 trunc_page(endoffset), PGO_DEACTIVATE | PGO_CLEANIT); 685 break; 686 687 case POSIX_FADV_NOREUSE: 688 /* Not implemented yet. */ 689 error = 0; 690 break; 691 default: 692 error = EINVAL; 693 break; 694 } 695 696 fd_putfile(fd); 697 return error; 698 } 699 700 int 701 sys___posix_fadvise50(struct lwp *l, 702 const struct sys___posix_fadvise50_args *uap, 703 register_t *retval) 704 { 705 /* { 706 syscallarg(int) fd; 707 syscallarg(int) pad; 708 syscallarg(off_t) offset; 709 syscallarg(off_t) len; 710 syscallarg(int) advice; 711 } */ 712 713 *retval = do_posix_fadvise(SCARG(uap, fd), SCARG(uap, offset), 714 SCARG(uap, len), SCARG(uap, advice)); 715 716 return 0; 717 } 718