1 /* $NetBSD: sys_descrip.c,v 1.9 2009/01/11 02:45:52 christos Exp $ */ 2 3 /*- 4 * Copyright (c) 2008 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 * POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 /* 30 * Copyright (c) 1982, 1986, 1989, 1991, 1993 31 * The Regents of the University of California. All rights reserved. 32 * (c) UNIX System Laboratories, Inc. 33 * All or some portions of this file are derived from material licensed 34 * to the University of California by American Telephone and Telegraph 35 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 36 * the permission of UNIX System Laboratories, Inc. 37 * 38 * Redistribution and use in source and binary forms, with or without 39 * modification, are permitted provided that the following conditions 40 * are met: 41 * 1. Redistributions of source code must retain the above copyright 42 * notice, this list of conditions and the following disclaimer. 43 * 2. Redistributions in binary form must reproduce the above copyright 44 * notice, this list of conditions and the following disclaimer in the 45 * documentation and/or other materials provided with the distribution. 46 * 3. Neither the name of the University nor the names of its contributors 47 * may be used to endorse or promote products derived from this software 48 * without specific prior written permission. 49 * 50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 53 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 60 * SUCH DAMAGE. 61 * 62 * @(#)kern_descrip.c 8.8 (Berkeley) 2/14/95 63 */ 64 65 /* 66 * System calls on descriptors. 67 */ 68 69 #include <sys/cdefs.h> 70 __KERNEL_RCSID(0, "$NetBSD: sys_descrip.c,v 1.9 2009/01/11 02:45:52 christos Exp $"); 71 72 #include <sys/param.h> 73 #include <sys/systm.h> 74 #include <sys/filedesc.h> 75 #include <sys/kernel.h> 76 #include <sys/vnode.h> 77 #include <sys/proc.h> 78 #include <sys/file.h> 79 #include <sys/namei.h> 80 #include <sys/socket.h> 81 #include <sys/socketvar.h> 82 #include <sys/stat.h> 83 #include <sys/ioctl.h> 84 #include <sys/fcntl.h> 85 #include <sys/malloc.h> 86 #include <sys/pool.h> 87 #include <sys/syslog.h> 88 #include <sys/unistd.h> 89 #include <sys/resourcevar.h> 90 #include <sys/conf.h> 91 #include <sys/event.h> 92 #include <sys/kauth.h> 93 #include <sys/atomic.h> 94 #include <sys/mount.h> 95 #include <sys/syscallargs.h> 96 97 /* 98 * Duplicate a file descriptor. 99 */ 100 int 101 sys_dup(struct lwp *l, const struct sys_dup_args *uap, register_t *retval) 102 { 103 /* { 104 syscallarg(int) fd; 105 } */ 106 int new, error, old; 107 file_t *fp; 108 109 old = SCARG(uap, fd); 110 111 if ((fp = fd_getfile(old)) == NULL) { 112 return EBADF; 113 } 114 error = fd_dup(fp, 0, &new, false); 115 fd_putfile(old); 116 *retval = new; 117 return error; 118 } 119 120 /* 121 * Duplicate a file descriptor to a particular value. 122 */ 123 int 124 sys_dup2(struct lwp *l, const struct sys_dup2_args *uap, register_t *retval) 125 { 126 /* { 127 syscallarg(int) from; 128 syscallarg(int) to; 129 } */ 130 int old, new, error; 131 file_t *fp; 132 133 old = SCARG(uap, from); 134 new = SCARG(uap, to); 135 136 if ((fp = fd_getfile(old)) == NULL) { 137 return EBADF; 138 } 139 mutex_enter(&fp->f_lock); 140 fp->f_count++; 141 mutex_exit(&fp->f_lock); 142 fd_putfile(old); 143 144 if ((u_int)new >= curproc->p_rlimit[RLIMIT_NOFILE].rlim_cur || 145 (u_int)new >= maxfiles) { 146 error = EBADF; 147 } else if (old == new) { 148 error = 0; 149 } else { 150 error = fd_dup2(fp, new); 151 } 152 closef(fp); 153 *retval = new; 154 155 return error; 156 } 157 158 /* 159 * fcntl call which is being passed to the file's fs. 160 */ 161 static int 162 fcntl_forfs(int fd, file_t *fp, int cmd, void *arg) 163 { 164 int error; 165 u_int size; 166 void *data, *memp; 167 #define STK_PARAMS 128 168 char stkbuf[STK_PARAMS]; 169 170 if ((fp->f_flag & (FREAD | FWRITE)) == 0) 171 return (EBADF); 172 173 /* 174 * Interpret high order word to find amount of data to be 175 * copied to/from the user's address space. 176 */ 177 size = (size_t)F_PARAM_LEN(cmd); 178 if (size > F_PARAM_MAX) 179 return (EINVAL); 180 memp = NULL; 181 if (size > sizeof(stkbuf)) { 182 memp = kmem_alloc(size, KM_SLEEP); 183 data = memp; 184 } else 185 data = stkbuf; 186 if (cmd & F_FSIN) { 187 if (size) { 188 error = copyin(arg, data, size); 189 if (error) { 190 if (memp) 191 kmem_free(memp, size); 192 return (error); 193 } 194 } else 195 *(void **)data = arg; 196 } else if ((cmd & F_FSOUT) != 0 && size != 0) { 197 /* 198 * Zero the buffer so the user always 199 * gets back something deterministic. 200 */ 201 memset(data, 0, size); 202 } else if (cmd & F_FSVOID) 203 *(void **)data = arg; 204 205 206 error = (*fp->f_ops->fo_fcntl)(fp, cmd, data); 207 208 /* 209 * Copy any data to user, size was 210 * already set and checked above. 211 */ 212 if (error == 0 && (cmd & F_FSOUT) && size) 213 error = copyout(data, arg, size); 214 if (memp) 215 kmem_free(memp, size); 216 return (error); 217 } 218 219 int 220 do_fcntl_lock(int fd, int cmd, struct flock *fl) 221 { 222 file_t *fp; 223 vnode_t *vp; 224 proc_t *p; 225 int error, flg; 226 227 if ((fp = fd_getfile(fd)) == NULL) 228 return EBADF; 229 if (fp->f_type != DTYPE_VNODE) { 230 fd_putfile(fd); 231 return EINVAL; 232 } 233 vp = fp->f_data; 234 if (fl->l_whence == SEEK_CUR) 235 fl->l_start += fp->f_offset; 236 237 flg = F_POSIX; 238 p = curproc; 239 240 switch (cmd) { 241 case F_SETLKW: 242 flg |= F_WAIT; 243 /* Fall into F_SETLK */ 244 245 case F_SETLK: 246 switch (fl->l_type) { 247 case F_RDLCK: 248 if ((fp->f_flag & FREAD) == 0) { 249 error = EBADF; 250 break; 251 } 252 if ((p->p_flag & PK_ADVLOCK) == 0) { 253 mutex_enter(p->p_lock); 254 p->p_flag |= PK_ADVLOCK; 255 mutex_exit(p->p_lock); 256 } 257 error = VOP_ADVLOCK(vp, p, F_SETLK, fl, flg); 258 break; 259 260 case F_WRLCK: 261 if ((fp->f_flag & FWRITE) == 0) { 262 error = EBADF; 263 break; 264 } 265 if ((p->p_flag & PK_ADVLOCK) == 0) { 266 mutex_enter(p->p_lock); 267 p->p_flag |= PK_ADVLOCK; 268 mutex_exit(p->p_lock); 269 } 270 error = VOP_ADVLOCK(vp, p, F_SETLK, fl, flg); 271 break; 272 273 case F_UNLCK: 274 error = VOP_ADVLOCK(vp, p, F_UNLCK, fl, F_POSIX); 275 break; 276 277 default: 278 error = EINVAL; 279 break; 280 } 281 break; 282 283 case F_GETLK: 284 if (fl->l_type != F_RDLCK && 285 fl->l_type != F_WRLCK && 286 fl->l_type != F_UNLCK) { 287 error = EINVAL; 288 break; 289 } 290 error = VOP_ADVLOCK(vp, p, F_GETLK, fl, F_POSIX); 291 break; 292 293 default: 294 error = EINVAL; 295 break; 296 } 297 298 fd_putfile(fd); 299 return error; 300 } 301 302 /* 303 * The file control system call. 304 */ 305 int 306 sys_fcntl(struct lwp *l, const struct sys_fcntl_args *uap, register_t *retval) 307 { 308 /* { 309 syscallarg(int) fd; 310 syscallarg(int) cmd; 311 syscallarg(void *) arg; 312 } */ 313 int fd, i, tmp, error, cmd, newmin; 314 filedesc_t *fdp; 315 file_t *fp; 316 fdfile_t *ff; 317 struct flock fl; 318 319 fd = SCARG(uap, fd); 320 cmd = SCARG(uap, cmd); 321 fdp = l->l_fd; 322 error = 0; 323 324 switch (cmd) { 325 case F_CLOSEM: 326 if (fd < 0) 327 return EBADF; 328 while ((i = fdp->fd_lastfile) >= fd) { 329 if (fd_getfile(i) == NULL) { 330 /* Another thread has updated. */ 331 continue; 332 } 333 fd_close(i); 334 } 335 return 0; 336 337 case F_MAXFD: 338 *retval = fdp->fd_lastfile; 339 return 0; 340 341 case F_SETLKW: 342 case F_SETLK: 343 case F_GETLK: 344 error = copyin(SCARG(uap, arg), &fl, sizeof(fl)); 345 if (error) 346 return error; 347 error = do_fcntl_lock(fd, cmd, &fl); 348 if (cmd == F_GETLK && error == 0) 349 error = copyout(&fl, SCARG(uap, arg), sizeof(fl)); 350 return error; 351 352 default: 353 /* Handled below */ 354 break; 355 } 356 357 if ((fp = fd_getfile(fd)) == NULL) 358 return (EBADF); 359 ff = fdp->fd_ofiles[fd]; 360 361 if ((cmd & F_FSCTL)) { 362 error = fcntl_forfs(fd, fp, cmd, SCARG(uap, arg)); 363 fd_putfile(fd); 364 return error; 365 } 366 367 switch (cmd) { 368 case F_DUPFD: 369 newmin = (long)SCARG(uap, arg); 370 if ((u_int)newmin >= 371 l->l_proc->p_rlimit[RLIMIT_NOFILE].rlim_cur || 372 (u_int)newmin >= maxfiles) { 373 fd_putfile(fd); 374 return EINVAL; 375 } 376 error = fd_dup(fp, newmin, &i, false); 377 *retval = i; 378 break; 379 380 case F_GETFD: 381 *retval = ff->ff_exclose; 382 break; 383 384 case F_SETFD: 385 if ((long)SCARG(uap, arg) & 1) { 386 ff->ff_exclose = true; 387 fdp->fd_exclose = true; 388 } else { 389 ff->ff_exclose = false; 390 } 391 break; 392 393 case F_GETFL: 394 *retval = OFLAGS(fp->f_flag); 395 break; 396 397 case F_SETFL: 398 /* XXX not guaranteed to be atomic. */ 399 tmp = FFLAGS((long)SCARG(uap, arg)) & FCNTLFLAGS; 400 error = (*fp->f_ops->fo_fcntl)(fp, F_SETFL, &tmp); 401 if (error) 402 break; 403 i = tmp ^ fp->f_flag; 404 if (i & FNONBLOCK) { 405 int flgs = tmp & FNONBLOCK; 406 error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, &flgs); 407 if (error) { 408 (*fp->f_ops->fo_fcntl)(fp, F_SETFL, 409 &fp->f_flag); 410 break; 411 } 412 } 413 if (i & FASYNC) { 414 int flgs = tmp & FASYNC; 415 error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, &flgs); 416 if (error) { 417 if (i & FNONBLOCK) { 418 tmp = fp->f_flag & FNONBLOCK; 419 (void)(*fp->f_ops->fo_ioctl)(fp, 420 FIONBIO, &tmp); 421 } 422 (*fp->f_ops->fo_fcntl)(fp, F_SETFL, 423 &fp->f_flag); 424 break; 425 } 426 } 427 fp->f_flag = (fp->f_flag & ~FCNTLFLAGS) | tmp; 428 break; 429 430 case F_GETOWN: 431 error = (*fp->f_ops->fo_ioctl)(fp, FIOGETOWN, &tmp); 432 *retval = tmp; 433 break; 434 435 case F_SETOWN: 436 tmp = (int)(uintptr_t) SCARG(uap, arg); 437 error = (*fp->f_ops->fo_ioctl)(fp, FIOSETOWN, &tmp); 438 break; 439 440 default: 441 error = EINVAL; 442 } 443 444 fd_putfile(fd); 445 return (error); 446 } 447 448 /* 449 * Close a file descriptor. 450 */ 451 int 452 sys_close(struct lwp *l, const struct sys_close_args *uap, register_t *retval) 453 { 454 /* { 455 syscallarg(int) fd; 456 } */ 457 458 if (fd_getfile(SCARG(uap, fd)) == NULL) { 459 return EBADF; 460 } 461 return fd_close(SCARG(uap, fd)); 462 } 463 464 /* 465 * Return status information about a file descriptor. 466 * Common function for compat code. 467 */ 468 int 469 do_sys_fstat(int fd, struct stat *sb) 470 { 471 file_t *fp; 472 int error; 473 474 if ((fp = fd_getfile(fd)) == NULL) { 475 return EBADF; 476 } 477 error = (*fp->f_ops->fo_stat)(fp, sb); 478 fd_putfile(fd); 479 480 return error; 481 } 482 483 /* 484 * Return status information about a file descriptor. 485 */ 486 int 487 sys___fstat50(struct lwp *l, const struct sys___fstat50_args *uap, 488 register_t *retval) 489 { 490 /* { 491 syscallarg(int) fd; 492 syscallarg(struct stat *) sb; 493 } */ 494 struct stat sb; 495 int error; 496 497 error = do_sys_fstat(SCARG(uap, fd), &sb); 498 if (error == 0) { 499 error = copyout(&sb, SCARG(uap, sb), sizeof(sb)); 500 } 501 return error; 502 } 503 504 /* 505 * Return pathconf information about a file descriptor. 506 */ 507 int 508 sys_fpathconf(struct lwp *l, const struct sys_fpathconf_args *uap, 509 register_t *retval) 510 { 511 /* { 512 syscallarg(int) fd; 513 syscallarg(int) name; 514 } */ 515 int fd, error; 516 file_t *fp; 517 518 fd = SCARG(uap, fd); 519 error = 0; 520 521 if ((fp = fd_getfile(fd)) == NULL) { 522 return (EBADF); 523 } 524 switch (fp->f_type) { 525 case DTYPE_SOCKET: 526 case DTYPE_PIPE: 527 if (SCARG(uap, name) != _PC_PIPE_BUF) 528 error = EINVAL; 529 else 530 *retval = PIPE_BUF; 531 break; 532 533 case DTYPE_VNODE: 534 error = VOP_PATHCONF(fp->f_data, SCARG(uap, name), retval); 535 break; 536 537 case DTYPE_KQUEUE: 538 error = EINVAL; 539 break; 540 541 default: 542 error = EOPNOTSUPP; 543 break; 544 } 545 546 fd_putfile(fd); 547 return (error); 548 } 549 550 /* 551 * Apply an advisory lock on a file descriptor. 552 * 553 * Just attempt to get a record lock of the requested type on 554 * the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0). 555 */ 556 /* ARGSUSED */ 557 int 558 sys_flock(struct lwp *l, const struct sys_flock_args *uap, register_t *retval) 559 { 560 /* { 561 syscallarg(int) fd; 562 syscallarg(int) how; 563 } */ 564 int fd, how, error; 565 file_t *fp; 566 vnode_t *vp; 567 struct flock lf; 568 proc_t *p; 569 570 fd = SCARG(uap, fd); 571 how = SCARG(uap, how); 572 error = 0; 573 574 if ((fp = fd_getfile(fd)) == NULL) { 575 return EBADF; 576 } 577 if (fp->f_type != DTYPE_VNODE) { 578 fd_putfile(fd); 579 return EOPNOTSUPP; 580 } 581 582 vp = fp->f_data; 583 lf.l_whence = SEEK_SET; 584 lf.l_start = 0; 585 lf.l_len = 0; 586 if (how & LOCK_UN) { 587 lf.l_type = F_UNLCK; 588 atomic_and_uint(&fp->f_flag, ~FHASLOCK); 589 error = VOP_ADVLOCK(vp, fp, F_UNLCK, &lf, F_FLOCK); 590 fd_putfile(fd); 591 return error; 592 } 593 if (how & LOCK_EX) { 594 lf.l_type = F_WRLCK; 595 } else if (how & LOCK_SH) { 596 lf.l_type = F_RDLCK; 597 } else { 598 fd_putfile(fd); 599 return EINVAL; 600 } 601 atomic_or_uint(&fp->f_flag, FHASLOCK); 602 p = curproc; 603 if (how & LOCK_NB) { 604 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, F_FLOCK); 605 } else { 606 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, F_FLOCK|F_WAIT); 607 } 608 fd_putfile(fd); 609 return error; 610 } 611 612 int 613 do_posix_fadvise(int fd, off_t offset, off_t len, int advice) 614 { 615 file_t *fp; 616 int error; 617 618 if ((fp = fd_getfile(fd)) == NULL) { 619 return EBADF; 620 } 621 if (fp->f_type != DTYPE_VNODE) { 622 if (fp->f_type == DTYPE_PIPE || fp->f_type == DTYPE_SOCKET) { 623 error = ESPIPE; 624 } else { 625 error = EOPNOTSUPP; 626 } 627 fd_putfile(fd); 628 return error; 629 } 630 631 switch (advice) { 632 case POSIX_FADV_NORMAL: 633 case POSIX_FADV_RANDOM: 634 case POSIX_FADV_SEQUENTIAL: 635 KASSERT(POSIX_FADV_NORMAL == UVM_ADV_NORMAL); 636 KASSERT(POSIX_FADV_RANDOM == UVM_ADV_RANDOM); 637 KASSERT(POSIX_FADV_SEQUENTIAL == UVM_ADV_SEQUENTIAL); 638 639 /* 640 * We ignore offset and size. must lock the file to 641 * do this, as f_advice is sub-word sized. 642 */ 643 mutex_enter(&fp->f_lock); 644 fp->f_advice = (u_char)advice; 645 mutex_exit(&fp->f_lock); 646 error = 0; 647 break; 648 649 case POSIX_FADV_WILLNEED: 650 case POSIX_FADV_DONTNEED: 651 case POSIX_FADV_NOREUSE: 652 /* Not implemented yet. */ 653 error = 0; 654 break; 655 default: 656 error = EINVAL; 657 break; 658 } 659 660 fd_putfile(fd); 661 return error; 662 } 663 664 int 665 sys___posix_fadvise50(struct lwp *l, 666 const struct sys___posix_fadvise50_args *uap, 667 register_t *retval) 668 { 669 /* { 670 syscallarg(int) fd; 671 syscallarg(int) pad; 672 syscallarg(off_t) offset; 673 syscallarg(off_t) len; 674 syscallarg(int) advice; 675 } */ 676 677 return do_posix_fadvise(SCARG(uap, fd), SCARG(uap, offset), 678 SCARG(uap, len), SCARG(uap, advice)); 679 } 680