1 /* $NetBSD: sys_descrip.c,v 1.3 2008/04/28 20:24:04 martin Exp $ */ 2 3 /*- 4 * Copyright (c) 2008 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 * POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 /* 30 * Copyright (c) 1982, 1986, 1989, 1991, 1993 31 * The Regents of the University of California. All rights reserved. 32 * (c) UNIX System Laboratories, Inc. 33 * All or some portions of this file are derived from material licensed 34 * to the University of California by American Telephone and Telegraph 35 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 36 * the permission of UNIX System Laboratories, Inc. 37 * 38 * Redistribution and use in source and binary forms, with or without 39 * modification, are permitted provided that the following conditions 40 * are met: 41 * 1. Redistributions of source code must retain the above copyright 42 * notice, this list of conditions and the following disclaimer. 43 * 2. Redistributions in binary form must reproduce the above copyright 44 * notice, this list of conditions and the following disclaimer in the 45 * documentation and/or other materials provided with the distribution. 46 * 3. Neither the name of the University nor the names of its contributors 47 * may be used to endorse or promote products derived from this software 48 * without specific prior written permission. 49 * 50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 53 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 60 * SUCH DAMAGE. 61 * 62 * @(#)kern_descrip.c 8.8 (Berkeley) 2/14/95 63 */ 64 65 /* 66 * System calls on descriptors. 67 */ 68 69 #include <sys/cdefs.h> 70 __KERNEL_RCSID(0, "$NetBSD: sys_descrip.c,v 1.3 2008/04/28 20:24:04 martin Exp $"); 71 72 #include <sys/param.h> 73 #include <sys/systm.h> 74 #include <sys/filedesc.h> 75 #include <sys/kernel.h> 76 #include <sys/vnode.h> 77 #include <sys/proc.h> 78 #include <sys/file.h> 79 #include <sys/namei.h> 80 #include <sys/socket.h> 81 #include <sys/socketvar.h> 82 #include <sys/stat.h> 83 #include <sys/ioctl.h> 84 #include <sys/fcntl.h> 85 #include <sys/malloc.h> 86 #include <sys/pool.h> 87 #include <sys/syslog.h> 88 #include <sys/unistd.h> 89 #include <sys/resourcevar.h> 90 #include <sys/conf.h> 91 #include <sys/event.h> 92 #include <sys/kauth.h> 93 #include <sys/atomic.h> 94 #include <sys/mount.h> 95 #include <sys/syscallargs.h> 96 97 /* 98 * Duplicate a file descriptor. 99 */ 100 int 101 sys_dup(struct lwp *l, const struct sys_dup_args *uap, register_t *retval) 102 { 103 /* { 104 syscallarg(int) fd; 105 } */ 106 int new, error, old; 107 file_t *fp; 108 109 old = SCARG(uap, fd); 110 111 if ((fp = fd_getfile(old)) == NULL) { 112 return EBADF; 113 } 114 error = fd_dup(fp, 0, &new, 0); 115 fd_putfile(old); 116 *retval = new; 117 return error; 118 } 119 120 /* 121 * Duplicate a file descriptor to a particular value. 122 */ 123 int 124 sys_dup2(struct lwp *l, const struct sys_dup2_args *uap, register_t *retval) 125 { 126 /* { 127 syscallarg(int) from; 128 syscallarg(int) to; 129 } */ 130 int old, new, error; 131 file_t *fp; 132 133 old = SCARG(uap, from); 134 new = SCARG(uap, to); 135 136 if ((fp = fd_getfile(old)) == NULL) { 137 return EBADF; 138 } 139 if ((u_int)new >= curproc->p_rlimit[RLIMIT_NOFILE].rlim_cur || 140 (u_int)new >= maxfiles) { 141 error = EBADF; 142 } else if (old == new) { 143 error = 0; 144 } else { 145 error = fd_dup2(fp, new); 146 } 147 fd_putfile(old); 148 *retval = new; 149 150 return 0; 151 } 152 153 /* 154 * fcntl call which is being passed to the file's fs. 155 */ 156 static int 157 fcntl_forfs(int fd, file_t *fp, int cmd, void *arg) 158 { 159 int error; 160 u_int size; 161 void *data, *memp; 162 #define STK_PARAMS 128 163 char stkbuf[STK_PARAMS]; 164 165 if ((fp->f_flag & (FREAD | FWRITE)) == 0) 166 return (EBADF); 167 168 /* 169 * Interpret high order word to find amount of data to be 170 * copied to/from the user's address space. 171 */ 172 size = (size_t)F_PARAM_LEN(cmd); 173 if (size > F_PARAM_MAX) 174 return (EINVAL); 175 memp = NULL; 176 if (size > sizeof(stkbuf)) { 177 memp = kmem_alloc(size, KM_SLEEP); 178 data = memp; 179 } else 180 data = stkbuf; 181 if (cmd & F_FSIN) { 182 if (size) { 183 error = copyin(arg, data, size); 184 if (error) { 185 if (memp) 186 kmem_free(memp, size); 187 return (error); 188 } 189 } else 190 *(void **)data = arg; 191 } else if ((cmd & F_FSOUT) != 0 && size != 0) { 192 /* 193 * Zero the buffer so the user always 194 * gets back something deterministic. 195 */ 196 memset(data, 0, size); 197 } else if (cmd & F_FSVOID) 198 *(void **)data = arg; 199 200 201 error = (*fp->f_ops->fo_fcntl)(fp, cmd, data); 202 203 /* 204 * Copy any data to user, size was 205 * already set and checked above. 206 */ 207 if (error == 0 && (cmd & F_FSOUT) && size) 208 error = copyout(data, arg, size); 209 if (memp) 210 kmem_free(memp, size); 211 return (error); 212 } 213 214 int 215 do_fcntl_lock(int fd, int cmd, struct flock *fl) 216 { 217 file_t *fp; 218 vnode_t *vp; 219 proc_t *p; 220 int error, flg; 221 222 if ((fp = fd_getfile(fd)) == NULL) 223 return EBADF; 224 if (fp->f_type != DTYPE_VNODE) { 225 fd_putfile(fd); 226 return EINVAL; 227 } 228 vp = fp->f_data; 229 if (fl->l_whence == SEEK_CUR) 230 fl->l_start += fp->f_offset; 231 232 flg = F_POSIX; 233 p = curproc; 234 235 switch (cmd) { 236 case F_SETLKW: 237 flg |= F_WAIT; 238 /* Fall into F_SETLK */ 239 240 case F_SETLK: 241 switch (fl->l_type) { 242 case F_RDLCK: 243 if ((fp->f_flag & FREAD) == 0) { 244 error = EBADF; 245 break; 246 } 247 if ((p->p_flag & PK_ADVLOCK) == 0) { 248 mutex_enter(p->p_lock); 249 p->p_flag |= PK_ADVLOCK; 250 mutex_exit(p->p_lock); 251 } 252 error = VOP_ADVLOCK(vp, p, F_SETLK, fl, flg); 253 break; 254 255 case F_WRLCK: 256 if ((fp->f_flag & FWRITE) == 0) { 257 error = EBADF; 258 break; 259 } 260 if ((p->p_flag & PK_ADVLOCK) == 0) { 261 mutex_enter(p->p_lock); 262 p->p_flag |= PK_ADVLOCK; 263 mutex_exit(p->p_lock); 264 } 265 error = VOP_ADVLOCK(vp, p, F_SETLK, fl, flg); 266 break; 267 268 case F_UNLCK: 269 error = VOP_ADVLOCK(vp, p, F_UNLCK, fl, F_POSIX); 270 break; 271 272 default: 273 error = EINVAL; 274 break; 275 } 276 break; 277 278 case F_GETLK: 279 if (fl->l_type != F_RDLCK && 280 fl->l_type != F_WRLCK && 281 fl->l_type != F_UNLCK) { 282 error = EINVAL; 283 break; 284 } 285 error = VOP_ADVLOCK(vp, p, F_GETLK, fl, F_POSIX); 286 break; 287 288 default: 289 error = EINVAL; 290 break; 291 } 292 293 fd_putfile(fd); 294 return error; 295 } 296 297 /* 298 * The file control system call. 299 */ 300 int 301 sys_fcntl(struct lwp *l, const struct sys_fcntl_args *uap, register_t *retval) 302 { 303 /* { 304 syscallarg(int) fd; 305 syscallarg(int) cmd; 306 syscallarg(void *) arg; 307 } */ 308 int fd, i, tmp, error, cmd, newmin; 309 filedesc_t *fdp; 310 file_t *fp; 311 fdfile_t *ff; 312 proc_t *p; 313 struct flock fl; 314 315 p = l->l_proc; 316 fd = SCARG(uap, fd); 317 cmd = SCARG(uap, cmd); 318 fdp = p->p_fd; 319 error = 0; 320 321 switch (cmd) { 322 case F_CLOSEM: 323 if (fd < 0) 324 return EBADF; 325 while ((i = fdp->fd_lastfile) >= fd) { 326 if (fd_getfile(i) == NULL) { 327 /* Another thread has updated. */ 328 continue; 329 } 330 fd_close(i); 331 } 332 return 0; 333 334 case F_MAXFD: 335 *retval = fdp->fd_lastfile; 336 return 0; 337 338 case F_SETLKW: 339 case F_SETLK: 340 case F_GETLK: 341 error = copyin(SCARG(uap, arg), &fl, sizeof(fl)); 342 if (error) 343 return error; 344 error = do_fcntl_lock(fd, cmd, &fl); 345 if (cmd == F_GETLK && error == 0) 346 error = copyout(&fl, SCARG(uap, arg), sizeof(fl)); 347 return error; 348 349 default: 350 /* Handled below */ 351 break; 352 } 353 354 if ((fp = fd_getfile(fd)) == NULL) 355 return (EBADF); 356 ff = fdp->fd_ofiles[fd]; 357 358 if ((cmd & F_FSCTL)) { 359 error = fcntl_forfs(fd, fp, cmd, SCARG(uap, arg)); 360 fd_putfile(fd); 361 return error; 362 } 363 364 switch (cmd) { 365 case F_DUPFD: 366 newmin = (long)SCARG(uap, arg); 367 if ((u_int)newmin >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur || 368 (u_int)newmin >= maxfiles) { 369 fd_putfile(fd); 370 return EINVAL; 371 } 372 error = fd_dup(fp, newmin, &i, 0); 373 *retval = i; 374 break; 375 376 case F_GETFD: 377 *retval = ff->ff_exclose; 378 break; 379 380 case F_SETFD: 381 if ((long)SCARG(uap, arg) & 1) { 382 ff->ff_exclose = 1; 383 fdp->fd_exclose = 1; 384 } else { 385 ff->ff_exclose = 0; 386 } 387 break; 388 389 case F_GETFL: 390 *retval = OFLAGS(fp->f_flag); 391 break; 392 393 case F_SETFL: 394 /* XXX not guaranteed to be atomic. */ 395 tmp = FFLAGS((long)SCARG(uap, arg)) & FCNTLFLAGS; 396 error = (*fp->f_ops->fo_fcntl)(fp, F_SETFL, &tmp); 397 if (error) 398 break; 399 i = tmp ^ fp->f_flag; 400 if (i & FNONBLOCK) { 401 int flgs = tmp & FNONBLOCK; 402 error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, &flgs); 403 if (error) { 404 (*fp->f_ops->fo_fcntl)(fp, F_SETFL, 405 &fp->f_flag); 406 break; 407 } 408 } 409 if (i & FASYNC) { 410 int flgs = tmp & FASYNC; 411 error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, &flgs); 412 if (error) { 413 if (i & FNONBLOCK) { 414 tmp = fp->f_flag & FNONBLOCK; 415 (void)(*fp->f_ops->fo_ioctl)(fp, 416 FIONBIO, &tmp); 417 } 418 (*fp->f_ops->fo_fcntl)(fp, F_SETFL, 419 &fp->f_flag); 420 break; 421 } 422 } 423 fp->f_flag = (fp->f_flag & ~FCNTLFLAGS) | tmp; 424 break; 425 426 case F_GETOWN: 427 error = (*fp->f_ops->fo_ioctl)(fp, FIOGETOWN, &tmp); 428 *retval = tmp; 429 break; 430 431 case F_SETOWN: 432 tmp = (int)(intptr_t) SCARG(uap, arg); 433 error = (*fp->f_ops->fo_ioctl)(fp, FIOSETOWN, &tmp); 434 break; 435 436 default: 437 error = EINVAL; 438 } 439 440 fd_putfile(fd); 441 return (error); 442 } 443 444 /* 445 * Close a file descriptor. 446 */ 447 int 448 sys_close(struct lwp *l, const struct sys_close_args *uap, register_t *retval) 449 { 450 /* { 451 syscallarg(int) fd; 452 } */ 453 454 if (fd_getfile(SCARG(uap, fd)) == NULL) { 455 return EBADF; 456 } 457 return fd_close(SCARG(uap, fd)); 458 } 459 460 /* 461 * Return status information about a file descriptor. 462 * Common function for compat code. 463 */ 464 int 465 do_sys_fstat(int fd, struct stat *sb) 466 { 467 file_t *fp; 468 int error; 469 470 if ((fp = fd_getfile(fd)) == NULL) { 471 return EBADF; 472 } 473 error = (*fp->f_ops->fo_stat)(fp, sb); 474 fd_putfile(fd); 475 476 return error; 477 } 478 479 /* 480 * Return status information about a file descriptor. 481 */ 482 int 483 sys___fstat30(struct lwp *l, const struct sys___fstat30_args *uap, 484 register_t *retval) 485 { 486 /* { 487 syscallarg(int) fd; 488 syscallarg(struct stat *) sb; 489 } */ 490 struct stat sb; 491 int error; 492 493 error = do_sys_fstat(SCARG(uap, fd), &sb); 494 if (error == 0) { 495 error = copyout(&sb, SCARG(uap, sb), sizeof(sb)); 496 } 497 return error; 498 } 499 500 /* 501 * Return pathconf information about a file descriptor. 502 */ 503 int 504 sys_fpathconf(struct lwp *l, const struct sys_fpathconf_args *uap, 505 register_t *retval) 506 { 507 /* { 508 syscallarg(int) fd; 509 syscallarg(int) name; 510 } */ 511 int fd, error; 512 file_t *fp; 513 514 fd = SCARG(uap, fd); 515 error = 0; 516 517 if ((fp = fd_getfile(fd)) == NULL) { 518 return (EBADF); 519 } 520 switch (fp->f_type) { 521 case DTYPE_SOCKET: 522 case DTYPE_PIPE: 523 if (SCARG(uap, name) != _PC_PIPE_BUF) 524 error = EINVAL; 525 else 526 *retval = PIPE_BUF; 527 break; 528 529 case DTYPE_VNODE: 530 error = VOP_PATHCONF(fp->f_data, SCARG(uap, name), retval); 531 break; 532 533 case DTYPE_KQUEUE: 534 error = EINVAL; 535 break; 536 537 default: 538 error = EOPNOTSUPP; 539 break; 540 } 541 542 fd_putfile(fd); 543 return (error); 544 } 545 546 /* 547 * Apply an advisory lock on a file descriptor. 548 * 549 * Just attempt to get a record lock of the requested type on 550 * the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0). 551 */ 552 /* ARGSUSED */ 553 int 554 sys_flock(struct lwp *l, const struct sys_flock_args *uap, register_t *retval) 555 { 556 /* { 557 syscallarg(int) fd; 558 syscallarg(int) how; 559 } */ 560 int fd, how, error; 561 file_t *fp; 562 vnode_t *vp; 563 struct flock lf; 564 proc_t *p; 565 566 fd = SCARG(uap, fd); 567 how = SCARG(uap, how); 568 error = 0; 569 570 if ((fp = fd_getfile(fd)) == NULL) { 571 return EBADF; 572 } 573 if (fp->f_type != DTYPE_VNODE) { 574 fd_putfile(fd); 575 return EOPNOTSUPP; 576 } 577 578 vp = fp->f_data; 579 lf.l_whence = SEEK_SET; 580 lf.l_start = 0; 581 lf.l_len = 0; 582 if (how & LOCK_UN) { 583 lf.l_type = F_UNLCK; 584 atomic_and_uint(&fp->f_flag, ~FHASLOCK); 585 error = VOP_ADVLOCK(vp, fp, F_UNLCK, &lf, F_FLOCK); 586 fd_putfile(fd); 587 return error; 588 } 589 if (how & LOCK_EX) { 590 lf.l_type = F_WRLCK; 591 } else if (how & LOCK_SH) { 592 lf.l_type = F_RDLCK; 593 } else { 594 fd_putfile(fd); 595 return EINVAL; 596 } 597 atomic_or_uint(&fp->f_flag, FHASLOCK); 598 p = curproc; 599 if (how & LOCK_NB) { 600 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, F_FLOCK); 601 } else { 602 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, F_FLOCK|F_WAIT); 603 } 604 fd_putfile(fd); 605 return error; 606 } 607 608 int 609 do_posix_fadvise(int fd, off_t offset, off_t len, int advice) 610 { 611 file_t *fp; 612 int error; 613 614 if ((fp = fd_getfile(fd)) == NULL) { 615 return EBADF; 616 } 617 if (fp->f_type != DTYPE_VNODE) { 618 if (fp->f_type == DTYPE_PIPE || fp->f_type == DTYPE_SOCKET) { 619 error = ESPIPE; 620 } else { 621 error = EOPNOTSUPP; 622 } 623 fd_putfile(fd); 624 return error; 625 } 626 627 switch (advice) { 628 case POSIX_FADV_NORMAL: 629 case POSIX_FADV_RANDOM: 630 case POSIX_FADV_SEQUENTIAL: 631 KASSERT(POSIX_FADV_NORMAL == UVM_ADV_NORMAL); 632 KASSERT(POSIX_FADV_RANDOM == UVM_ADV_RANDOM); 633 KASSERT(POSIX_FADV_SEQUENTIAL == UVM_ADV_SEQUENTIAL); 634 635 /* 636 * We ignore offset and size. must lock the file to 637 * do this, as f_advice is sub-word sized. 638 */ 639 mutex_enter(&fp->f_lock); 640 fp->f_advice = (u_char)advice; 641 mutex_exit(&fp->f_lock); 642 error = 0; 643 break; 644 645 case POSIX_FADV_WILLNEED: 646 case POSIX_FADV_DONTNEED: 647 case POSIX_FADV_NOREUSE: 648 /* Not implemented yet. */ 649 error = 0; 650 break; 651 default: 652 error = EINVAL; 653 break; 654 } 655 656 fd_putfile(fd); 657 return error; 658 } 659 660 int 661 sys___posix_fadvise50(struct lwp *l, 662 const struct sys___posix_fadvise50_args *uap, 663 register_t *retval) 664 { 665 /* { 666 syscallarg(int) fd; 667 syscallarg(int) pad; 668 syscallarg(off_t) offset; 669 syscallarg(off_t) len; 670 syscallarg(int) advice; 671 } */ 672 673 return do_posix_fadvise(SCARG(uap, fd), SCARG(uap, offset), 674 SCARG(uap, len), SCARG(uap, advice)); 675 } 676