1 /* $NetBSD: sys_descrip.c,v 1.2 2008/04/24 18:39:24 ad Exp $ */ 2 3 /*- 4 * Copyright (c) 2008 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. All advertising materials mentioning features or use of this software 16 * must display the following acknowledgement: 17 * This product includes software developed by the NetBSD 18 * Foundation, Inc. and its contributors. 19 * 4. Neither the name of The NetBSD Foundation nor the names of its 20 * contributors may be used to endorse or promote products derived 21 * from this software without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 24 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 25 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 26 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 27 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 28 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 29 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 30 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 31 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 32 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 33 * POSSIBILITY OF SUCH DAMAGE. 34 */ 35 36 /* 37 * Copyright (c) 1982, 1986, 1989, 1991, 1993 38 * The Regents of the University of California. All rights reserved. 39 * (c) UNIX System Laboratories, Inc. 40 * All or some portions of this file are derived from material licensed 41 * to the University of California by American Telephone and Telegraph 42 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 43 * the permission of UNIX System Laboratories, Inc. 44 * 45 * Redistribution and use in source and binary forms, with or without 46 * modification, are permitted provided that the following conditions 47 * are met: 48 * 1. Redistributions of source code must retain the above copyright 49 * notice, this list of conditions and the following disclaimer. 50 * 2. Redistributions in binary form must reproduce the above copyright 51 * notice, this list of conditions and the following disclaimer in the 52 * documentation and/or other materials provided with the distribution. 53 * 3. Neither the name of the University nor the names of its contributors 54 * may be used to endorse or promote products derived from this software 55 * without specific prior written permission. 56 * 57 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 58 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 59 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 60 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 61 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 62 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 63 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 64 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 65 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 66 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 67 * SUCH DAMAGE. 68 * 69 * @(#)kern_descrip.c 8.8 (Berkeley) 2/14/95 70 */ 71 72 /* 73 * System calls on descriptors. 74 */ 75 76 #include <sys/cdefs.h> 77 __KERNEL_RCSID(0, "$NetBSD: sys_descrip.c,v 1.2 2008/04/24 18:39:24 ad Exp $"); 78 79 #include <sys/param.h> 80 #include <sys/systm.h> 81 #include <sys/filedesc.h> 82 #include <sys/kernel.h> 83 #include <sys/vnode.h> 84 #include <sys/proc.h> 85 #include <sys/file.h> 86 #include <sys/namei.h> 87 #include <sys/socket.h> 88 #include <sys/socketvar.h> 89 #include <sys/stat.h> 90 #include <sys/ioctl.h> 91 #include <sys/fcntl.h> 92 #include <sys/malloc.h> 93 #include <sys/pool.h> 94 #include <sys/syslog.h> 95 #include <sys/unistd.h> 96 #include <sys/resourcevar.h> 97 #include <sys/conf.h> 98 #include <sys/event.h> 99 #include <sys/kauth.h> 100 #include <sys/atomic.h> 101 #include <sys/mount.h> 102 #include <sys/syscallargs.h> 103 104 /* 105 * Duplicate a file descriptor. 106 */ 107 int 108 sys_dup(struct lwp *l, const struct sys_dup_args *uap, register_t *retval) 109 { 110 /* { 111 syscallarg(int) fd; 112 } */ 113 int new, error, old; 114 file_t *fp; 115 116 old = SCARG(uap, fd); 117 118 if ((fp = fd_getfile(old)) == NULL) { 119 return EBADF; 120 } 121 error = fd_dup(fp, 0, &new, 0); 122 fd_putfile(old); 123 *retval = new; 124 return error; 125 } 126 127 /* 128 * Duplicate a file descriptor to a particular value. 129 */ 130 int 131 sys_dup2(struct lwp *l, const struct sys_dup2_args *uap, register_t *retval) 132 { 133 /* { 134 syscallarg(int) from; 135 syscallarg(int) to; 136 } */ 137 int old, new, error; 138 file_t *fp; 139 140 old = SCARG(uap, from); 141 new = SCARG(uap, to); 142 143 if ((fp = fd_getfile(old)) == NULL) { 144 return EBADF; 145 } 146 if ((u_int)new >= curproc->p_rlimit[RLIMIT_NOFILE].rlim_cur || 147 (u_int)new >= maxfiles) { 148 error = EBADF; 149 } else if (old == new) { 150 error = 0; 151 } else { 152 error = fd_dup2(fp, new); 153 } 154 fd_putfile(old); 155 *retval = new; 156 157 return 0; 158 } 159 160 /* 161 * fcntl call which is being passed to the file's fs. 162 */ 163 static int 164 fcntl_forfs(int fd, file_t *fp, int cmd, void *arg) 165 { 166 int error; 167 u_int size; 168 void *data, *memp; 169 #define STK_PARAMS 128 170 char stkbuf[STK_PARAMS]; 171 172 if ((fp->f_flag & (FREAD | FWRITE)) == 0) 173 return (EBADF); 174 175 /* 176 * Interpret high order word to find amount of data to be 177 * copied to/from the user's address space. 178 */ 179 size = (size_t)F_PARAM_LEN(cmd); 180 if (size > F_PARAM_MAX) 181 return (EINVAL); 182 memp = NULL; 183 if (size > sizeof(stkbuf)) { 184 memp = kmem_alloc(size, KM_SLEEP); 185 data = memp; 186 } else 187 data = stkbuf; 188 if (cmd & F_FSIN) { 189 if (size) { 190 error = copyin(arg, data, size); 191 if (error) { 192 if (memp) 193 kmem_free(memp, size); 194 return (error); 195 } 196 } else 197 *(void **)data = arg; 198 } else if ((cmd & F_FSOUT) != 0 && size != 0) { 199 /* 200 * Zero the buffer so the user always 201 * gets back something deterministic. 202 */ 203 memset(data, 0, size); 204 } else if (cmd & F_FSVOID) 205 *(void **)data = arg; 206 207 208 error = (*fp->f_ops->fo_fcntl)(fp, cmd, data); 209 210 /* 211 * Copy any data to user, size was 212 * already set and checked above. 213 */ 214 if (error == 0 && (cmd & F_FSOUT) && size) 215 error = copyout(data, arg, size); 216 if (memp) 217 kmem_free(memp, size); 218 return (error); 219 } 220 221 int 222 do_fcntl_lock(int fd, int cmd, struct flock *fl) 223 { 224 file_t *fp; 225 vnode_t *vp; 226 proc_t *p; 227 int error, flg; 228 229 if ((fp = fd_getfile(fd)) == NULL) 230 return EBADF; 231 if (fp->f_type != DTYPE_VNODE) { 232 fd_putfile(fd); 233 return EINVAL; 234 } 235 vp = fp->f_data; 236 if (fl->l_whence == SEEK_CUR) 237 fl->l_start += fp->f_offset; 238 239 flg = F_POSIX; 240 p = curproc; 241 242 switch (cmd) { 243 case F_SETLKW: 244 flg |= F_WAIT; 245 /* Fall into F_SETLK */ 246 247 case F_SETLK: 248 switch (fl->l_type) { 249 case F_RDLCK: 250 if ((fp->f_flag & FREAD) == 0) { 251 error = EBADF; 252 break; 253 } 254 if ((p->p_flag & PK_ADVLOCK) == 0) { 255 mutex_enter(p->p_lock); 256 p->p_flag |= PK_ADVLOCK; 257 mutex_exit(p->p_lock); 258 } 259 error = VOP_ADVLOCK(vp, p, F_SETLK, fl, flg); 260 break; 261 262 case F_WRLCK: 263 if ((fp->f_flag & FWRITE) == 0) { 264 error = EBADF; 265 break; 266 } 267 if ((p->p_flag & PK_ADVLOCK) == 0) { 268 mutex_enter(p->p_lock); 269 p->p_flag |= PK_ADVLOCK; 270 mutex_exit(p->p_lock); 271 } 272 error = VOP_ADVLOCK(vp, p, F_SETLK, fl, flg); 273 break; 274 275 case F_UNLCK: 276 error = VOP_ADVLOCK(vp, p, F_UNLCK, fl, F_POSIX); 277 break; 278 279 default: 280 error = EINVAL; 281 break; 282 } 283 break; 284 285 case F_GETLK: 286 if (fl->l_type != F_RDLCK && 287 fl->l_type != F_WRLCK && 288 fl->l_type != F_UNLCK) { 289 error = EINVAL; 290 break; 291 } 292 error = VOP_ADVLOCK(vp, p, F_GETLK, fl, F_POSIX); 293 break; 294 295 default: 296 error = EINVAL; 297 break; 298 } 299 300 fd_putfile(fd); 301 return error; 302 } 303 304 /* 305 * The file control system call. 306 */ 307 int 308 sys_fcntl(struct lwp *l, const struct sys_fcntl_args *uap, register_t *retval) 309 { 310 /* { 311 syscallarg(int) fd; 312 syscallarg(int) cmd; 313 syscallarg(void *) arg; 314 } */ 315 int fd, i, tmp, error, cmd, newmin; 316 filedesc_t *fdp; 317 file_t *fp; 318 fdfile_t *ff; 319 proc_t *p; 320 struct flock fl; 321 322 p = l->l_proc; 323 fd = SCARG(uap, fd); 324 cmd = SCARG(uap, cmd); 325 fdp = p->p_fd; 326 error = 0; 327 328 switch (cmd) { 329 case F_CLOSEM: 330 if (fd < 0) 331 return EBADF; 332 while ((i = fdp->fd_lastfile) >= fd) { 333 if (fd_getfile(i) == NULL) { 334 /* Another thread has updated. */ 335 continue; 336 } 337 fd_close(i); 338 } 339 return 0; 340 341 case F_MAXFD: 342 *retval = fdp->fd_lastfile; 343 return 0; 344 345 case F_SETLKW: 346 case F_SETLK: 347 case F_GETLK: 348 error = copyin(SCARG(uap, arg), &fl, sizeof(fl)); 349 if (error) 350 return error; 351 error = do_fcntl_lock(fd, cmd, &fl); 352 if (cmd == F_GETLK && error == 0) 353 error = copyout(&fl, SCARG(uap, arg), sizeof(fl)); 354 return error; 355 356 default: 357 /* Handled below */ 358 break; 359 } 360 361 if ((fp = fd_getfile(fd)) == NULL) 362 return (EBADF); 363 ff = fdp->fd_ofiles[fd]; 364 365 if ((cmd & F_FSCTL)) { 366 error = fcntl_forfs(fd, fp, cmd, SCARG(uap, arg)); 367 fd_putfile(fd); 368 return error; 369 } 370 371 switch (cmd) { 372 case F_DUPFD: 373 newmin = (long)SCARG(uap, arg); 374 if ((u_int)newmin >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur || 375 (u_int)newmin >= maxfiles) { 376 fd_putfile(fd); 377 return EINVAL; 378 } 379 error = fd_dup(fp, newmin, &i, 0); 380 *retval = i; 381 break; 382 383 case F_GETFD: 384 *retval = ff->ff_exclose; 385 break; 386 387 case F_SETFD: 388 if ((long)SCARG(uap, arg) & 1) { 389 ff->ff_exclose = 1; 390 fdp->fd_exclose = 1; 391 } else { 392 ff->ff_exclose = 0; 393 } 394 break; 395 396 case F_GETFL: 397 *retval = OFLAGS(fp->f_flag); 398 break; 399 400 case F_SETFL: 401 /* XXX not guaranteed to be atomic. */ 402 tmp = FFLAGS((long)SCARG(uap, arg)) & FCNTLFLAGS; 403 error = (*fp->f_ops->fo_fcntl)(fp, F_SETFL, &tmp); 404 if (error) 405 break; 406 i = tmp ^ fp->f_flag; 407 if (i & FNONBLOCK) { 408 int flgs = tmp & FNONBLOCK; 409 error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, &flgs); 410 if (error) { 411 (*fp->f_ops->fo_fcntl)(fp, F_SETFL, 412 &fp->f_flag); 413 break; 414 } 415 } 416 if (i & FASYNC) { 417 int flgs = tmp & FASYNC; 418 error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, &flgs); 419 if (error) { 420 if (i & FNONBLOCK) { 421 tmp = fp->f_flag & FNONBLOCK; 422 (void)(*fp->f_ops->fo_ioctl)(fp, 423 FIONBIO, &tmp); 424 } 425 (*fp->f_ops->fo_fcntl)(fp, F_SETFL, 426 &fp->f_flag); 427 break; 428 } 429 } 430 fp->f_flag = (fp->f_flag & ~FCNTLFLAGS) | tmp; 431 break; 432 433 case F_GETOWN: 434 error = (*fp->f_ops->fo_ioctl)(fp, FIOGETOWN, &tmp); 435 *retval = tmp; 436 break; 437 438 case F_SETOWN: 439 tmp = (int)(intptr_t) SCARG(uap, arg); 440 error = (*fp->f_ops->fo_ioctl)(fp, FIOSETOWN, &tmp); 441 break; 442 443 default: 444 error = EINVAL; 445 } 446 447 fd_putfile(fd); 448 return (error); 449 } 450 451 /* 452 * Close a file descriptor. 453 */ 454 int 455 sys_close(struct lwp *l, const struct sys_close_args *uap, register_t *retval) 456 { 457 /* { 458 syscallarg(int) fd; 459 } */ 460 461 if (fd_getfile(SCARG(uap, fd)) == NULL) { 462 return EBADF; 463 } 464 return fd_close(SCARG(uap, fd)); 465 } 466 467 /* 468 * Return status information about a file descriptor. 469 * Common function for compat code. 470 */ 471 int 472 do_sys_fstat(int fd, struct stat *sb) 473 { 474 file_t *fp; 475 int error; 476 477 if ((fp = fd_getfile(fd)) == NULL) { 478 return EBADF; 479 } 480 error = (*fp->f_ops->fo_stat)(fp, sb); 481 fd_putfile(fd); 482 483 return error; 484 } 485 486 /* 487 * Return status information about a file descriptor. 488 */ 489 int 490 sys___fstat30(struct lwp *l, const struct sys___fstat30_args *uap, 491 register_t *retval) 492 { 493 /* { 494 syscallarg(int) fd; 495 syscallarg(struct stat *) sb; 496 } */ 497 struct stat sb; 498 int error; 499 500 error = do_sys_fstat(SCARG(uap, fd), &sb); 501 if (error == 0) { 502 error = copyout(&sb, SCARG(uap, sb), sizeof(sb)); 503 } 504 return error; 505 } 506 507 /* 508 * Return pathconf information about a file descriptor. 509 */ 510 int 511 sys_fpathconf(struct lwp *l, const struct sys_fpathconf_args *uap, 512 register_t *retval) 513 { 514 /* { 515 syscallarg(int) fd; 516 syscallarg(int) name; 517 } */ 518 int fd, error; 519 file_t *fp; 520 521 fd = SCARG(uap, fd); 522 error = 0; 523 524 if ((fp = fd_getfile(fd)) == NULL) { 525 return (EBADF); 526 } 527 switch (fp->f_type) { 528 case DTYPE_SOCKET: 529 case DTYPE_PIPE: 530 if (SCARG(uap, name) != _PC_PIPE_BUF) 531 error = EINVAL; 532 else 533 *retval = PIPE_BUF; 534 break; 535 536 case DTYPE_VNODE: 537 error = VOP_PATHCONF(fp->f_data, SCARG(uap, name), retval); 538 break; 539 540 case DTYPE_KQUEUE: 541 error = EINVAL; 542 break; 543 544 default: 545 error = EOPNOTSUPP; 546 break; 547 } 548 549 fd_putfile(fd); 550 return (error); 551 } 552 553 /* 554 * Apply an advisory lock on a file descriptor. 555 * 556 * Just attempt to get a record lock of the requested type on 557 * the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0). 558 */ 559 /* ARGSUSED */ 560 int 561 sys_flock(struct lwp *l, const struct sys_flock_args *uap, register_t *retval) 562 { 563 /* { 564 syscallarg(int) fd; 565 syscallarg(int) how; 566 } */ 567 int fd, how, error; 568 file_t *fp; 569 vnode_t *vp; 570 struct flock lf; 571 proc_t *p; 572 573 fd = SCARG(uap, fd); 574 how = SCARG(uap, how); 575 error = 0; 576 577 if ((fp = fd_getfile(fd)) == NULL) { 578 return EBADF; 579 } 580 if (fp->f_type != DTYPE_VNODE) { 581 fd_putfile(fd); 582 return EOPNOTSUPP; 583 } 584 585 vp = fp->f_data; 586 lf.l_whence = SEEK_SET; 587 lf.l_start = 0; 588 lf.l_len = 0; 589 if (how & LOCK_UN) { 590 lf.l_type = F_UNLCK; 591 atomic_and_uint(&fp->f_flag, ~FHASLOCK); 592 error = VOP_ADVLOCK(vp, fp, F_UNLCK, &lf, F_FLOCK); 593 fd_putfile(fd); 594 return error; 595 } 596 if (how & LOCK_EX) { 597 lf.l_type = F_WRLCK; 598 } else if (how & LOCK_SH) { 599 lf.l_type = F_RDLCK; 600 } else { 601 fd_putfile(fd); 602 return EINVAL; 603 } 604 atomic_or_uint(&fp->f_flag, FHASLOCK); 605 p = curproc; 606 if (how & LOCK_NB) { 607 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, F_FLOCK); 608 } else { 609 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, F_FLOCK|F_WAIT); 610 } 611 fd_putfile(fd); 612 return error; 613 } 614 615 int 616 do_posix_fadvise(int fd, off_t offset, off_t len, int advice) 617 { 618 file_t *fp; 619 int error; 620 621 if ((fp = fd_getfile(fd)) == NULL) { 622 return EBADF; 623 } 624 if (fp->f_type != DTYPE_VNODE) { 625 if (fp->f_type == DTYPE_PIPE || fp->f_type == DTYPE_SOCKET) { 626 error = ESPIPE; 627 } else { 628 error = EOPNOTSUPP; 629 } 630 fd_putfile(fd); 631 return error; 632 } 633 634 switch (advice) { 635 case POSIX_FADV_NORMAL: 636 case POSIX_FADV_RANDOM: 637 case POSIX_FADV_SEQUENTIAL: 638 KASSERT(POSIX_FADV_NORMAL == UVM_ADV_NORMAL); 639 KASSERT(POSIX_FADV_RANDOM == UVM_ADV_RANDOM); 640 KASSERT(POSIX_FADV_SEQUENTIAL == UVM_ADV_SEQUENTIAL); 641 642 /* 643 * We ignore offset and size. must lock the file to 644 * do this, as f_advice is sub-word sized. 645 */ 646 mutex_enter(&fp->f_lock); 647 fp->f_advice = (u_char)advice; 648 mutex_exit(&fp->f_lock); 649 error = 0; 650 break; 651 652 case POSIX_FADV_WILLNEED: 653 case POSIX_FADV_DONTNEED: 654 case POSIX_FADV_NOREUSE: 655 /* Not implemented yet. */ 656 error = 0; 657 break; 658 default: 659 error = EINVAL; 660 break; 661 } 662 663 fd_putfile(fd); 664 return error; 665 } 666 667 int 668 sys___posix_fadvise50(struct lwp *l, 669 const struct sys___posix_fadvise50_args *uap, 670 register_t *retval) 671 { 672 /* { 673 syscallarg(int) fd; 674 syscallarg(int) pad; 675 syscallarg(off_t) offset; 676 syscallarg(off_t) len; 677 syscallarg(int) advice; 678 } */ 679 680 return do_posix_fadvise(SCARG(uap, fd), SCARG(uap, offset), 681 SCARG(uap, len), SCARG(uap, advice)); 682 } 683