1 /* $NetBSD: sys_descrip.c,v 1.46 2023/04/22 14:23:59 riastradh Exp $ */ 2 3 /*- 4 * Copyright (c) 2008, 2020 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 * POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 /* 30 * Copyright (c) 1982, 1986, 1989, 1991, 1993 31 * The Regents of the University of California. All rights reserved. 32 * (c) UNIX System Laboratories, Inc. 33 * All or some portions of this file are derived from material licensed 34 * to the University of California by American Telephone and Telegraph 35 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 36 * the permission of UNIX System Laboratories, Inc. 37 * 38 * Redistribution and use in source and binary forms, with or without 39 * modification, are permitted provided that the following conditions 40 * are met: 41 * 1. Redistributions of source code must retain the above copyright 42 * notice, this list of conditions and the following disclaimer. 43 * 2. Redistributions in binary form must reproduce the above copyright 44 * notice, this list of conditions and the following disclaimer in the 45 * documentation and/or other materials provided with the distribution. 46 * 3. Neither the name of the University nor the names of its contributors 47 * may be used to endorse or promote products derived from this software 48 * without specific prior written permission. 49 * 50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 53 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 60 * SUCH DAMAGE. 61 * 62 * @(#)kern_descrip.c 8.8 (Berkeley) 2/14/95 63 */ 64 65 /* 66 * System calls on descriptors. 67 */ 68 69 #include <sys/cdefs.h> 70 __KERNEL_RCSID(0, "$NetBSD: sys_descrip.c,v 1.46 2023/04/22 14:23:59 riastradh Exp $"); 71 72 #include <sys/param.h> 73 #include <sys/systm.h> 74 #include <sys/filedesc.h> 75 #include <sys/kernel.h> 76 #include <sys/vnode.h> 77 #include <sys/proc.h> 78 #include <sys/file.h> 79 #include <sys/namei.h> 80 #include <sys/socket.h> 81 #include <sys/socketvar.h> 82 #include <sys/stat.h> 83 #include <sys/ioctl.h> 84 #include <sys/fcntl.h> 85 #include <sys/kmem.h> 86 #include <sys/pool.h> 87 #include <sys/syslog.h> 88 #include <sys/unistd.h> 89 #include <sys/resourcevar.h> 90 #include <sys/conf.h> 91 #include <sys/event.h> 92 #include <sys/kauth.h> 93 #include <sys/atomic.h> 94 #include <sys/mount.h> 95 #include <sys/syscallargs.h> 96 97 #include <uvm/uvm_readahead.h> 98 99 /* 100 * Duplicate a file descriptor. 101 */ 102 int 103 sys_dup(struct lwp *l, const struct sys_dup_args *uap, register_t *retval) 104 { 105 /* { 106 syscallarg(int) fd; 107 } */ 108 int error, newfd, oldfd; 109 file_t *fp; 110 111 oldfd = SCARG(uap, fd); 112 113 if ((fp = fd_getfile(oldfd)) == NULL) { 114 return EBADF; 115 } 116 error = fd_dup(fp, 0, &newfd, false); 117 fd_putfile(oldfd); 118 *retval = newfd; 119 return error; 120 } 121 122 /* 123 * Duplicate a file descriptor to a particular value. 124 */ 125 int 126 dodup(struct lwp *l, int from, int to, int flags, register_t *retval) 127 { 128 int error; 129 file_t *fp; 130 131 if ((fp = fd_getfile(from)) == NULL) 132 return EBADF; 133 mutex_enter(&fp->f_lock); 134 fp->f_count++; 135 mutex_exit(&fp->f_lock); 136 fd_putfile(from); 137 138 if ((u_int)to >= curproc->p_rlimit[RLIMIT_NOFILE].rlim_cur || 139 (u_int)to >= maxfiles) 140 error = EBADF; 141 else if (from == to) 142 error = 0; 143 else 144 error = fd_dup2(fp, to, flags); 145 closef(fp); 146 *retval = to; 147 148 return error; 149 } 150 151 int 152 sys_dup3(struct lwp *l, const struct sys_dup3_args *uap, register_t *retval) 153 { 154 /* { 155 syscallarg(int) from; 156 syscallarg(int) to; 157 syscallarg(int) flags; 158 } */ 159 return dodup(l, SCARG(uap, from), SCARG(uap, to), SCARG(uap, flags), 160 retval); 161 } 162 163 int 164 sys_dup2(struct lwp *l, const struct sys_dup2_args *uap, register_t *retval) 165 { 166 /* { 167 syscallarg(int) from; 168 syscallarg(int) to; 169 } */ 170 return dodup(l, SCARG(uap, from), SCARG(uap, to), 0, retval); 171 } 172 173 /* 174 * fcntl call which is being passed to the file's fs. 175 */ 176 static int 177 fcntl_forfs(int fd, file_t *fp, int cmd, void *arg) 178 { 179 int error; 180 u_int size; 181 void *data, *memp; 182 #define STK_PARAMS 128 183 char stkbuf[STK_PARAMS]; 184 185 if ((fp->f_flag & (FREAD | FWRITE)) == 0) 186 return (EBADF); 187 188 /* 189 * Interpret high order word to find amount of data to be 190 * copied to/from the user's address space. 191 */ 192 size = (size_t)F_PARAM_LEN(cmd); 193 if (size > F_PARAM_MAX) 194 return (EINVAL); 195 memp = NULL; 196 if (size > sizeof(stkbuf)) { 197 memp = kmem_alloc(size, KM_SLEEP); 198 data = memp; 199 } else 200 data = stkbuf; 201 if (cmd & F_FSIN) { 202 if (size) { 203 error = copyin(arg, data, size); 204 if (error) { 205 if (memp) 206 kmem_free(memp, size); 207 return (error); 208 } 209 } else 210 *(void **)data = arg; 211 } else if ((cmd & F_FSOUT) != 0 && size != 0) { 212 /* 213 * Zero the buffer so the user always 214 * gets back something deterministic. 215 */ 216 memset(data, 0, size); 217 } else if (cmd & F_FSVOID) 218 *(void **)data = arg; 219 220 221 error = (*fp->f_ops->fo_fcntl)(fp, cmd, data); 222 223 /* 224 * Copy any data to user, size was 225 * already set and checked above. 226 */ 227 if (error == 0 && (cmd & F_FSOUT) && size) 228 error = copyout(data, arg, size); 229 if (memp) 230 kmem_free(memp, size); 231 return (error); 232 } 233 234 int 235 do_fcntl_lock(int fd, int cmd, struct flock *fl) 236 { 237 struct file *fp = NULL; 238 proc_t *p; 239 int (*fo_advlock)(struct file *, void *, int, struct flock *, int); 240 int error, flg; 241 242 if ((fp = fd_getfile(fd)) == NULL) { 243 error = EBADF; 244 goto out; 245 } 246 if ((fo_advlock = fp->f_ops->fo_advlock) == NULL) { 247 error = EINVAL; 248 goto out; 249 } 250 251 flg = F_POSIX; 252 p = curproc; 253 254 switch (cmd) { 255 case F_SETLKW: 256 flg |= F_WAIT; 257 /* Fall into F_SETLK */ 258 259 /* FALLTHROUGH */ 260 case F_SETLK: 261 switch (fl->l_type) { 262 case F_RDLCK: 263 if ((fp->f_flag & FREAD) == 0) { 264 error = EBADF; 265 break; 266 } 267 if ((p->p_flag & PK_ADVLOCK) == 0) { 268 mutex_enter(p->p_lock); 269 p->p_flag |= PK_ADVLOCK; 270 mutex_exit(p->p_lock); 271 } 272 error = (*fo_advlock)(fp, p, F_SETLK, fl, flg); 273 break; 274 275 case F_WRLCK: 276 if ((fp->f_flag & FWRITE) == 0) { 277 error = EBADF; 278 break; 279 } 280 if ((p->p_flag & PK_ADVLOCK) == 0) { 281 mutex_enter(p->p_lock); 282 p->p_flag |= PK_ADVLOCK; 283 mutex_exit(p->p_lock); 284 } 285 error = (*fo_advlock)(fp, p, F_SETLK, fl, flg); 286 break; 287 288 case F_UNLCK: 289 error = (*fo_advlock)(fp, p, F_UNLCK, fl, F_POSIX); 290 break; 291 292 default: 293 error = EINVAL; 294 break; 295 } 296 break; 297 298 case F_GETLK: 299 if (fl->l_type != F_RDLCK && 300 fl->l_type != F_WRLCK && 301 fl->l_type != F_UNLCK) { 302 error = EINVAL; 303 break; 304 } 305 error = (*fo_advlock)(fp, p, F_GETLK, fl, F_POSIX); 306 break; 307 308 default: 309 error = EINVAL; 310 break; 311 } 312 313 out: if (fp) 314 fd_putfile(fd); 315 return error; 316 } 317 318 static int 319 do_fcntl_getpath(struct lwp *l, file_t *fp, char *upath) 320 { 321 char *kpath; 322 int error; 323 324 if (fp->f_type != DTYPE_VNODE) 325 return EOPNOTSUPP; 326 327 kpath = PNBUF_GET(); 328 329 error = vnode_to_path(kpath, MAXPATHLEN, fp->f_vnode, l, l->l_proc); 330 if (!error) 331 error = copyoutstr(kpath, upath, MAXPATHLEN, NULL); 332 333 PNBUF_PUT(kpath); 334 335 return error; 336 } 337 338 /* 339 * The file control system call. 340 */ 341 int 342 sys_fcntl(struct lwp *l, const struct sys_fcntl_args *uap, register_t *retval) 343 { 344 /* { 345 syscallarg(int) fd; 346 syscallarg(int) cmd; 347 syscallarg(void *) arg; 348 } */ 349 int fd, i, tmp, error, cmd, newmin; 350 filedesc_t *fdp; 351 fdtab_t *dt; 352 file_t *fp; 353 struct flock fl; 354 bool cloexec = false; 355 356 fd = SCARG(uap, fd); 357 cmd = SCARG(uap, cmd); 358 fdp = l->l_fd; 359 error = 0; 360 361 switch (cmd) { 362 case F_CLOSEM: 363 if (fd < 0) 364 return EBADF; 365 while ((i = fdp->fd_lastfile) >= fd) { 366 if (fd_getfile(i) == NULL) { 367 /* Another thread has updated. */ 368 continue; 369 } 370 fd_close(i); 371 } 372 return 0; 373 374 case F_MAXFD: 375 *retval = fdp->fd_lastfile; 376 return 0; 377 378 case F_SETLKW: 379 case F_SETLK: 380 case F_GETLK: 381 error = copyin(SCARG(uap, arg), &fl, sizeof(fl)); 382 if (error) 383 return error; 384 error = do_fcntl_lock(fd, cmd, &fl); 385 if (cmd == F_GETLK && error == 0) 386 error = copyout(&fl, SCARG(uap, arg), sizeof(fl)); 387 return error; 388 389 default: 390 /* Handled below */ 391 break; 392 } 393 394 if ((fp = fd_getfile(fd)) == NULL) 395 return EBADF; 396 397 if ((cmd & F_FSCTL)) { 398 error = fcntl_forfs(fd, fp, cmd, SCARG(uap, arg)); 399 fd_putfile(fd); 400 return error; 401 } 402 403 switch (cmd) { 404 case F_DUPFD_CLOEXEC: 405 cloexec = true; 406 /*FALLTHROUGH*/ 407 case F_DUPFD: 408 newmin = (long)SCARG(uap, arg); 409 if ((u_int)newmin >= 410 l->l_proc->p_rlimit[RLIMIT_NOFILE].rlim_cur || 411 (u_int)newmin >= maxfiles) { 412 fd_putfile(fd); 413 return EINVAL; 414 } 415 error = fd_dup(fp, newmin, &i, cloexec); 416 *retval = i; 417 break; 418 419 case F_GETFD: 420 dt = atomic_load_consume(&fdp->fd_dt); 421 *retval = dt->dt_ff[fd]->ff_exclose; 422 break; 423 424 case F_SETFD: 425 fd_set_exclose(l, fd, 426 ((long)SCARG(uap, arg) & FD_CLOEXEC) != 0); 427 break; 428 429 case F_GETNOSIGPIPE: 430 *retval = (fp->f_flag & FNOSIGPIPE) != 0; 431 break; 432 433 case F_SETNOSIGPIPE: 434 if (SCARG(uap, arg)) 435 atomic_or_uint(&fp->f_flag, FNOSIGPIPE); 436 else 437 atomic_and_uint(&fp->f_flag, ~FNOSIGPIPE); 438 *retval = 0; 439 break; 440 441 case F_GETFL: 442 *retval = OFLAGS(fp->f_flag); 443 break; 444 445 case F_SETFL: 446 /* XXX not guaranteed to be atomic. */ 447 tmp = FFLAGS((long)SCARG(uap, arg)) & FCNTLFLAGS; 448 error = (*fp->f_ops->fo_fcntl)(fp, F_SETFL, &tmp); 449 if (error) 450 break; 451 i = tmp ^ fp->f_flag; 452 if (i & FNONBLOCK) { 453 int flgs = tmp & FNONBLOCK; 454 error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, &flgs); 455 if (error) { 456 (*fp->f_ops->fo_fcntl)(fp, F_SETFL, 457 &fp->f_flag); 458 break; 459 } 460 } 461 if (i & FASYNC) { 462 int flgs = tmp & FASYNC; 463 error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, &flgs); 464 if (error) { 465 if (i & FNONBLOCK) { 466 tmp = fp->f_flag & FNONBLOCK; 467 (void)(*fp->f_ops->fo_ioctl)(fp, 468 FIONBIO, &tmp); 469 } 470 (*fp->f_ops->fo_fcntl)(fp, F_SETFL, 471 &fp->f_flag); 472 break; 473 } 474 } 475 fp->f_flag = (fp->f_flag & ~FCNTLFLAGS) | tmp; 476 break; 477 478 case F_GETOWN: 479 error = (*fp->f_ops->fo_ioctl)(fp, FIOGETOWN, &tmp); 480 *retval = tmp; 481 break; 482 483 case F_SETOWN: 484 tmp = (int)(uintptr_t) SCARG(uap, arg); 485 error = (*fp->f_ops->fo_ioctl)(fp, FIOSETOWN, &tmp); 486 break; 487 488 case F_GETPATH: 489 error = do_fcntl_getpath(l, fp, SCARG(uap, arg)); 490 break; 491 492 default: 493 error = EINVAL; 494 } 495 496 fd_putfile(fd); 497 return (error); 498 } 499 500 /* 501 * Close a file descriptor. 502 */ 503 int 504 sys_close(struct lwp *l, const struct sys_close_args *uap, register_t *retval) 505 { 506 /* { 507 syscallarg(int) fd; 508 } */ 509 int error; 510 int fd = SCARG(uap, fd); 511 512 if (fd_getfile(fd) == NULL) { 513 return EBADF; 514 } 515 516 error = fd_close(fd); 517 if (error == ERESTART) { 518 #ifdef DIAGNOSTIC 519 printf("%s[%d]: close(%d) returned ERESTART\n", 520 l->l_proc->p_comm, (int)l->l_proc->p_pid, fd); 521 #endif 522 error = EINTR; 523 } 524 525 return error; 526 } 527 528 /* 529 * Return status information about a file descriptor. 530 * Common function for compat code. 531 */ 532 int 533 do_sys_fstat(int fd, struct stat *sb) 534 { 535 file_t *fp; 536 int error; 537 538 if ((fp = fd_getfile(fd)) == NULL) { 539 return EBADF; 540 } 541 error = (*fp->f_ops->fo_stat)(fp, sb); 542 fd_putfile(fd); 543 544 return error; 545 } 546 547 /* 548 * Return status information about a file descriptor. 549 */ 550 int 551 sys___fstat50(struct lwp *l, const struct sys___fstat50_args *uap, 552 register_t *retval) 553 { 554 /* { 555 syscallarg(int) fd; 556 syscallarg(struct stat *) sb; 557 } */ 558 struct stat sb; 559 int error; 560 561 error = do_sys_fstat(SCARG(uap, fd), &sb); 562 if (error == 0) { 563 error = copyout(&sb, SCARG(uap, sb), sizeof(sb)); 564 } 565 return error; 566 } 567 568 /* 569 * Return pathconf information about a file descriptor. 570 */ 571 int 572 sys_fpathconf(struct lwp *l, const struct sys_fpathconf_args *uap, 573 register_t *retval) 574 { 575 /* { 576 syscallarg(int) fd; 577 syscallarg(int) name; 578 } */ 579 int fd, name, error; 580 file_t *fp; 581 582 fd = SCARG(uap, fd); 583 name = SCARG(uap, name); 584 error = 0; 585 586 if ((fp = fd_getfile(fd)) == NULL) 587 return EBADF; 588 if (fp->f_ops->fo_fpathconf == NULL) 589 error = EOPNOTSUPP; 590 else 591 error = (*fp->f_ops->fo_fpathconf)(fp, name, retval); 592 fd_putfile(fd); 593 return error; 594 } 595 596 /* 597 * Apply an advisory lock on a file descriptor. 598 * 599 * Just attempt to get a record lock of the requested type on 600 * the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0). 601 */ 602 /* ARGSUSED */ 603 int 604 sys_flock(struct lwp *l, const struct sys_flock_args *uap, register_t *retval) 605 { 606 /* { 607 syscallarg(int) fd; 608 syscallarg(int) how; 609 } */ 610 int fd, how, error; 611 struct file *fp = NULL; 612 int (*fo_advlock)(struct file *, void *, int, struct flock *, int); 613 struct flock lf; 614 615 fd = SCARG(uap, fd); 616 how = SCARG(uap, how); 617 618 if ((fp = fd_getfile(fd)) == NULL) { 619 error = EBADF; 620 goto out; 621 } 622 if ((fo_advlock = fp->f_ops->fo_advlock) == NULL) { 623 KASSERT((atomic_load_relaxed(&fp->f_flag) & FHASLOCK) == 0); 624 error = EOPNOTSUPP; 625 goto out; 626 } 627 628 lf.l_whence = SEEK_SET; 629 lf.l_start = 0; 630 lf.l_len = 0; 631 632 switch (how & ~LOCK_NB) { 633 case LOCK_UN: 634 lf.l_type = F_UNLCK; 635 atomic_and_uint(&fp->f_flag, ~FHASLOCK); 636 error = (*fo_advlock)(fp, fp, F_UNLCK, &lf, F_FLOCK); 637 goto out; 638 case LOCK_EX: 639 lf.l_type = F_WRLCK; 640 break; 641 case LOCK_SH: 642 lf.l_type = F_RDLCK; 643 break; 644 default: 645 error = EINVAL; 646 goto out; 647 } 648 649 atomic_or_uint(&fp->f_flag, FHASLOCK); 650 if (how & LOCK_NB) { 651 error = (*fo_advlock)(fp, fp, F_SETLK, &lf, F_FLOCK); 652 } else { 653 error = (*fo_advlock)(fp, fp, F_SETLK, &lf, F_FLOCK|F_WAIT); 654 } 655 out: if (fp) 656 fd_putfile(fd); 657 return error; 658 } 659 660 int 661 do_posix_fadvise(int fd, off_t offset, off_t len, int advice) 662 { 663 file_t *fp; 664 int error; 665 666 if ((fp = fd_getfile(fd)) == NULL) 667 return EBADF; 668 if (fp->f_ops->fo_posix_fadvise == NULL) { 669 error = EOPNOTSUPP; 670 } else { 671 error = (*fp->f_ops->fo_posix_fadvise)(fp, offset, len, 672 advice); 673 } 674 fd_putfile(fd); 675 return error; 676 } 677 678 int 679 sys___posix_fadvise50(struct lwp *l, 680 const struct sys___posix_fadvise50_args *uap, 681 register_t *retval) 682 { 683 /* { 684 syscallarg(int) fd; 685 syscallarg(int) pad; 686 syscallarg(off_t) offset; 687 syscallarg(off_t) len; 688 syscallarg(int) advice; 689 } */ 690 691 *retval = do_posix_fadvise(SCARG(uap, fd), SCARG(uap, offset), 692 SCARG(uap, len), SCARG(uap, advice)); 693 694 return 0; 695 } 696 697 int 698 sys_pipe(struct lwp *l, const void *v, register_t *retval) 699 { 700 int fd[2], error; 701 702 if ((error = pipe1(l, fd, 0)) != 0) 703 return error; 704 705 retval[0] = fd[0]; 706 retval[1] = fd[1]; 707 708 return 0; 709 } 710 711 int 712 sys_pipe2(struct lwp *l, const struct sys_pipe2_args *uap, register_t *retval) 713 { 714 /* { 715 syscallarg(int[2]) fildes; 716 syscallarg(int) flags; 717 } */ 718 int fd[2], error; 719 720 if ((error = pipe1(l, fd, SCARG(uap, flags))) != 0) 721 return error; 722 723 if ((error = copyout(fd, SCARG(uap, fildes), sizeof(fd))) != 0) 724 return error; 725 retval[0] = 0; 726 return 0; 727 } 728