1 /* $NetBSD: sys_descrip.c,v 1.51 2024/05/20 09:37:34 martin Exp $ */ 2 3 /*- 4 * Copyright (c) 2008, 2020 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 * POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 /* 30 * Copyright (c) 1982, 1986, 1989, 1991, 1993 31 * The Regents of the University of California. All rights reserved. 32 * (c) UNIX System Laboratories, Inc. 33 * All or some portions of this file are derived from material licensed 34 * to the University of California by American Telephone and Telegraph 35 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 36 * the permission of UNIX System Laboratories, Inc. 37 * 38 * Redistribution and use in source and binary forms, with or without 39 * modification, are permitted provided that the following conditions 40 * are met: 41 * 1. Redistributions of source code must retain the above copyright 42 * notice, this list of conditions and the following disclaimer. 43 * 2. Redistributions in binary form must reproduce the above copyright 44 * notice, this list of conditions and the following disclaimer in the 45 * documentation and/or other materials provided with the distribution. 46 * 3. Neither the name of the University nor the names of its contributors 47 * may be used to endorse or promote products derived from this software 48 * without specific prior written permission. 49 * 50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 53 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 60 * SUCH DAMAGE. 61 * 62 * @(#)kern_descrip.c 8.8 (Berkeley) 2/14/95 63 */ 64 65 /* 66 * System calls on descriptors. 67 */ 68 69 #include <sys/cdefs.h> 70 __KERNEL_RCSID(0, "$NetBSD: sys_descrip.c,v 1.51 2024/05/20 09:37:34 martin Exp $"); 71 72 #include <sys/param.h> 73 #include <sys/systm.h> 74 #include <sys/filedesc.h> 75 #include <sys/kernel.h> 76 #include <sys/vnode.h> 77 #include <sys/proc.h> 78 #include <sys/file.h> 79 #include <sys/namei.h> 80 #include <sys/socket.h> 81 #include <sys/socketvar.h> 82 #include <sys/stat.h> 83 #include <sys/ioctl.h> 84 #include <sys/fcntl.h> 85 #include <sys/kmem.h> 86 #include <sys/pool.h> 87 #include <sys/syslog.h> 88 #include <sys/unistd.h> 89 #include <sys/resourcevar.h> 90 #include <sys/conf.h> 91 #include <sys/event.h> 92 #include <sys/kauth.h> 93 #include <sys/atomic.h> 94 #include <sys/mount.h> 95 #include <sys/syscallargs.h> 96 97 #include <uvm/uvm_readahead.h> 98 99 /* 100 * Duplicate a file descriptor. 101 */ 102 int 103 sys_dup(struct lwp *l, const struct sys_dup_args *uap, register_t *retval) 104 { 105 /* { 106 syscallarg(int) fd; 107 } */ 108 int error, newfd, oldfd; 109 file_t *fp; 110 111 oldfd = SCARG(uap, fd); 112 113 if ((fp = fd_getfile(oldfd)) == NULL) { 114 return EBADF; 115 } 116 error = fd_dup(fp, 0, &newfd, false); 117 fd_putfile(oldfd); 118 *retval = newfd; 119 return error; 120 } 121 122 /* 123 * Duplicate a file descriptor to a particular value. 124 */ 125 int 126 dodup(struct lwp *l, int from, int to, int flags, register_t *retval) 127 { 128 int error; 129 file_t *fp; 130 131 if ((fp = fd_getfile(from)) == NULL) 132 return EBADF; 133 mutex_enter(&fp->f_lock); 134 fp->f_count++; 135 mutex_exit(&fp->f_lock); 136 fd_putfile(from); 137 138 if ((u_int)to >= curproc->p_rlimit[RLIMIT_NOFILE].rlim_cur || 139 (u_int)to >= maxfiles) 140 error = EBADF; 141 else if (from == to) 142 error = 0; 143 else 144 error = fd_dup2(fp, to, flags); 145 closef(fp); 146 *retval = to; 147 148 return error; 149 } 150 151 int 152 sys___dup3100(struct lwp *l, const struct sys___dup3100_args *uap, register_t *retval) 153 { 154 /* { 155 syscallarg(int) from; 156 syscallarg(int) to; 157 syscallarg(int) flags; 158 } */ 159 if (SCARG(uap, from) == SCARG(uap, to)) 160 return EINVAL; 161 return dodup(l, SCARG(uap, from), SCARG(uap, to), SCARG(uap, flags), 162 retval); 163 } 164 165 int 166 sys_dup2(struct lwp *l, const struct sys_dup2_args *uap, register_t *retval) 167 { 168 /* { 169 syscallarg(int) from; 170 syscallarg(int) to; 171 } */ 172 return dodup(l, SCARG(uap, from), SCARG(uap, to), 0, retval); 173 } 174 175 /* 176 * fcntl call which is being passed to the file's fs. 177 */ 178 static int 179 fcntl_forfs(int fd, file_t *fp, int cmd, void *arg) 180 { 181 int error; 182 u_int size; 183 void *data, *memp; 184 #define STK_PARAMS 128 185 char stkbuf[STK_PARAMS]; 186 187 if ((fp->f_flag & (FREAD | FWRITE)) == 0) 188 return (EBADF); 189 190 /* 191 * Interpret high order word to find amount of data to be 192 * copied to/from the user's address space. 193 */ 194 size = (size_t)F_PARAM_LEN(cmd); 195 if (size > F_PARAM_MAX) 196 return (EINVAL); 197 memp = NULL; 198 if (size > sizeof(stkbuf)) { 199 memp = kmem_alloc(size, KM_SLEEP); 200 data = memp; 201 } else 202 data = stkbuf; 203 if (cmd & F_FSIN) { 204 if (size) { 205 error = copyin(arg, data, size); 206 if (error) { 207 if (memp) 208 kmem_free(memp, size); 209 return (error); 210 } 211 } else 212 *(void **)data = arg; 213 } else if ((cmd & F_FSOUT) != 0 && size != 0) { 214 /* 215 * Zero the buffer so the user always 216 * gets back something deterministic. 217 */ 218 memset(data, 0, size); 219 } else if (cmd & F_FSVOID) 220 *(void **)data = arg; 221 222 223 error = (*fp->f_ops->fo_fcntl)(fp, cmd, data); 224 225 /* 226 * Copy any data to user, size was 227 * already set and checked above. 228 */ 229 if (error == 0 && (cmd & F_FSOUT) && size) 230 error = copyout(data, arg, size); 231 if (memp) 232 kmem_free(memp, size); 233 return (error); 234 } 235 236 int 237 do_fcntl_lock(int fd, int cmd, struct flock *fl) 238 { 239 struct file *fp = NULL; 240 proc_t *p; 241 int (*fo_advlock)(struct file *, void *, int, struct flock *, int); 242 int error, flg; 243 244 if ((fp = fd_getfile(fd)) == NULL) { 245 error = EBADF; 246 goto out; 247 } 248 if ((fo_advlock = fp->f_ops->fo_advlock) == NULL) { 249 error = EINVAL; 250 goto out; 251 } 252 253 flg = F_POSIX; 254 p = curproc; 255 256 switch (cmd) { 257 case F_SETLKW: 258 flg |= F_WAIT; 259 /* Fall into F_SETLK */ 260 261 /* FALLTHROUGH */ 262 case F_SETLK: 263 switch (fl->l_type) { 264 case F_RDLCK: 265 if ((fp->f_flag & FREAD) == 0) { 266 error = EBADF; 267 break; 268 } 269 if ((p->p_flag & PK_ADVLOCK) == 0) { 270 mutex_enter(p->p_lock); 271 p->p_flag |= PK_ADVLOCK; 272 mutex_exit(p->p_lock); 273 } 274 error = (*fo_advlock)(fp, p, F_SETLK, fl, flg); 275 break; 276 277 case F_WRLCK: 278 if ((fp->f_flag & FWRITE) == 0) { 279 error = EBADF; 280 break; 281 } 282 if ((p->p_flag & PK_ADVLOCK) == 0) { 283 mutex_enter(p->p_lock); 284 p->p_flag |= PK_ADVLOCK; 285 mutex_exit(p->p_lock); 286 } 287 error = (*fo_advlock)(fp, p, F_SETLK, fl, flg); 288 break; 289 290 case F_UNLCK: 291 error = (*fo_advlock)(fp, p, F_UNLCK, fl, F_POSIX); 292 break; 293 294 default: 295 error = EINVAL; 296 break; 297 } 298 break; 299 300 case F_GETLK: 301 if (fl->l_type != F_RDLCK && 302 fl->l_type != F_WRLCK && 303 fl->l_type != F_UNLCK) { 304 error = EINVAL; 305 break; 306 } 307 error = (*fo_advlock)(fp, p, F_GETLK, fl, F_POSIX); 308 break; 309 310 default: 311 error = EINVAL; 312 break; 313 } 314 315 out: if (fp) 316 fd_putfile(fd); 317 return error; 318 } 319 320 /* 321 * The file control system call. 322 */ 323 int 324 sys_fcntl(struct lwp *l, const struct sys_fcntl_args *uap, register_t *retval) 325 { 326 /* { 327 syscallarg(int) fd; 328 syscallarg(int) cmd; 329 syscallarg(void *) arg; 330 } */ 331 int fd, i, tmp, error, cmd, newmin; 332 filedesc_t *fdp; 333 fdtab_t *dt; 334 file_t *fp; 335 char *kpath; 336 struct flock fl; 337 bool cloexec = false; 338 339 fd = SCARG(uap, fd); 340 cmd = SCARG(uap, cmd); 341 fdp = l->l_fd; 342 error = 0; 343 344 switch (cmd) { 345 case F_CLOSEM: 346 if (fd < 0) 347 return EBADF; 348 while ((i = fdp->fd_lastfile) >= fd) { 349 if (fd_getfile(i) == NULL) { 350 /* Another thread has updated. */ 351 continue; 352 } 353 fd_close(i); 354 } 355 return 0; 356 357 case F_MAXFD: 358 *retval = fdp->fd_lastfile; 359 return 0; 360 361 case F_SETLKW: 362 case F_SETLK: 363 case F_GETLK: 364 error = copyin(SCARG(uap, arg), &fl, sizeof(fl)); 365 if (error) 366 return error; 367 error = do_fcntl_lock(fd, cmd, &fl); 368 if (cmd == F_GETLK && error == 0) 369 error = copyout(&fl, SCARG(uap, arg), sizeof(fl)); 370 return error; 371 372 default: 373 /* Handled below */ 374 break; 375 } 376 377 if ((fp = fd_getfile(fd)) == NULL) 378 return EBADF; 379 380 if ((cmd & F_FSCTL)) { 381 error = fcntl_forfs(fd, fp, cmd, SCARG(uap, arg)); 382 fd_putfile(fd); 383 return error; 384 } 385 386 switch (cmd) { 387 case F_DUPFD_CLOEXEC: 388 cloexec = true; 389 /*FALLTHROUGH*/ 390 case F_DUPFD: 391 newmin = (long)SCARG(uap, arg); 392 if ((u_int)newmin >= 393 l->l_proc->p_rlimit[RLIMIT_NOFILE].rlim_cur || 394 (u_int)newmin >= maxfiles) { 395 fd_putfile(fd); 396 return EINVAL; 397 } 398 error = fd_dup(fp, newmin, &i, cloexec); 399 *retval = i; 400 break; 401 402 case F_GETFD: 403 dt = atomic_load_consume(&fdp->fd_dt); 404 *retval = dt->dt_ff[fd]->ff_exclose; 405 break; 406 407 case F_SETFD: 408 fd_set_exclose(l, fd, 409 ((long)SCARG(uap, arg) & FD_CLOEXEC) != 0); 410 break; 411 412 case F_GETNOSIGPIPE: 413 *retval = (fp->f_flag & FNOSIGPIPE) != 0; 414 break; 415 416 case F_SETNOSIGPIPE: 417 if (SCARG(uap, arg)) 418 atomic_or_uint(&fp->f_flag, FNOSIGPIPE); 419 else 420 atomic_and_uint(&fp->f_flag, ~FNOSIGPIPE); 421 *retval = 0; 422 break; 423 424 case F_GETFL: 425 *retval = OFLAGS(fp->f_flag); 426 break; 427 428 case F_SETFL: 429 /* XXX not guaranteed to be atomic. */ 430 tmp = FFLAGS((long)SCARG(uap, arg)) & FCNTLFLAGS; 431 error = (*fp->f_ops->fo_fcntl)(fp, F_SETFL, &tmp); 432 if (error) 433 break; 434 i = tmp ^ fp->f_flag; 435 if (i & FNONBLOCK) { 436 int flgs = tmp & FNONBLOCK; 437 error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, &flgs); 438 if (error) { 439 (*fp->f_ops->fo_fcntl)(fp, F_SETFL, 440 &fp->f_flag); 441 break; 442 } 443 } 444 if (i & FASYNC) { 445 int flgs = tmp & FASYNC; 446 error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, &flgs); 447 if (error) { 448 if (i & FNONBLOCK) { 449 tmp = fp->f_flag & FNONBLOCK; 450 (void)(*fp->f_ops->fo_ioctl)(fp, 451 FIONBIO, &tmp); 452 } 453 (*fp->f_ops->fo_fcntl)(fp, F_SETFL, 454 &fp->f_flag); 455 break; 456 } 457 } 458 fp->f_flag = (fp->f_flag & ~FCNTLFLAGS) | tmp; 459 break; 460 461 case F_GETOWN: 462 error = (*fp->f_ops->fo_ioctl)(fp, FIOGETOWN, &tmp); 463 *retval = tmp; 464 break; 465 466 case F_SETOWN: 467 tmp = (int)(uintptr_t) SCARG(uap, arg); 468 error = (*fp->f_ops->fo_ioctl)(fp, FIOSETOWN, &tmp); 469 break; 470 471 case F_GETPATH: 472 kpath = PNBUF_GET(); 473 474 /* vnodes need extra context, so are handled separately */ 475 if (fp->f_type == DTYPE_VNODE) 476 error = vnode_to_path(kpath, MAXPATHLEN, fp->f_vnode, 477 l, l->l_proc); 478 else 479 error = (*fp->f_ops->fo_fcntl)(fp, F_GETPATH, kpath); 480 481 if (error == 0) 482 error = copyoutstr(kpath, SCARG(uap, arg), MAXPATHLEN, 483 NULL); 484 485 PNBUF_PUT(kpath); 486 break; 487 488 case F_ADD_SEALS: 489 tmp = (int)(uintptr_t) SCARG(uap, arg); 490 error = (*fp->f_ops->fo_fcntl)(fp, F_ADD_SEALS, &tmp); 491 break; 492 493 case F_GET_SEALS: 494 error = (*fp->f_ops->fo_fcntl)(fp, F_GET_SEALS, &tmp); 495 *retval = tmp; 496 break; 497 498 default: 499 error = EINVAL; 500 } 501 502 fd_putfile(fd); 503 return (error); 504 } 505 506 /* 507 * Close a file descriptor. 508 */ 509 int 510 sys_close(struct lwp *l, const struct sys_close_args *uap, register_t *retval) 511 { 512 /* { 513 syscallarg(int) fd; 514 } */ 515 int error; 516 int fd = SCARG(uap, fd); 517 518 if (fd_getfile(fd) == NULL) { 519 return EBADF; 520 } 521 522 error = fd_close(fd); 523 if (error == ERESTART) { 524 #ifdef DIAGNOSTIC 525 printf("%s[%d]: close(%d) returned ERESTART\n", 526 l->l_proc->p_comm, (int)l->l_proc->p_pid, fd); 527 #endif 528 error = EINTR; 529 } 530 531 return error; 532 } 533 534 /* 535 * Return status information about a file descriptor. 536 * Common function for compat code. 537 */ 538 int 539 do_sys_fstat(int fd, struct stat *sb) 540 { 541 file_t *fp; 542 int error; 543 544 if ((fp = fd_getfile(fd)) == NULL) { 545 return EBADF; 546 } 547 error = (*fp->f_ops->fo_stat)(fp, sb); 548 fd_putfile(fd); 549 550 return error; 551 } 552 553 /* 554 * Return status information about a file descriptor. 555 */ 556 int 557 sys___fstat50(struct lwp *l, const struct sys___fstat50_args *uap, 558 register_t *retval) 559 { 560 /* { 561 syscallarg(int) fd; 562 syscallarg(struct stat *) sb; 563 } */ 564 struct stat sb; 565 int error; 566 567 error = do_sys_fstat(SCARG(uap, fd), &sb); 568 if (error == 0) { 569 error = copyout(&sb, SCARG(uap, sb), sizeof(sb)); 570 } 571 return error; 572 } 573 574 /* 575 * Return pathconf information about a file descriptor. 576 */ 577 int 578 sys_fpathconf(struct lwp *l, const struct sys_fpathconf_args *uap, 579 register_t *retval) 580 { 581 /* { 582 syscallarg(int) fd; 583 syscallarg(int) name; 584 } */ 585 int fd, name, error; 586 file_t *fp; 587 588 fd = SCARG(uap, fd); 589 name = SCARG(uap, name); 590 error = 0; 591 592 if ((fp = fd_getfile(fd)) == NULL) 593 return EBADF; 594 if (fp->f_ops->fo_fpathconf == NULL) 595 error = EOPNOTSUPP; 596 else 597 error = (*fp->f_ops->fo_fpathconf)(fp, name, retval); 598 fd_putfile(fd); 599 return error; 600 } 601 602 /* 603 * Apply an advisory lock on a file descriptor. 604 * 605 * Just attempt to get a record lock of the requested type on 606 * the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0). 607 */ 608 /* ARGSUSED */ 609 int 610 sys_flock(struct lwp *l, const struct sys_flock_args *uap, register_t *retval) 611 { 612 /* { 613 syscallarg(int) fd; 614 syscallarg(int) how; 615 } */ 616 int fd, how, error; 617 struct file *fp = NULL; 618 int (*fo_advlock)(struct file *, void *, int, struct flock *, int); 619 struct flock lf; 620 621 fd = SCARG(uap, fd); 622 how = SCARG(uap, how); 623 624 if ((fp = fd_getfile(fd)) == NULL) { 625 error = EBADF; 626 goto out; 627 } 628 if ((fo_advlock = fp->f_ops->fo_advlock) == NULL) { 629 KASSERT((atomic_load_relaxed(&fp->f_flag) & FHASLOCK) == 0); 630 error = EOPNOTSUPP; 631 goto out; 632 } 633 634 lf.l_whence = SEEK_SET; 635 lf.l_start = 0; 636 lf.l_len = 0; 637 638 switch (how & ~LOCK_NB) { 639 case LOCK_UN: 640 lf.l_type = F_UNLCK; 641 atomic_and_uint(&fp->f_flag, ~FHASLOCK); 642 error = (*fo_advlock)(fp, fp, F_UNLCK, &lf, F_FLOCK); 643 goto out; 644 case LOCK_EX: 645 lf.l_type = F_WRLCK; 646 break; 647 case LOCK_SH: 648 lf.l_type = F_RDLCK; 649 break; 650 default: 651 error = EINVAL; 652 goto out; 653 } 654 655 atomic_or_uint(&fp->f_flag, FHASLOCK); 656 if (how & LOCK_NB) { 657 error = (*fo_advlock)(fp, fp, F_SETLK, &lf, F_FLOCK); 658 } else { 659 error = (*fo_advlock)(fp, fp, F_SETLK, &lf, F_FLOCK|F_WAIT); 660 } 661 out: if (fp) 662 fd_putfile(fd); 663 return error; 664 } 665 666 int 667 do_posix_fadvise(int fd, off_t offset, off_t len, int advice) 668 { 669 file_t *fp; 670 int error; 671 672 if ((fp = fd_getfile(fd)) == NULL) 673 return EBADF; 674 if (fp->f_ops->fo_posix_fadvise == NULL) { 675 error = EOPNOTSUPP; 676 } else { 677 error = (*fp->f_ops->fo_posix_fadvise)(fp, offset, len, 678 advice); 679 } 680 fd_putfile(fd); 681 return error; 682 } 683 684 int 685 sys___posix_fadvise50(struct lwp *l, 686 const struct sys___posix_fadvise50_args *uap, 687 register_t *retval) 688 { 689 /* { 690 syscallarg(int) fd; 691 syscallarg(int) pad; 692 syscallarg(off_t) offset; 693 syscallarg(off_t) len; 694 syscallarg(int) advice; 695 } */ 696 697 *retval = do_posix_fadvise(SCARG(uap, fd), SCARG(uap, offset), 698 SCARG(uap, len), SCARG(uap, advice)); 699 700 return 0; 701 } 702 703 int 704 sys_pipe(struct lwp *l, const void *v, register_t *retval) 705 { 706 int fd[2], error; 707 708 if ((error = pipe1(l, fd, 0)) != 0) 709 return error; 710 711 retval[0] = fd[0]; 712 retval[1] = fd[1]; 713 714 return 0; 715 } 716 717 int 718 sys_pipe2(struct lwp *l, const struct sys_pipe2_args *uap, register_t *retval) 719 { 720 /* { 721 syscallarg(int[2]) fildes; 722 syscallarg(int) flags; 723 } */ 724 int fd[2], error; 725 726 if ((error = pipe1(l, fd, SCARG(uap, flags))) != 0) 727 return error; 728 729 if ((error = copyout(fd, SCARG(uap, fildes), sizeof(fd))) != 0) 730 return error; 731 retval[0] = 0; 732 return 0; 733 } 734