1 /* $NetBSD: sys_descrip.c,v 1.48 2023/07/10 02:31:55 christos Exp $ */ 2 3 /*- 4 * Copyright (c) 2008, 2020 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 * POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 /* 30 * Copyright (c) 1982, 1986, 1989, 1991, 1993 31 * The Regents of the University of California. All rights reserved. 32 * (c) UNIX System Laboratories, Inc. 33 * All or some portions of this file are derived from material licensed 34 * to the University of California by American Telephone and Telegraph 35 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 36 * the permission of UNIX System Laboratories, Inc. 37 * 38 * Redistribution and use in source and binary forms, with or without 39 * modification, are permitted provided that the following conditions 40 * are met: 41 * 1. Redistributions of source code must retain the above copyright 42 * notice, this list of conditions and the following disclaimer. 43 * 2. Redistributions in binary form must reproduce the above copyright 44 * notice, this list of conditions and the following disclaimer in the 45 * documentation and/or other materials provided with the distribution. 46 * 3. Neither the name of the University nor the names of its contributors 47 * may be used to endorse or promote products derived from this software 48 * without specific prior written permission. 49 * 50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 53 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 60 * SUCH DAMAGE. 61 * 62 * @(#)kern_descrip.c 8.8 (Berkeley) 2/14/95 63 */ 64 65 /* 66 * System calls on descriptors. 67 */ 68 69 #include <sys/cdefs.h> 70 __KERNEL_RCSID(0, "$NetBSD: sys_descrip.c,v 1.48 2023/07/10 02:31:55 christos Exp $"); 71 72 #include <sys/param.h> 73 #include <sys/systm.h> 74 #include <sys/filedesc.h> 75 #include <sys/kernel.h> 76 #include <sys/vnode.h> 77 #include <sys/proc.h> 78 #include <sys/file.h> 79 #include <sys/namei.h> 80 #include <sys/socket.h> 81 #include <sys/socketvar.h> 82 #include <sys/stat.h> 83 #include <sys/ioctl.h> 84 #include <sys/fcntl.h> 85 #include <sys/kmem.h> 86 #include <sys/pool.h> 87 #include <sys/syslog.h> 88 #include <sys/unistd.h> 89 #include <sys/resourcevar.h> 90 #include <sys/conf.h> 91 #include <sys/event.h> 92 #include <sys/kauth.h> 93 #include <sys/atomic.h> 94 #include <sys/mount.h> 95 #include <sys/syscallargs.h> 96 97 #include <uvm/uvm_readahead.h> 98 99 /* 100 * Duplicate a file descriptor. 101 */ 102 int 103 sys_dup(struct lwp *l, const struct sys_dup_args *uap, register_t *retval) 104 { 105 /* { 106 syscallarg(int) fd; 107 } */ 108 int error, newfd, oldfd; 109 file_t *fp; 110 111 oldfd = SCARG(uap, fd); 112 113 if ((fp = fd_getfile(oldfd)) == NULL) { 114 return EBADF; 115 } 116 error = fd_dup(fp, 0, &newfd, false); 117 fd_putfile(oldfd); 118 *retval = newfd; 119 return error; 120 } 121 122 /* 123 * Duplicate a file descriptor to a particular value. 124 */ 125 int 126 dodup(struct lwp *l, int from, int to, int flags, register_t *retval) 127 { 128 int error; 129 file_t *fp; 130 131 if ((fp = fd_getfile(from)) == NULL) 132 return EBADF; 133 mutex_enter(&fp->f_lock); 134 fp->f_count++; 135 mutex_exit(&fp->f_lock); 136 fd_putfile(from); 137 138 if ((u_int)to >= curproc->p_rlimit[RLIMIT_NOFILE].rlim_cur || 139 (u_int)to >= maxfiles) 140 error = EBADF; 141 else if (from == to) 142 error = 0; 143 else 144 error = fd_dup2(fp, to, flags); 145 closef(fp); 146 *retval = to; 147 148 return error; 149 } 150 151 int 152 sys_dup3(struct lwp *l, const struct sys_dup3_args *uap, register_t *retval) 153 { 154 /* { 155 syscallarg(int) from; 156 syscallarg(int) to; 157 syscallarg(int) flags; 158 } */ 159 return dodup(l, SCARG(uap, from), SCARG(uap, to), SCARG(uap, flags), 160 retval); 161 } 162 163 int 164 sys_dup2(struct lwp *l, const struct sys_dup2_args *uap, register_t *retval) 165 { 166 /* { 167 syscallarg(int) from; 168 syscallarg(int) to; 169 } */ 170 return dodup(l, SCARG(uap, from), SCARG(uap, to), 0, retval); 171 } 172 173 /* 174 * fcntl call which is being passed to the file's fs. 175 */ 176 static int 177 fcntl_forfs(int fd, file_t *fp, int cmd, void *arg) 178 { 179 int error; 180 u_int size; 181 void *data, *memp; 182 #define STK_PARAMS 128 183 char stkbuf[STK_PARAMS]; 184 185 if ((fp->f_flag & (FREAD | FWRITE)) == 0) 186 return (EBADF); 187 188 /* 189 * Interpret high order word to find amount of data to be 190 * copied to/from the user's address space. 191 */ 192 size = (size_t)F_PARAM_LEN(cmd); 193 if (size > F_PARAM_MAX) 194 return (EINVAL); 195 memp = NULL; 196 if (size > sizeof(stkbuf)) { 197 memp = kmem_alloc(size, KM_SLEEP); 198 data = memp; 199 } else 200 data = stkbuf; 201 if (cmd & F_FSIN) { 202 if (size) { 203 error = copyin(arg, data, size); 204 if (error) { 205 if (memp) 206 kmem_free(memp, size); 207 return (error); 208 } 209 } else 210 *(void **)data = arg; 211 } else if ((cmd & F_FSOUT) != 0 && size != 0) { 212 /* 213 * Zero the buffer so the user always 214 * gets back something deterministic. 215 */ 216 memset(data, 0, size); 217 } else if (cmd & F_FSVOID) 218 *(void **)data = arg; 219 220 221 error = (*fp->f_ops->fo_fcntl)(fp, cmd, data); 222 223 /* 224 * Copy any data to user, size was 225 * already set and checked above. 226 */ 227 if (error == 0 && (cmd & F_FSOUT) && size) 228 error = copyout(data, arg, size); 229 if (memp) 230 kmem_free(memp, size); 231 return (error); 232 } 233 234 int 235 do_fcntl_lock(int fd, int cmd, struct flock *fl) 236 { 237 struct file *fp = NULL; 238 proc_t *p; 239 int (*fo_advlock)(struct file *, void *, int, struct flock *, int); 240 int error, flg; 241 242 if ((fp = fd_getfile(fd)) == NULL) { 243 error = EBADF; 244 goto out; 245 } 246 if ((fo_advlock = fp->f_ops->fo_advlock) == NULL) { 247 error = EINVAL; 248 goto out; 249 } 250 251 flg = F_POSIX; 252 p = curproc; 253 254 switch (cmd) { 255 case F_SETLKW: 256 flg |= F_WAIT; 257 /* Fall into F_SETLK */ 258 259 /* FALLTHROUGH */ 260 case F_SETLK: 261 switch (fl->l_type) { 262 case F_RDLCK: 263 if ((fp->f_flag & FREAD) == 0) { 264 error = EBADF; 265 break; 266 } 267 if ((p->p_flag & PK_ADVLOCK) == 0) { 268 mutex_enter(p->p_lock); 269 p->p_flag |= PK_ADVLOCK; 270 mutex_exit(p->p_lock); 271 } 272 error = (*fo_advlock)(fp, p, F_SETLK, fl, flg); 273 break; 274 275 case F_WRLCK: 276 if ((fp->f_flag & FWRITE) == 0) { 277 error = EBADF; 278 break; 279 } 280 if ((p->p_flag & PK_ADVLOCK) == 0) { 281 mutex_enter(p->p_lock); 282 p->p_flag |= PK_ADVLOCK; 283 mutex_exit(p->p_lock); 284 } 285 error = (*fo_advlock)(fp, p, F_SETLK, fl, flg); 286 break; 287 288 case F_UNLCK: 289 error = (*fo_advlock)(fp, p, F_UNLCK, fl, F_POSIX); 290 break; 291 292 default: 293 error = EINVAL; 294 break; 295 } 296 break; 297 298 case F_GETLK: 299 if (fl->l_type != F_RDLCK && 300 fl->l_type != F_WRLCK && 301 fl->l_type != F_UNLCK) { 302 error = EINVAL; 303 break; 304 } 305 error = (*fo_advlock)(fp, p, F_GETLK, fl, F_POSIX); 306 break; 307 308 default: 309 error = EINVAL; 310 break; 311 } 312 313 out: if (fp) 314 fd_putfile(fd); 315 return error; 316 } 317 318 /* 319 * The file control system call. 320 */ 321 int 322 sys_fcntl(struct lwp *l, const struct sys_fcntl_args *uap, register_t *retval) 323 { 324 /* { 325 syscallarg(int) fd; 326 syscallarg(int) cmd; 327 syscallarg(void *) arg; 328 } */ 329 int fd, i, tmp, error, cmd, newmin; 330 filedesc_t *fdp; 331 fdtab_t *dt; 332 file_t *fp; 333 char *kpath; 334 struct flock fl; 335 bool cloexec = false; 336 337 fd = SCARG(uap, fd); 338 cmd = SCARG(uap, cmd); 339 fdp = l->l_fd; 340 error = 0; 341 342 switch (cmd) { 343 case F_CLOSEM: 344 if (fd < 0) 345 return EBADF; 346 while ((i = fdp->fd_lastfile) >= fd) { 347 if (fd_getfile(i) == NULL) { 348 /* Another thread has updated. */ 349 continue; 350 } 351 fd_close(i); 352 } 353 return 0; 354 355 case F_MAXFD: 356 *retval = fdp->fd_lastfile; 357 return 0; 358 359 case F_SETLKW: 360 case F_SETLK: 361 case F_GETLK: 362 error = copyin(SCARG(uap, arg), &fl, sizeof(fl)); 363 if (error) 364 return error; 365 error = do_fcntl_lock(fd, cmd, &fl); 366 if (cmd == F_GETLK && error == 0) 367 error = copyout(&fl, SCARG(uap, arg), sizeof(fl)); 368 return error; 369 370 default: 371 /* Handled below */ 372 break; 373 } 374 375 if ((fp = fd_getfile(fd)) == NULL) 376 return EBADF; 377 378 if ((cmd & F_FSCTL)) { 379 error = fcntl_forfs(fd, fp, cmd, SCARG(uap, arg)); 380 fd_putfile(fd); 381 return error; 382 } 383 384 switch (cmd) { 385 case F_DUPFD_CLOEXEC: 386 cloexec = true; 387 /*FALLTHROUGH*/ 388 case F_DUPFD: 389 newmin = (long)SCARG(uap, arg); 390 if ((u_int)newmin >= 391 l->l_proc->p_rlimit[RLIMIT_NOFILE].rlim_cur || 392 (u_int)newmin >= maxfiles) { 393 fd_putfile(fd); 394 return EINVAL; 395 } 396 error = fd_dup(fp, newmin, &i, cloexec); 397 *retval = i; 398 break; 399 400 case F_GETFD: 401 dt = atomic_load_consume(&fdp->fd_dt); 402 *retval = dt->dt_ff[fd]->ff_exclose; 403 break; 404 405 case F_SETFD: 406 fd_set_exclose(l, fd, 407 ((long)SCARG(uap, arg) & FD_CLOEXEC) != 0); 408 break; 409 410 case F_GETNOSIGPIPE: 411 *retval = (fp->f_flag & FNOSIGPIPE) != 0; 412 break; 413 414 case F_SETNOSIGPIPE: 415 if (SCARG(uap, arg)) 416 atomic_or_uint(&fp->f_flag, FNOSIGPIPE); 417 else 418 atomic_and_uint(&fp->f_flag, ~FNOSIGPIPE); 419 *retval = 0; 420 break; 421 422 case F_GETFL: 423 *retval = OFLAGS(fp->f_flag); 424 break; 425 426 case F_SETFL: 427 /* XXX not guaranteed to be atomic. */ 428 tmp = FFLAGS((long)SCARG(uap, arg)) & FCNTLFLAGS; 429 error = (*fp->f_ops->fo_fcntl)(fp, F_SETFL, &tmp); 430 if (error) 431 break; 432 i = tmp ^ fp->f_flag; 433 if (i & FNONBLOCK) { 434 int flgs = tmp & FNONBLOCK; 435 error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, &flgs); 436 if (error) { 437 (*fp->f_ops->fo_fcntl)(fp, F_SETFL, 438 &fp->f_flag); 439 break; 440 } 441 } 442 if (i & FASYNC) { 443 int flgs = tmp & FASYNC; 444 error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, &flgs); 445 if (error) { 446 if (i & FNONBLOCK) { 447 tmp = fp->f_flag & FNONBLOCK; 448 (void)(*fp->f_ops->fo_ioctl)(fp, 449 FIONBIO, &tmp); 450 } 451 (*fp->f_ops->fo_fcntl)(fp, F_SETFL, 452 &fp->f_flag); 453 break; 454 } 455 } 456 fp->f_flag = (fp->f_flag & ~FCNTLFLAGS) | tmp; 457 break; 458 459 case F_GETOWN: 460 error = (*fp->f_ops->fo_ioctl)(fp, FIOGETOWN, &tmp); 461 *retval = tmp; 462 break; 463 464 case F_SETOWN: 465 tmp = (int)(uintptr_t) SCARG(uap, arg); 466 error = (*fp->f_ops->fo_ioctl)(fp, FIOSETOWN, &tmp); 467 break; 468 469 case F_GETPATH: 470 kpath = PNBUF_GET(); 471 472 /* vnodes need extra context, so are handled separately */ 473 if (fp->f_type == DTYPE_VNODE) 474 error = vnode_to_path(kpath, MAXPATHLEN, fp->f_vnode, 475 l, l->l_proc); 476 else 477 error = (*fp->f_ops->fo_fcntl)(fp, F_GETPATH, kpath); 478 479 if (error == 0) 480 error = copyoutstr(kpath, SCARG(uap, arg), MAXPATHLEN, 481 NULL); 482 483 PNBUF_PUT(kpath); 484 break; 485 486 case F_ADD_SEALS: 487 tmp = (int)(uintptr_t) SCARG(uap, arg); 488 error = (*fp->f_ops->fo_fcntl)(fp, F_ADD_SEALS, &tmp); 489 break; 490 491 case F_GET_SEALS: 492 error = (*fp->f_ops->fo_fcntl)(fp, F_GET_SEALS, &tmp); 493 *retval = tmp; 494 break; 495 496 default: 497 error = EINVAL; 498 } 499 500 fd_putfile(fd); 501 return (error); 502 } 503 504 /* 505 * Close a file descriptor. 506 */ 507 int 508 sys_close(struct lwp *l, const struct sys_close_args *uap, register_t *retval) 509 { 510 /* { 511 syscallarg(int) fd; 512 } */ 513 int error; 514 int fd = SCARG(uap, fd); 515 516 if (fd_getfile(fd) == NULL) { 517 return EBADF; 518 } 519 520 error = fd_close(fd); 521 if (error == ERESTART) { 522 #ifdef DIAGNOSTIC 523 printf("%s[%d]: close(%d) returned ERESTART\n", 524 l->l_proc->p_comm, (int)l->l_proc->p_pid, fd); 525 #endif 526 error = EINTR; 527 } 528 529 return error; 530 } 531 532 /* 533 * Return status information about a file descriptor. 534 * Common function for compat code. 535 */ 536 int 537 do_sys_fstat(int fd, struct stat *sb) 538 { 539 file_t *fp; 540 int error; 541 542 if ((fp = fd_getfile(fd)) == NULL) { 543 return EBADF; 544 } 545 error = (*fp->f_ops->fo_stat)(fp, sb); 546 fd_putfile(fd); 547 548 return error; 549 } 550 551 /* 552 * Return status information about a file descriptor. 553 */ 554 int 555 sys___fstat50(struct lwp *l, const struct sys___fstat50_args *uap, 556 register_t *retval) 557 { 558 /* { 559 syscallarg(int) fd; 560 syscallarg(struct stat *) sb; 561 } */ 562 struct stat sb; 563 int error; 564 565 error = do_sys_fstat(SCARG(uap, fd), &sb); 566 if (error == 0) { 567 error = copyout(&sb, SCARG(uap, sb), sizeof(sb)); 568 } 569 return error; 570 } 571 572 /* 573 * Return pathconf information about a file descriptor. 574 */ 575 int 576 sys_fpathconf(struct lwp *l, const struct sys_fpathconf_args *uap, 577 register_t *retval) 578 { 579 /* { 580 syscallarg(int) fd; 581 syscallarg(int) name; 582 } */ 583 int fd, name, error; 584 file_t *fp; 585 586 fd = SCARG(uap, fd); 587 name = SCARG(uap, name); 588 error = 0; 589 590 if ((fp = fd_getfile(fd)) == NULL) 591 return EBADF; 592 if (fp->f_ops->fo_fpathconf == NULL) 593 error = EOPNOTSUPP; 594 else 595 error = (*fp->f_ops->fo_fpathconf)(fp, name, retval); 596 fd_putfile(fd); 597 return error; 598 } 599 600 /* 601 * Apply an advisory lock on a file descriptor. 602 * 603 * Just attempt to get a record lock of the requested type on 604 * the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0). 605 */ 606 /* ARGSUSED */ 607 int 608 sys_flock(struct lwp *l, const struct sys_flock_args *uap, register_t *retval) 609 { 610 /* { 611 syscallarg(int) fd; 612 syscallarg(int) how; 613 } */ 614 int fd, how, error; 615 struct file *fp = NULL; 616 int (*fo_advlock)(struct file *, void *, int, struct flock *, int); 617 struct flock lf; 618 619 fd = SCARG(uap, fd); 620 how = SCARG(uap, how); 621 622 if ((fp = fd_getfile(fd)) == NULL) { 623 error = EBADF; 624 goto out; 625 } 626 if ((fo_advlock = fp->f_ops->fo_advlock) == NULL) { 627 KASSERT((atomic_load_relaxed(&fp->f_flag) & FHASLOCK) == 0); 628 error = EOPNOTSUPP; 629 goto out; 630 } 631 632 lf.l_whence = SEEK_SET; 633 lf.l_start = 0; 634 lf.l_len = 0; 635 636 switch (how & ~LOCK_NB) { 637 case LOCK_UN: 638 lf.l_type = F_UNLCK; 639 atomic_and_uint(&fp->f_flag, ~FHASLOCK); 640 error = (*fo_advlock)(fp, fp, F_UNLCK, &lf, F_FLOCK); 641 goto out; 642 case LOCK_EX: 643 lf.l_type = F_WRLCK; 644 break; 645 case LOCK_SH: 646 lf.l_type = F_RDLCK; 647 break; 648 default: 649 error = EINVAL; 650 goto out; 651 } 652 653 atomic_or_uint(&fp->f_flag, FHASLOCK); 654 if (how & LOCK_NB) { 655 error = (*fo_advlock)(fp, fp, F_SETLK, &lf, F_FLOCK); 656 } else { 657 error = (*fo_advlock)(fp, fp, F_SETLK, &lf, F_FLOCK|F_WAIT); 658 } 659 out: if (fp) 660 fd_putfile(fd); 661 return error; 662 } 663 664 int 665 do_posix_fadvise(int fd, off_t offset, off_t len, int advice) 666 { 667 file_t *fp; 668 int error; 669 670 if ((fp = fd_getfile(fd)) == NULL) 671 return EBADF; 672 if (fp->f_ops->fo_posix_fadvise == NULL) { 673 error = EOPNOTSUPP; 674 } else { 675 error = (*fp->f_ops->fo_posix_fadvise)(fp, offset, len, 676 advice); 677 } 678 fd_putfile(fd); 679 return error; 680 } 681 682 int 683 sys___posix_fadvise50(struct lwp *l, 684 const struct sys___posix_fadvise50_args *uap, 685 register_t *retval) 686 { 687 /* { 688 syscallarg(int) fd; 689 syscallarg(int) pad; 690 syscallarg(off_t) offset; 691 syscallarg(off_t) len; 692 syscallarg(int) advice; 693 } */ 694 695 *retval = do_posix_fadvise(SCARG(uap, fd), SCARG(uap, offset), 696 SCARG(uap, len), SCARG(uap, advice)); 697 698 return 0; 699 } 700 701 int 702 sys_pipe(struct lwp *l, const void *v, register_t *retval) 703 { 704 int fd[2], error; 705 706 if ((error = pipe1(l, fd, 0)) != 0) 707 return error; 708 709 retval[0] = fd[0]; 710 retval[1] = fd[1]; 711 712 return 0; 713 } 714 715 int 716 sys_pipe2(struct lwp *l, const struct sys_pipe2_args *uap, register_t *retval) 717 { 718 /* { 719 syscallarg(int[2]) fildes; 720 syscallarg(int) flags; 721 } */ 722 int fd[2], error; 723 724 if ((error = pipe1(l, fd, SCARG(uap, flags))) != 0) 725 return error; 726 727 if ((error = copyout(fd, SCARG(uap, fildes), sizeof(fd))) != 0) 728 return error; 729 retval[0] = 0; 730 return 0; 731 } 732