1 /* 2 * Copyright (c) 1982, 1986, 1989, 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)kern_descrip.c 8.6 (Berkeley) 4/19/94 39 * $FreeBSD: src/sys/kern/kern_descrip.c,v 1.81.2.19 2004/02/28 00:43:31 tegge Exp $ 40 * $DragonFly: src/sys/kern/kern_descrip.c,v 1.41 2005/04/08 17:39:31 joerg Exp $ 41 */ 42 43 #include "opt_compat.h" 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/malloc.h> 47 #include <sys/sysproto.h> 48 #include <sys/conf.h> 49 #include <sys/filedesc.h> 50 #include <sys/kernel.h> 51 #include <sys/sysctl.h> 52 #include <sys/vnode.h> 53 #include <sys/proc.h> 54 #include <sys/nlookup.h> 55 #include <sys/file.h> 56 #include <sys/stat.h> 57 #include <sys/filio.h> 58 #include <sys/fcntl.h> 59 #include <sys/unistd.h> 60 #include <sys/resourcevar.h> 61 #include <sys/event.h> 62 #include <sys/kern_syscall.h> 63 #include <sys/kcore.h> 64 #include <sys/kinfo.h> 65 66 #include <vm/vm.h> 67 #include <vm/vm_extern.h> 68 69 #include <sys/file2.h> 70 71 static MALLOC_DEFINE(M_FILEDESC, "file desc", "Open file descriptor table"); 72 static MALLOC_DEFINE(M_FILEDESC_TO_LEADER, "file desc to leader", 73 "file desc to leader structures"); 74 MALLOC_DEFINE(M_FILE, "file", "Open file structure"); 75 static MALLOC_DEFINE(M_SIGIO, "sigio", "sigio structures"); 76 77 static d_open_t fdopen; 78 #define NUMFDESC 64 79 80 #define CDEV_MAJOR 22 81 static struct cdevsw fildesc_cdevsw = { 82 /* name */ "FD", 83 /* maj */ CDEV_MAJOR, 84 /* flags */ 0, 85 /* port */ NULL, 86 /* clone */ NULL, 87 88 /* open */ fdopen, 89 /* close */ noclose, 90 /* read */ noread, 91 /* write */ nowrite, 92 /* ioctl */ noioctl, 93 /* poll */ nopoll, 94 /* mmap */ nommap, 95 /* strategy */ nostrategy, 96 /* dump */ nodump, 97 /* psize */ nopsize 98 }; 99 100 static int badfo_readwrite (struct file *fp, struct uio *uio, 101 struct ucred *cred, int flags, struct thread *td); 102 static int badfo_ioctl (struct file *fp, u_long com, caddr_t data, 103 struct thread *td); 104 static int badfo_poll (struct file *fp, int events, 105 struct ucred *cred, struct thread *td); 106 static int badfo_kqfilter (struct file *fp, struct knote *kn); 107 static int badfo_stat (struct file *fp, struct stat *sb, struct thread *td); 108 static int badfo_close (struct file *fp, struct thread *td); 109 110 /* 111 * Descriptor management. 112 */ 113 struct filelist filehead; /* head of list of open files */ 114 int nfiles; /* actual number of open files */ 115 extern int cmask; 116 117 /* 118 * System calls on descriptors. 119 */ 120 /* ARGSUSED */ 121 int 122 getdtablesize(struct getdtablesize_args *uap) 123 { 124 struct proc *p = curproc; 125 126 uap->sysmsg_result = 127 min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc); 128 return (0); 129 } 130 131 /* 132 * Duplicate a file descriptor to a particular value. 133 * 134 * note: keep in mind that a potential race condition exists when closing 135 * descriptors from a shared descriptor table (via rfork). 136 */ 137 /* ARGSUSED */ 138 int 139 dup2(struct dup2_args *uap) 140 { 141 int error; 142 143 error = kern_dup(DUP_FIXED, uap->from, uap->to, uap->sysmsg_fds); 144 145 return (error); 146 } 147 148 /* 149 * Duplicate a file descriptor. 150 */ 151 /* ARGSUSED */ 152 int 153 dup(struct dup_args *uap) 154 { 155 int error; 156 157 error = kern_dup(DUP_VARIABLE, uap->fd, 0, uap->sysmsg_fds); 158 159 return (error); 160 } 161 162 int 163 kern_fcntl(int fd, int cmd, union fcntl_dat *dat) 164 { 165 struct thread *td = curthread; 166 struct proc *p = td->td_proc; 167 struct filedesc *fdp = p->p_fd; 168 struct file *fp; 169 char *pop; 170 struct vnode *vp; 171 u_int newmin; 172 int tmp, error, flg = F_POSIX; 173 174 KKASSERT(p); 175 176 if ((unsigned)fd >= fdp->fd_nfiles || 177 (fp = fdp->fd_ofiles[fd]) == NULL) 178 return (EBADF); 179 pop = &fdp->fd_ofileflags[fd]; 180 181 switch (cmd) { 182 case F_DUPFD: 183 newmin = dat->fc_fd; 184 if (newmin >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur || 185 newmin > maxfilesperproc) 186 return (EINVAL); 187 error = kern_dup(DUP_VARIABLE, fd, newmin, &dat->fc_fd); 188 return (error); 189 190 case F_GETFD: 191 dat->fc_cloexec = (*pop & UF_EXCLOSE) ? FD_CLOEXEC : 0; 192 return (0); 193 194 case F_SETFD: 195 *pop = (*pop &~ UF_EXCLOSE) | 196 (dat->fc_cloexec & FD_CLOEXEC ? UF_EXCLOSE : 0); 197 return (0); 198 199 case F_GETFL: 200 dat->fc_flags = OFLAGS(fp->f_flag); 201 return (0); 202 203 case F_SETFL: 204 fhold(fp); 205 fp->f_flag &= ~FCNTLFLAGS; 206 fp->f_flag |= FFLAGS(dat->fc_flags & ~O_ACCMODE) & FCNTLFLAGS; 207 tmp = fp->f_flag & FNONBLOCK; 208 error = fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, td); 209 if (error) { 210 fdrop(fp, td); 211 return (error); 212 } 213 tmp = fp->f_flag & FASYNC; 214 error = fo_ioctl(fp, FIOASYNC, (caddr_t)&tmp, td); 215 if (!error) { 216 fdrop(fp, td); 217 return (0); 218 } 219 fp->f_flag &= ~FNONBLOCK; 220 tmp = 0; 221 fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, td); 222 fdrop(fp, td); 223 return (error); 224 225 case F_GETOWN: 226 fhold(fp); 227 error = fo_ioctl(fp, FIOGETOWN, (caddr_t)&dat->fc_owner, td); 228 fdrop(fp, td); 229 return(error); 230 231 case F_SETOWN: 232 fhold(fp); 233 error = fo_ioctl(fp, FIOSETOWN, (caddr_t)&dat->fc_owner, td); 234 fdrop(fp, td); 235 return(error); 236 237 case F_SETLKW: 238 flg |= F_WAIT; 239 /* Fall into F_SETLK */ 240 241 case F_SETLK: 242 if (fp->f_type != DTYPE_VNODE) 243 return (EBADF); 244 vp = (struct vnode *)fp->f_data; 245 246 /* 247 * copyin/lockop may block 248 */ 249 fhold(fp); 250 if (dat->fc_flock.l_whence == SEEK_CUR) 251 dat->fc_flock.l_start += fp->f_offset; 252 253 switch (dat->fc_flock.l_type) { 254 case F_RDLCK: 255 if ((fp->f_flag & FREAD) == 0) { 256 error = EBADF; 257 break; 258 } 259 p->p_leader->p_flag |= P_ADVLOCK; 260 error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK, 261 &dat->fc_flock, flg); 262 break; 263 case F_WRLCK: 264 if ((fp->f_flag & FWRITE) == 0) { 265 error = EBADF; 266 break; 267 } 268 p->p_leader->p_flag |= P_ADVLOCK; 269 error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK, 270 &dat->fc_flock, flg); 271 break; 272 case F_UNLCK: 273 error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_UNLCK, 274 &dat->fc_flock, F_POSIX); 275 break; 276 default: 277 error = EINVAL; 278 break; 279 } 280 /* Check for race with close */ 281 if ((unsigned) fd >= fdp->fd_nfiles || 282 fp != fdp->fd_ofiles[fd]) { 283 dat->fc_flock.l_whence = SEEK_SET; 284 dat->fc_flock.l_start = 0; 285 dat->fc_flock.l_len = 0; 286 dat->fc_flock.l_type = F_UNLCK; 287 (void) VOP_ADVLOCK(vp, (caddr_t)p->p_leader, 288 F_UNLCK, &dat->fc_flock, F_POSIX); 289 } 290 fdrop(fp, td); 291 return(error); 292 293 case F_GETLK: 294 if (fp->f_type != DTYPE_VNODE) 295 return (EBADF); 296 vp = (struct vnode *)fp->f_data; 297 /* 298 * copyin/lockop may block 299 */ 300 fhold(fp); 301 if (dat->fc_flock.l_type != F_RDLCK && 302 dat->fc_flock.l_type != F_WRLCK && 303 dat->fc_flock.l_type != F_UNLCK) { 304 fdrop(fp, td); 305 return (EINVAL); 306 } 307 if (dat->fc_flock.l_whence == SEEK_CUR) 308 dat->fc_flock.l_start += fp->f_offset; 309 error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_GETLK, 310 &dat->fc_flock, F_POSIX); 311 fdrop(fp, td); 312 return(error); 313 default: 314 return (EINVAL); 315 } 316 /* NOTREACHED */ 317 } 318 319 /* 320 * The file control system call. 321 */ 322 int 323 fcntl(struct fcntl_args *uap) 324 { 325 union fcntl_dat dat; 326 int error; 327 328 switch (uap->cmd) { 329 case F_DUPFD: 330 dat.fc_fd = uap->arg; 331 break; 332 case F_SETFD: 333 dat.fc_cloexec = uap->arg; 334 break; 335 case F_SETFL: 336 dat.fc_flags = uap->arg; 337 break; 338 case F_SETOWN: 339 dat.fc_owner = uap->arg; 340 break; 341 case F_SETLKW: 342 case F_SETLK: 343 case F_GETLK: 344 error = copyin((caddr_t)uap->arg, &dat.fc_flock, 345 sizeof(struct flock)); 346 if (error) 347 return (error); 348 break; 349 } 350 351 error = kern_fcntl(uap->fd, uap->cmd, &dat); 352 353 if (error == 0) { 354 switch (uap->cmd) { 355 case F_DUPFD: 356 uap->sysmsg_result = dat.fc_fd; 357 break; 358 case F_GETFD: 359 uap->sysmsg_result = dat.fc_cloexec; 360 break; 361 case F_GETFL: 362 uap->sysmsg_result = dat.fc_flags; 363 break; 364 case F_GETOWN: 365 uap->sysmsg_result = dat.fc_owner; 366 case F_GETLK: 367 error = copyout(&dat.fc_flock, (caddr_t)uap->arg, 368 sizeof(struct flock)); 369 break; 370 } 371 } 372 373 return (error); 374 } 375 376 /* 377 * Common code for dup, dup2, and fcntl(F_DUPFD). 378 * 379 * The type flag can be either DUP_FIXED or DUP_VARIABLE. DUP_FIXED tells 380 * kern_dup() to destructively dup over an existing file descriptor if new 381 * is already open. DUP_VARIABLE tells kern_dup() to find the lowest 382 * unused file descriptor that is greater than or equal to new. 383 */ 384 int 385 kern_dup(enum dup_type type, int old, int new, int *res) 386 { 387 struct thread *td = curthread; 388 struct proc *p = td->td_proc; 389 struct filedesc *fdp = p->p_fd; 390 struct file *fp; 391 struct file *delfp; 392 int holdleaders; 393 int error, newfd; 394 395 /* 396 * Verify that we have a valid descriptor to dup from and 397 * possibly to dup to. 398 */ 399 if (old < 0 || new < 0 || new > p->p_rlimit[RLIMIT_NOFILE].rlim_cur || 400 new >= maxfilesperproc) 401 return (EBADF); 402 if (old >= fdp->fd_nfiles || fdp->fd_ofiles[old] == NULL) 403 return (EBADF); 404 if (type == DUP_FIXED && old == new) { 405 *res = new; 406 return (0); 407 } 408 fp = fdp->fd_ofiles[old]; 409 fhold(fp); 410 411 /* 412 * Expand the table for the new descriptor if needed. This may 413 * block and drop and reacquire the fidedesc lock. 414 */ 415 if (type == DUP_VARIABLE || new >= fdp->fd_nfiles) { 416 error = fdalloc(p, new, &newfd); 417 if (error) { 418 fdrop(fp, td); 419 return (error); 420 } 421 } 422 if (type == DUP_VARIABLE) 423 new = newfd; 424 425 /* 426 * If the old file changed out from under us then treat it as a 427 * bad file descriptor. Userland should do its own locking to 428 * avoid this case. 429 */ 430 if (fdp->fd_ofiles[old] != fp) { 431 if (fdp->fd_ofiles[new] == NULL) { 432 if (new < fdp->fd_freefile) 433 fdp->fd_freefile = new; 434 while (fdp->fd_lastfile > 0 && 435 fdp->fd_ofiles[fdp->fd_lastfile] == NULL) 436 fdp->fd_lastfile--; 437 } 438 fdrop(fp, td); 439 return (EBADF); 440 } 441 KASSERT(old != new, ("new fd is same as old")); 442 443 /* 444 * Save info on the descriptor being overwritten. We have 445 * to do the unmap now, but we cannot close it without 446 * introducing an ownership race for the slot. 447 */ 448 delfp = fdp->fd_ofiles[new]; 449 if (delfp != NULL && p->p_fdtol != NULL) { 450 /* 451 * Ask fdfree() to sleep to ensure that all relevant 452 * process leaders can be traversed in closef(). 453 */ 454 fdp->fd_holdleaderscount++; 455 holdleaders = 1; 456 } else 457 holdleaders = 0; 458 KASSERT(delfp == NULL || type == DUP_FIXED, 459 ("dup() picked an open file")); 460 #if 0 461 if (delfp && (fdp->fd_ofileflags[new] & UF_MAPPED)) 462 (void) munmapfd(p, new); 463 #endif 464 465 /* 466 * Duplicate the source descriptor, update lastfile 467 */ 468 fdp->fd_ofiles[new] = fp; 469 fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] &~ UF_EXCLOSE; 470 if (new > fdp->fd_lastfile) 471 fdp->fd_lastfile = new; 472 *res = new; 473 474 /* 475 * If we dup'd over a valid file, we now own the reference to it 476 * and must dispose of it using closef() semantics (as if a 477 * close() were performed on it). 478 */ 479 if (delfp) { 480 (void) closef(delfp, td); 481 if (holdleaders) { 482 fdp->fd_holdleaderscount--; 483 if (fdp->fd_holdleaderscount == 0 && 484 fdp->fd_holdleaderswakeup != 0) { 485 fdp->fd_holdleaderswakeup = 0; 486 wakeup(&fdp->fd_holdleaderscount); 487 } 488 } 489 } 490 return (0); 491 } 492 493 /* 494 * If sigio is on the list associated with a process or process group, 495 * disable signalling from the device, remove sigio from the list and 496 * free sigio. 497 */ 498 void 499 funsetown(struct sigio *sigio) 500 { 501 int s; 502 503 if (sigio == NULL) 504 return; 505 s = splhigh(); 506 *(sigio->sio_myref) = NULL; 507 splx(s); 508 if (sigio->sio_pgid < 0) { 509 SLIST_REMOVE(&sigio->sio_pgrp->pg_sigiolst, sigio, 510 sigio, sio_pgsigio); 511 } else /* if ((*sigiop)->sio_pgid > 0) */ { 512 SLIST_REMOVE(&sigio->sio_proc->p_sigiolst, sigio, 513 sigio, sio_pgsigio); 514 } 515 crfree(sigio->sio_ucred); 516 free(sigio, M_SIGIO); 517 } 518 519 /* Free a list of sigio structures. */ 520 void 521 funsetownlst(struct sigiolst *sigiolst) 522 { 523 struct sigio *sigio; 524 525 while ((sigio = SLIST_FIRST(sigiolst)) != NULL) 526 funsetown(sigio); 527 } 528 529 /* 530 * This is common code for FIOSETOWN ioctl called by fcntl(fd, F_SETOWN, arg). 531 * 532 * After permission checking, add a sigio structure to the sigio list for 533 * the process or process group. 534 */ 535 int 536 fsetown(pid_t pgid, struct sigio **sigiop) 537 { 538 struct proc *proc; 539 struct pgrp *pgrp; 540 struct sigio *sigio; 541 int s; 542 543 if (pgid == 0) { 544 funsetown(*sigiop); 545 return (0); 546 } 547 if (pgid > 0) { 548 proc = pfind(pgid); 549 if (proc == NULL) 550 return (ESRCH); 551 552 /* 553 * Policy - Don't allow a process to FSETOWN a process 554 * in another session. 555 * 556 * Remove this test to allow maximum flexibility or 557 * restrict FSETOWN to the current process or process 558 * group for maximum safety. 559 */ 560 if (proc->p_session != curproc->p_session) 561 return (EPERM); 562 563 pgrp = NULL; 564 } else /* if (pgid < 0) */ { 565 pgrp = pgfind(-pgid); 566 if (pgrp == NULL) 567 return (ESRCH); 568 569 /* 570 * Policy - Don't allow a process to FSETOWN a process 571 * in another session. 572 * 573 * Remove this test to allow maximum flexibility or 574 * restrict FSETOWN to the current process or process 575 * group for maximum safety. 576 */ 577 if (pgrp->pg_session != curproc->p_session) 578 return (EPERM); 579 580 proc = NULL; 581 } 582 funsetown(*sigiop); 583 sigio = malloc(sizeof(struct sigio), M_SIGIO, M_WAITOK); 584 if (pgid > 0) { 585 SLIST_INSERT_HEAD(&proc->p_sigiolst, sigio, sio_pgsigio); 586 sigio->sio_proc = proc; 587 } else { 588 SLIST_INSERT_HEAD(&pgrp->pg_sigiolst, sigio, sio_pgsigio); 589 sigio->sio_pgrp = pgrp; 590 } 591 sigio->sio_pgid = pgid; 592 sigio->sio_ucred = crhold(curproc->p_ucred); 593 /* It would be convenient if p_ruid was in ucred. */ 594 sigio->sio_ruid = curproc->p_ucred->cr_ruid; 595 sigio->sio_myref = sigiop; 596 s = splhigh(); 597 *sigiop = sigio; 598 splx(s); 599 return (0); 600 } 601 602 /* 603 * This is common code for FIOGETOWN ioctl called by fcntl(fd, F_GETOWN, arg). 604 */ 605 pid_t 606 fgetown(struct sigio *sigio) 607 { 608 return (sigio != NULL ? sigio->sio_pgid : 0); 609 } 610 611 /* 612 * Close many file descriptors. 613 */ 614 /* ARGSUSED */ 615 616 int 617 closefrom(struct closefrom_args *uap) 618 { 619 return(kern_closefrom(uap->fd)); 620 } 621 622 int 623 kern_closefrom(int fd) 624 { 625 struct thread *td = curthread; 626 struct proc *p = td->td_proc; 627 struct filedesc *fdp; 628 629 KKASSERT(p); 630 fdp = p->p_fd; 631 632 if (fd < 0 || fd > fdp->fd_lastfile) 633 return (0); 634 635 do { 636 if (kern_close(fdp->fd_lastfile) == EINTR) 637 return (EINTR); 638 } while (fdp->fd_lastfile > fd); 639 640 return (0); 641 } 642 643 /* 644 * Close a file descriptor. 645 */ 646 /* ARGSUSED */ 647 648 int 649 close(struct close_args *uap) 650 { 651 return(kern_close(uap->fd)); 652 } 653 654 int 655 kern_close(int fd) 656 { 657 struct thread *td = curthread; 658 struct proc *p = td->td_proc; 659 struct filedesc *fdp; 660 struct file *fp; 661 int error; 662 int holdleaders; 663 664 KKASSERT(p); 665 fdp = p->p_fd; 666 667 if ((unsigned)fd >= fdp->fd_nfiles || 668 (fp = fdp->fd_ofiles[fd]) == NULL) 669 return (EBADF); 670 #if 0 671 if (fdp->fd_ofileflags[fd] & UF_MAPPED) 672 (void) munmapfd(p, fd); 673 #endif 674 fdp->fd_ofiles[fd] = NULL; 675 fdp->fd_ofileflags[fd] = 0; 676 holdleaders = 0; 677 if (p->p_fdtol != NULL) { 678 /* 679 * Ask fdfree() to sleep to ensure that all relevant 680 * process leaders can be traversed in closef(). 681 */ 682 fdp->fd_holdleaderscount++; 683 holdleaders = 1; 684 } 685 686 /* 687 * we now hold the fp reference that used to be owned by the descriptor 688 * array. 689 */ 690 while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL) 691 fdp->fd_lastfile--; 692 if (fd < fdp->fd_freefile) 693 fdp->fd_freefile = fd; 694 if (fd < fdp->fd_knlistsize) 695 knote_fdclose(p, fd); 696 error = closef(fp, td); 697 if (holdleaders) { 698 fdp->fd_holdleaderscount--; 699 if (fdp->fd_holdleaderscount == 0 && 700 fdp->fd_holdleaderswakeup != 0) { 701 fdp->fd_holdleaderswakeup = 0; 702 wakeup(&fdp->fd_holdleaderscount); 703 } 704 } 705 return (error); 706 } 707 708 int 709 kern_fstat(int fd, struct stat *ub) 710 { 711 struct thread *td = curthread; 712 struct proc *p = td->td_proc; 713 struct filedesc *fdp; 714 struct file *fp; 715 int error; 716 717 KKASSERT(p); 718 719 fdp = p->p_fd; 720 if ((unsigned)fd >= fdp->fd_nfiles || 721 (fp = fdp->fd_ofiles[fd]) == NULL) 722 return (EBADF); 723 fhold(fp); 724 error = fo_stat(fp, ub, td); 725 fdrop(fp, td); 726 727 return (error); 728 } 729 730 /* 731 * Return status information about a file descriptor. 732 */ 733 int 734 fstat(struct fstat_args *uap) 735 { 736 struct stat st; 737 int error; 738 739 error = kern_fstat(uap->fd, &st); 740 741 if (error == 0) 742 error = copyout(&st, uap->sb, sizeof(st)); 743 return (error); 744 } 745 746 /* 747 * XXX: This is for source compatibility with NetBSD. Probably doesn't 748 * belong here. 749 */ 750 int 751 nfstat(struct nfstat_args *uap) 752 { 753 struct stat st; 754 struct nstat nst; 755 int error; 756 757 error = kern_fstat(uap->fd, &st); 758 759 if (error == 0) { 760 cvtnstat(&st, &nst); 761 error = copyout(&nst, uap->sb, sizeof (nst)); 762 } 763 return (error); 764 } 765 766 /* 767 * Return pathconf information about a file descriptor. 768 */ 769 /* ARGSUSED */ 770 int 771 fpathconf(struct fpathconf_args *uap) 772 { 773 struct thread *td = curthread; 774 struct proc *p = td->td_proc; 775 struct filedesc *fdp; 776 struct file *fp; 777 struct vnode *vp; 778 int error = 0; 779 780 KKASSERT(p); 781 fdp = p->p_fd; 782 if ((unsigned)uap->fd >= fdp->fd_nfiles || 783 (fp = fdp->fd_ofiles[uap->fd]) == NULL) 784 return (EBADF); 785 786 fhold(fp); 787 788 switch (fp->f_type) { 789 case DTYPE_PIPE: 790 case DTYPE_SOCKET: 791 if (uap->name != _PC_PIPE_BUF) { 792 error = EINVAL; 793 } else { 794 uap->sysmsg_result = PIPE_BUF; 795 error = 0; 796 } 797 break; 798 case DTYPE_FIFO: 799 case DTYPE_VNODE: 800 vp = (struct vnode *)fp->f_data; 801 error = VOP_PATHCONF(vp, uap->name, uap->sysmsg_fds); 802 break; 803 default: 804 error = EOPNOTSUPP; 805 break; 806 } 807 fdrop(fp, td); 808 return(error); 809 } 810 811 /* 812 * Allocate a file descriptor for the process. 813 */ 814 static int fdexpand; 815 SYSCTL_INT(_debug, OID_AUTO, fdexpand, CTLFLAG_RD, &fdexpand, 0, ""); 816 817 int 818 fdalloc(struct proc *p, int want, int *result) 819 { 820 struct filedesc *fdp = p->p_fd; 821 int i; 822 int lim, last, nfiles; 823 struct file **newofile; 824 char *newofileflags; 825 826 /* 827 * Search for a free descriptor starting at the higher 828 * of want or fd_freefile. If that fails, consider 829 * expanding the ofile array. 830 */ 831 lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc); 832 for (;;) { 833 last = min(fdp->fd_nfiles, lim); 834 if ((i = want) < fdp->fd_freefile) 835 i = fdp->fd_freefile; 836 for (; i < last; i++) { 837 if (fdp->fd_ofiles[i] == NULL) { 838 fdp->fd_ofileflags[i] = 0; 839 if (i > fdp->fd_lastfile) 840 fdp->fd_lastfile = i; 841 if (want <= fdp->fd_freefile) 842 fdp->fd_freefile = i; 843 *result = i; 844 return (0); 845 } 846 } 847 848 /* 849 * No space in current array. Expand? 850 */ 851 if (fdp->fd_nfiles >= lim) 852 return (EMFILE); 853 if (fdp->fd_nfiles < NDEXTENT) 854 nfiles = NDEXTENT; 855 else 856 nfiles = 2 * fdp->fd_nfiles; 857 newofile = malloc(nfiles * OFILESIZE, M_FILEDESC, M_WAITOK); 858 859 /* 860 * deal with file-table extend race that might have occured 861 * when malloc was blocked. 862 */ 863 if (fdp->fd_nfiles >= nfiles) { 864 free(newofile, M_FILEDESC); 865 continue; 866 } 867 newofileflags = (char *) &newofile[nfiles]; 868 /* 869 * Copy the existing ofile and ofileflags arrays 870 * and zero the new portion of each array. 871 */ 872 bcopy(fdp->fd_ofiles, newofile, 873 (i = sizeof(struct file *) * fdp->fd_nfiles)); 874 bzero((char *)newofile + i, nfiles * sizeof(struct file *) - i); 875 bcopy(fdp->fd_ofileflags, newofileflags, 876 (i = sizeof(char) * fdp->fd_nfiles)); 877 bzero(newofileflags + i, nfiles * sizeof(char) - i); 878 if (fdp->fd_nfiles > NDFILE) 879 free(fdp->fd_ofiles, M_FILEDESC); 880 fdp->fd_ofiles = newofile; 881 fdp->fd_ofileflags = newofileflags; 882 fdp->fd_nfiles = nfiles; 883 fdexpand++; 884 } 885 return (0); 886 } 887 888 /* 889 * Check to see whether n user file descriptors 890 * are available to the process p. 891 */ 892 int 893 fdavail(struct proc *p, int n) 894 { 895 struct filedesc *fdp = p->p_fd; 896 struct file **fpp; 897 int i, lim, last; 898 899 lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc); 900 if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0) 901 return (1); 902 903 last = min(fdp->fd_nfiles, lim); 904 fpp = &fdp->fd_ofiles[fdp->fd_freefile]; 905 for (i = last - fdp->fd_freefile; --i >= 0; fpp++) { 906 if (*fpp == NULL && --n <= 0) 907 return (1); 908 } 909 return (0); 910 } 911 912 /* 913 * falloc: 914 * Create a new open file structure and allocate a file decriptor 915 * for the process that refers to it. If p is NULL, no descriptor 916 * is allocated and the file pointer is returned unassociated with 917 * any process. resultfd is only used if p is not NULL and may 918 * separately be NULL indicating that you don't need the returned fd. 919 * 920 * A held file pointer is returned. If a descriptor has been allocated 921 * an additional hold on the fp will be made due to the fd_ofiles[] 922 * reference. 923 */ 924 int 925 falloc(struct proc *p, struct file **resultfp, int *resultfd) 926 { 927 static struct timeval lastfail; 928 static int curfail; 929 struct file *fp; 930 int error; 931 932 fp = NULL; 933 934 /* 935 * Handle filetable full issues and root overfill. 936 */ 937 if (nfiles >= maxfiles - maxfilesrootres && 938 ((p && p->p_ucred->cr_ruid != 0) || nfiles >= maxfiles)) { 939 if (ppsratecheck(&lastfail, &curfail, 1)) { 940 printf("kern.maxfiles limit exceeded by uid %d, please see tuning(7).\n", 941 (p ? p->p_ucred->cr_ruid : -1)); 942 } 943 error = ENFILE; 944 goto done; 945 } 946 947 /* 948 * Allocate a new file descriptor. 949 */ 950 nfiles++; 951 fp = malloc(sizeof(struct file), M_FILE, M_WAITOK | M_ZERO); 952 fp->f_count = 1; 953 fp->f_ops = &badfileops; 954 fp->f_seqcount = 1; 955 if (p) 956 fp->f_cred = crhold(p->p_ucred); 957 else 958 fp->f_cred = crhold(proc0.p_ucred); 959 LIST_INSERT_HEAD(&filehead, fp, f_list); 960 if (resultfd) { 961 if ((error = fsetfd(p, fp, resultfd)) != 0) { 962 fdrop(fp, p->p_thread); 963 fp = NULL; 964 } 965 } else { 966 error = 0; 967 } 968 done: 969 *resultfp = fp; 970 return (error); 971 } 972 973 /* 974 * Associate a file pointer with a file descriptor. On success the fp 975 * will have an additional ref representing the fd_ofiles[] association. 976 */ 977 int 978 fsetfd(struct proc *p, struct file *fp, int *resultfd) 979 { 980 int i; 981 int error; 982 983 KKASSERT(p); 984 985 i = -1; 986 if ((error = fdalloc(p, 0, &i)) == 0) { 987 fhold(fp); 988 p->p_fd->fd_ofiles[i] = fp; 989 } 990 *resultfd = i; 991 return (0); 992 } 993 994 void 995 fsetcred(struct file *fp, struct ucred *cr) 996 { 997 crhold(cr); 998 crfree(fp->f_cred); 999 fp->f_cred = cr; 1000 } 1001 1002 /* 1003 * Free a file descriptor. 1004 */ 1005 void 1006 ffree(struct file *fp) 1007 { 1008 KASSERT((fp->f_count == 0), ("ffree: fp_fcount not 0!")); 1009 LIST_REMOVE(fp, f_list); 1010 crfree(fp->f_cred); 1011 if (fp->f_ncp) { 1012 cache_drop(fp->f_ncp); 1013 fp->f_ncp = NULL; 1014 } 1015 nfiles--; 1016 free(fp, M_FILE); 1017 } 1018 1019 /* 1020 * Build a new filedesc structure. 1021 */ 1022 struct filedesc * 1023 fdinit(struct proc *p) 1024 { 1025 struct filedesc0 *newfdp; 1026 struct filedesc *fdp = p->p_fd; 1027 1028 newfdp = malloc(sizeof(struct filedesc0), M_FILEDESC, M_WAITOK|M_ZERO); 1029 if (fdp->fd_cdir) { 1030 newfdp->fd_fd.fd_cdir = fdp->fd_cdir; 1031 vref(newfdp->fd_fd.fd_cdir); 1032 newfdp->fd_fd.fd_ncdir = cache_hold(fdp->fd_ncdir); 1033 } 1034 1035 /* 1036 * rdir may not be set in e.g. proc0 or anything vm_fork'd off of 1037 * proc0, but should unconditionally exist in other processes. 1038 */ 1039 if (fdp->fd_rdir) { 1040 newfdp->fd_fd.fd_rdir = fdp->fd_rdir; 1041 vref(newfdp->fd_fd.fd_rdir); 1042 newfdp->fd_fd.fd_nrdir = cache_hold(fdp->fd_nrdir); 1043 } 1044 if (fdp->fd_jdir) { 1045 newfdp->fd_fd.fd_jdir = fdp->fd_jdir; 1046 vref(newfdp->fd_fd.fd_jdir); 1047 newfdp->fd_fd.fd_njdir = cache_hold(fdp->fd_njdir); 1048 } 1049 1050 /* Create the file descriptor table. */ 1051 newfdp->fd_fd.fd_refcnt = 1; 1052 newfdp->fd_fd.fd_cmask = cmask; 1053 newfdp->fd_fd.fd_ofiles = newfdp->fd_dfiles; 1054 newfdp->fd_fd.fd_ofileflags = newfdp->fd_dfileflags; 1055 newfdp->fd_fd.fd_nfiles = NDFILE; 1056 newfdp->fd_fd.fd_knlistsize = -1; 1057 1058 return (&newfdp->fd_fd); 1059 } 1060 1061 /* 1062 * Share a filedesc structure. 1063 */ 1064 struct filedesc * 1065 fdshare(struct proc *p) 1066 { 1067 p->p_fd->fd_refcnt++; 1068 return (p->p_fd); 1069 } 1070 1071 /* 1072 * Copy a filedesc structure. 1073 */ 1074 struct filedesc * 1075 fdcopy(struct proc *p) 1076 { 1077 struct filedesc *newfdp, *fdp = p->p_fd; 1078 struct file **fpp; 1079 int i; 1080 1081 /* Certain daemons might not have file descriptors. */ 1082 if (fdp == NULL) 1083 return (NULL); 1084 1085 newfdp = malloc(sizeof(struct filedesc0), M_FILEDESC, M_WAITOK); 1086 bcopy(fdp, newfdp, sizeof(struct filedesc)); 1087 if (newfdp->fd_cdir) { 1088 vref(newfdp->fd_cdir); 1089 newfdp->fd_ncdir = cache_hold(newfdp->fd_ncdir); 1090 } 1091 /* 1092 * We must check for fd_rdir here, at least for now because 1093 * the init process is created before we have access to the 1094 * rootvode to take a reference to it. 1095 */ 1096 if (newfdp->fd_rdir) { 1097 vref(newfdp->fd_rdir); 1098 newfdp->fd_nrdir = cache_hold(newfdp->fd_nrdir); 1099 } 1100 if (newfdp->fd_jdir) { 1101 vref(newfdp->fd_jdir); 1102 newfdp->fd_njdir = cache_hold(newfdp->fd_njdir); 1103 } 1104 newfdp->fd_refcnt = 1; 1105 1106 /* 1107 * If the number of open files fits in the internal arrays 1108 * of the open file structure, use them, otherwise allocate 1109 * additional memory for the number of descriptors currently 1110 * in use. 1111 */ 1112 if (newfdp->fd_lastfile < NDFILE) { 1113 newfdp->fd_ofiles = ((struct filedesc0 *) newfdp)->fd_dfiles; 1114 newfdp->fd_ofileflags = 1115 ((struct filedesc0 *) newfdp)->fd_dfileflags; 1116 i = NDFILE; 1117 } else { 1118 /* 1119 * Compute the smallest multiple of NDEXTENT needed 1120 * for the file descriptors currently in use, 1121 * allowing the table to shrink. 1122 */ 1123 i = newfdp->fd_nfiles; 1124 while (i > 2 * NDEXTENT && i > newfdp->fd_lastfile * 2) 1125 i /= 2; 1126 newfdp->fd_ofiles = malloc(i * OFILESIZE, M_FILEDESC, M_WAITOK); 1127 newfdp->fd_ofileflags = (char *) &newfdp->fd_ofiles[i]; 1128 } 1129 newfdp->fd_nfiles = i; 1130 bcopy(fdp->fd_ofiles, newfdp->fd_ofiles, i * sizeof(struct file **)); 1131 bcopy(fdp->fd_ofileflags, newfdp->fd_ofileflags, i * sizeof(char)); 1132 1133 /* 1134 * kq descriptors cannot be copied. 1135 */ 1136 if (newfdp->fd_knlistsize != -1) { 1137 fpp = &newfdp->fd_ofiles[newfdp->fd_lastfile]; 1138 for (i = newfdp->fd_lastfile; i >= 0; i--, fpp--) { 1139 if (*fpp != NULL && (*fpp)->f_type == DTYPE_KQUEUE) { 1140 *fpp = NULL; 1141 if (i < newfdp->fd_freefile) 1142 newfdp->fd_freefile = i; 1143 } 1144 if (*fpp == NULL && i == newfdp->fd_lastfile && i > 0) 1145 newfdp->fd_lastfile--; 1146 } 1147 newfdp->fd_knlist = NULL; 1148 newfdp->fd_knlistsize = -1; 1149 newfdp->fd_knhash = NULL; 1150 newfdp->fd_knhashmask = 0; 1151 } 1152 1153 fpp = newfdp->fd_ofiles; 1154 for (i = newfdp->fd_lastfile; i-- >= 0; fpp++) { 1155 if (*fpp != NULL) 1156 fhold(*fpp); 1157 } 1158 return (newfdp); 1159 } 1160 1161 /* 1162 * Release a filedesc structure. 1163 */ 1164 void 1165 fdfree(struct proc *p) 1166 { 1167 struct thread *td = p->p_thread; 1168 struct filedesc *fdp = p->p_fd; 1169 struct file **fpp; 1170 int i; 1171 struct filedesc_to_leader *fdtol; 1172 struct file *fp; 1173 struct vnode *vp; 1174 struct flock lf; 1175 1176 /* Certain daemons might not have file descriptors. */ 1177 if (fdp == NULL) 1178 return; 1179 1180 /* Check for special need to clear POSIX style locks */ 1181 fdtol = p->p_fdtol; 1182 if (fdtol != NULL) { 1183 KASSERT(fdtol->fdl_refcount > 0, 1184 ("filedesc_to_refcount botch: fdl_refcount=%d", 1185 fdtol->fdl_refcount)); 1186 if (fdtol->fdl_refcount == 1 && 1187 (p->p_leader->p_flag & P_ADVLOCK) != 0) { 1188 i = 0; 1189 fpp = fdp->fd_ofiles; 1190 for (i = 0, fpp = fdp->fd_ofiles; 1191 i <= fdp->fd_lastfile; 1192 i++, fpp++) { 1193 if (*fpp == NULL || 1194 (*fpp)->f_type != DTYPE_VNODE) 1195 continue; 1196 fp = *fpp; 1197 fhold(fp); 1198 lf.l_whence = SEEK_SET; 1199 lf.l_start = 0; 1200 lf.l_len = 0; 1201 lf.l_type = F_UNLCK; 1202 vp = (struct vnode *)fp->f_data; 1203 (void) VOP_ADVLOCK(vp, 1204 (caddr_t)p->p_leader, 1205 F_UNLCK, 1206 &lf, 1207 F_POSIX); 1208 fdrop(fp, p->p_thread); 1209 fpp = fdp->fd_ofiles + i; 1210 } 1211 } 1212 retry: 1213 if (fdtol->fdl_refcount == 1) { 1214 if (fdp->fd_holdleaderscount > 0 && 1215 (p->p_leader->p_flag & P_ADVLOCK) != 0) { 1216 /* 1217 * close() or do_dup() has cleared a reference 1218 * in a shared file descriptor table. 1219 */ 1220 fdp->fd_holdleaderswakeup = 1; 1221 tsleep(&fdp->fd_holdleaderscount, 1222 0, "fdlhold", 0); 1223 goto retry; 1224 } 1225 if (fdtol->fdl_holdcount > 0) { 1226 /* 1227 * Ensure that fdtol->fdl_leader 1228 * remains valid in closef(). 1229 */ 1230 fdtol->fdl_wakeup = 1; 1231 tsleep(fdtol, 0, "fdlhold", 0); 1232 goto retry; 1233 } 1234 } 1235 fdtol->fdl_refcount--; 1236 if (fdtol->fdl_refcount == 0 && 1237 fdtol->fdl_holdcount == 0) { 1238 fdtol->fdl_next->fdl_prev = fdtol->fdl_prev; 1239 fdtol->fdl_prev->fdl_next = fdtol->fdl_next; 1240 } else 1241 fdtol = NULL; 1242 p->p_fdtol = NULL; 1243 if (fdtol != NULL) 1244 free(fdtol, M_FILEDESC_TO_LEADER); 1245 } 1246 if (--fdp->fd_refcnt > 0) 1247 return; 1248 /* 1249 * we are the last reference to the structure, we can 1250 * safely assume it will not change out from under us. 1251 */ 1252 fpp = fdp->fd_ofiles; 1253 for (i = fdp->fd_lastfile; i-- >= 0; fpp++) { 1254 if (*fpp) 1255 (void) closef(*fpp, td); 1256 } 1257 if (fdp->fd_nfiles > NDFILE) 1258 free(fdp->fd_ofiles, M_FILEDESC); 1259 if (fdp->fd_cdir) { 1260 cache_drop(fdp->fd_ncdir); 1261 vrele(fdp->fd_cdir); 1262 } 1263 if (fdp->fd_rdir) { 1264 cache_drop(fdp->fd_nrdir); 1265 vrele(fdp->fd_rdir); 1266 } 1267 if (fdp->fd_jdir) { 1268 cache_drop(fdp->fd_njdir); 1269 vrele(fdp->fd_jdir); 1270 } 1271 if (fdp->fd_knlist) 1272 free(fdp->fd_knlist, M_KQUEUE); 1273 if (fdp->fd_knhash) 1274 free(fdp->fd_knhash, M_KQUEUE); 1275 free(fdp, M_FILEDESC); 1276 } 1277 1278 /* 1279 * For setugid programs, we don't want to people to use that setugidness 1280 * to generate error messages which write to a file which otherwise would 1281 * otherwise be off-limits to the process. 1282 * 1283 * This is a gross hack to plug the hole. A better solution would involve 1284 * a special vop or other form of generalized access control mechanism. We 1285 * go ahead and just reject all procfs file systems accesses as dangerous. 1286 * 1287 * Since setugidsafety calls this only for fd 0, 1 and 2, this check is 1288 * sufficient. We also don't for check setugidness since we know we are. 1289 */ 1290 static int 1291 is_unsafe(struct file *fp) 1292 { 1293 if (fp->f_type == DTYPE_VNODE && 1294 ((struct vnode *)(fp->f_data))->v_tag == VT_PROCFS) 1295 return (1); 1296 return (0); 1297 } 1298 1299 /* 1300 * Make this setguid thing safe, if at all possible. 1301 */ 1302 void 1303 setugidsafety(struct proc *p) 1304 { 1305 struct thread *td = p->p_thread; 1306 struct filedesc *fdp = p->p_fd; 1307 int i; 1308 1309 /* Certain daemons might not have file descriptors. */ 1310 if (fdp == NULL) 1311 return; 1312 1313 /* 1314 * note: fdp->fd_ofiles may be reallocated out from under us while 1315 * we are blocked in a close. Be careful! 1316 */ 1317 for (i = 0; i <= fdp->fd_lastfile; i++) { 1318 if (i > 2) 1319 break; 1320 if (fdp->fd_ofiles[i] && is_unsafe(fdp->fd_ofiles[i])) { 1321 struct file *fp; 1322 1323 #if 0 1324 if ((fdp->fd_ofileflags[i] & UF_MAPPED) != 0) 1325 (void) munmapfd(p, i); 1326 #endif 1327 if (i < fdp->fd_knlistsize) 1328 knote_fdclose(p, i); 1329 /* 1330 * NULL-out descriptor prior to close to avoid 1331 * a race while close blocks. 1332 */ 1333 fp = fdp->fd_ofiles[i]; 1334 fdp->fd_ofiles[i] = NULL; 1335 fdp->fd_ofileflags[i] = 0; 1336 if (i < fdp->fd_freefile) 1337 fdp->fd_freefile = i; 1338 (void) closef(fp, td); 1339 } 1340 } 1341 while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL) 1342 fdp->fd_lastfile--; 1343 } 1344 1345 /* 1346 * Close any files on exec? 1347 */ 1348 void 1349 fdcloseexec(struct proc *p) 1350 { 1351 struct thread *td = p->p_thread; 1352 struct filedesc *fdp = p->p_fd; 1353 int i; 1354 1355 /* Certain daemons might not have file descriptors. */ 1356 if (fdp == NULL) 1357 return; 1358 1359 /* 1360 * We cannot cache fd_ofiles or fd_ofileflags since operations 1361 * may block and rip them out from under us. 1362 */ 1363 for (i = 0; i <= fdp->fd_lastfile; i++) { 1364 if (fdp->fd_ofiles[i] != NULL && 1365 (fdp->fd_ofileflags[i] & UF_EXCLOSE)) { 1366 struct file *fp; 1367 1368 #if 0 1369 if (fdp->fd_ofileflags[i] & UF_MAPPED) 1370 (void) munmapfd(p, i); 1371 #endif 1372 if (i < fdp->fd_knlistsize) 1373 knote_fdclose(p, i); 1374 /* 1375 * NULL-out descriptor prior to close to avoid 1376 * a race while close blocks. 1377 */ 1378 fp = fdp->fd_ofiles[i]; 1379 fdp->fd_ofiles[i] = NULL; 1380 fdp->fd_ofileflags[i] = 0; 1381 if (i < fdp->fd_freefile) 1382 fdp->fd_freefile = i; 1383 (void) closef(fp, td); 1384 } 1385 } 1386 while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL) 1387 fdp->fd_lastfile--; 1388 } 1389 1390 /* 1391 * It is unsafe for set[ug]id processes to be started with file 1392 * descriptors 0..2 closed, as these descriptors are given implicit 1393 * significance in the Standard C library. fdcheckstd() will create a 1394 * descriptor referencing /dev/null for each of stdin, stdout, and 1395 * stderr that is not already open. 1396 */ 1397 int 1398 fdcheckstd(struct proc *p) 1399 { 1400 struct thread *td = p->p_thread; 1401 struct nlookupdata nd; 1402 struct filedesc *fdp; 1403 struct file *fp; 1404 register_t retval; 1405 int fd, i, error, flags, devnull; 1406 1407 fdp = p->p_fd; 1408 if (fdp == NULL) 1409 return (0); 1410 devnull = -1; 1411 error = 0; 1412 for (i = 0; i < 3; i++) { 1413 if (fdp->fd_ofiles[i] != NULL) 1414 continue; 1415 if (devnull < 0) { 1416 if ((error = falloc(p, &fp, NULL)) != 0) 1417 break; 1418 1419 error = nlookup_init(&nd, "/dev/null", UIO_SYSSPACE, 1420 NLC_FOLLOW|NLC_LOCKVP); 1421 flags = FREAD | FWRITE; 1422 if (error == 0) 1423 error = vn_open(&nd, fp, flags, 0); 1424 if (error == 0) 1425 error = fsetfd(p, fp, &fd); 1426 fdrop(fp, td); 1427 nlookup_done(&nd); 1428 if (error) 1429 break; 1430 KKASSERT(i == fd); 1431 devnull = fd; 1432 } else { 1433 error = kern_dup(DUP_FIXED, devnull, i, &retval); 1434 if (error != 0) 1435 break; 1436 } 1437 } 1438 return (error); 1439 } 1440 1441 /* 1442 * Internal form of close. 1443 * Decrement reference count on file structure. 1444 * Note: td and/or p may be NULL when closing a file 1445 * that was being passed in a message. 1446 */ 1447 int 1448 closef(struct file *fp, struct thread *td) 1449 { 1450 struct vnode *vp; 1451 struct flock lf; 1452 struct filedesc_to_leader *fdtol; 1453 struct proc *p; 1454 1455 if (fp == NULL) 1456 return (0); 1457 if (td == NULL) { 1458 td = curthread; 1459 p = NULL; /* allow no proc association */ 1460 } else { 1461 p = td->td_proc; /* can also be NULL */ 1462 } 1463 /* 1464 * POSIX record locking dictates that any close releases ALL 1465 * locks owned by this process. This is handled by setting 1466 * a flag in the unlock to free ONLY locks obeying POSIX 1467 * semantics, and not to free BSD-style file locks. 1468 * If the descriptor was in a message, POSIX-style locks 1469 * aren't passed with the descriptor. 1470 */ 1471 if (p != NULL && 1472 fp->f_type == DTYPE_VNODE) { 1473 if ((p->p_leader->p_flag & P_ADVLOCK) != 0) { 1474 lf.l_whence = SEEK_SET; 1475 lf.l_start = 0; 1476 lf.l_len = 0; 1477 lf.l_type = F_UNLCK; 1478 vp = (struct vnode *)fp->f_data; 1479 (void) VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_UNLCK, 1480 &lf, F_POSIX); 1481 } 1482 fdtol = p->p_fdtol; 1483 if (fdtol != NULL) { 1484 /* 1485 * Handle special case where file descriptor table 1486 * is shared between multiple process leaders. 1487 */ 1488 for (fdtol = fdtol->fdl_next; 1489 fdtol != p->p_fdtol; 1490 fdtol = fdtol->fdl_next) { 1491 if ((fdtol->fdl_leader->p_flag & 1492 P_ADVLOCK) == 0) 1493 continue; 1494 fdtol->fdl_holdcount++; 1495 lf.l_whence = SEEK_SET; 1496 lf.l_start = 0; 1497 lf.l_len = 0; 1498 lf.l_type = F_UNLCK; 1499 vp = (struct vnode *)fp->f_data; 1500 (void) VOP_ADVLOCK(vp, 1501 (caddr_t)p->p_leader, 1502 F_UNLCK, &lf, F_POSIX); 1503 fdtol->fdl_holdcount--; 1504 if (fdtol->fdl_holdcount == 0 && 1505 fdtol->fdl_wakeup != 0) { 1506 fdtol->fdl_wakeup = 0; 1507 wakeup(fdtol); 1508 } 1509 } 1510 } 1511 } 1512 return (fdrop(fp, td)); 1513 } 1514 1515 int 1516 fdrop(struct file *fp, struct thread *td) 1517 { 1518 struct flock lf; 1519 struct vnode *vp; 1520 int error; 1521 1522 if (--fp->f_count > 0) 1523 return (0); 1524 if (fp->f_count < 0) 1525 panic("fdrop: count < 0"); 1526 if ((fp->f_flag & FHASLOCK) && fp->f_type == DTYPE_VNODE) { 1527 lf.l_whence = SEEK_SET; 1528 lf.l_start = 0; 1529 lf.l_len = 0; 1530 lf.l_type = F_UNLCK; 1531 vp = (struct vnode *)fp->f_data; 1532 (void) VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK); 1533 } 1534 if (fp->f_ops != &badfileops) 1535 error = fo_close(fp, td); 1536 else 1537 error = 0; 1538 ffree(fp); 1539 return (error); 1540 } 1541 1542 /* 1543 * Apply an advisory lock on a file descriptor. 1544 * 1545 * Just attempt to get a record lock of the requested type on 1546 * the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0). 1547 */ 1548 /* ARGSUSED */ 1549 int 1550 flock(struct flock_args *uap) 1551 { 1552 struct proc *p = curproc; 1553 struct filedesc *fdp = p->p_fd; 1554 struct file *fp; 1555 struct vnode *vp; 1556 struct flock lf; 1557 1558 if ((unsigned)uap->fd >= fdp->fd_nfiles || 1559 (fp = fdp->fd_ofiles[uap->fd]) == NULL) 1560 return (EBADF); 1561 if (fp->f_type != DTYPE_VNODE) 1562 return (EOPNOTSUPP); 1563 vp = (struct vnode *)fp->f_data; 1564 lf.l_whence = SEEK_SET; 1565 lf.l_start = 0; 1566 lf.l_len = 0; 1567 if (uap->how & LOCK_UN) { 1568 lf.l_type = F_UNLCK; 1569 fp->f_flag &= ~FHASLOCK; 1570 return (VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK)); 1571 } 1572 if (uap->how & LOCK_EX) 1573 lf.l_type = F_WRLCK; 1574 else if (uap->how & LOCK_SH) 1575 lf.l_type = F_RDLCK; 1576 else 1577 return (EBADF); 1578 fp->f_flag |= FHASLOCK; 1579 if (uap->how & LOCK_NB) 1580 return (VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, F_FLOCK)); 1581 return (VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, F_FLOCK|F_WAIT)); 1582 } 1583 1584 /* 1585 * File Descriptor pseudo-device driver (/dev/fd/). 1586 * 1587 * Opening minor device N dup()s the file (if any) connected to file 1588 * descriptor N belonging to the calling process. Note that this driver 1589 * consists of only the ``open()'' routine, because all subsequent 1590 * references to this file will be direct to the other driver. 1591 */ 1592 /* ARGSUSED */ 1593 static int 1594 fdopen(dev_t dev, int mode, int type, struct thread *td) 1595 { 1596 KKASSERT(td->td_proc != NULL); 1597 1598 /* 1599 * XXX Kludge: set curproc->p_dupfd to contain the value of the 1600 * the file descriptor being sought for duplication. The error 1601 * return ensures that the vnode for this device will be released 1602 * by vn_open. Open will detect this special error and take the 1603 * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN 1604 * will simply report the error. 1605 */ 1606 td->td_proc->p_dupfd = minor(dev); 1607 return (ENODEV); 1608 } 1609 1610 /* 1611 * Duplicate the specified descriptor to a free descriptor. 1612 */ 1613 int 1614 dupfdopen(struct filedesc *fdp, int indx, int dfd, int mode, int error) 1615 { 1616 struct file *wfp; 1617 struct file *fp; 1618 1619 /* 1620 * If the to-be-dup'd fd number is greater than the allowed number 1621 * of file descriptors, or the fd to be dup'd has already been 1622 * closed, then reject. 1623 */ 1624 if ((u_int)dfd >= fdp->fd_nfiles || 1625 (wfp = fdp->fd_ofiles[dfd]) == NULL) { 1626 return (EBADF); 1627 } 1628 1629 /* 1630 * There are two cases of interest here. 1631 * 1632 * For ENODEV simply dup (dfd) to file descriptor 1633 * (indx) and return. 1634 * 1635 * For ENXIO steal away the file structure from (dfd) and 1636 * store it in (indx). (dfd) is effectively closed by 1637 * this operation. 1638 * 1639 * Any other error code is just returned. 1640 */ 1641 switch (error) { 1642 case ENODEV: 1643 /* 1644 * Check that the mode the file is being opened for is a 1645 * subset of the mode of the existing descriptor. 1646 */ 1647 if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag) 1648 return (EACCES); 1649 fp = fdp->fd_ofiles[indx]; 1650 #if 0 1651 if (fp && fdp->fd_ofileflags[indx] & UF_MAPPED) 1652 (void) munmapfd(p, indx); 1653 #endif 1654 fdp->fd_ofiles[indx] = wfp; 1655 fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd]; 1656 fhold(wfp); 1657 if (indx > fdp->fd_lastfile) 1658 fdp->fd_lastfile = indx; 1659 /* 1660 * we now own the reference to fp that the ofiles[] array 1661 * used to own. Release it. 1662 */ 1663 if (fp) 1664 fdrop(fp, curthread); 1665 return (0); 1666 1667 case ENXIO: 1668 /* 1669 * Steal away the file pointer from dfd, and stuff it into indx. 1670 */ 1671 fp = fdp->fd_ofiles[indx]; 1672 #if 0 1673 if (fp && fdp->fd_ofileflags[indx] & UF_MAPPED) 1674 (void) munmapfd(p, indx); 1675 #endif 1676 fdp->fd_ofiles[indx] = fdp->fd_ofiles[dfd]; 1677 fdp->fd_ofiles[dfd] = NULL; 1678 fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd]; 1679 fdp->fd_ofileflags[dfd] = 0; 1680 1681 /* 1682 * we now own the reference to fp that the ofiles[] array 1683 * used to own. Release it. 1684 */ 1685 if (fp) 1686 fdrop(fp, curthread); 1687 /* 1688 * Complete the clean up of the filedesc structure by 1689 * recomputing the various hints. 1690 */ 1691 if (indx > fdp->fd_lastfile) { 1692 fdp->fd_lastfile = indx; 1693 } else { 1694 while (fdp->fd_lastfile > 0 && 1695 fdp->fd_ofiles[fdp->fd_lastfile] == NULL) { 1696 fdp->fd_lastfile--; 1697 } 1698 if (dfd < fdp->fd_freefile) 1699 fdp->fd_freefile = dfd; 1700 } 1701 return (0); 1702 1703 default: 1704 return (error); 1705 } 1706 /* NOTREACHED */ 1707 } 1708 1709 1710 struct filedesc_to_leader * 1711 filedesc_to_leader_alloc(struct filedesc_to_leader *old, 1712 struct proc *leader) 1713 { 1714 struct filedesc_to_leader *fdtol; 1715 1716 fdtol = malloc(sizeof(struct filedesc_to_leader), 1717 M_FILEDESC_TO_LEADER, M_WAITOK); 1718 fdtol->fdl_refcount = 1; 1719 fdtol->fdl_holdcount = 0; 1720 fdtol->fdl_wakeup = 0; 1721 fdtol->fdl_leader = leader; 1722 if (old != NULL) { 1723 fdtol->fdl_next = old->fdl_next; 1724 fdtol->fdl_prev = old; 1725 old->fdl_next = fdtol; 1726 fdtol->fdl_next->fdl_prev = fdtol; 1727 } else { 1728 fdtol->fdl_next = fdtol; 1729 fdtol->fdl_prev = fdtol; 1730 } 1731 return fdtol; 1732 } 1733 1734 /* 1735 * Get file structures. 1736 */ 1737 static int 1738 sysctl_kern_file(SYSCTL_HANDLER_ARGS) 1739 { 1740 struct kinfo_file kf; 1741 struct filedesc *fdp; 1742 struct file *fp; 1743 struct proc *p; 1744 int count; 1745 int error; 1746 int n; 1747 1748 /* 1749 * Note: because the number of file descriptors is calculated 1750 * in different ways for sizing vs returning the data, 1751 * there is information leakage from the first loop. However, 1752 * it is of a similar order of magnitude to the leakage from 1753 * global system statistics such as kern.openfiles. 1754 * 1755 * When just doing a count, note that we cannot just count 1756 * the elements and add f_count via the filehead list because 1757 * threaded processes share their descriptor table and f_count might 1758 * still be '1' in that case. 1759 */ 1760 count = 0; 1761 error = 0; 1762 LIST_FOREACH(p, &allproc, p_list) { 1763 if (p->p_stat == SIDL) 1764 continue; 1765 if (!PRISON_CHECK(req->td->td_proc->p_ucred, p->p_ucred) != 0) 1766 continue; 1767 if ((fdp = p->p_fd) == NULL) 1768 continue; 1769 for (n = 0; n < fdp->fd_nfiles; ++n) { 1770 if ((fp = fdp->fd_ofiles[n]) == NULL) 1771 continue; 1772 if (req->oldptr == NULL) { 1773 ++count; 1774 } else { 1775 kcore_make_file(&kf, fp, p->p_pid, 1776 p->p_ucred->cr_uid, n); 1777 error = SYSCTL_OUT(req, &kf, sizeof(kf)); 1778 if (error) 1779 break; 1780 } 1781 } 1782 if (error) 1783 break; 1784 } 1785 1786 /* 1787 * When just calculating the size, overestimate a bit to try to 1788 * prevent system activity from causing the buffer-fill call 1789 * to fail later on. 1790 */ 1791 if (req->oldptr == NULL) { 1792 count = (count + 16) + (count / 10); 1793 error = SYSCTL_OUT(req, NULL, count * sizeof(kf)); 1794 } 1795 return (error); 1796 } 1797 1798 SYSCTL_PROC(_kern, KERN_FILE, file, CTLTYPE_OPAQUE|CTLFLAG_RD, 1799 0, 0, sysctl_kern_file, "S,file", "Entire file table"); 1800 1801 SYSCTL_INT(_kern, KERN_MAXFILESPERPROC, maxfilesperproc, CTLFLAG_RW, 1802 &maxfilesperproc, 0, "Maximum files allowed open per process"); 1803 1804 SYSCTL_INT(_kern, KERN_MAXFILES, maxfiles, CTLFLAG_RW, 1805 &maxfiles, 0, "Maximum number of files"); 1806 1807 SYSCTL_INT(_kern, OID_AUTO, maxfilesrootres, CTLFLAG_RW, 1808 &maxfilesrootres, 0, "Descriptors reserved for root use"); 1809 1810 SYSCTL_INT(_kern, OID_AUTO, openfiles, CTLFLAG_RD, 1811 &nfiles, 0, "System-wide number of open files"); 1812 1813 static void 1814 fildesc_drvinit(void *unused) 1815 { 1816 int fd; 1817 1818 cdevsw_add(&fildesc_cdevsw, 0, 0); 1819 for (fd = 0; fd < NUMFDESC; fd++) { 1820 make_dev(&fildesc_cdevsw, fd, 1821 UID_BIN, GID_BIN, 0666, "fd/%d", fd); 1822 } 1823 make_dev(&fildesc_cdevsw, 0, UID_ROOT, GID_WHEEL, 0666, "stdin"); 1824 make_dev(&fildesc_cdevsw, 1, UID_ROOT, GID_WHEEL, 0666, "stdout"); 1825 make_dev(&fildesc_cdevsw, 2, UID_ROOT, GID_WHEEL, 0666, "stderr"); 1826 } 1827 1828 struct fileops badfileops = { 1829 NULL, /* port */ 1830 NULL, /* clone */ 1831 badfo_readwrite, 1832 badfo_readwrite, 1833 badfo_ioctl, 1834 badfo_poll, 1835 badfo_kqfilter, 1836 badfo_stat, 1837 badfo_close 1838 }; 1839 1840 static int 1841 badfo_readwrite( 1842 struct file *fp, 1843 struct uio *uio, 1844 struct ucred *cred, 1845 int flags, 1846 struct thread *td 1847 ) { 1848 return (EBADF); 1849 } 1850 1851 static int 1852 badfo_ioctl(struct file *fp, u_long com, caddr_t data, struct thread *td) 1853 { 1854 return (EBADF); 1855 } 1856 1857 static int 1858 badfo_poll(struct file *fp, int events, struct ucred *cred, struct thread *td) 1859 { 1860 return (0); 1861 } 1862 1863 static int 1864 badfo_kqfilter(struct file *fp, struct knote *kn) 1865 { 1866 return (0); 1867 } 1868 1869 static int 1870 badfo_stat(struct file *fp, struct stat *sb, struct thread *td) 1871 { 1872 return (EBADF); 1873 } 1874 1875 static int 1876 badfo_close(struct file *fp, struct thread *td) 1877 { 1878 return (EBADF); 1879 } 1880 1881 SYSINIT(fildescdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE+CDEV_MAJOR, 1882 fildesc_drvinit,NULL) 1883