1 /* 2 * Copyright (c) 1993, 1995 Jan-Simon Pendry 3 * Copyright (c) 1993, 1995 4 * The Regents of the University of California. All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * Jan-Simon Pendry. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * @(#)procfs_vnops.c 8.18 (Berkeley) 5/21/95 34 * 35 * $FreeBSD: src/sys/miscfs/procfs/procfs_vnops.c,v 1.76.2.7 2002/01/22 17:22:59 nectar Exp $ 36 */ 37 38 /* 39 * procfs vnode interface 40 */ 41 42 #include <sys/param.h> 43 #include <sys/systm.h> 44 #include <sys/time.h> 45 #include <sys/kernel.h> 46 #include <sys/lock.h> 47 #include <sys/fcntl.h> 48 #include <sys/proc.h> 49 #include <sys/caps.h> 50 #include <sys/signalvar.h> 51 #include <sys/vnode.h> 52 #include <sys/uio.h> 53 #include <sys/mount.h> 54 #include <sys/namei.h> 55 #include <sys/dirent.h> 56 #include <sys/malloc.h> 57 #include <sys/reg.h> 58 #include <vm/vm_zone.h> 59 #include <vfs/procfs/procfs.h> 60 #include <sys/pioctl.h> 61 62 #include <sys/spinlock2.h> 63 64 #include <machine/limits.h> 65 66 static int procfs_access (struct vop_access_args *); 67 static int procfs_badop (struct vop_generic_args *); 68 static int procfs_bmap (struct vop_bmap_args *); 69 static int procfs_close (struct vop_close_args *); 70 static int procfs_getattr (struct vop_getattr_args *); 71 static int procfs_inactive (struct vop_inactive_args *); 72 static int procfs_ioctl (struct vop_ioctl_args *); 73 static int procfs_kqfilter (struct vop_kqfilter_args *); 74 static int procfs_lookup (struct vop_old_lookup_args *); 75 static int procfs_open (struct vop_open_args *); 76 static int procfs_print (struct vop_print_args *); 77 static int procfs_readdir (struct vop_readdir_args *); 78 static int procfs_readlink (struct vop_readlink_args *); 79 static int procfs_reclaim (struct vop_reclaim_args *); 80 static int procfs_setattr (struct vop_setattr_args *); 81 82 static int procfs_readdir_proc(struct vop_readdir_args *); 83 static int procfs_readdir_root(struct vop_readdir_args *); 84 85 /* 86 * procfs vnode operations. 87 */ 88 struct vop_ops procfs_vnode_vops = { 89 .vop_default = vop_defaultop, 90 .vop_access = procfs_access, 91 .vop_advlock = (void *)procfs_badop, 92 .vop_bmap = procfs_bmap, 93 .vop_close = procfs_close, 94 .vop_old_create = (void *)procfs_badop, 95 .vop_getattr = procfs_getattr, 96 .vop_inactive = procfs_inactive, 97 .vop_old_link = (void *)procfs_badop, 98 .vop_old_lookup = procfs_lookup, 99 .vop_old_mkdir = (void *)procfs_badop, 100 .vop_old_mknod = (void *)procfs_badop, 101 .vop_open = procfs_open, 102 .vop_pathconf = vop_stdpathconf, 103 .vop_print = procfs_print, 104 .vop_read = procfs_rw, 105 .vop_readdir = procfs_readdir, 106 .vop_readlink = procfs_readlink, 107 .vop_reclaim = procfs_reclaim, 108 .vop_old_remove = (void *)procfs_badop, 109 .vop_old_rename = (void *)procfs_badop, 110 .vop_old_rmdir = (void *)procfs_badop, 111 .vop_setattr = procfs_setattr, 112 .vop_old_symlink = (void *)procfs_badop, 113 .vop_write = (void *)procfs_rw, 114 .vop_ioctl = procfs_ioctl, 115 .vop_kqfilter = procfs_kqfilter, 116 }; 117 118 119 /* 120 * This is a list of the valid names in the 121 * process-specific sub-directories. It is 122 * used in procfs_lookup and procfs_readdir 123 */ 124 static struct proc_target { 125 u_char pt_type; 126 u_char pt_namlen; 127 char *pt_name; 128 pfstype pt_pfstype; 129 int (*pt_valid) (struct lwp *p); 130 } proc_targets[] = { 131 #define N(s) sizeof(s)-1, s 132 /* name type validp */ 133 { DT_DIR, N("."), Pproc, NULL }, 134 { DT_DIR, N(".."), Proot, NULL }, 135 { DT_REG, N("mem"), Pmem, NULL }, 136 { DT_REG, N("regs"), Pregs, procfs_validregs }, 137 { DT_REG, N("fpregs"), Pfpregs, procfs_validfpregs }, 138 { DT_REG, N("dbregs"), Pdbregs, procfs_validdbregs }, 139 { DT_REG, N("ctl"), Pctl, NULL }, 140 { DT_REG, N("status"), Pstatus, NULL }, 141 { DT_REG, N("note"), Pnote, NULL }, 142 { DT_REG, N("notepg"), Pnotepg, NULL }, 143 { DT_REG, N("map"), Pmap, procfs_validmap }, 144 { DT_REG, N("etype"), Ptype, procfs_validtype }, 145 { DT_REG, N("cmdline"), Pcmdline, NULL }, 146 { DT_REG, N("rlimit"), Prlimit, NULL }, 147 { DT_LNK, N("file"), Pfile, NULL }, 148 { DT_LNK, N("exe"), Pfile, NULL }, 149 #undef N 150 }; 151 static const int nproc_targets = NELEM(proc_targets); 152 153 static pid_t atopid (const char *, u_int); 154 155 /* 156 * set things up for doing i/o on 157 * the pfsnode (vp). (vp) is locked 158 * on entry, and should be left locked 159 * on exit. 160 * 161 * for procfs we don't need to do anything 162 * in particular for i/o. all that is done 163 * is to support exclusive open on process 164 * memory images. 165 * 166 * procfs_open(struct vnode *a_vp, int a_mode, struct ucred *a_cred, 167 * struct file *a_fp) 168 */ 169 static int 170 procfs_open(struct vop_open_args *ap) 171 { 172 struct pfsnode *pfs = VTOPFS(ap->a_vp); 173 struct proc *p1, *p2; 174 int error; 175 176 p2 = pfs_pfind(pfs->pfs_pid); 177 if (p2 == NULL) 178 return (ENOENT); 179 if (pfs->pfs_pid && !PRISON_CHECK(ap->a_cred, p2->p_ucred)) { 180 error = ENOENT; 181 goto done; 182 } 183 184 switch (pfs->pfs_type) { 185 case Pmem: 186 if (((pfs->pfs_flags & FWRITE) && (ap->a_mode & O_EXCL)) || 187 ((pfs->pfs_flags & O_EXCL) && (ap->a_mode & FWRITE))) { 188 error = EBUSY; 189 goto done; 190 } 191 192 p1 = curproc; 193 KKASSERT(p1); 194 /* Can't trace a process that's currently exec'ing. */ 195 if ((p2->p_flags & P_INEXEC) != 0) { 196 error = EAGAIN; 197 goto done; 198 } 199 if (!CHECKIO(p1, p2) || p_trespass(ap->a_cred, p2->p_ucred)) { 200 error = EPERM; 201 goto done; 202 } 203 204 if (ap->a_mode & FWRITE) 205 pfs->pfs_flags = ap->a_mode & (FWRITE|O_EXCL); 206 207 break; 208 209 default: 210 break; 211 } 212 error = vop_stdopen(ap); 213 done: 214 pfs_pdone(p2); 215 return error; 216 } 217 218 /* 219 * close the pfsnode (vp) after doing i/o. 220 * (vp) is not locked on entry or exit. 221 * 222 * nothing to do for procfs other than undo 223 * any exclusive open flag (see _open above). 224 * 225 * procfs_close(struct vnode *a_vp, int a_fflag, struct ucred *a_cred) 226 */ 227 static int 228 procfs_close(struct vop_close_args *ap) 229 { 230 struct pfsnode *pfs = VTOPFS(ap->a_vp); 231 struct proc *p; 232 233 /* 234 * Make sure the lock is exclusive for opencount tests 235 */ 236 vn_lock(ap->a_vp, LK_UPGRADE | LK_RETRY); 237 238 switch (pfs->pfs_type) { 239 case Pmem: 240 if ((ap->a_fflag & FWRITE) && (pfs->pfs_flags & O_EXCL)) 241 pfs->pfs_flags &= ~(FWRITE|O_EXCL); 242 /* 243 * v_opencount determines the last real close on the vnode. 244 * 245 * If this is the last close, then it checks to see if 246 * the target process has PF_LINGER set in p_pfsflags, 247 * if this is *not* the case, then the process' stop flags 248 * are cleared, and the process is woken up. This is 249 * to help prevent the case where a process has been 250 * told to stop on an event, but then the requesting process 251 * has gone away or forgotten about it. 252 */ 253 p = NULL; 254 if ((ap->a_vp->v_opencount < 2) 255 && ((p = pfs_pfind(pfs->pfs_pid)) != NULL || 256 (p = pfs_zpfind(pfs->pfs_pid)) != NULL) 257 && !(p->p_pfsflags & PF_LINGER)) { 258 spin_lock(&p->p_spin); 259 p->p_stops = 0; 260 p->p_step = 0; 261 spin_unlock(&p->p_spin); 262 wakeup(&p->p_stype); 263 wakeup(&p->p_step); 264 } 265 pfs_pdone(p); 266 break; 267 default: 268 break; 269 } 270 271 return (vop_stdclose(ap)); 272 } 273 274 /* 275 * do an ioctl operation on a pfsnode (vp). 276 * (vp) is not locked on entry or exit. 277 */ 278 static int 279 procfs_ioctl(struct vop_ioctl_args *ap) 280 { 281 struct pfsnode *pfs = VTOPFS(ap->a_vp); 282 struct proc *procp; 283 struct proc *p; 284 int error; 285 int signo; 286 struct procfs_status *psp; 287 unsigned char flags; 288 289 procp = pfs_pfind(pfs->pfs_pid); 290 if (procp == NULL) 291 return ENOTTY; 292 p = curproc; 293 if (p == NULL) { 294 error = EINVAL; 295 goto done; 296 } 297 298 /* Can't trace a process that's currently exec'ing. */ 299 if ((procp->p_flags & P_INEXEC) != 0) { 300 error = EAGAIN; 301 goto done; 302 } 303 if (!CHECKIO(p, procp) || p_trespass(ap->a_cred, procp->p_ucred)) { 304 error = EPERM; 305 goto done; 306 } 307 308 switch (ap->a_command) { 309 case PIOCBIS: 310 spin_lock(&procp->p_spin); 311 procp->p_stops |= *(unsigned int*)ap->a_data; 312 spin_unlock(&procp->p_spin); 313 break; 314 case PIOCBIC: 315 spin_lock(&procp->p_spin); 316 procp->p_stops &= ~*(unsigned int*)ap->a_data; 317 spin_unlock(&procp->p_spin); 318 break; 319 case PIOCSFL: 320 /* 321 * NFLAGS is "non-suser_xxx flags" -- currently, only 322 * PFS_ISUGID ("ignore set u/g id"); 323 */ 324 #define NFLAGS (PF_ISUGID) 325 flags = (unsigned char)*(unsigned int*)ap->a_data; 326 if (flags & NFLAGS && 327 (error = caps_priv_check(ap->a_cred, 328 SYSCAP_RESTRICTEDROOT))) { 329 goto done; 330 } 331 procp->p_pfsflags = flags; 332 break; 333 case PIOCGFL: 334 *(unsigned int*)ap->a_data = (unsigned int)procp->p_pfsflags; 335 break; 336 case PIOCSTATUS: 337 /* 338 * NOTE: syscall entry deals with stopevents and may run 339 * without the MP lock. 340 */ 341 psp = (struct procfs_status *)ap->a_data; 342 psp->flags = procp->p_pfsflags; 343 psp->events = procp->p_stops; 344 spin_lock(&procp->p_spin); 345 if (procp->p_step) { 346 psp->state = 0; 347 psp->why = procp->p_stype; 348 psp->val = procp->p_xstat; 349 spin_unlock(&procp->p_spin); 350 } else { 351 psp->state = 1; 352 spin_unlock(&procp->p_spin); 353 psp->why = 0; /* Not defined values */ 354 psp->val = 0; /* Not defined values */ 355 } 356 break; 357 case PIOCWAIT: 358 /* 359 * NOTE: syscall entry deals with stopevents and may run 360 * without the MP lock. 361 */ 362 psp = (struct procfs_status *)ap->a_data; 363 spin_lock(&procp->p_spin); 364 while (procp->p_step == 0) { 365 tsleep_interlock(&procp->p_stype, PCATCH); 366 spin_unlock(&procp->p_spin); 367 if (procp->p_stops == 0) { 368 error = 0; 369 goto done; 370 } 371 if (procp->p_flags & P_POSTEXIT) { 372 error = EINVAL; 373 goto done; 374 } 375 if (procp->p_flags & P_INEXEC) { 376 error = EAGAIN; 377 goto done; 378 } 379 error = tsleep(&procp->p_stype, PCATCH | PINTERLOCKED, 380 "piocwait", 0); 381 if (error) 382 goto done; 383 spin_lock(&procp->p_spin); 384 } 385 spin_unlock(&procp->p_spin); 386 psp->state = 1; /* It stopped */ 387 psp->flags = procp->p_pfsflags; 388 psp->events = procp->p_stops; 389 psp->why = procp->p_stype; /* why it stopped */ 390 psp->val = procp->p_xstat; /* any extra info */ 391 break; 392 case PIOCCONT: /* Restart a proc */ 393 /* 394 * NOTE: syscall entry deals with stopevents and may run 395 * without the MP lock. However, the caller is 396 * presumably interlocked by having waited. 397 */ 398 if (procp->p_step == 0) { 399 error = EINVAL; /* Can only start a stopped process */ 400 goto done; 401 } 402 if ((signo = *(int*)ap->a_data) != 0) { 403 if (signo >= NSIG || signo <= 0) { 404 error = EINVAL; 405 goto done; 406 } 407 ksignal(procp, signo); 408 } 409 procp->p_step = 0; 410 wakeup(&procp->p_step); 411 break; 412 default: 413 error = ENOTTY; 414 goto done; 415 } 416 error = 0; 417 done: 418 pfs_pdone(procp); 419 return error; 420 } 421 422 /* 423 * do block mapping for pfsnode (vp). 424 * since we don't use the buffer cache 425 * for procfs this function should never 426 * be called. in any case, it's not clear 427 * what part of the kernel ever makes use 428 * of this function. for sanity, this is the 429 * usual no-op bmap, although returning 430 * (EIO) would be a reasonable alternative. 431 * 432 * XXX mmap assumes buffer cache operation 433 * 434 * procfs_bmap(struct vnode *a_vp, off_t a_loffset, 435 * off_t *a_doffsetp, int *a_runp, int *a_runb) 436 */ 437 static int 438 procfs_bmap(struct vop_bmap_args *ap) 439 { 440 if (ap->a_doffsetp != NULL) 441 *ap->a_doffsetp = ap->a_loffset; 442 if (ap->a_runp != NULL) 443 *ap->a_runp = 0; 444 if (ap->a_runb != NULL) 445 *ap->a_runb = 0; 446 return (0); 447 } 448 449 /* 450 * procfs_inactive is called when the pfsnode 451 * is vrele'd and the reference count goes 452 * to zero. (vp) will be on the vnode free 453 * list, so to get it back vget() must be 454 * used. 455 * 456 * (vp) is locked on entry, but must be unlocked on exit. 457 * 458 * procfs_inactive(struct vnode *a_vp) 459 */ 460 static int 461 procfs_inactive(struct vop_inactive_args *ap) 462 { 463 struct pfsnode *pfs = VTOPFS(ap->a_vp); 464 465 if (pfs->pfs_pid & PFS_DEAD) 466 vrecycle(ap->a_vp); 467 return (0); 468 } 469 470 /* 471 * _reclaim is called when getnewvnode() 472 * wants to make use of an entry on the vnode 473 * free list. at this time the filesystem needs 474 * to free any private data and remove the node 475 * from any private lists. 476 * 477 * procfs_reclaim(struct vnode *a_vp) 478 */ 479 static int 480 procfs_reclaim(struct vop_reclaim_args *ap) 481 { 482 return (procfs_freevp(ap->a_vp)); 483 } 484 485 /* 486 * _print is used for debugging. 487 * just print a readable description 488 * of (vp). 489 * 490 * procfs_print(struct vnode *a_vp) 491 */ 492 static int 493 procfs_print(struct vop_print_args *ap) 494 { 495 struct pfsnode *pfs = VTOPFS(ap->a_vp); 496 497 kprintf("tag VT_PROCFS, type %d, pid %ld, mode %x, flags %lx\n", 498 pfs->pfs_type, (long)pfs->pfs_pid, pfs->pfs_mode, pfs->pfs_flags); 499 return (0); 500 } 501 502 /* 503 * generic entry point for unsupported operations 504 */ 505 static int 506 procfs_badop(struct vop_generic_args *ap) 507 { 508 return (EIO); 509 } 510 511 /* 512 * Invent attributes for pfsnode (vp) and store 513 * them in (vap). 514 * Directories lengths are returned as zero since 515 * any real length would require the genuine size 516 * to be computed, and nothing cares anyway. 517 * 518 * this is relatively minimal for procfs. 519 * 520 * procfs_getattr(struct vnode *a_vp, struct vattr *a_vap) 521 */ 522 static int 523 procfs_getattr(struct vop_getattr_args *ap) 524 { 525 struct pfsnode *pfs = VTOPFS(ap->a_vp); 526 struct vattr *vap = ap->a_vap; 527 struct proc *procp; 528 int error; 529 530 /* 531 * First make sure that the process and its credentials 532 * still exist. 533 */ 534 switch (pfs->pfs_type) { 535 case Proot: 536 case Pcurproc: 537 procp = NULL; 538 break; 539 default: 540 procp = pfs_pfind(pfs->pfs_pid); 541 if (procp == NULL || procp->p_ucred == NULL) { 542 error = ENOENT; 543 goto done; 544 } 545 break; 546 } 547 548 error = 0; 549 550 /* start by zeroing out the attributes */ 551 VATTR_NULL(vap); 552 553 /* next do all the common fields */ 554 vap->va_type = ap->a_vp->v_type; 555 vap->va_mode = pfs->pfs_mode; 556 vap->va_fileid = pfs->pfs_fileno; 557 vap->va_flags = 0; 558 vap->va_blocksize = PAGE_SIZE; 559 vap->va_bytes = vap->va_size = 0; 560 vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsid.val[0]; 561 562 /* 563 * Make all times be current TOD. 564 * It would be possible to get the process start 565 * time from the p_stat structure, but there's 566 * no "file creation" time stamp anyway, and the 567 * p_stat structure is not addressible if u. gets 568 * swapped out for that process. 569 */ 570 vfs_timestamp(&vap->va_ctime); 571 vap->va_atime = vap->va_mtime = vap->va_ctime; 572 573 /* 574 * If the process has exercised some setuid or setgid 575 * privilege, then rip away read/write permission so 576 * that only root can gain access. 577 */ 578 switch (pfs->pfs_type) { 579 case Pctl: 580 case Pregs: 581 case Pfpregs: 582 case Pdbregs: 583 case Pmem: 584 if (procp->p_flags & P_SUGID) { 585 vap->va_mode &= ~((VREAD|VWRITE)| 586 ((VREAD|VWRITE)>>3)| 587 ((VREAD|VWRITE)>>6)); 588 } 589 break; 590 default: 591 break; 592 } 593 594 /* 595 * now do the object specific fields 596 * 597 * The size could be set from struct reg, but it's hardly 598 * worth the trouble, and it puts some (potentially) machine 599 * dependent data into this machine-independent code. If it 600 * becomes important then this function should break out into 601 * a per-file stat function in the corresponding .c file. 602 */ 603 604 vap->va_nlink = 1; 605 if (procp) { 606 if (procp->p_ucred) { 607 vap->va_uid = procp->p_ucred->cr_uid; 608 vap->va_gid = procp->p_ucred->cr_gid; 609 } else { 610 vap->va_uid = -1; 611 vap->va_gid = -1; 612 } 613 } 614 615 switch (pfs->pfs_type) { 616 case Proot: 617 /* 618 * Set nlink to 1 to tell fts(3) we don't actually know. 619 */ 620 vap->va_nlink = 1; 621 vap->va_uid = 0; 622 vap->va_gid = 0; 623 vap->va_size = vap->va_bytes = DEV_BSIZE; 624 break; 625 626 case Pcurproc: { 627 char buf[16]; /* should be enough */ 628 629 vap->va_uid = 0; 630 vap->va_gid = 0; 631 vap->va_size = ksnprintf(buf, sizeof(buf), 632 "%ld", (long)curproc->p_pid); 633 vap->va_bytes = vap->va_size; 634 break; 635 } 636 637 case Pproc: 638 vap->va_nlink = nproc_targets; 639 vap->va_size = vap->va_bytes = DEV_BSIZE; 640 break; 641 642 case Pfile: { 643 char *fullpath, *freepath; 644 645 if (procp->p_textnch.ncp) { 646 struct nchandle nch; 647 648 cache_copy(&procp->p_textnch, &nch); 649 error = cache_fullpath(procp, &nch, NULL, 650 &fullpath, &freepath, 0); 651 cache_drop(&nch); 652 } else { 653 error = EINVAL; 654 } 655 656 if (error == 0) { 657 vap->va_size = strlen(fullpath); 658 kfree(freepath, M_TEMP); 659 } else { 660 vap->va_size = sizeof("unknown") - 1; 661 error = 0; 662 } 663 vap->va_bytes = vap->va_size; 664 break; 665 } 666 667 case Pmem: 668 /* 669 * If we denied owner access earlier, then we have to 670 * change the owner to root - otherwise 'ps' and friends 671 * will break even though they are setgid kmem. *SIGH* 672 */ 673 if (procp->p_flags & P_SUGID) 674 vap->va_uid = 0; 675 else if (procp->p_ucred) 676 vap->va_uid = procp->p_ucred->cr_uid; 677 else 678 vap->va_uid = -1; 679 break; 680 681 case Pregs: 682 vap->va_bytes = vap->va_size = sizeof(struct reg); 683 break; 684 685 case Pfpregs: 686 vap->va_bytes = vap->va_size = sizeof(struct fpreg); 687 break; 688 689 case Pdbregs: 690 vap->va_bytes = vap->va_size = sizeof(struct dbreg); 691 break; 692 693 case Ptype: 694 case Pmap: 695 case Pctl: 696 case Pstatus: 697 case Pnote: 698 case Pnotepg: 699 case Pcmdline: 700 case Prlimit: 701 break; 702 703 default: 704 panic("procfs_getattr"); 705 } 706 done: 707 pfs_pdone(procp); 708 return (error); 709 } 710 711 /* 712 * procfs_setattr(struct vnode *a_vp, struct vattr *a_vap, 713 * struct ucred *a_cred) 714 */ 715 static int 716 procfs_setattr(struct vop_setattr_args *ap) 717 { 718 if (ap->a_vap->va_flags != VNOVAL) 719 return (EOPNOTSUPP); 720 721 /* 722 * just fake out attribute setting 723 * it's not good to generate an error 724 * return, otherwise things like creat() 725 * will fail when they try to set the 726 * file length to 0. worse, this means 727 * that echo $note > /proc/$pid/note will fail. 728 */ 729 730 return (0); 731 } 732 733 /* 734 * implement access checking. 735 * 736 * procfs_access(struct vnode *a_vp, int a_mode, struct ucred *a_cred) 737 */ 738 static int 739 procfs_access(struct vop_access_args *ap) 740 { 741 struct vattr vattr; 742 int error; 743 744 error = VOP_GETATTR(ap->a_vp, &vattr); 745 if (!error) 746 error = vop_helper_access(ap, vattr.va_uid, vattr.va_gid, 747 vattr.va_mode, 0); 748 return (error); 749 } 750 751 /* 752 * lookup. this is incredibly complicated in the general case, however 753 * for most pseudo-filesystems very little needs to be done. 754 * 755 * procfs_lookup(struct vnode *a_dvp, struct vnode **a_vpp, 756 * struct componentname *a_cnp) 757 */ 758 static int 759 procfs_lookup(struct vop_old_lookup_args *ap) 760 { 761 struct componentname *cnp = ap->a_cnp; 762 struct vnode **vpp = ap->a_vpp; 763 struct vnode *dvp = ap->a_dvp; 764 char *pname = cnp->cn_nameptr; 765 /* struct proc *curp = cnp->cn_proc; */ 766 struct proc_target *pt; 767 pid_t pid; 768 struct pfsnode *pfs; 769 struct proc *p; 770 struct lwp *lp; 771 int i; 772 int error; 773 774 *vpp = NULL; 775 776 if (cnp->cn_nameiop == NAMEI_DELETE || cnp->cn_nameiop == NAMEI_RENAME) 777 return (EROFS); 778 779 p = NULL; 780 error = 0; 781 if (cnp->cn_namelen == 1 && *pname == '.') { 782 *vpp = dvp; 783 vref(*vpp); 784 goto out; 785 } 786 787 pfs = VTOPFS(dvp); 788 switch (pfs->pfs_type) { 789 case Proot: 790 if (cnp->cn_flags & CNP_ISDOTDOT) 791 return (EIO); 792 793 if (CNEQ(cnp, "curproc", 7) || CNEQ(cnp, "self", 4)) { 794 error = procfs_allocvp(dvp->v_mount, vpp, 0, Pcurproc); 795 goto out; 796 } 797 798 pid = atopid(pname, cnp->cn_namelen); 799 if (pid == NO_PID) 800 break; 801 802 p = pfs_pfind(pid); 803 if (p == NULL) 804 break; 805 806 if (!PRISON_CHECK(ap->a_cnp->cn_cred, p->p_ucred)) 807 break; 808 809 if (ps_showallprocs == 0 && ap->a_cnp->cn_cred->cr_uid != 0 && 810 ap->a_cnp->cn_cred->cr_uid != p->p_ucred->cr_uid) 811 break; 812 813 error = procfs_allocvp(dvp->v_mount, vpp, pid, Pproc); 814 goto out; 815 816 case Pproc: 817 if (cnp->cn_flags & CNP_ISDOTDOT) { 818 error = procfs_root(dvp->v_mount, vpp); 819 goto out; 820 } 821 822 p = pfs_pfind(pfs->pfs_pid); 823 if (p == NULL) 824 break; 825 /* XXX lwp */ 826 lp = FIRST_LWP_IN_PROC(p); 827 if (lp == NULL) 828 break; 829 830 if (!PRISON_CHECK(ap->a_cnp->cn_cred, p->p_ucred)) 831 break; 832 833 if (ps_showallprocs == 0 && ap->a_cnp->cn_cred->cr_uid != 0 && 834 ap->a_cnp->cn_cred->cr_uid != p->p_ucred->cr_uid) 835 break; 836 837 for (pt = proc_targets, i = 0; i < nproc_targets; pt++, i++) { 838 if (cnp->cn_namelen == pt->pt_namlen && 839 bcmp(pt->pt_name, pname, cnp->cn_namelen) == 0 && 840 (pt->pt_valid == NULL || (*pt->pt_valid)(lp))) 841 goto found; 842 } 843 break; 844 found: 845 error = procfs_allocvp(dvp->v_mount, vpp, pfs->pfs_pid, 846 pt->pt_pfstype); 847 goto out; 848 849 default: 850 error = ENOTDIR; 851 goto out; 852 } 853 if (cnp->cn_nameiop == NAMEI_LOOKUP) 854 error = ENOENT; 855 else 856 error = EROFS; 857 /* 858 * If no error occured *vpp will hold a referenced locked vnode. 859 * dvp was passed to us locked and *vpp must be returned locked. 860 * If *vpp != dvp then we should unlock dvp if (1) this is not the 861 * last component or (2) CNP_LOCKPARENT is not set. 862 */ 863 out: 864 if (error == 0 && *vpp != dvp) { 865 if ((cnp->cn_flags & CNP_LOCKPARENT) == 0) { 866 cnp->cn_flags |= CNP_PDIRUNLOCK; 867 vn_unlock(dvp); 868 } 869 } 870 pfs_pdone(p); 871 return (error); 872 } 873 874 /* 875 * Does this process have a text file? 876 */ 877 int 878 procfs_validfile(struct lwp *lp) 879 { 880 return (procfs_findtextvp(lp->lwp_proc) != NULLVP); 881 } 882 883 /* 884 * readdir() returns directory entries from pfsnode (vp). 885 * 886 * We generate just one directory entry at a time, as it would probably 887 * not pay off to buffer several entries locally to save uiomove calls. 888 * 889 * procfs_readdir(struct vnode *a_vp, struct uio *a_uio, struct ucred *a_cred, 890 * int *a_eofflag, int *a_ncookies, off_t **a_cookies) 891 */ 892 static int 893 procfs_readdir(struct vop_readdir_args *ap) 894 { 895 struct pfsnode *pfs; 896 int error; 897 898 if (ap->a_uio->uio_offset < 0 || ap->a_uio->uio_offset > INT_MAX) 899 return (EINVAL); 900 error = vn_lock(ap->a_vp, LK_EXCLUSIVE | LK_RETRY | LK_FAILRECLAIM); 901 if (error) 902 return (error); 903 904 pfs = VTOPFS(ap->a_vp); 905 switch (pfs->pfs_type) { 906 case Pproc: 907 /* 908 * this is for the process-specific sub-directories. 909 * all that is needed to is copy out all the entries 910 * from the procent[] table (top of this file). 911 */ 912 error = procfs_readdir_proc(ap); 913 break; 914 case Proot: 915 /* 916 * this is for the root of the procfs filesystem 917 * what is needed is a special entry for "curproc" 918 * followed by an entry for each process on allproc 919 */ 920 error = procfs_readdir_root(ap); 921 break; 922 default: 923 error = ENOTDIR; 924 break; 925 } 926 927 vn_unlock(ap->a_vp); 928 return (error); 929 } 930 931 static int 932 procfs_readdir_proc(struct vop_readdir_args *ap) 933 { 934 struct pfsnode *pfs; 935 int error, i, retval; 936 struct proc *p; 937 struct lwp *lp; 938 struct proc_target *pt; 939 struct uio *uio = ap->a_uio; 940 941 pfs = VTOPFS(ap->a_vp); 942 p = pfs_pfind(pfs->pfs_pid); 943 if (p == NULL) 944 return(0); 945 if (!PRISON_CHECK(ap->a_cred, p->p_ucred)) { 946 error = 0; 947 goto done; 948 } 949 /* XXX lwp, not MPSAFE */ 950 lp = FIRST_LWP_IN_PROC(p); 951 if (lp == NULL) { 952 error = EINVAL; 953 goto done; 954 } 955 956 error = 0; 957 i = (int)uio->uio_offset; 958 if (i < 0) { 959 error = EINVAL; 960 goto done; 961 } 962 963 for (pt = &proc_targets[i]; 964 !error && uio->uio_resid > 0 && i < nproc_targets; pt++, i++) { 965 if (pt->pt_valid && (*pt->pt_valid)(lp) == 0) 966 continue; 967 968 retval = vop_write_dirent(&error, uio, 969 PROCFS_FILENO(pfs->pfs_pid, pt->pt_pfstype), pt->pt_type, 970 pt->pt_namlen, pt->pt_name); 971 if (retval) 972 break; 973 } 974 975 uio->uio_offset = (off_t)i; 976 error = 0; 977 done: 978 pfs_pdone(p); 979 return error; 980 } 981 982 struct procfs_readdir_root_info { 983 int error; 984 int i; 985 int pcnt; 986 struct uio *uio; 987 struct ucred *cred; 988 }; 989 990 static int procfs_readdir_root_callback(struct proc *p, void *data); 991 992 static int 993 procfs_readdir_root(struct vop_readdir_args *ap) 994 { 995 struct procfs_readdir_root_info info; 996 struct uio *uio = ap->a_uio; 997 int res; 998 999 res = 0; 1000 info.error = 0; 1001 info.i = (int)uio->uio_offset; 1002 1003 if (info.i < 0) 1004 return (EINVAL); 1005 1006 info.pcnt = 0; 1007 info.uio = uio; 1008 info.cred = ap->a_cred; 1009 while (info.pcnt < 4) { 1010 res = procfs_readdir_root_callback(NULL, &info); 1011 if (res < 0) 1012 break; 1013 } 1014 if (res >= 0) 1015 allproc_scan(procfs_readdir_root_callback, &info, 0); 1016 uio->uio_offset = (off_t)info.i; 1017 1018 return (info.error); 1019 } 1020 1021 static int 1022 procfs_readdir_root_callback(struct proc *p, void *data) 1023 { 1024 struct procfs_readdir_root_info *info = data; 1025 struct uio *uio; 1026 int retval; 1027 ino_t d_ino; 1028 const char *d_name; 1029 char d_name_pid[20]; 1030 size_t d_namlen; 1031 uint8_t d_type; 1032 1033 uio = info->uio; 1034 1035 if (uio->uio_resid <= 0 || info->error) 1036 return(-1); 1037 1038 switch (info->pcnt) { 1039 case 0: /* `.' */ 1040 d_ino = PROCFS_FILENO(0, Proot); 1041 d_name = "."; 1042 d_namlen = 1; 1043 d_type = DT_DIR; 1044 break; 1045 case 1: /* `..' */ 1046 d_ino = PROCFS_FILENO(0, Proot); 1047 d_name = ".."; 1048 d_namlen = 2; 1049 d_type = DT_DIR; 1050 break; 1051 1052 case 2: 1053 d_ino = PROCFS_FILENO(0, Pcurproc); 1054 d_namlen = 7; 1055 d_name = "curproc"; 1056 d_type = DT_LNK; 1057 break; 1058 1059 case 3: 1060 d_ino = PROCFS_FILENO(0, Pcurproc); 1061 d_namlen = 4; 1062 d_name = "self"; 1063 d_type = DT_LNK; 1064 break; 1065 1066 default: 1067 if (!PRISON_CHECK(info->cred, p->p_ucred)) 1068 return(0); 1069 if (ps_showallprocs == 0 && 1070 info->cred->cr_uid != 0 && 1071 info->cred->cr_uid != p->p_ucred->cr_uid) { 1072 return(0); 1073 } 1074 1075 /* 1076 * Skip entries we have already returned (optimization) 1077 */ 1078 if (info->pcnt < info->i) { 1079 ++info->pcnt; 1080 return(0); 1081 } 1082 1083 d_ino = PROCFS_FILENO(p->p_pid, Pproc); 1084 d_namlen = ksnprintf(d_name_pid, sizeof(d_name_pid), 1085 "%ld", (long)p->p_pid); 1086 d_name = d_name_pid; 1087 d_type = DT_DIR; 1088 break; 1089 } 1090 1091 /* 1092 * Skip entries we have already returned (optimization) 1093 */ 1094 if (info->pcnt < info->i) { 1095 ++info->pcnt; 1096 return(0); 1097 } 1098 1099 retval = vop_write_dirent(&info->error, uio, 1100 d_ino, d_type, d_namlen, d_name); 1101 if (retval) 1102 return(-1); 1103 ++info->pcnt; 1104 ++info->i; 1105 return(0); 1106 } 1107 1108 /* 1109 * readlink reads the link of `curproc' or `file' 1110 */ 1111 static int 1112 procfs_readlink(struct vop_readlink_args *ap) 1113 { 1114 char buf[16]; /* should be enough */ 1115 struct proc *procp; 1116 struct vnode *vp = ap->a_vp; 1117 struct pfsnode *pfs = VTOPFS(vp); 1118 char *fullpath, *freepath; 1119 int error, len; 1120 1121 switch (pfs->pfs_type) { 1122 case Pcurproc: 1123 if (pfs->pfs_fileno != PROCFS_FILENO(0, Pcurproc)) 1124 return (EINVAL); 1125 1126 len = ksnprintf(buf, sizeof(buf), "%ld", (long)curproc->p_pid); 1127 return (uiomove(buf, len, ap->a_uio)); 1128 case Pfile: 1129 /* 1130 * procfs's directory topology is somewhat asynchronous from 1131 * reality so it is possible for pid requests to race exiting 1132 * processes. In this situation, bit 31 is set in 1133 * pfs->pfs_pid which guarantees that pfs_pfind() will return 1134 * NULL. 1135 * 1136 * It is also possible to catch a process in the middle of 1137 * an exit sequence so various fields might wind up being 1138 * NULL that are not normally NULL. 1139 */ 1140 procp = pfs_pfind(pfs->pfs_pid); 1141 if (procp == NULL || procp->p_ucred == NULL) { 1142 pfs_pdone(procp); 1143 return (uiomove("unknown", sizeof("unknown") - 1, 1144 ap->a_uio)); 1145 } 1146 if (procp->p_textnch.ncp) { 1147 struct nchandle nch; 1148 1149 cache_copy(&procp->p_textnch, &nch); 1150 error = cache_fullpath(procp, &nch, NULL, 1151 &fullpath, &freepath, 0); 1152 cache_drop(&nch); 1153 } else { 1154 error = EINVAL; 1155 } 1156 1157 if (error != 0) { 1158 pfs_pdone(procp); 1159 return (uiomove("unknown", sizeof("unknown") - 1, 1160 ap->a_uio)); 1161 } 1162 error = uiomove(fullpath, strlen(fullpath), ap->a_uio); 1163 kfree(freepath, M_TEMP); 1164 pfs_pdone(procp); 1165 return (error); 1166 default: 1167 return (EINVAL); 1168 } 1169 } 1170 1171 /* 1172 * convert decimal ascii to pid_t 1173 */ 1174 static pid_t 1175 atopid(const char *b, u_int len) 1176 { 1177 pid_t p = 0; 1178 1179 while (len--) { 1180 char c = *b++; 1181 if (c < '0' || c > '9') 1182 return (NO_PID); 1183 p = 10 * p + (c - '0'); 1184 if (p > PID_MAX) 1185 return (NO_PID); 1186 } 1187 1188 return (p); 1189 } 1190 1191 /* 1192 * kqfilter operations 1193 */ 1194 static void 1195 procfs_filt_detach(struct knote *kn) 1196 { 1197 struct vnode *vp = (void *)kn->kn_hook; 1198 1199 knote_remove(&vp->v_pollinfo.vpi_kqinfo.ki_note, kn); 1200 } 1201 1202 static int 1203 procfs_filt_read(struct knote *kn, long hint) 1204 { 1205 if (hint == NOTE_REVOKE) { 1206 kn->kn_flags |= (EV_EOF | EV_NODATA | EV_ONESHOT); 1207 return (1); 1208 } 1209 1210 /* Files on procfs have a size of 0. */ 1211 kn->kn_data = 0; 1212 if (kn->kn_sfflags & NOTE_OLDAPI) 1213 return (1); 1214 return (kn->kn_data != 0); 1215 } 1216 1217 static int 1218 procfs_filt_write(struct knote *kn, long hint) 1219 { 1220 if (hint == NOTE_REVOKE) 1221 kn->kn_flags |= (EV_EOF | EV_NODATA | EV_ONESHOT); 1222 kn->kn_data = 0; 1223 return (1); 1224 } 1225 1226 static int 1227 procfs_filt_vnode(struct knote *kn, long hint) 1228 { 1229 if (kn->kn_sfflags & hint) 1230 kn->kn_fflags |= hint; 1231 if (hint == NOTE_REVOKE) { 1232 kn->kn_flags |= (EV_EOF | EV_NODATA); 1233 return (1); 1234 } 1235 return (kn->kn_fflags != 0); 1236 } 1237 1238 static struct filterops procfs_read_filtops = { 1239 FILTEROP_ISFD | FILTEROP_MPSAFE, NULL, 1240 procfs_filt_detach, procfs_filt_read, 1241 }; 1242 static struct filterops procfs_write_filtops = { 1243 FILTEROP_ISFD | FILTEROP_MPSAFE, NULL, 1244 procfs_filt_detach, procfs_filt_write, 1245 }; 1246 static struct filterops procfs_vnode_filtops = { 1247 FILTEROP_ISFD | FILTEROP_MPSAFE, NULL, 1248 procfs_filt_detach, procfs_filt_vnode, 1249 }; 1250 1251 static int 1252 procfs_kqfilter(struct vop_kqfilter_args *ap) 1253 { 1254 struct vnode *vp = ap->a_vp; 1255 struct knote *kn = ap->a_kn; 1256 1257 switch (kn->kn_filter) { 1258 case EVFILT_READ: 1259 kn->kn_fop = &procfs_read_filtops; 1260 break; 1261 case EVFILT_WRITE: 1262 kn->kn_fop = &procfs_write_filtops; 1263 break; 1264 case EVFILT_VNODE: 1265 kn->kn_fop = &procfs_vnode_filtops; 1266 break; 1267 default: 1268 return (EOPNOTSUPP); 1269 } 1270 1271 kn->kn_hook = (caddr_t)vp; 1272 knote_insert(&vp->v_pollinfo.vpi_kqinfo.ki_note, kn); 1273 1274 return (0); 1275 } 1276