1 /* 2 * Copyright (c) 1993 The Regents of the University of California. 3 * Copyright (c) 1993 Jan-Simon Pendry 4 * All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * Jan-Simon Pendry. 8 * 9 * %sccs.include.redist.c% 10 * 11 * @(#)procfs_vnops.c 8.1 (Berkeley) 01/05/94 12 * 13 * From: 14 * $Id: procfs_vnops.c,v 3.2 1993/12/15 09:40:17 jsp Exp $ 15 */ 16 17 /* 18 * procfs vnode interface 19 */ 20 21 #include <sys/param.h> 22 #include <sys/systm.h> 23 #include <sys/time.h> 24 #include <sys/kernel.h> 25 #include <sys/file.h> 26 #include <sys/proc.h> 27 #include <sys/vnode.h> 28 #include <sys/namei.h> 29 #include <sys/malloc.h> 30 #include <sys/dirent.h> 31 #include <sys/resourcevar.h> 32 #include <miscfs/procfs/procfs.h> 33 #include <vm/vm.h> /* for PAGE_SIZE */ 34 35 /* 36 * Vnode Operations. 37 * 38 */ 39 40 /* 41 * This is a list of the valid names in the 42 * process-specific sub-directories. It is 43 * used in procfs_lookup and procfs_readdir 44 */ 45 static struct pfsnames { 46 u_short d_namlen; 47 char d_name[PROCFS_NAMELEN]; 48 pfstype d_pfstype; 49 } procent[] = { 50 #define N(s) sizeof(s)-1, s 51 /* namlen, nam, type */ 52 { N("file"), Pfile }, 53 { N("mem"), Pmem }, 54 { N("regs"), Pregs }, 55 { N("ctl"), Pctl }, 56 { N("status"), Pstatus }, 57 { N("note"), Pnote }, 58 { N("notepg"), Pnotepg }, 59 #undef N 60 }; 61 #define Nprocent (sizeof(procent)/sizeof(procent[0])) 62 63 static pid_t atopid __P((const char *, u_int)); 64 65 /* 66 * set things up for doing i/o on 67 * the pfsnode (vp). (vp) is locked 68 * on entry, and should be left locked 69 * on exit. 70 * 71 * for procfs we don't need to do anything 72 * in particular for i/o. all that is done 73 * is to support exclusive open on process 74 * memory images. 75 */ 76 procfs_open(ap) 77 struct vop_open_args *ap; 78 { 79 struct pfsnode *pfs = VTOPFS(ap->a_vp); 80 81 switch (pfs->pfs_type) { 82 case Pmem: 83 if (PFIND(pfs->pfs_pid) == 0) 84 return (ENOENT); /* was ESRCH, jsp */ 85 86 if ((pfs->pfs_flags & FWRITE) && (ap->a_mode & O_EXCL) || 87 (pfs->pfs_flags & O_EXCL) && (ap->a_mode & FWRITE)) 88 return (EBUSY); 89 90 91 if (ap->a_mode & FWRITE) 92 pfs->pfs_flags = ap->a_mode & (FWRITE|O_EXCL); 93 94 return (0); 95 96 default: 97 break; 98 } 99 100 return (0); 101 } 102 103 /* 104 * close the pfsnode (vp) after doing i/o. 105 * (vp) is not locked on entry or exit. 106 * 107 * nothing to do for procfs other than undo 108 * any exclusive open flag (see _open above). 109 */ 110 procfs_close(ap) 111 struct vop_close_args *ap; 112 { 113 struct pfsnode *pfs = VTOPFS(ap->a_vp); 114 115 switch (pfs->pfs_type) { 116 case Pmem: 117 if ((ap->a_fflag & FWRITE) && (pfs->pfs_flags & O_EXCL)) 118 pfs->pfs_flags &= ~(FWRITE|O_EXCL); 119 break; 120 } 121 122 return (0); 123 } 124 125 /* 126 * do an ioctl operation on pfsnode (vp). 127 * (vp) is not locked on entry or exit. 128 */ 129 procfs_ioctl(ap) 130 struct vop_ioctl_args *ap; 131 { 132 133 return (ENOTTY); 134 } 135 136 /* 137 * do block mapping for pfsnode (vp). 138 * since we don't use the buffer cache 139 * for procfs this function should never 140 * be called. in any case, it's not clear 141 * what part of the kernel ever makes use 142 * of this function. for sanity, this is the 143 * usual no-op bmap, although returning 144 * (EIO) would be a reasonable alternative. 145 */ 146 procfs_bmap(ap) 147 struct vop_bmap_args *ap; 148 { 149 150 if (ap->a_vpp != NULL) 151 *ap->a_vpp = ap->a_vp; 152 if (ap->a_bnp != NULL) 153 *ap->a_bnp = ap->a_bn; 154 return (0); 155 } 156 157 /* 158 * _inactive is called when the pfsnode 159 * is vrele'd and the reference count goes 160 * to zero. (vp) will be on the vnode free 161 * list, so to get it back vget() must be 162 * used. 163 * 164 * for procfs, check if the process is still 165 * alive and if it isn't then just throw away 166 * the vnode by calling vgone(). this may 167 * be overkill and a waste of time since the 168 * chances are that the process will still be 169 * there and PFIND is not free. 170 * 171 * (vp) is not locked on entry or exit. 172 */ 173 procfs_inactive(ap) 174 struct vop_inactive_args *ap; 175 { 176 struct pfsnode *pfs = VTOPFS(ap->a_vp); 177 178 if (PFIND(pfs->pfs_pid) == 0) 179 vgone(ap->a_vp); 180 181 return (0); 182 } 183 184 /* 185 * _reclaim is called when getnewvnode() 186 * wants to make use of an entry on the vnode 187 * free list. at this time the filesystem needs 188 * to free any private data and remove the node 189 * from any private lists. 190 */ 191 procfs_reclaim(ap) 192 struct vop_reclaim_args *ap; 193 { 194 int error; 195 196 error = procfs_freevp(ap->a_vp); 197 return (error); 198 } 199 200 /* 201 * _print is used for debugging. 202 * just print a readable description 203 * of (vp). 204 */ 205 procfs_print(ap) 206 struct vop_print_args *ap; 207 { 208 struct pfsnode *pfs = VTOPFS(ap->a_vp); 209 210 printf("tag VT_PROCFS, pid %d, mode %x, flags %x\n", 211 pfs->pfs_pid, 212 pfs->pfs_mode, pfs->pfs_flags); 213 } 214 215 /* 216 * _abortop is called when operations such as 217 * rename and create fail. this entry is responsible 218 * for undoing any side-effects caused by the lookup. 219 * this will always include freeing the pathname buffer. 220 */ 221 procfs_abortop(ap) 222 struct vop_abortop_args *ap; 223 { 224 225 if ((ap->a_cnp->cn_flags & (HASBUF | SAVESTART)) == HASBUF) 226 FREE(ap->a_cnp->cn_pnbuf, M_NAMEI); 227 return (0); 228 } 229 230 /* 231 * generic entry point for unsupported operations 232 */ 233 procfs_badop() 234 { 235 236 return (EIO); 237 } 238 239 /* 240 * Invent attributes for pfsnode (vp) and store 241 * them in (vap). 242 * Directories lengths are returned as zero since 243 * any real length would require the genuine size 244 * to be computed, and nothing cares anyway. 245 * 246 * this is relatively minimal for procfs. 247 */ 248 procfs_getattr(ap) 249 struct vop_getattr_args *ap; 250 { 251 struct pfsnode *pfs = VTOPFS(ap->a_vp); 252 struct proc *procp; 253 int error; 254 255 /* first check the process still exists */ 256 procp = PFIND(pfs->pfs_pid); 257 if (procp == 0) 258 return (ENOENT); 259 260 error = 0; 261 262 /* start by zeroing out the attributes */ 263 VATTR_NULL(ap->a_vap); 264 265 /* next do all the common fields */ 266 (ap->a_vap)->va_type = ap->a_vp->v_type; 267 (ap->a_vap)->va_mode = pfs->pfs_mode; 268 (ap->a_vap)->va_fileid = pfs->pfs_fileno; 269 (ap->a_vap)->va_flags = 0; 270 (ap->a_vap)->va_blocksize = PAGE_SIZE; 271 (ap->a_vap)->va_bytes = ap->a_vap->va_size = 0; 272 273 /* 274 * Make all times be current TOD. 275 * It would be possible to get the process start 276 * time from the p_stat structure, but there's 277 * no "file creation" time stamp anyway, and the 278 * p_stat structure is not addressible if u. gets 279 * swapped out for that process. 280 */ 281 microtime(&(ap->a_vap)->va_ctime); 282 (ap->a_vap)->va_atime = ap->a_vap->va_mtime = ap->a_vap->va_ctime; 283 284 /* 285 * now do the object specific fields 286 * 287 * The size could be set from struct reg, but it's hardly 288 * worth the trouble, and it puts some (potentially) machine 289 * dependent data into this machine-independent code. If it 290 * becomes important then this function should break out into 291 * a per-file stat function in the corresponding .c file. 292 */ 293 294 switch (pfs->pfs_type) { 295 case Proot: 296 ap->a_vap->va_nlink = 2; 297 ap->a_vap->va_uid = 0; 298 ap->a_vap->va_gid = 0; 299 break; 300 301 case Pproc: 302 ap->a_vap->va_nlink = 2; 303 ap->a_vap->va_uid = procp->p_ucred->cr_uid; 304 ap->a_vap->va_gid = procp->p_ucred->cr_gid; 305 break; 306 307 case Pfile: 308 error = EOPNOTSUPP; 309 break; 310 311 case Pmem: 312 ap->a_vap->va_nlink = 1; 313 ap->a_vap->va_bytes = ap->a_vap->va_size = 314 ctob(procp->p_vmspace->vm_tsize + 315 procp->p_vmspace->vm_dsize + 316 procp->p_vmspace->vm_ssize); 317 ap->a_vap->va_uid = procp->p_ucred->cr_uid; 318 ap->a_vap->va_gid = procp->p_ucred->cr_gid; 319 break; 320 321 case Pregs: 322 case Pctl: 323 case Pstatus: 324 case Pnote: 325 case Pnotepg: 326 ap->a_vap->va_nlink = 1; 327 ap->a_vap->va_uid = procp->p_ucred->cr_uid; 328 ap->a_vap->va_gid = procp->p_ucred->cr_gid; 329 break; 330 331 default: 332 panic("procfs_getattr"); 333 } 334 335 return (error); 336 } 337 338 procfs_setattr(ap) 339 struct vop_setattr_args *ap; 340 { 341 /* 342 * just fake out attribute setting 343 * it's not good to generate an error 344 * return, otherwise things like creat() 345 * will fail when they try to set the 346 * file length to 0. worse, this means 347 * that echo $note > /proc/$pid/note will fail. 348 */ 349 350 return (0); 351 } 352 353 /* 354 * implement access checking. 355 * 356 * something very similar to this code is duplicated 357 * throughout the 4bsd kernel and should be moved 358 * into kern/vfs_subr.c sometime. 359 * 360 * actually, the check for super-user is slightly 361 * broken since it will allow read access to write-only 362 * objects. this doesn't cause any particular trouble 363 * but does mean that the i/o entry points need to check 364 * that the operation really does make sense. 365 */ 366 procfs_access(ap) 367 struct vop_access_args *ap; 368 { 369 struct vattr *vap; 370 struct vattr vattr; 371 int error; 372 373 /* 374 * If you're the super-user, 375 * you always get access. 376 */ 377 if (ap->a_cred->cr_uid == (uid_t) 0) 378 return (0); 379 vap = &vattr; 380 if (error = VOP_GETATTR(ap->a_vp, vap, ap->a_cred, ap->a_p)) 381 return (error); 382 383 /* 384 * Access check is based on only one of owner, group, public. 385 * If not owner, then check group. If not a member of the 386 * group, then check public access. 387 */ 388 if (ap->a_cred->cr_uid != vap->va_uid) { 389 gid_t *gp; 390 int i; 391 392 (ap->a_mode) >>= 3; 393 gp = ap->a_cred->cr_groups; 394 for (i = 0; i < ap->a_cred->cr_ngroups; i++, gp++) 395 if (vap->va_gid == *gp) 396 goto found; 397 ap->a_mode >>= 3; 398 found: 399 ; 400 } 401 402 if ((vap->va_mode & ap->a_mode) == ap->a_mode) 403 return (0); 404 405 return (EACCES); 406 } 407 408 /* 409 * lookup. this is incredibly complicated in the 410 * general case, however for most pseudo-filesystems 411 * very little needs to be done. 412 * 413 * unless you want to get a migraine, just make sure your 414 * filesystem doesn't do any locking of its own. otherwise 415 * read and inwardly digest ufs_lookup(). 416 */ 417 procfs_lookup(ap) 418 struct vop_lookup_args *ap; 419 { 420 struct componentname *cnp = ap->a_cnp; 421 struct vnode **vpp = ap->a_vpp; 422 struct vnode *dvp = ap->a_dvp; 423 char *pname = cnp->cn_nameptr; 424 int error = 0; 425 int flag; 426 pid_t pid; 427 struct vnode *nvp; 428 struct pfsnode *pfs; 429 struct proc *procp; 430 int mode; 431 pfstype pfs_type; 432 int i; 433 434 if (cnp->cn_namelen == 1 && *pname == '.') { 435 *vpp = dvp; 436 VREF(dvp); 437 /*VOP_LOCK(dvp);*/ 438 return (0); 439 } 440 441 *vpp = NULL; 442 443 pfs = VTOPFS(dvp); 444 switch (pfs->pfs_type) { 445 case Proot: 446 if (cnp->cn_flags & ISDOTDOT) 447 return (EIO); 448 449 if (CNEQ(cnp, "curproc", 7)) 450 pid = cnp->cn_proc->p_pid; 451 else 452 pid = atopid(pname, cnp->cn_namelen); 453 if (pid == NO_PID) 454 return (ENOENT); 455 456 procp = PFIND(pid); 457 if (procp == 0) 458 return (ENOENT); 459 460 error = procfs_allocvp(dvp->v_mount, &nvp, pid, Pproc); 461 if (error) 462 return (error); 463 464 nvp->v_type = VDIR; 465 pfs = VTOPFS(nvp); 466 467 *vpp = nvp; 468 return (0); 469 470 case Pproc: 471 if (cnp->cn_flags & ISDOTDOT) { 472 error = procfs_root(dvp->v_mount, vpp); 473 return (error); 474 } 475 476 procp = PFIND(pfs->pfs_pid); 477 if (procp == 0) 478 return (ENOENT); 479 480 for (i = 0; i < Nprocent; i++) { 481 struct pfsnames *dp = &procent[i]; 482 483 if (cnp->cn_namelen == dp->d_namlen && 484 bcmp(pname, dp->d_name, dp->d_namlen) == 0) { 485 pfs_type = dp->d_pfstype; 486 goto found; 487 } 488 } 489 return (ENOENT); 490 491 found: 492 if (pfs_type == Pfile) { 493 nvp = procfs_findtextvp(procp); 494 if (nvp) { 495 VREF(nvp); 496 VOP_LOCK(nvp); 497 } else { 498 error = ENXIO; 499 } 500 } else { 501 error = procfs_allocvp(dvp->v_mount, &nvp, 502 pfs->pfs_pid, pfs_type); 503 if (error) 504 return (error); 505 506 nvp->v_type = VREG; 507 pfs = VTOPFS(nvp); 508 } 509 *vpp = nvp; 510 return (error); 511 512 default: 513 return (ENOTDIR); 514 } 515 } 516 517 /* 518 * readdir returns directory entries from pfsnode (vp). 519 * 520 * the strategy here with procfs is to generate a single 521 * directory entry at a time (struct pfsdent) and then 522 * copy that out to userland using uiomove. a more efficent 523 * though more complex implementation, would try to minimize 524 * the number of calls to uiomove(). for procfs, this is 525 * hardly worth the added code complexity. 526 * 527 * this should just be done through read() 528 */ 529 procfs_readdir(ap) 530 struct vop_readdir_args *ap; 531 { 532 struct uio *uio = ap->a_uio; 533 struct pfsdent d; 534 struct pfsdent *dp = &d; 535 struct pfsnode *pfs; 536 int error; 537 int count; 538 int i; 539 540 pfs = VTOPFS(ap->a_vp); 541 542 if (uio->uio_resid < UIO_MX) 543 return (EINVAL); 544 if (uio->uio_offset & (UIO_MX-1)) 545 return (EINVAL); 546 if (uio->uio_offset < 0) 547 return (EINVAL); 548 549 error = 0; 550 count = 0; 551 i = uio->uio_offset / UIO_MX; 552 553 switch (pfs->pfs_type) { 554 /* 555 * this is for the process-specific sub-directories. 556 * all that is needed to is copy out all the entries 557 * from the procent[] table (top of this file). 558 */ 559 case Pproc: { 560 while (uio->uio_resid >= UIO_MX) { 561 struct pfsnames *dt; 562 563 if (i >= Nprocent) 564 break; 565 566 dt = &procent[i]; 567 568 dp->d_reclen = UIO_MX; 569 dp->d_fileno = PROCFS_FILENO(pfs->pfs_pid, dt->d_pfstype); 570 dp->d_type = DT_REG; 571 dp->d_namlen = dt->d_namlen; 572 bcopy(dt->d_name, dp->d_name, sizeof(dt->d_name)-1); 573 error = uiomove((caddr_t) dp, UIO_MX, uio); 574 if (error) 575 break; 576 count += UIO_MX; 577 i++; 578 } 579 580 break; 581 582 } 583 584 /* 585 * this is for the root of the procfs filesystem 586 * what is needed is a special entry for "curproc" 587 * followed by an entry for each process on allproc 588 #ifdef PROCFS_ZOMBIE 589 * and zombproc. 590 #endif 591 */ 592 593 case Proot: { 594 int pcnt; 595 #ifdef PROCFS_ZOMBIE 596 int doingzomb = 0; 597 #endif 598 volatile struct proc *p; 599 600 p = allproc; 601 602 #define PROCFS_XFILES 1 /* number of other entries, like "curproc" */ 603 pcnt = PROCFS_XFILES; 604 605 while (p && uio->uio_resid >= UIO_MX) { 606 bzero((char *) dp, UIO_MX); 607 dp->d_type = DT_DIR; 608 dp->d_reclen = UIO_MX; 609 610 switch (i) { 611 case 0: 612 /* ship out entry for "curproc" */ 613 dp->d_fileno = PROCFS_FILENO(PID_MAX+1, Pproc); 614 dp->d_namlen = 7; 615 bcopy("curproc", dp->d_name, dp->d_namlen+1); 616 break; 617 618 default: 619 if (pcnt >= i) { 620 dp->d_fileno = PROCFS_FILENO(p->p_pid, Pproc); 621 dp->d_namlen = sprintf(dp->d_name, "%ld", (long) p->p_pid); 622 } 623 624 p = p->p_next; 625 626 #ifdef PROCFS_ZOMBIE 627 if (p == 0 && doingzomb == 0) { 628 doingzomb = 1; 629 p = zombproc; 630 } 631 #endif 632 633 if (pcnt++ < i) 634 continue; 635 636 break; 637 } 638 error = uiomove((caddr_t) dp, UIO_MX, uio); 639 if (error) 640 break; 641 count += UIO_MX; 642 i++; 643 } 644 645 break; 646 647 } 648 649 default: 650 error = ENOTDIR; 651 break; 652 } 653 654 uio->uio_offset = i * UIO_MX; 655 656 return (error); 657 } 658 659 /* 660 * convert decimal ascii to pid_t 661 */ 662 static pid_t 663 atopid(b, len) 664 const char *b; 665 u_int len; 666 { 667 pid_t p = 0; 668 669 while (len--) { 670 char c = *b++; 671 if (c < '0' || c > '9') 672 return (NO_PID); 673 p = 10 * p + (c - '0'); 674 if (p > PID_MAX) 675 return (NO_PID); 676 } 677 678 return (p); 679 } 680 681 /* 682 * procfs vnode operations. 683 */ 684 int (**procfs_vnodeop_p)(); 685 struct vnodeopv_entry_desc procfs_vnodeop_entries[] = { 686 { &vop_default_desc, vn_default_error }, 687 { &vop_lookup_desc, procfs_lookup }, /* lookup */ 688 { &vop_create_desc, procfs_create }, /* create */ 689 { &vop_mknod_desc, procfs_mknod }, /* mknod */ 690 { &vop_open_desc, procfs_open }, /* open */ 691 { &vop_close_desc, procfs_close }, /* close */ 692 { &vop_access_desc, procfs_access }, /* access */ 693 { &vop_getattr_desc, procfs_getattr }, /* getattr */ 694 { &vop_setattr_desc, procfs_setattr }, /* setattr */ 695 { &vop_read_desc, procfs_read }, /* read */ 696 { &vop_write_desc, procfs_write }, /* write */ 697 { &vop_ioctl_desc, procfs_ioctl }, /* ioctl */ 698 { &vop_select_desc, procfs_select }, /* select */ 699 { &vop_mmap_desc, procfs_mmap }, /* mmap */ 700 { &vop_fsync_desc, procfs_fsync }, /* fsync */ 701 { &vop_seek_desc, procfs_seek }, /* seek */ 702 { &vop_remove_desc, procfs_remove }, /* remove */ 703 { &vop_link_desc, procfs_link }, /* link */ 704 { &vop_rename_desc, procfs_rename }, /* rename */ 705 { &vop_mkdir_desc, procfs_mkdir }, /* mkdir */ 706 { &vop_rmdir_desc, procfs_rmdir }, /* rmdir */ 707 { &vop_symlink_desc, procfs_symlink }, /* symlink */ 708 { &vop_readdir_desc, procfs_readdir }, /* readdir */ 709 { &vop_readlink_desc, procfs_readlink }, /* readlink */ 710 { &vop_abortop_desc, procfs_abortop }, /* abortop */ 711 { &vop_inactive_desc, procfs_inactive }, /* inactive */ 712 { &vop_reclaim_desc, procfs_reclaim }, /* reclaim */ 713 { &vop_lock_desc, procfs_lock }, /* lock */ 714 { &vop_unlock_desc, procfs_unlock }, /* unlock */ 715 { &vop_bmap_desc, procfs_bmap }, /* bmap */ 716 { &vop_strategy_desc, procfs_strategy }, /* strategy */ 717 { &vop_print_desc, procfs_print }, /* print */ 718 { &vop_islocked_desc, procfs_islocked }, /* islocked */ 719 { &vop_pathconf_desc, procfs_pathconf }, /* pathconf */ 720 { &vop_advlock_desc, procfs_advlock }, /* advlock */ 721 { &vop_blkatoff_desc, procfs_blkatoff }, /* blkatoff */ 722 { &vop_valloc_desc, procfs_valloc }, /* valloc */ 723 { &vop_vfree_desc, procfs_vfree }, /* vfree */ 724 { &vop_truncate_desc, procfs_truncate }, /* truncate */ 725 { &vop_update_desc, procfs_update }, /* update */ 726 { (struct vnodeop_desc*)NULL, (int(*)())NULL } 727 }; 728 struct vnodeopv_desc procfs_vnodeop_opv_desc = 729 { &procfs_vnodeop_p, procfs_vnodeop_entries }; 730