1 /* $NetBSD: procfs_vnops.c,v 1.82 2001/06/03 02:21:35 chs Exp $ */ 2 3 /* 4 * Copyright (c) 1993 Jan-Simon Pendry 5 * Copyright (c) 1993, 1995 6 * The Regents of the University of California. All rights reserved. 7 * 8 * This code is derived from software contributed to Berkeley by 9 * Jan-Simon Pendry. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the University of 22 * California, Berkeley and its contributors. 23 * 4. Neither the name of the University nor the names of its contributors 24 * may be used to endorse or promote products derived from this software 25 * without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 37 * SUCH DAMAGE. 38 * 39 * @(#)procfs_vnops.c 8.18 (Berkeley) 5/21/95 40 */ 41 42 /* 43 * procfs vnode interface 44 */ 45 46 #include <sys/param.h> 47 #include <sys/systm.h> 48 #include <sys/time.h> 49 #include <sys/kernel.h> 50 #include <sys/file.h> 51 #include <sys/proc.h> 52 #include <sys/vnode.h> 53 #include <sys/namei.h> 54 #include <sys/malloc.h> 55 #include <sys/mount.h> 56 #include <sys/dirent.h> 57 #include <sys/resourcevar.h> 58 #include <sys/ptrace.h> 59 #include <sys/stat.h> 60 61 #include <uvm/uvm_extern.h> /* for PAGE_SIZE */ 62 63 #include <machine/reg.h> 64 65 #include <miscfs/genfs/genfs.h> 66 #include <miscfs/procfs/procfs.h> 67 68 /* 69 * Vnode Operations. 70 * 71 */ 72 73 static int procfs_validfile_linux __P((struct proc *, struct mount *)); 74 75 /* 76 * This is a list of the valid names in the 77 * process-specific sub-directories. It is 78 * used in procfs_lookup and procfs_readdir 79 */ 80 const struct proc_target { 81 u_char pt_type; 82 u_char pt_namlen; 83 char *pt_name; 84 pfstype pt_pfstype; 85 int (*pt_valid) __P((struct proc *, struct mount *)); 86 } proc_targets[] = { 87 #define N(s) sizeof(s)-1, s 88 /* name type validp */ 89 { DT_DIR, N("."), Pproc, NULL }, 90 { DT_DIR, N(".."), Proot, NULL }, 91 { DT_REG, N("file"), Pfile, procfs_validfile }, 92 { DT_REG, N("mem"), Pmem, NULL }, 93 { DT_REG, N("regs"), Pregs, procfs_validregs }, 94 { DT_REG, N("fpregs"), Pfpregs, procfs_validfpregs }, 95 { DT_REG, N("ctl"), Pctl, NULL }, 96 { DT_REG, N("status"), Pstatus, NULL }, 97 { DT_REG, N("note"), Pnote, NULL }, 98 { DT_REG, N("notepg"), Pnotepg, NULL }, 99 { DT_REG, N("map"), Pmap, procfs_validmap }, 100 { DT_REG, N("maps"), Pmaps, procfs_validmap }, 101 { DT_REG, N("cmdline"), Pcmdline, NULL }, 102 { DT_REG, N("exe"), Pfile, procfs_validfile_linux }, 103 #undef N 104 }; 105 static int nproc_targets = sizeof(proc_targets) / sizeof(proc_targets[0]); 106 107 /* 108 * List of files in the root directory. Note: the validate function will 109 * be called with p == NULL for these ones. 110 */ 111 struct proc_target proc_root_targets[] = { 112 #define N(s) sizeof(s)-1, s 113 /* name type validp */ 114 { DT_REG, N("meminfo"), Pmeminfo, procfs_validfile_linux }, 115 { DT_REG, N("cpuinfo"), Pcpuinfo, procfs_validfile_linux }, 116 #undef N 117 }; 118 static int nproc_root_targets = 119 sizeof(proc_root_targets) / sizeof(proc_root_targets[0]); 120 121 int procfs_lookup __P((void *)); 122 #define procfs_create genfs_eopnotsupp_rele 123 #define procfs_mknod genfs_eopnotsupp_rele 124 int procfs_open __P((void *)); 125 int procfs_close __P((void *)); 126 int procfs_access __P((void *)); 127 int procfs_getattr __P((void *)); 128 int procfs_setattr __P((void *)); 129 #define procfs_read procfs_rw 130 #define procfs_write procfs_rw 131 #define procfs_fcntl genfs_fcntl 132 #define procfs_ioctl genfs_enoioctl 133 #define procfs_poll genfs_poll 134 #define procfs_revoke genfs_revoke 135 #define procfs_fsync genfs_nullop 136 #define procfs_seek genfs_nullop 137 #define procfs_remove genfs_eopnotsupp_rele 138 int procfs_link __P((void *)); 139 #define procfs_rename genfs_eopnotsupp_rele 140 #define procfs_mkdir genfs_eopnotsupp_rele 141 #define procfs_rmdir genfs_eopnotsupp_rele 142 int procfs_symlink __P((void *)); 143 int procfs_readdir __P((void *)); 144 int procfs_readlink __P((void *)); 145 #define procfs_abortop genfs_abortop 146 int procfs_inactive __P((void *)); 147 int procfs_reclaim __P((void *)); 148 #define procfs_lock genfs_lock 149 #define procfs_unlock genfs_unlock 150 #define procfs_bmap genfs_badop 151 #define procfs_strategy genfs_badop 152 int procfs_print __P((void *)); 153 int procfs_pathconf __P((void *)); 154 #define procfs_islocked genfs_islocked 155 #define procfs_advlock genfs_einval 156 #define procfs_blkatoff genfs_eopnotsupp 157 #define procfs_valloc genfs_eopnotsupp 158 #define procfs_vfree genfs_nullop 159 #define procfs_truncate genfs_eopnotsupp 160 #define procfs_update genfs_nullop 161 #define procfs_bwrite genfs_eopnotsupp 162 163 static pid_t atopid __P((const char *, u_int)); 164 165 /* 166 * procfs vnode operations. 167 */ 168 int (**procfs_vnodeop_p) __P((void *)); 169 const struct vnodeopv_entry_desc procfs_vnodeop_entries[] = { 170 { &vop_default_desc, vn_default_error }, 171 { &vop_lookup_desc, procfs_lookup }, /* lookup */ 172 { &vop_create_desc, procfs_create }, /* create */ 173 { &vop_mknod_desc, procfs_mknod }, /* mknod */ 174 { &vop_open_desc, procfs_open }, /* open */ 175 { &vop_close_desc, procfs_close }, /* close */ 176 { &vop_access_desc, procfs_access }, /* access */ 177 { &vop_getattr_desc, procfs_getattr }, /* getattr */ 178 { &vop_setattr_desc, procfs_setattr }, /* setattr */ 179 { &vop_read_desc, procfs_read }, /* read */ 180 { &vop_write_desc, procfs_write }, /* write */ 181 { &vop_fcntl_desc, procfs_fcntl }, /* fcntl */ 182 { &vop_ioctl_desc, procfs_ioctl }, /* ioctl */ 183 { &vop_poll_desc, procfs_poll }, /* poll */ 184 { &vop_revoke_desc, procfs_revoke }, /* revoke */ 185 { &vop_fsync_desc, procfs_fsync }, /* fsync */ 186 { &vop_seek_desc, procfs_seek }, /* seek */ 187 { &vop_remove_desc, procfs_remove }, /* remove */ 188 { &vop_link_desc, procfs_link }, /* link */ 189 { &vop_rename_desc, procfs_rename }, /* rename */ 190 { &vop_mkdir_desc, procfs_mkdir }, /* mkdir */ 191 { &vop_rmdir_desc, procfs_rmdir }, /* rmdir */ 192 { &vop_symlink_desc, procfs_symlink }, /* symlink */ 193 { &vop_readdir_desc, procfs_readdir }, /* readdir */ 194 { &vop_readlink_desc, procfs_readlink }, /* readlink */ 195 { &vop_abortop_desc, procfs_abortop }, /* abortop */ 196 { &vop_inactive_desc, procfs_inactive }, /* inactive */ 197 { &vop_reclaim_desc, procfs_reclaim }, /* reclaim */ 198 { &vop_lock_desc, procfs_lock }, /* lock */ 199 { &vop_unlock_desc, procfs_unlock }, /* unlock */ 200 { &vop_bmap_desc, procfs_bmap }, /* bmap */ 201 { &vop_strategy_desc, procfs_strategy }, /* strategy */ 202 { &vop_print_desc, procfs_print }, /* print */ 203 { &vop_islocked_desc, procfs_islocked }, /* islocked */ 204 { &vop_pathconf_desc, procfs_pathconf }, /* pathconf */ 205 { &vop_advlock_desc, procfs_advlock }, /* advlock */ 206 { &vop_blkatoff_desc, procfs_blkatoff }, /* blkatoff */ 207 { &vop_valloc_desc, procfs_valloc }, /* valloc */ 208 { &vop_vfree_desc, procfs_vfree }, /* vfree */ 209 { &vop_truncate_desc, procfs_truncate }, /* truncate */ 210 { &vop_update_desc, procfs_update }, /* update */ 211 { NULL, NULL } 212 }; 213 const struct vnodeopv_desc procfs_vnodeop_opv_desc = 214 { &procfs_vnodeop_p, procfs_vnodeop_entries }; 215 /* 216 * set things up for doing i/o on 217 * the pfsnode (vp). (vp) is locked 218 * on entry, and should be left locked 219 * on exit. 220 * 221 * for procfs we don't need to do anything 222 * in particular for i/o. all that is done 223 * is to support exclusive open on process 224 * memory images. 225 */ 226 int 227 procfs_open(v) 228 void *v; 229 { 230 struct vop_open_args /* { 231 struct vnode *a_vp; 232 int a_mode; 233 struct ucred *a_cred; 234 struct proc *a_p; 235 } */ *ap = v; 236 struct pfsnode *pfs = VTOPFS(ap->a_vp); 237 struct proc *p1, *p2; 238 int error; 239 240 p1 = ap->a_p; /* tracer */ 241 p2 = PFIND(pfs->pfs_pid); /* traced */ 242 243 if (p2 == NULL) 244 return (ENOENT); /* was ESRCH, jsp */ 245 246 switch (pfs->pfs_type) { 247 case Pmem: 248 if (((pfs->pfs_flags & FWRITE) && (ap->a_mode & O_EXCL)) || 249 ((pfs->pfs_flags & O_EXCL) && (ap->a_mode & FWRITE))) 250 return (EBUSY); 251 252 if ((error = procfs_checkioperm(p1, p2)) != 0) 253 return (EPERM); 254 255 if (ap->a_mode & FWRITE) 256 pfs->pfs_flags = ap->a_mode & (FWRITE|O_EXCL); 257 258 return (0); 259 260 default: 261 break; 262 } 263 264 return (0); 265 } 266 267 /* 268 * close the pfsnode (vp) after doing i/o. 269 * (vp) is not locked on entry or exit. 270 * 271 * nothing to do for procfs other than undo 272 * any exclusive open flag (see _open above). 273 */ 274 int 275 procfs_close(v) 276 void *v; 277 { 278 struct vop_close_args /* { 279 struct vnode *a_vp; 280 int a_fflag; 281 struct ucred *a_cred; 282 struct proc *a_p; 283 } */ *ap = v; 284 struct pfsnode *pfs = VTOPFS(ap->a_vp); 285 286 switch (pfs->pfs_type) { 287 case Pmem: 288 if ((ap->a_fflag & FWRITE) && (pfs->pfs_flags & O_EXCL)) 289 pfs->pfs_flags &= ~(FWRITE|O_EXCL); 290 break; 291 292 default: 293 break; 294 } 295 296 return (0); 297 } 298 299 /* 300 * _inactive is called when the pfsnode 301 * is vrele'd and the reference count goes 302 * to zero. (vp) will be on the vnode free 303 * list, so to get it back vget() must be 304 * used. 305 * 306 * for procfs, check if the process is still 307 * alive and if it isn't then just throw away 308 * the vnode by calling vgone(). this may 309 * be overkill and a waste of time since the 310 * chances are that the process will still be 311 * there and PFIND is not free. 312 * 313 * (vp) is locked on entry, but must be unlocked on exit. 314 */ 315 int 316 procfs_inactive(v) 317 void *v; 318 { 319 struct vop_inactive_args /* { 320 struct vnode *a_vp; 321 struct proc *a_p; 322 } */ *ap = v; 323 struct pfsnode *pfs = VTOPFS(ap->a_vp); 324 325 VOP_UNLOCK(ap->a_vp, 0); 326 if (PFIND(pfs->pfs_pid) == 0) 327 vgone(ap->a_vp); 328 329 return (0); 330 } 331 332 /* 333 * _reclaim is called when getnewvnode() 334 * wants to make use of an entry on the vnode 335 * free list. at this time the filesystem needs 336 * to free any private data and remove the node 337 * from any private lists. 338 */ 339 int 340 procfs_reclaim(v) 341 void *v; 342 { 343 struct vop_reclaim_args /* { 344 struct vnode *a_vp; 345 } */ *ap = v; 346 347 return (procfs_freevp(ap->a_vp)); 348 } 349 350 /* 351 * Return POSIX pathconf information applicable to special devices. 352 */ 353 int 354 procfs_pathconf(v) 355 void *v; 356 { 357 struct vop_pathconf_args /* { 358 struct vnode *a_vp; 359 int a_name; 360 register_t *a_retval; 361 } */ *ap = v; 362 363 switch (ap->a_name) { 364 case _PC_LINK_MAX: 365 *ap->a_retval = LINK_MAX; 366 return (0); 367 case _PC_MAX_CANON: 368 *ap->a_retval = MAX_CANON; 369 return (0); 370 case _PC_MAX_INPUT: 371 *ap->a_retval = MAX_INPUT; 372 return (0); 373 case _PC_PIPE_BUF: 374 *ap->a_retval = PIPE_BUF; 375 return (0); 376 case _PC_CHOWN_RESTRICTED: 377 *ap->a_retval = 1; 378 return (0); 379 case _PC_VDISABLE: 380 *ap->a_retval = _POSIX_VDISABLE; 381 return (0); 382 case _PC_SYNC_IO: 383 *ap->a_retval = 1; 384 return (0); 385 default: 386 return (EINVAL); 387 } 388 /* NOTREACHED */ 389 } 390 391 /* 392 * _print is used for debugging. 393 * just print a readable description 394 * of (vp). 395 */ 396 int 397 procfs_print(v) 398 void *v; 399 { 400 struct vop_print_args /* { 401 struct vnode *a_vp; 402 } */ *ap = v; 403 struct pfsnode *pfs = VTOPFS(ap->a_vp); 404 405 printf("tag VT_PROCFS, type %d, pid %d, mode %x, flags %lx\n", 406 pfs->pfs_type, pfs->pfs_pid, pfs->pfs_mode, pfs->pfs_flags); 407 return 0; 408 } 409 410 int 411 procfs_link(v) 412 void *v; 413 { 414 struct vop_link_args /* { 415 struct vnode *a_dvp; 416 struct vnode *a_vp; 417 struct componentname *a_cnp; 418 } */ *ap = v; 419 420 VOP_ABORTOP(ap->a_dvp, ap->a_cnp); 421 vput(ap->a_dvp); 422 return (EROFS); 423 } 424 425 int 426 procfs_symlink(v) 427 void *v; 428 { 429 struct vop_symlink_args /* { 430 struct vnode *a_dvp; 431 struct vnode **a_vpp; 432 struct componentname *a_cnp; 433 struct vattr *a_vap; 434 char *a_target; 435 } */ *ap = v; 436 437 VOP_ABORTOP(ap->a_dvp, ap->a_cnp); 438 vput(ap->a_dvp); 439 return (EROFS); 440 } 441 442 /* 443 * Invent attributes for pfsnode (vp) and store 444 * them in (vap). 445 * Directories lengths are returned as zero since 446 * any real length would require the genuine size 447 * to be computed, and nothing cares anyway. 448 * 449 * this is relatively minimal for procfs. 450 */ 451 int 452 procfs_getattr(v) 453 void *v; 454 { 455 struct vop_getattr_args /* { 456 struct vnode *a_vp; 457 struct vattr *a_vap; 458 struct ucred *a_cred; 459 struct proc *a_p; 460 } */ *ap = v; 461 struct pfsnode *pfs = VTOPFS(ap->a_vp); 462 struct vattr *vap = ap->a_vap; 463 struct proc *procp; 464 struct timeval tv; 465 int error; 466 467 /* first check the process still exists */ 468 switch (pfs->pfs_type) { 469 case Proot: 470 case Pcurproc: 471 case Pself: 472 procp = 0; 473 break; 474 475 default: 476 procp = PFIND(pfs->pfs_pid); 477 if (procp == 0) 478 return (ENOENT); 479 break; 480 } 481 482 error = 0; 483 484 /* start by zeroing out the attributes */ 485 VATTR_NULL(vap); 486 487 /* next do all the common fields */ 488 vap->va_type = ap->a_vp->v_type; 489 vap->va_mode = pfs->pfs_mode; 490 vap->va_fileid = pfs->pfs_fileno; 491 vap->va_flags = 0; 492 vap->va_blocksize = PAGE_SIZE; 493 494 /* 495 * Make all times be current TOD. 496 * It would be possible to get the process start 497 * time from the p_stat structure, but there's 498 * no "file creation" time stamp anyway, and the 499 * p_stat structure is not addressible if u. gets 500 * swapped out for that process. 501 */ 502 microtime(&tv); 503 TIMEVAL_TO_TIMESPEC(&tv, &vap->va_ctime); 504 vap->va_atime = vap->va_mtime = vap->va_ctime; 505 506 switch (pfs->pfs_type) { 507 case Pmem: 508 case Pregs: 509 case Pfpregs: 510 /* 511 * If the process has exercised some setuid or setgid 512 * privilege, then rip away read/write permission so 513 * that only root can gain access. 514 */ 515 if (procp->p_flag & P_SUGID) 516 vap->va_mode &= ~(S_IRUSR|S_IWUSR); 517 /* FALLTHROUGH */ 518 case Pctl: 519 case Pstatus: 520 case Pnote: 521 case Pnotepg: 522 case Pmap: 523 case Pmaps: 524 case Pcmdline: 525 vap->va_nlink = 1; 526 vap->va_uid = procp->p_ucred->cr_uid; 527 vap->va_gid = procp->p_ucred->cr_gid; 528 break; 529 case Pmeminfo: 530 case Pcpuinfo: 531 vap->va_nlink = 1; 532 vap->va_uid = vap->va_gid = 0; 533 break; 534 535 default: 536 break; 537 } 538 539 /* 540 * now do the object specific fields 541 * 542 * The size could be set from struct reg, but it's hardly 543 * worth the trouble, and it puts some (potentially) machine 544 * dependent data into this machine-independent code. If it 545 * becomes important then this function should break out into 546 * a per-file stat function in the corresponding .c file. 547 */ 548 549 switch (pfs->pfs_type) { 550 case Proot: 551 /* 552 * Set nlink to 1 to tell fts(3) we don't actually know. 553 */ 554 vap->va_nlink = 1; 555 vap->va_uid = 0; 556 vap->va_gid = 0; 557 vap->va_bytes = vap->va_size = DEV_BSIZE; 558 break; 559 560 case Pcurproc: { 561 char buf[16]; /* should be enough */ 562 vap->va_nlink = 1; 563 vap->va_uid = 0; 564 vap->va_gid = 0; 565 vap->va_bytes = vap->va_size = 566 sprintf(buf, "%ld", (long)curproc->p_pid); 567 break; 568 } 569 570 case Pself: 571 vap->va_nlink = 1; 572 vap->va_uid = 0; 573 vap->va_gid = 0; 574 vap->va_bytes = vap->va_size = sizeof("curproc"); 575 break; 576 577 case Pproc: 578 vap->va_nlink = 2; 579 vap->va_uid = procp->p_ucred->cr_uid; 580 vap->va_gid = procp->p_ucred->cr_gid; 581 vap->va_bytes = vap->va_size = DEV_BSIZE; 582 break; 583 584 case Pfile: 585 error = EOPNOTSUPP; 586 break; 587 588 case Pmem: 589 vap->va_bytes = vap->va_size = 590 ctob(procp->p_vmspace->vm_tsize + 591 procp->p_vmspace->vm_dsize + 592 procp->p_vmspace->vm_ssize); 593 break; 594 595 #if defined(PT_GETREGS) || defined(PT_SETREGS) 596 case Pregs: 597 vap->va_bytes = vap->va_size = sizeof(struct reg); 598 break; 599 #endif 600 601 #if defined(PT_GETFPREGS) || defined(PT_SETFPREGS) 602 case Pfpregs: 603 vap->va_bytes = vap->va_size = sizeof(struct fpreg); 604 break; 605 #endif 606 607 case Pctl: 608 case Pstatus: 609 case Pnote: 610 case Pnotepg: 611 case Pcmdline: 612 case Pmeminfo: 613 case Pcpuinfo: 614 vap->va_bytes = vap->va_size = 0; 615 break; 616 case Pmap: 617 case Pmaps: 618 /* 619 * Advise a larger blocksize for the map files, so that 620 * they may be read in one pass. 621 */ 622 vap->va_blocksize = 4 * PAGE_SIZE; 623 break; 624 625 default: 626 panic("procfs_getattr"); 627 } 628 629 return (error); 630 } 631 632 /*ARGSUSED*/ 633 int 634 procfs_setattr(v) 635 void *v; 636 { 637 /* 638 * just fake out attribute setting 639 * it's not good to generate an error 640 * return, otherwise things like creat() 641 * will fail when they try to set the 642 * file length to 0. worse, this means 643 * that echo $note > /proc/$pid/note will fail. 644 */ 645 646 return (0); 647 } 648 649 /* 650 * implement access checking. 651 * 652 * actually, the check for super-user is slightly 653 * broken since it will allow read access to write-only 654 * objects. this doesn't cause any particular trouble 655 * but does mean that the i/o entry points need to check 656 * that the operation really does make sense. 657 */ 658 int 659 procfs_access(v) 660 void *v; 661 { 662 struct vop_access_args /* { 663 struct vnode *a_vp; 664 int a_mode; 665 struct ucred *a_cred; 666 struct proc *a_p; 667 } */ *ap = v; 668 struct vattr va; 669 int error; 670 671 if ((error = VOP_GETATTR(ap->a_vp, &va, ap->a_cred, ap->a_p)) != 0) 672 return (error); 673 674 return (vaccess(va.va_type, va.va_mode, 675 va.va_uid, va.va_gid, ap->a_mode, ap->a_cred)); 676 } 677 678 /* 679 * lookup. this is incredibly complicated in the 680 * general case, however for most pseudo-filesystems 681 * very little needs to be done. 682 * 683 * Locking isn't hard here, just poorly documented. 684 * 685 * If we're looking up ".", just vref the parent & return it. 686 * 687 * If we're looking up "..", unlock the parent, and lock "..". If everything 688 * went ok, and we're on the last component and the caller requested the 689 * parent locked, try to re-lock the parent. We do this to prevent lock 690 * races. 691 * 692 * For anything else, get the needed node. Then unlock the parent if not 693 * the last component or not LOCKPARENT (i.e. if we wouldn't re-lock the 694 * parent in the .. case). 695 * 696 * We try to exit with the parent locked in error cases. 697 */ 698 int 699 procfs_lookup(v) 700 void *v; 701 { 702 struct vop_lookup_args /* { 703 struct vnode * a_dvp; 704 struct vnode ** a_vpp; 705 struct componentname * a_cnp; 706 } */ *ap = v; 707 struct componentname *cnp = ap->a_cnp; 708 struct vnode **vpp = ap->a_vpp; 709 struct vnode *dvp = ap->a_dvp; 710 const char *pname = cnp->cn_nameptr; 711 const struct proc_target *pt = NULL; 712 struct vnode *fvp; 713 pid_t pid; 714 struct pfsnode *pfs; 715 struct proc *p = NULL; 716 int i, error, wantpunlock, iscurproc = 0, isself = 0; 717 718 *vpp = NULL; 719 cnp->cn_flags &= ~PDIRUNLOCK; 720 721 if (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME) 722 return (EROFS); 723 724 if (cnp->cn_namelen == 1 && *pname == '.') { 725 *vpp = dvp; 726 VREF(dvp); 727 return (0); 728 } 729 730 wantpunlock = (~cnp->cn_flags & (LOCKPARENT | ISLASTCN)); 731 pfs = VTOPFS(dvp); 732 switch (pfs->pfs_type) { 733 case Proot: 734 /* 735 * Shouldn't get here with .. in the root node. 736 */ 737 if (cnp->cn_flags & ISDOTDOT) 738 return (EIO); 739 740 iscurproc = CNEQ(cnp, "curproc", 7); 741 isself = CNEQ(cnp, "self", 4); 742 743 if (iscurproc || isself) { 744 error = procfs_allocvp(dvp->v_mount, vpp, 0, 745 iscurproc ? Pcurproc : Pself); 746 if ((error == 0) && (wantpunlock)) { 747 VOP_UNLOCK(dvp, 0); 748 cnp->cn_flags |= PDIRUNLOCK; 749 } 750 return (error); 751 } 752 753 for (i = 0; i < nproc_root_targets; i++) { 754 pt = &proc_root_targets[i]; 755 if (cnp->cn_namelen == pt->pt_namlen && 756 memcmp(pt->pt_name, pname, cnp->cn_namelen) == 0 && 757 (pt->pt_valid == NULL || 758 (*pt->pt_valid)(p, dvp->v_mount))) 759 break; 760 } 761 762 if (i != nproc_root_targets) { 763 error = procfs_allocvp(dvp->v_mount, vpp, 0, 764 pt->pt_pfstype); 765 if ((error == 0) && (wantpunlock)) { 766 VOP_UNLOCK(dvp, 0); 767 cnp->cn_flags |= PDIRUNLOCK; 768 } 769 return (error); 770 } 771 772 pid = atopid(pname, cnp->cn_namelen); 773 if (pid == NO_PID) 774 break; 775 776 p = PFIND(pid); 777 if (p == 0) 778 break; 779 780 error = procfs_allocvp(dvp->v_mount, vpp, pid, Pproc); 781 if ((error == 0) && (wantpunlock)) { 782 VOP_UNLOCK(dvp, 0); 783 cnp->cn_flags |= PDIRUNLOCK; 784 } 785 return (error); 786 787 case Pproc: 788 /* 789 * do the .. dance. We unlock the directory, and then 790 * get the root dir. That will automatically return .. 791 * locked. Then if the caller wanted dvp locked, we 792 * re-lock. 793 */ 794 if (cnp->cn_flags & ISDOTDOT) { 795 VOP_UNLOCK(dvp, 0); 796 cnp->cn_flags |= PDIRUNLOCK; 797 error = procfs_root(dvp->v_mount, vpp); 798 if ((error == 0) && (wantpunlock == 0) && 799 ((error = vn_lock(dvp, LK_EXCLUSIVE)) == 0)) 800 cnp->cn_flags &= ~PDIRUNLOCK; 801 return (error); 802 } 803 804 p = PFIND(pfs->pfs_pid); 805 if (p == 0) 806 break; 807 808 for (pt = proc_targets, i = 0; i < nproc_targets; pt++, i++) { 809 if (cnp->cn_namelen == pt->pt_namlen && 810 memcmp(pt->pt_name, pname, cnp->cn_namelen) == 0 && 811 (pt->pt_valid == NULL || 812 (*pt->pt_valid)(p, dvp->v_mount))) 813 goto found; 814 } 815 break; 816 817 found: 818 if (pt->pt_pfstype == Pfile) { 819 fvp = p->p_textvp; 820 /* We already checked that it exists. */ 821 VREF(fvp); 822 vn_lock(fvp, LK_EXCLUSIVE | LK_RETRY); 823 if (wantpunlock) { 824 VOP_UNLOCK(dvp, 0); 825 cnp->cn_flags |= PDIRUNLOCK; 826 } 827 *vpp = fvp; 828 return (0); 829 } 830 831 error = procfs_allocvp(dvp->v_mount, vpp, pfs->pfs_pid, 832 pt->pt_pfstype); 833 if ((error == 0) && (wantpunlock)) { 834 VOP_UNLOCK(dvp, 0); 835 cnp->cn_flags |= PDIRUNLOCK; 836 } 837 return (error); 838 839 default: 840 return (ENOTDIR); 841 } 842 843 return (cnp->cn_nameiop == LOOKUP ? ENOENT : EROFS); 844 } 845 846 int 847 procfs_validfile(p, mp) 848 struct proc *p; 849 struct mount *mp; 850 { 851 return (p->p_textvp != NULL); 852 } 853 854 static int 855 procfs_validfile_linux(p, mp) 856 struct proc *p; 857 struct mount *mp; 858 { 859 int flags; 860 861 flags = VFSTOPROC(mp)->pmnt_flags; 862 return ((flags & PROCFSMNT_LINUXCOMPAT) && 863 (p == NULL || procfs_validfile(p, mp))); 864 } 865 866 /* 867 * readdir returns directory entries from pfsnode (vp). 868 * 869 * the strategy here with procfs is to generate a single 870 * directory entry at a time (struct dirent) and then 871 * copy that out to userland using uiomove. a more efficent 872 * though more complex implementation, would try to minimize 873 * the number of calls to uiomove(). for procfs, this is 874 * hardly worth the added code complexity. 875 * 876 * this should just be done through read() 877 */ 878 int 879 procfs_readdir(v) 880 void *v; 881 { 882 struct vop_readdir_args /* { 883 struct vnode *a_vp; 884 struct uio *a_uio; 885 struct ucred *a_cred; 886 int *a_eofflag; 887 off_t **a_cookies; 888 int *a_ncookies; 889 } */ *ap = v; 890 struct uio *uio = ap->a_uio; 891 struct dirent d; 892 struct pfsnode *pfs; 893 off_t i; 894 int error; 895 off_t *cookies = NULL; 896 int ncookies, left, skip, j; 897 struct vnode *vp; 898 const struct proc_target *pt; 899 900 vp = ap->a_vp; 901 pfs = VTOPFS(vp); 902 903 if (uio->uio_resid < UIO_MX) 904 return (EINVAL); 905 if (uio->uio_offset < 0) 906 return (EINVAL); 907 908 error = 0; 909 i = uio->uio_offset; 910 memset((caddr_t)&d, 0, UIO_MX); 911 d.d_reclen = UIO_MX; 912 ncookies = uio->uio_resid / UIO_MX; 913 914 switch (pfs->pfs_type) { 915 /* 916 * this is for the process-specific sub-directories. 917 * all that is needed to is copy out all the entries 918 * from the procent[] table (top of this file). 919 */ 920 case Pproc: { 921 struct proc *p; 922 923 if (i >= nproc_targets) 924 return 0; 925 926 p = PFIND(pfs->pfs_pid); 927 if (p == NULL) 928 break; 929 930 if (ap->a_ncookies) { 931 ncookies = min(ncookies, (nproc_targets - i)); 932 cookies = malloc(ncookies * sizeof (off_t), 933 M_TEMP, M_WAITOK); 934 *ap->a_cookies = cookies; 935 } 936 937 for (pt = &proc_targets[i]; 938 uio->uio_resid >= UIO_MX && i < nproc_targets; pt++, i++) { 939 if (pt->pt_valid && 940 (*pt->pt_valid)(p, vp->v_mount) == 0) 941 continue; 942 943 d.d_fileno = PROCFS_FILENO(pfs->pfs_pid, pt->pt_pfstype); 944 d.d_namlen = pt->pt_namlen; 945 memcpy(d.d_name, pt->pt_name, pt->pt_namlen + 1); 946 d.d_type = pt->pt_type; 947 948 if ((error = uiomove((caddr_t)&d, UIO_MX, uio)) != 0) 949 break; 950 if (cookies) 951 *cookies++ = i + 1; 952 } 953 954 break; 955 } 956 957 /* 958 * this is for the root of the procfs filesystem 959 * what is needed are special entries for "curproc" 960 * and "self" followed by an entry for each process 961 * on allproc 962 #ifdef PROCFS_ZOMBIE 963 * and deadproc and zombproc. 964 #endif 965 */ 966 967 case Proot: { 968 int pcnt = i, nc = 0; 969 const struct proclist_desc *pd; 970 volatile struct proc *p; 971 972 if (pcnt > 3) 973 pcnt = 3; 974 if (ap->a_ncookies) { 975 /* 976 * XXX Potentially allocating too much space here, 977 * but I'm lazy. This loop needs some work. 978 */ 979 cookies = malloc(ncookies * sizeof (off_t), 980 M_TEMP, M_WAITOK); 981 *ap->a_cookies = cookies; 982 } 983 /* 984 * XXX: THIS LOOP ASSUMES THAT allproc IS THE FIRST 985 * PROCLIST IN THE proclists! 986 */ 987 proclist_lock_read(); 988 pd = proclists; 989 #ifdef PROCFS_ZOMBIE 990 again: 991 #endif 992 for (p = LIST_FIRST(pd->pd_list); 993 p != NULL && uio->uio_resid >= UIO_MX; i++, pcnt++) { 994 switch (i) { 995 case 0: /* `.' */ 996 case 1: /* `..' */ 997 d.d_fileno = PROCFS_FILENO(0, Proot); 998 d.d_namlen = i + 1; 999 memcpy(d.d_name, "..", d.d_namlen); 1000 d.d_name[i + 1] = '\0'; 1001 d.d_type = DT_DIR; 1002 break; 1003 1004 case 2: 1005 d.d_fileno = PROCFS_FILENO(0, Pcurproc); 1006 d.d_namlen = sizeof("curproc") - 1; 1007 memcpy(d.d_name, "curproc", sizeof("curproc")); 1008 d.d_type = DT_LNK; 1009 break; 1010 1011 case 3: 1012 d.d_fileno = PROCFS_FILENO(0, Pself); 1013 d.d_namlen = sizeof("self") - 1; 1014 memcpy(d.d_name, "self", sizeof("self")); 1015 d.d_type = DT_LNK; 1016 break; 1017 1018 default: 1019 while (pcnt < i) { 1020 pcnt++; 1021 p = LIST_NEXT(p, p_list); 1022 if (!p) 1023 goto done; 1024 } 1025 d.d_fileno = PROCFS_FILENO(p->p_pid, Pproc); 1026 d.d_namlen = sprintf(d.d_name, "%ld", 1027 (long)p->p_pid); 1028 d.d_type = DT_DIR; 1029 p = p->p_list.le_next; 1030 break; 1031 } 1032 1033 if ((error = uiomove((caddr_t)&d, UIO_MX, uio)) != 0) 1034 break; 1035 nc++; 1036 if (cookies) 1037 *cookies++ = i + 1; 1038 } 1039 done: 1040 1041 #ifdef PROCFS_ZOMBIE 1042 pd++; 1043 if (p == NULL && pd->pd_list != NULL) 1044 goto again; 1045 #endif 1046 proclist_unlock_read(); 1047 1048 skip = i - pcnt; 1049 if (skip >= nproc_root_targets) 1050 break; 1051 left = nproc_root_targets - skip; 1052 for (j = 0, pt = &proc_root_targets[0]; 1053 uio->uio_resid >= UIO_MX && j < left; 1054 pt++, j++, i++) { 1055 if (pt->pt_valid && 1056 (*pt->pt_valid)(NULL, vp->v_mount) == 0) 1057 continue; 1058 d.d_fileno = PROCFS_FILENO(0, pt->pt_pfstype); 1059 d.d_namlen = pt->pt_namlen; 1060 memcpy(d.d_name, pt->pt_name, pt->pt_namlen + 1); 1061 d.d_type = pt->pt_type; 1062 1063 if ((error = uiomove((caddr_t)&d, UIO_MX, uio)) != 0) 1064 break; 1065 nc++; 1066 if (cookies) 1067 *cookies++ = i + 1; 1068 } 1069 1070 ncookies = nc; 1071 break; 1072 } 1073 1074 default: 1075 error = ENOTDIR; 1076 break; 1077 } 1078 1079 if (ap->a_ncookies) { 1080 if (error) { 1081 if (cookies) 1082 free(*ap->a_cookies, M_TEMP); 1083 *ap->a_ncookies = 0; 1084 *ap->a_cookies = NULL; 1085 } else 1086 *ap->a_ncookies = ncookies; 1087 } 1088 uio->uio_offset = i; 1089 return (error); 1090 } 1091 1092 /* 1093 * readlink reads the link of `curproc' 1094 */ 1095 int 1096 procfs_readlink(v) 1097 void *v; 1098 { 1099 struct vop_readlink_args *ap = v; 1100 char buf[16]; /* should be enough */ 1101 int len; 1102 1103 if (VTOPFS(ap->a_vp)->pfs_fileno == PROCFS_FILENO(0, Pcurproc)) 1104 len = sprintf(buf, "%ld", (long)curproc->p_pid); 1105 else if (VTOPFS(ap->a_vp)->pfs_fileno == PROCFS_FILENO(0, Pself)) 1106 len = sprintf(buf, "%s", "curproc"); 1107 else 1108 return (EINVAL); 1109 1110 return (uiomove((caddr_t)buf, len, ap->a_uio)); 1111 } 1112 1113 /* 1114 * convert decimal ascii to pid_t 1115 */ 1116 static pid_t 1117 atopid(b, len) 1118 const char *b; 1119 u_int len; 1120 { 1121 pid_t p = 0; 1122 1123 while (len--) { 1124 char c = *b++; 1125 if (c < '0' || c > '9') 1126 return (NO_PID); 1127 p = 10 * p + (c - '0'); 1128 if (p > PID_MAX) 1129 return (NO_PID); 1130 } 1131 1132 return (p); 1133 } 1134