1 /* $NetBSD: procfs_vnops.c,v 1.83 2001/08/31 04:56:26 chs Exp $ */ 2 3 /* 4 * Copyright (c) 1993 Jan-Simon Pendry 5 * Copyright (c) 1993, 1995 6 * The Regents of the University of California. All rights reserved. 7 * 8 * This code is derived from software contributed to Berkeley by 9 * Jan-Simon Pendry. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the University of 22 * California, Berkeley and its contributors. 23 * 4. Neither the name of the University nor the names of its contributors 24 * may be used to endorse or promote products derived from this software 25 * without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 37 * SUCH DAMAGE. 38 * 39 * @(#)procfs_vnops.c 8.18 (Berkeley) 5/21/95 40 */ 41 42 /* 43 * procfs vnode interface 44 */ 45 46 #include <sys/param.h> 47 #include <sys/systm.h> 48 #include <sys/time.h> 49 #include <sys/kernel.h> 50 #include <sys/file.h> 51 #include <sys/proc.h> 52 #include <sys/vnode.h> 53 #include <sys/namei.h> 54 #include <sys/malloc.h> 55 #include <sys/mount.h> 56 #include <sys/dirent.h> 57 #include <sys/resourcevar.h> 58 #include <sys/ptrace.h> 59 #include <sys/stat.h> 60 61 #include <uvm/uvm_extern.h> /* for PAGE_SIZE */ 62 63 #include <machine/reg.h> 64 65 #include <miscfs/genfs/genfs.h> 66 #include <miscfs/procfs/procfs.h> 67 68 /* 69 * Vnode Operations. 70 * 71 */ 72 73 static int procfs_validfile_linux __P((struct proc *, struct mount *)); 74 75 /* 76 * This is a list of the valid names in the 77 * process-specific sub-directories. It is 78 * used in procfs_lookup and procfs_readdir 79 */ 80 const struct proc_target { 81 u_char pt_type; 82 u_char pt_namlen; 83 char *pt_name; 84 pfstype pt_pfstype; 85 int (*pt_valid) __P((struct proc *, struct mount *)); 86 } proc_targets[] = { 87 #define N(s) sizeof(s)-1, s 88 /* name type validp */ 89 { DT_DIR, N("."), Pproc, NULL }, 90 { DT_DIR, N(".."), Proot, NULL }, 91 { DT_REG, N("file"), Pfile, procfs_validfile }, 92 { DT_REG, N("mem"), Pmem, NULL }, 93 { DT_REG, N("regs"), Pregs, procfs_validregs }, 94 { DT_REG, N("fpregs"), Pfpregs, procfs_validfpregs }, 95 { DT_REG, N("ctl"), Pctl, NULL }, 96 { DT_REG, N("status"), Pstatus, NULL }, 97 { DT_REG, N("note"), Pnote, NULL }, 98 { DT_REG, N("notepg"), Pnotepg, NULL }, 99 { DT_REG, N("map"), Pmap, procfs_validmap }, 100 { DT_REG, N("maps"), Pmaps, procfs_validmap }, 101 { DT_REG, N("cmdline"), Pcmdline, NULL }, 102 { DT_REG, N("exe"), Pfile, procfs_validfile_linux }, 103 #undef N 104 }; 105 static int nproc_targets = sizeof(proc_targets) / sizeof(proc_targets[0]); 106 107 /* 108 * List of files in the root directory. Note: the validate function will 109 * be called with p == NULL for these ones. 110 */ 111 struct proc_target proc_root_targets[] = { 112 #define N(s) sizeof(s)-1, s 113 /* name type validp */ 114 { DT_REG, N("meminfo"), Pmeminfo, procfs_validfile_linux }, 115 { DT_REG, N("cpuinfo"), Pcpuinfo, procfs_validfile_linux }, 116 #undef N 117 }; 118 static int nproc_root_targets = 119 sizeof(proc_root_targets) / sizeof(proc_root_targets[0]); 120 121 int procfs_lookup __P((void *)); 122 #define procfs_create genfs_eopnotsupp_rele 123 #define procfs_mknod genfs_eopnotsupp_rele 124 int procfs_open __P((void *)); 125 int procfs_close __P((void *)); 126 int procfs_access __P((void *)); 127 int procfs_getattr __P((void *)); 128 int procfs_setattr __P((void *)); 129 #define procfs_read procfs_rw 130 #define procfs_write procfs_rw 131 #define procfs_fcntl genfs_fcntl 132 #define procfs_ioctl genfs_enoioctl 133 #define procfs_poll genfs_poll 134 #define procfs_revoke genfs_revoke 135 #define procfs_fsync genfs_nullop 136 #define procfs_seek genfs_nullop 137 #define procfs_remove genfs_eopnotsupp_rele 138 int procfs_link __P((void *)); 139 #define procfs_rename genfs_eopnotsupp_rele 140 #define procfs_mkdir genfs_eopnotsupp_rele 141 #define procfs_rmdir genfs_eopnotsupp_rele 142 int procfs_symlink __P((void *)); 143 int procfs_readdir __P((void *)); 144 int procfs_readlink __P((void *)); 145 #define procfs_abortop genfs_abortop 146 int procfs_inactive __P((void *)); 147 int procfs_reclaim __P((void *)); 148 #define procfs_lock genfs_lock 149 #define procfs_unlock genfs_unlock 150 #define procfs_bmap genfs_badop 151 #define procfs_strategy genfs_badop 152 int procfs_print __P((void *)); 153 int procfs_pathconf __P((void *)); 154 #define procfs_islocked genfs_islocked 155 #define procfs_advlock genfs_einval 156 #define procfs_blkatoff genfs_eopnotsupp 157 #define procfs_valloc genfs_eopnotsupp 158 #define procfs_vfree genfs_nullop 159 #define procfs_truncate genfs_eopnotsupp 160 #define procfs_update genfs_nullop 161 #define procfs_bwrite genfs_eopnotsupp 162 163 static pid_t atopid __P((const char *, u_int)); 164 165 /* 166 * procfs vnode operations. 167 */ 168 int (**procfs_vnodeop_p) __P((void *)); 169 const struct vnodeopv_entry_desc procfs_vnodeop_entries[] = { 170 { &vop_default_desc, vn_default_error }, 171 { &vop_lookup_desc, procfs_lookup }, /* lookup */ 172 { &vop_create_desc, procfs_create }, /* create */ 173 { &vop_mknod_desc, procfs_mknod }, /* mknod */ 174 { &vop_open_desc, procfs_open }, /* open */ 175 { &vop_close_desc, procfs_close }, /* close */ 176 { &vop_access_desc, procfs_access }, /* access */ 177 { &vop_getattr_desc, procfs_getattr }, /* getattr */ 178 { &vop_setattr_desc, procfs_setattr }, /* setattr */ 179 { &vop_read_desc, procfs_read }, /* read */ 180 { &vop_write_desc, procfs_write }, /* write */ 181 { &vop_fcntl_desc, procfs_fcntl }, /* fcntl */ 182 { &vop_ioctl_desc, procfs_ioctl }, /* ioctl */ 183 { &vop_poll_desc, procfs_poll }, /* poll */ 184 { &vop_revoke_desc, procfs_revoke }, /* revoke */ 185 { &vop_fsync_desc, procfs_fsync }, /* fsync */ 186 { &vop_seek_desc, procfs_seek }, /* seek */ 187 { &vop_remove_desc, procfs_remove }, /* remove */ 188 { &vop_link_desc, procfs_link }, /* link */ 189 { &vop_rename_desc, procfs_rename }, /* rename */ 190 { &vop_mkdir_desc, procfs_mkdir }, /* mkdir */ 191 { &vop_rmdir_desc, procfs_rmdir }, /* rmdir */ 192 { &vop_symlink_desc, procfs_symlink }, /* symlink */ 193 { &vop_readdir_desc, procfs_readdir }, /* readdir */ 194 { &vop_readlink_desc, procfs_readlink }, /* readlink */ 195 { &vop_abortop_desc, procfs_abortop }, /* abortop */ 196 { &vop_inactive_desc, procfs_inactive }, /* inactive */ 197 { &vop_reclaim_desc, procfs_reclaim }, /* reclaim */ 198 { &vop_lock_desc, procfs_lock }, /* lock */ 199 { &vop_unlock_desc, procfs_unlock }, /* unlock */ 200 { &vop_bmap_desc, procfs_bmap }, /* bmap */ 201 { &vop_strategy_desc, procfs_strategy }, /* strategy */ 202 { &vop_print_desc, procfs_print }, /* print */ 203 { &vop_islocked_desc, procfs_islocked }, /* islocked */ 204 { &vop_pathconf_desc, procfs_pathconf }, /* pathconf */ 205 { &vop_advlock_desc, procfs_advlock }, /* advlock */ 206 { &vop_blkatoff_desc, procfs_blkatoff }, /* blkatoff */ 207 { &vop_valloc_desc, procfs_valloc }, /* valloc */ 208 { &vop_vfree_desc, procfs_vfree }, /* vfree */ 209 { &vop_truncate_desc, procfs_truncate }, /* truncate */ 210 { &vop_update_desc, procfs_update }, /* update */ 211 { NULL, NULL } 212 }; 213 const struct vnodeopv_desc procfs_vnodeop_opv_desc = 214 { &procfs_vnodeop_p, procfs_vnodeop_entries }; 215 /* 216 * set things up for doing i/o on 217 * the pfsnode (vp). (vp) is locked 218 * on entry, and should be left locked 219 * on exit. 220 * 221 * for procfs we don't need to do anything 222 * in particular for i/o. all that is done 223 * is to support exclusive open on process 224 * memory images. 225 */ 226 int 227 procfs_open(v) 228 void *v; 229 { 230 struct vop_open_args /* { 231 struct vnode *a_vp; 232 int a_mode; 233 struct ucred *a_cred; 234 struct proc *a_p; 235 } */ *ap = v; 236 struct pfsnode *pfs = VTOPFS(ap->a_vp); 237 struct proc *p1, *p2; 238 int error; 239 240 p1 = ap->a_p; /* tracer */ 241 p2 = PFIND(pfs->pfs_pid); /* traced */ 242 243 if (p2 == NULL) 244 return (ENOENT); /* was ESRCH, jsp */ 245 246 switch (pfs->pfs_type) { 247 case Pmem: 248 if (((pfs->pfs_flags & FWRITE) && (ap->a_mode & O_EXCL)) || 249 ((pfs->pfs_flags & O_EXCL) && (ap->a_mode & FWRITE))) 250 return (EBUSY); 251 252 if ((error = procfs_checkioperm(p1, p2)) != 0) 253 return (EPERM); 254 255 if (ap->a_mode & FWRITE) 256 pfs->pfs_flags = ap->a_mode & (FWRITE|O_EXCL); 257 258 return (0); 259 260 default: 261 break; 262 } 263 264 return (0); 265 } 266 267 /* 268 * close the pfsnode (vp) after doing i/o. 269 * (vp) is not locked on entry or exit. 270 * 271 * nothing to do for procfs other than undo 272 * any exclusive open flag (see _open above). 273 */ 274 int 275 procfs_close(v) 276 void *v; 277 { 278 struct vop_close_args /* { 279 struct vnode *a_vp; 280 int a_fflag; 281 struct ucred *a_cred; 282 struct proc *a_p; 283 } */ *ap = v; 284 struct pfsnode *pfs = VTOPFS(ap->a_vp); 285 286 switch (pfs->pfs_type) { 287 case Pmem: 288 if ((ap->a_fflag & FWRITE) && (pfs->pfs_flags & O_EXCL)) 289 pfs->pfs_flags &= ~(FWRITE|O_EXCL); 290 break; 291 292 default: 293 break; 294 } 295 296 return (0); 297 } 298 299 /* 300 * _inactive is called when the pfsnode 301 * is vrele'd and the reference count goes 302 * to zero. (vp) will be on the vnode free 303 * list, so to get it back vget() must be 304 * used. 305 * 306 * for procfs, check if the process is still 307 * alive and if it isn't then just throw away 308 * the vnode by calling vgone(). this may 309 * be overkill and a waste of time since the 310 * chances are that the process will still be 311 * there and PFIND is not free. 312 * 313 * (vp) is locked on entry, but must be unlocked on exit. 314 */ 315 int 316 procfs_inactive(v) 317 void *v; 318 { 319 struct vop_inactive_args /* { 320 struct vnode *a_vp; 321 struct proc *a_p; 322 } */ *ap = v; 323 struct pfsnode *pfs = VTOPFS(ap->a_vp); 324 325 VOP_UNLOCK(ap->a_vp, 0); 326 if (PFIND(pfs->pfs_pid) == 0) 327 vgone(ap->a_vp); 328 329 return (0); 330 } 331 332 /* 333 * _reclaim is called when getnewvnode() 334 * wants to make use of an entry on the vnode 335 * free list. at this time the filesystem needs 336 * to free any private data and remove the node 337 * from any private lists. 338 */ 339 int 340 procfs_reclaim(v) 341 void *v; 342 { 343 struct vop_reclaim_args /* { 344 struct vnode *a_vp; 345 } */ *ap = v; 346 347 return (procfs_freevp(ap->a_vp)); 348 } 349 350 /* 351 * Return POSIX pathconf information applicable to special devices. 352 */ 353 int 354 procfs_pathconf(v) 355 void *v; 356 { 357 struct vop_pathconf_args /* { 358 struct vnode *a_vp; 359 int a_name; 360 register_t *a_retval; 361 } */ *ap = v; 362 363 switch (ap->a_name) { 364 case _PC_LINK_MAX: 365 *ap->a_retval = LINK_MAX; 366 return (0); 367 case _PC_MAX_CANON: 368 *ap->a_retval = MAX_CANON; 369 return (0); 370 case _PC_MAX_INPUT: 371 *ap->a_retval = MAX_INPUT; 372 return (0); 373 case _PC_PIPE_BUF: 374 *ap->a_retval = PIPE_BUF; 375 return (0); 376 case _PC_CHOWN_RESTRICTED: 377 *ap->a_retval = 1; 378 return (0); 379 case _PC_VDISABLE: 380 *ap->a_retval = _POSIX_VDISABLE; 381 return (0); 382 case _PC_SYNC_IO: 383 *ap->a_retval = 1; 384 return (0); 385 default: 386 return (EINVAL); 387 } 388 /* NOTREACHED */ 389 } 390 391 /* 392 * _print is used for debugging. 393 * just print a readable description 394 * of (vp). 395 */ 396 int 397 procfs_print(v) 398 void *v; 399 { 400 struct vop_print_args /* { 401 struct vnode *a_vp; 402 } */ *ap = v; 403 struct pfsnode *pfs = VTOPFS(ap->a_vp); 404 405 printf("tag VT_PROCFS, type %d, pid %d, mode %x, flags %lx\n", 406 pfs->pfs_type, pfs->pfs_pid, pfs->pfs_mode, pfs->pfs_flags); 407 return 0; 408 } 409 410 int 411 procfs_link(v) 412 void *v; 413 { 414 struct vop_link_args /* { 415 struct vnode *a_dvp; 416 struct vnode *a_vp; 417 struct componentname *a_cnp; 418 } */ *ap = v; 419 420 VOP_ABORTOP(ap->a_dvp, ap->a_cnp); 421 vput(ap->a_dvp); 422 return (EROFS); 423 } 424 425 int 426 procfs_symlink(v) 427 void *v; 428 { 429 struct vop_symlink_args /* { 430 struct vnode *a_dvp; 431 struct vnode **a_vpp; 432 struct componentname *a_cnp; 433 struct vattr *a_vap; 434 char *a_target; 435 } */ *ap = v; 436 437 VOP_ABORTOP(ap->a_dvp, ap->a_cnp); 438 vput(ap->a_dvp); 439 return (EROFS); 440 } 441 442 /* 443 * Invent attributes for pfsnode (vp) and store 444 * them in (vap). 445 * Directories lengths are returned as zero since 446 * any real length would require the genuine size 447 * to be computed, and nothing cares anyway. 448 * 449 * this is relatively minimal for procfs. 450 */ 451 int 452 procfs_getattr(v) 453 void *v; 454 { 455 struct vop_getattr_args /* { 456 struct vnode *a_vp; 457 struct vattr *a_vap; 458 struct ucred *a_cred; 459 struct proc *a_p; 460 } */ *ap = v; 461 struct pfsnode *pfs = VTOPFS(ap->a_vp); 462 struct vattr *vap = ap->a_vap; 463 struct proc *procp; 464 struct timeval tv; 465 int error; 466 467 /* first check the process still exists */ 468 switch (pfs->pfs_type) { 469 case Proot: 470 case Pcurproc: 471 case Pself: 472 procp = 0; 473 break; 474 475 default: 476 procp = PFIND(pfs->pfs_pid); 477 if (procp == 0) 478 return (ENOENT); 479 break; 480 } 481 482 error = 0; 483 484 /* start by zeroing out the attributes */ 485 VATTR_NULL(vap); 486 487 /* next do all the common fields */ 488 vap->va_type = ap->a_vp->v_type; 489 vap->va_mode = pfs->pfs_mode; 490 vap->va_fileid = pfs->pfs_fileno; 491 vap->va_flags = 0; 492 vap->va_blocksize = PAGE_SIZE; 493 494 /* 495 * Make all times be current TOD. 496 * It would be possible to get the process start 497 * time from the p_stat structure, but there's 498 * no "file creation" time stamp anyway, and the 499 * p_stat structure is not addressible if u. gets 500 * swapped out for that process. 501 */ 502 microtime(&tv); 503 TIMEVAL_TO_TIMESPEC(&tv, &vap->va_ctime); 504 vap->va_atime = vap->va_mtime = vap->va_ctime; 505 506 switch (pfs->pfs_type) { 507 case Pmem: 508 case Pregs: 509 case Pfpregs: 510 /* 511 * If the process has exercised some setuid or setgid 512 * privilege, then rip away read/write permission so 513 * that only root can gain access. 514 */ 515 if (procp->p_flag & P_SUGID) 516 vap->va_mode &= ~(S_IRUSR|S_IWUSR); 517 /* FALLTHROUGH */ 518 case Pctl: 519 case Pstatus: 520 case Pnote: 521 case Pnotepg: 522 case Pmap: 523 case Pmaps: 524 case Pcmdline: 525 vap->va_nlink = 1; 526 vap->va_uid = procp->p_ucred->cr_uid; 527 vap->va_gid = procp->p_ucred->cr_gid; 528 break; 529 case Pmeminfo: 530 case Pcpuinfo: 531 vap->va_nlink = 1; 532 vap->va_uid = vap->va_gid = 0; 533 break; 534 535 default: 536 break; 537 } 538 539 /* 540 * now do the object specific fields 541 * 542 * The size could be set from struct reg, but it's hardly 543 * worth the trouble, and it puts some (potentially) machine 544 * dependent data into this machine-independent code. If it 545 * becomes important then this function should break out into 546 * a per-file stat function in the corresponding .c file. 547 */ 548 549 switch (pfs->pfs_type) { 550 case Proot: 551 /* 552 * Set nlink to 1 to tell fts(3) we don't actually know. 553 */ 554 vap->va_nlink = 1; 555 vap->va_uid = 0; 556 vap->va_gid = 0; 557 vap->va_bytes = vap->va_size = DEV_BSIZE; 558 break; 559 560 case Pcurproc: { 561 char buf[16]; /* should be enough */ 562 vap->va_nlink = 1; 563 vap->va_uid = 0; 564 vap->va_gid = 0; 565 vap->va_bytes = vap->va_size = 566 sprintf(buf, "%ld", (long)curproc->p_pid); 567 break; 568 } 569 570 case Pself: 571 vap->va_nlink = 1; 572 vap->va_uid = 0; 573 vap->va_gid = 0; 574 vap->va_bytes = vap->va_size = sizeof("curproc"); 575 break; 576 577 case Pproc: 578 vap->va_nlink = 2; 579 vap->va_uid = procp->p_ucred->cr_uid; 580 vap->va_gid = procp->p_ucred->cr_gid; 581 vap->va_bytes = vap->va_size = DEV_BSIZE; 582 break; 583 584 case Pfile: 585 error = EOPNOTSUPP; 586 break; 587 588 case Pmem: 589 vap->va_bytes = vap->va_size = 590 ctob(procp->p_vmspace->vm_tsize + 591 procp->p_vmspace->vm_dsize + 592 procp->p_vmspace->vm_ssize); 593 break; 594 595 #if defined(PT_GETREGS) || defined(PT_SETREGS) 596 case Pregs: 597 vap->va_bytes = vap->va_size = sizeof(struct reg); 598 break; 599 #endif 600 601 #if defined(PT_GETFPREGS) || defined(PT_SETFPREGS) 602 case Pfpregs: 603 vap->va_bytes = vap->va_size = sizeof(struct fpreg); 604 break; 605 #endif 606 607 case Pctl: 608 case Pstatus: 609 case Pnote: 610 case Pnotepg: 611 case Pcmdline: 612 case Pmeminfo: 613 case Pcpuinfo: 614 vap->va_bytes = vap->va_size = 0; 615 break; 616 case Pmap: 617 case Pmaps: 618 /* 619 * Advise a larger blocksize for the map files, so that 620 * they may be read in one pass. 621 */ 622 vap->va_blocksize = 4 * PAGE_SIZE; 623 vap->va_bytes = vap->va_size = 0; 624 break; 625 626 default: 627 panic("procfs_getattr"); 628 } 629 630 return (error); 631 } 632 633 /*ARGSUSED*/ 634 int 635 procfs_setattr(v) 636 void *v; 637 { 638 /* 639 * just fake out attribute setting 640 * it's not good to generate an error 641 * return, otherwise things like creat() 642 * will fail when they try to set the 643 * file length to 0. worse, this means 644 * that echo $note > /proc/$pid/note will fail. 645 */ 646 647 return (0); 648 } 649 650 /* 651 * implement access checking. 652 * 653 * actually, the check for super-user is slightly 654 * broken since it will allow read access to write-only 655 * objects. this doesn't cause any particular trouble 656 * but does mean that the i/o entry points need to check 657 * that the operation really does make sense. 658 */ 659 int 660 procfs_access(v) 661 void *v; 662 { 663 struct vop_access_args /* { 664 struct vnode *a_vp; 665 int a_mode; 666 struct ucred *a_cred; 667 struct proc *a_p; 668 } */ *ap = v; 669 struct vattr va; 670 int error; 671 672 if ((error = VOP_GETATTR(ap->a_vp, &va, ap->a_cred, ap->a_p)) != 0) 673 return (error); 674 675 return (vaccess(va.va_type, va.va_mode, 676 va.va_uid, va.va_gid, ap->a_mode, ap->a_cred)); 677 } 678 679 /* 680 * lookup. this is incredibly complicated in the 681 * general case, however for most pseudo-filesystems 682 * very little needs to be done. 683 * 684 * Locking isn't hard here, just poorly documented. 685 * 686 * If we're looking up ".", just vref the parent & return it. 687 * 688 * If we're looking up "..", unlock the parent, and lock "..". If everything 689 * went ok, and we're on the last component and the caller requested the 690 * parent locked, try to re-lock the parent. We do this to prevent lock 691 * races. 692 * 693 * For anything else, get the needed node. Then unlock the parent if not 694 * the last component or not LOCKPARENT (i.e. if we wouldn't re-lock the 695 * parent in the .. case). 696 * 697 * We try to exit with the parent locked in error cases. 698 */ 699 int 700 procfs_lookup(v) 701 void *v; 702 { 703 struct vop_lookup_args /* { 704 struct vnode * a_dvp; 705 struct vnode ** a_vpp; 706 struct componentname * a_cnp; 707 } */ *ap = v; 708 struct componentname *cnp = ap->a_cnp; 709 struct vnode **vpp = ap->a_vpp; 710 struct vnode *dvp = ap->a_dvp; 711 const char *pname = cnp->cn_nameptr; 712 const struct proc_target *pt = NULL; 713 struct vnode *fvp; 714 pid_t pid; 715 struct pfsnode *pfs; 716 struct proc *p = NULL; 717 int i, error, wantpunlock, iscurproc = 0, isself = 0; 718 719 *vpp = NULL; 720 cnp->cn_flags &= ~PDIRUNLOCK; 721 722 if (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME) 723 return (EROFS); 724 725 if (cnp->cn_namelen == 1 && *pname == '.') { 726 *vpp = dvp; 727 VREF(dvp); 728 return (0); 729 } 730 731 wantpunlock = (~cnp->cn_flags & (LOCKPARENT | ISLASTCN)); 732 pfs = VTOPFS(dvp); 733 switch (pfs->pfs_type) { 734 case Proot: 735 /* 736 * Shouldn't get here with .. in the root node. 737 */ 738 if (cnp->cn_flags & ISDOTDOT) 739 return (EIO); 740 741 iscurproc = CNEQ(cnp, "curproc", 7); 742 isself = CNEQ(cnp, "self", 4); 743 744 if (iscurproc || isself) { 745 error = procfs_allocvp(dvp->v_mount, vpp, 0, 746 iscurproc ? Pcurproc : Pself); 747 if ((error == 0) && (wantpunlock)) { 748 VOP_UNLOCK(dvp, 0); 749 cnp->cn_flags |= PDIRUNLOCK; 750 } 751 return (error); 752 } 753 754 for (i = 0; i < nproc_root_targets; i++) { 755 pt = &proc_root_targets[i]; 756 if (cnp->cn_namelen == pt->pt_namlen && 757 memcmp(pt->pt_name, pname, cnp->cn_namelen) == 0 && 758 (pt->pt_valid == NULL || 759 (*pt->pt_valid)(p, dvp->v_mount))) 760 break; 761 } 762 763 if (i != nproc_root_targets) { 764 error = procfs_allocvp(dvp->v_mount, vpp, 0, 765 pt->pt_pfstype); 766 if ((error == 0) && (wantpunlock)) { 767 VOP_UNLOCK(dvp, 0); 768 cnp->cn_flags |= PDIRUNLOCK; 769 } 770 return (error); 771 } 772 773 pid = atopid(pname, cnp->cn_namelen); 774 if (pid == NO_PID) 775 break; 776 777 p = PFIND(pid); 778 if (p == 0) 779 break; 780 781 error = procfs_allocvp(dvp->v_mount, vpp, pid, Pproc); 782 if ((error == 0) && (wantpunlock)) { 783 VOP_UNLOCK(dvp, 0); 784 cnp->cn_flags |= PDIRUNLOCK; 785 } 786 return (error); 787 788 case Pproc: 789 /* 790 * do the .. dance. We unlock the directory, and then 791 * get the root dir. That will automatically return .. 792 * locked. Then if the caller wanted dvp locked, we 793 * re-lock. 794 */ 795 if (cnp->cn_flags & ISDOTDOT) { 796 VOP_UNLOCK(dvp, 0); 797 cnp->cn_flags |= PDIRUNLOCK; 798 error = procfs_root(dvp->v_mount, vpp); 799 if ((error == 0) && (wantpunlock == 0) && 800 ((error = vn_lock(dvp, LK_EXCLUSIVE)) == 0)) 801 cnp->cn_flags &= ~PDIRUNLOCK; 802 return (error); 803 } 804 805 p = PFIND(pfs->pfs_pid); 806 if (p == 0) 807 break; 808 809 for (pt = proc_targets, i = 0; i < nproc_targets; pt++, i++) { 810 if (cnp->cn_namelen == pt->pt_namlen && 811 memcmp(pt->pt_name, pname, cnp->cn_namelen) == 0 && 812 (pt->pt_valid == NULL || 813 (*pt->pt_valid)(p, dvp->v_mount))) 814 goto found; 815 } 816 break; 817 818 found: 819 if (pt->pt_pfstype == Pfile) { 820 fvp = p->p_textvp; 821 /* We already checked that it exists. */ 822 VREF(fvp); 823 vn_lock(fvp, LK_EXCLUSIVE | LK_RETRY); 824 if (wantpunlock) { 825 VOP_UNLOCK(dvp, 0); 826 cnp->cn_flags |= PDIRUNLOCK; 827 } 828 *vpp = fvp; 829 return (0); 830 } 831 832 error = procfs_allocvp(dvp->v_mount, vpp, pfs->pfs_pid, 833 pt->pt_pfstype); 834 if ((error == 0) && (wantpunlock)) { 835 VOP_UNLOCK(dvp, 0); 836 cnp->cn_flags |= PDIRUNLOCK; 837 } 838 return (error); 839 840 default: 841 return (ENOTDIR); 842 } 843 844 return (cnp->cn_nameiop == LOOKUP ? ENOENT : EROFS); 845 } 846 847 int 848 procfs_validfile(p, mp) 849 struct proc *p; 850 struct mount *mp; 851 { 852 return (p->p_textvp != NULL); 853 } 854 855 static int 856 procfs_validfile_linux(p, mp) 857 struct proc *p; 858 struct mount *mp; 859 { 860 int flags; 861 862 flags = VFSTOPROC(mp)->pmnt_flags; 863 return ((flags & PROCFSMNT_LINUXCOMPAT) && 864 (p == NULL || procfs_validfile(p, mp))); 865 } 866 867 /* 868 * readdir returns directory entries from pfsnode (vp). 869 * 870 * the strategy here with procfs is to generate a single 871 * directory entry at a time (struct dirent) and then 872 * copy that out to userland using uiomove. a more efficent 873 * though more complex implementation, would try to minimize 874 * the number of calls to uiomove(). for procfs, this is 875 * hardly worth the added code complexity. 876 * 877 * this should just be done through read() 878 */ 879 int 880 procfs_readdir(v) 881 void *v; 882 { 883 struct vop_readdir_args /* { 884 struct vnode *a_vp; 885 struct uio *a_uio; 886 struct ucred *a_cred; 887 int *a_eofflag; 888 off_t **a_cookies; 889 int *a_ncookies; 890 } */ *ap = v; 891 struct uio *uio = ap->a_uio; 892 struct dirent d; 893 struct pfsnode *pfs; 894 off_t i; 895 int error; 896 off_t *cookies = NULL; 897 int ncookies, left, skip, j; 898 struct vnode *vp; 899 const struct proc_target *pt; 900 901 vp = ap->a_vp; 902 pfs = VTOPFS(vp); 903 904 if (uio->uio_resid < UIO_MX) 905 return (EINVAL); 906 if (uio->uio_offset < 0) 907 return (EINVAL); 908 909 error = 0; 910 i = uio->uio_offset; 911 memset((caddr_t)&d, 0, UIO_MX); 912 d.d_reclen = UIO_MX; 913 ncookies = uio->uio_resid / UIO_MX; 914 915 switch (pfs->pfs_type) { 916 /* 917 * this is for the process-specific sub-directories. 918 * all that is needed to is copy out all the entries 919 * from the procent[] table (top of this file). 920 */ 921 case Pproc: { 922 struct proc *p; 923 924 if (i >= nproc_targets) 925 return 0; 926 927 p = PFIND(pfs->pfs_pid); 928 if (p == NULL) 929 break; 930 931 if (ap->a_ncookies) { 932 ncookies = min(ncookies, (nproc_targets - i)); 933 cookies = malloc(ncookies * sizeof (off_t), 934 M_TEMP, M_WAITOK); 935 *ap->a_cookies = cookies; 936 } 937 938 for (pt = &proc_targets[i]; 939 uio->uio_resid >= UIO_MX && i < nproc_targets; pt++, i++) { 940 if (pt->pt_valid && 941 (*pt->pt_valid)(p, vp->v_mount) == 0) 942 continue; 943 944 d.d_fileno = PROCFS_FILENO(pfs->pfs_pid, pt->pt_pfstype); 945 d.d_namlen = pt->pt_namlen; 946 memcpy(d.d_name, pt->pt_name, pt->pt_namlen + 1); 947 d.d_type = pt->pt_type; 948 949 if ((error = uiomove((caddr_t)&d, UIO_MX, uio)) != 0) 950 break; 951 if (cookies) 952 *cookies++ = i + 1; 953 } 954 955 break; 956 } 957 958 /* 959 * this is for the root of the procfs filesystem 960 * what is needed are special entries for "curproc" 961 * and "self" followed by an entry for each process 962 * on allproc 963 #ifdef PROCFS_ZOMBIE 964 * and deadproc and zombproc. 965 #endif 966 */ 967 968 case Proot: { 969 int pcnt = i, nc = 0; 970 const struct proclist_desc *pd; 971 volatile struct proc *p; 972 973 if (pcnt > 3) 974 pcnt = 3; 975 if (ap->a_ncookies) { 976 /* 977 * XXX Potentially allocating too much space here, 978 * but I'm lazy. This loop needs some work. 979 */ 980 cookies = malloc(ncookies * sizeof (off_t), 981 M_TEMP, M_WAITOK); 982 *ap->a_cookies = cookies; 983 } 984 /* 985 * XXX: THIS LOOP ASSUMES THAT allproc IS THE FIRST 986 * PROCLIST IN THE proclists! 987 */ 988 proclist_lock_read(); 989 pd = proclists; 990 #ifdef PROCFS_ZOMBIE 991 again: 992 #endif 993 for (p = LIST_FIRST(pd->pd_list); 994 p != NULL && uio->uio_resid >= UIO_MX; i++, pcnt++) { 995 switch (i) { 996 case 0: /* `.' */ 997 case 1: /* `..' */ 998 d.d_fileno = PROCFS_FILENO(0, Proot); 999 d.d_namlen = i + 1; 1000 memcpy(d.d_name, "..", d.d_namlen); 1001 d.d_name[i + 1] = '\0'; 1002 d.d_type = DT_DIR; 1003 break; 1004 1005 case 2: 1006 d.d_fileno = PROCFS_FILENO(0, Pcurproc); 1007 d.d_namlen = sizeof("curproc") - 1; 1008 memcpy(d.d_name, "curproc", sizeof("curproc")); 1009 d.d_type = DT_LNK; 1010 break; 1011 1012 case 3: 1013 d.d_fileno = PROCFS_FILENO(0, Pself); 1014 d.d_namlen = sizeof("self") - 1; 1015 memcpy(d.d_name, "self", sizeof("self")); 1016 d.d_type = DT_LNK; 1017 break; 1018 1019 default: 1020 while (pcnt < i) { 1021 pcnt++; 1022 p = LIST_NEXT(p, p_list); 1023 if (!p) 1024 goto done; 1025 } 1026 d.d_fileno = PROCFS_FILENO(p->p_pid, Pproc); 1027 d.d_namlen = sprintf(d.d_name, "%ld", 1028 (long)p->p_pid); 1029 d.d_type = DT_DIR; 1030 p = p->p_list.le_next; 1031 break; 1032 } 1033 1034 if ((error = uiomove((caddr_t)&d, UIO_MX, uio)) != 0) 1035 break; 1036 nc++; 1037 if (cookies) 1038 *cookies++ = i + 1; 1039 } 1040 done: 1041 1042 #ifdef PROCFS_ZOMBIE 1043 pd++; 1044 if (p == NULL && pd->pd_list != NULL) 1045 goto again; 1046 #endif 1047 proclist_unlock_read(); 1048 1049 skip = i - pcnt; 1050 if (skip >= nproc_root_targets) 1051 break; 1052 left = nproc_root_targets - skip; 1053 for (j = 0, pt = &proc_root_targets[0]; 1054 uio->uio_resid >= UIO_MX && j < left; 1055 pt++, j++, i++) { 1056 if (pt->pt_valid && 1057 (*pt->pt_valid)(NULL, vp->v_mount) == 0) 1058 continue; 1059 d.d_fileno = PROCFS_FILENO(0, pt->pt_pfstype); 1060 d.d_namlen = pt->pt_namlen; 1061 memcpy(d.d_name, pt->pt_name, pt->pt_namlen + 1); 1062 d.d_type = pt->pt_type; 1063 1064 if ((error = uiomove((caddr_t)&d, UIO_MX, uio)) != 0) 1065 break; 1066 nc++; 1067 if (cookies) 1068 *cookies++ = i + 1; 1069 } 1070 1071 ncookies = nc; 1072 break; 1073 } 1074 1075 default: 1076 error = ENOTDIR; 1077 break; 1078 } 1079 1080 if (ap->a_ncookies) { 1081 if (error) { 1082 if (cookies) 1083 free(*ap->a_cookies, M_TEMP); 1084 *ap->a_ncookies = 0; 1085 *ap->a_cookies = NULL; 1086 } else 1087 *ap->a_ncookies = ncookies; 1088 } 1089 uio->uio_offset = i; 1090 return (error); 1091 } 1092 1093 /* 1094 * readlink reads the link of `curproc' 1095 */ 1096 int 1097 procfs_readlink(v) 1098 void *v; 1099 { 1100 struct vop_readlink_args *ap = v; 1101 char buf[16]; /* should be enough */ 1102 int len; 1103 1104 if (VTOPFS(ap->a_vp)->pfs_fileno == PROCFS_FILENO(0, Pcurproc)) 1105 len = sprintf(buf, "%ld", (long)curproc->p_pid); 1106 else if (VTOPFS(ap->a_vp)->pfs_fileno == PROCFS_FILENO(0, Pself)) 1107 len = sprintf(buf, "%s", "curproc"); 1108 else 1109 return (EINVAL); 1110 1111 return (uiomove((caddr_t)buf, len, ap->a_uio)); 1112 } 1113 1114 /* 1115 * convert decimal ascii to pid_t 1116 */ 1117 static pid_t 1118 atopid(b, len) 1119 const char *b; 1120 u_int len; 1121 { 1122 pid_t p = 0; 1123 1124 while (len--) { 1125 char c = *b++; 1126 if (c < '0' || c > '9') 1127 return (NO_PID); 1128 p = 10 * p + (c - '0'); 1129 if (p > PID_MAX) 1130 return (NO_PID); 1131 } 1132 1133 return (p); 1134 } 1135