1 /* $NetBSD: procfs_vnops.c,v 1.85 2001/11/10 13:33:44 lukem Exp $ */ 2 3 /* 4 * Copyright (c) 1993 Jan-Simon Pendry 5 * Copyright (c) 1993, 1995 6 * The Regents of the University of California. All rights reserved. 7 * 8 * This code is derived from software contributed to Berkeley by 9 * Jan-Simon Pendry. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the University of 22 * California, Berkeley and its contributors. 23 * 4. Neither the name of the University nor the names of its contributors 24 * may be used to endorse or promote products derived from this software 25 * without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 37 * SUCH DAMAGE. 38 * 39 * @(#)procfs_vnops.c 8.18 (Berkeley) 5/21/95 40 */ 41 42 /* 43 * procfs vnode interface 44 */ 45 46 #include <sys/cdefs.h> 47 __KERNEL_RCSID(0, "$NetBSD: procfs_vnops.c,v 1.85 2001/11/10 13:33:44 lukem Exp $"); 48 49 #include <sys/param.h> 50 #include <sys/systm.h> 51 #include <sys/time.h> 52 #include <sys/kernel.h> 53 #include <sys/file.h> 54 #include <sys/proc.h> 55 #include <sys/vnode.h> 56 #include <sys/namei.h> 57 #include <sys/malloc.h> 58 #include <sys/mount.h> 59 #include <sys/dirent.h> 60 #include <sys/resourcevar.h> 61 #include <sys/ptrace.h> 62 #include <sys/stat.h> 63 64 #include <uvm/uvm_extern.h> /* for PAGE_SIZE */ 65 66 #include <machine/reg.h> 67 68 #include <miscfs/genfs/genfs.h> 69 #include <miscfs/procfs/procfs.h> 70 71 /* 72 * Vnode Operations. 73 * 74 */ 75 76 static int procfs_validfile_linux __P((struct proc *, struct mount *)); 77 78 /* 79 * This is a list of the valid names in the 80 * process-specific sub-directories. It is 81 * used in procfs_lookup and procfs_readdir 82 */ 83 const struct proc_target { 84 u_char pt_type; 85 u_char pt_namlen; 86 char *pt_name; 87 pfstype pt_pfstype; 88 int (*pt_valid) __P((struct proc *, struct mount *)); 89 } proc_targets[] = { 90 #define N(s) sizeof(s)-1, s 91 /* name type validp */ 92 { DT_DIR, N("."), Pproc, NULL }, 93 { DT_DIR, N(".."), Proot, NULL }, 94 { DT_REG, N("file"), Pfile, procfs_validfile }, 95 { DT_REG, N("mem"), Pmem, NULL }, 96 { DT_REG, N("regs"), Pregs, procfs_validregs }, 97 { DT_REG, N("fpregs"), Pfpregs, procfs_validfpregs }, 98 { DT_REG, N("ctl"), Pctl, NULL }, 99 { DT_REG, N("status"), Pstatus, NULL }, 100 { DT_REG, N("note"), Pnote, NULL }, 101 { DT_REG, N("notepg"), Pnotepg, NULL }, 102 { DT_REG, N("map"), Pmap, procfs_validmap }, 103 { DT_REG, N("maps"), Pmaps, procfs_validmap }, 104 { DT_REG, N("cmdline"), Pcmdline, NULL }, 105 { DT_REG, N("exe"), Pfile, procfs_validfile_linux }, 106 #undef N 107 }; 108 static int nproc_targets = sizeof(proc_targets) / sizeof(proc_targets[0]); 109 110 /* 111 * List of files in the root directory. Note: the validate function will 112 * be called with p == NULL for these ones. 113 */ 114 struct proc_target proc_root_targets[] = { 115 #define N(s) sizeof(s)-1, s 116 /* name type validp */ 117 { DT_REG, N("meminfo"), Pmeminfo, procfs_validfile_linux }, 118 { DT_REG, N("cpuinfo"), Pcpuinfo, procfs_validfile_linux }, 119 #undef N 120 }; 121 static int nproc_root_targets = 122 sizeof(proc_root_targets) / sizeof(proc_root_targets[0]); 123 124 int procfs_lookup __P((void *)); 125 #define procfs_create genfs_eopnotsupp_rele 126 #define procfs_mknod genfs_eopnotsupp_rele 127 int procfs_open __P((void *)); 128 int procfs_close __P((void *)); 129 int procfs_access __P((void *)); 130 int procfs_getattr __P((void *)); 131 int procfs_setattr __P((void *)); 132 #define procfs_read procfs_rw 133 #define procfs_write procfs_rw 134 #define procfs_fcntl genfs_fcntl 135 #define procfs_ioctl genfs_enoioctl 136 #define procfs_poll genfs_poll 137 #define procfs_revoke genfs_revoke 138 #define procfs_fsync genfs_nullop 139 #define procfs_seek genfs_nullop 140 #define procfs_remove genfs_eopnotsupp_rele 141 int procfs_link __P((void *)); 142 #define procfs_rename genfs_eopnotsupp_rele 143 #define procfs_mkdir genfs_eopnotsupp_rele 144 #define procfs_rmdir genfs_eopnotsupp_rele 145 int procfs_symlink __P((void *)); 146 int procfs_readdir __P((void *)); 147 int procfs_readlink __P((void *)); 148 #define procfs_abortop genfs_abortop 149 int procfs_inactive __P((void *)); 150 int procfs_reclaim __P((void *)); 151 #define procfs_lock genfs_lock 152 #define procfs_unlock genfs_unlock 153 #define procfs_bmap genfs_badop 154 #define procfs_strategy genfs_badop 155 int procfs_print __P((void *)); 156 int procfs_pathconf __P((void *)); 157 #define procfs_islocked genfs_islocked 158 #define procfs_advlock genfs_einval 159 #define procfs_blkatoff genfs_eopnotsupp 160 #define procfs_valloc genfs_eopnotsupp 161 #define procfs_vfree genfs_nullop 162 #define procfs_truncate genfs_eopnotsupp 163 #define procfs_update genfs_nullop 164 #define procfs_bwrite genfs_eopnotsupp 165 166 static pid_t atopid __P((const char *, u_int)); 167 168 /* 169 * procfs vnode operations. 170 */ 171 int (**procfs_vnodeop_p) __P((void *)); 172 const struct vnodeopv_entry_desc procfs_vnodeop_entries[] = { 173 { &vop_default_desc, vn_default_error }, 174 { &vop_lookup_desc, procfs_lookup }, /* lookup */ 175 { &vop_create_desc, procfs_create }, /* create */ 176 { &vop_mknod_desc, procfs_mknod }, /* mknod */ 177 { &vop_open_desc, procfs_open }, /* open */ 178 { &vop_close_desc, procfs_close }, /* close */ 179 { &vop_access_desc, procfs_access }, /* access */ 180 { &vop_getattr_desc, procfs_getattr }, /* getattr */ 181 { &vop_setattr_desc, procfs_setattr }, /* setattr */ 182 { &vop_read_desc, procfs_read }, /* read */ 183 { &vop_write_desc, procfs_write }, /* write */ 184 { &vop_fcntl_desc, procfs_fcntl }, /* fcntl */ 185 { &vop_ioctl_desc, procfs_ioctl }, /* ioctl */ 186 { &vop_poll_desc, procfs_poll }, /* poll */ 187 { &vop_revoke_desc, procfs_revoke }, /* revoke */ 188 { &vop_fsync_desc, procfs_fsync }, /* fsync */ 189 { &vop_seek_desc, procfs_seek }, /* seek */ 190 { &vop_remove_desc, procfs_remove }, /* remove */ 191 { &vop_link_desc, procfs_link }, /* link */ 192 { &vop_rename_desc, procfs_rename }, /* rename */ 193 { &vop_mkdir_desc, procfs_mkdir }, /* mkdir */ 194 { &vop_rmdir_desc, procfs_rmdir }, /* rmdir */ 195 { &vop_symlink_desc, procfs_symlink }, /* symlink */ 196 { &vop_readdir_desc, procfs_readdir }, /* readdir */ 197 { &vop_readlink_desc, procfs_readlink }, /* readlink */ 198 { &vop_abortop_desc, procfs_abortop }, /* abortop */ 199 { &vop_inactive_desc, procfs_inactive }, /* inactive */ 200 { &vop_reclaim_desc, procfs_reclaim }, /* reclaim */ 201 { &vop_lock_desc, procfs_lock }, /* lock */ 202 { &vop_unlock_desc, procfs_unlock }, /* unlock */ 203 { &vop_bmap_desc, procfs_bmap }, /* bmap */ 204 { &vop_strategy_desc, procfs_strategy }, /* strategy */ 205 { &vop_print_desc, procfs_print }, /* print */ 206 { &vop_islocked_desc, procfs_islocked }, /* islocked */ 207 { &vop_pathconf_desc, procfs_pathconf }, /* pathconf */ 208 { &vop_advlock_desc, procfs_advlock }, /* advlock */ 209 { &vop_blkatoff_desc, procfs_blkatoff }, /* blkatoff */ 210 { &vop_valloc_desc, procfs_valloc }, /* valloc */ 211 { &vop_vfree_desc, procfs_vfree }, /* vfree */ 212 { &vop_truncate_desc, procfs_truncate }, /* truncate */ 213 { &vop_update_desc, procfs_update }, /* update */ 214 { NULL, NULL } 215 }; 216 const struct vnodeopv_desc procfs_vnodeop_opv_desc = 217 { &procfs_vnodeop_p, procfs_vnodeop_entries }; 218 /* 219 * set things up for doing i/o on 220 * the pfsnode (vp). (vp) is locked 221 * on entry, and should be left locked 222 * on exit. 223 * 224 * for procfs we don't need to do anything 225 * in particular for i/o. all that is done 226 * is to support exclusive open on process 227 * memory images. 228 */ 229 int 230 procfs_open(v) 231 void *v; 232 { 233 struct vop_open_args /* { 234 struct vnode *a_vp; 235 int a_mode; 236 struct ucred *a_cred; 237 struct proc *a_p; 238 } */ *ap = v; 239 struct pfsnode *pfs = VTOPFS(ap->a_vp); 240 struct proc *p1, *p2; 241 242 p1 = ap->a_p; /* tracer */ 243 p2 = PFIND(pfs->pfs_pid); /* traced */ 244 245 if (p2 == NULL) 246 return (ENOENT); /* was ESRCH, jsp */ 247 248 switch (pfs->pfs_type) { 249 case Pmem: 250 if (((pfs->pfs_flags & FWRITE) && (ap->a_mode & O_EXCL)) || 251 ((pfs->pfs_flags & O_EXCL) && (ap->a_mode & FWRITE))) 252 return (EBUSY); 253 254 if (procfs_checkioperm(p1, p2) != 0) 255 return (EPERM); 256 257 if (ap->a_mode & FWRITE) 258 pfs->pfs_flags = ap->a_mode & (FWRITE|O_EXCL); 259 260 return (0); 261 262 default: 263 break; 264 } 265 266 return (0); 267 } 268 269 /* 270 * close the pfsnode (vp) after doing i/o. 271 * (vp) is not locked on entry or exit. 272 * 273 * nothing to do for procfs other than undo 274 * any exclusive open flag (see _open above). 275 */ 276 int 277 procfs_close(v) 278 void *v; 279 { 280 struct vop_close_args /* { 281 struct vnode *a_vp; 282 int a_fflag; 283 struct ucred *a_cred; 284 struct proc *a_p; 285 } */ *ap = v; 286 struct pfsnode *pfs = VTOPFS(ap->a_vp); 287 288 switch (pfs->pfs_type) { 289 case Pmem: 290 if ((ap->a_fflag & FWRITE) && (pfs->pfs_flags & O_EXCL)) 291 pfs->pfs_flags &= ~(FWRITE|O_EXCL); 292 break; 293 294 default: 295 break; 296 } 297 298 return (0); 299 } 300 301 /* 302 * _inactive is called when the pfsnode 303 * is vrele'd and the reference count goes 304 * to zero. (vp) will be on the vnode free 305 * list, so to get it back vget() must be 306 * used. 307 * 308 * for procfs, check if the process is still 309 * alive and if it isn't then just throw away 310 * the vnode by calling vgone(). this may 311 * be overkill and a waste of time since the 312 * chances are that the process will still be 313 * there and PFIND is not free. 314 * 315 * (vp) is locked on entry, but must be unlocked on exit. 316 */ 317 int 318 procfs_inactive(v) 319 void *v; 320 { 321 struct vop_inactive_args /* { 322 struct vnode *a_vp; 323 struct proc *a_p; 324 } */ *ap = v; 325 struct pfsnode *pfs = VTOPFS(ap->a_vp); 326 327 VOP_UNLOCK(ap->a_vp, 0); 328 if (PFIND(pfs->pfs_pid) == 0) 329 vgone(ap->a_vp); 330 331 return (0); 332 } 333 334 /* 335 * _reclaim is called when getnewvnode() 336 * wants to make use of an entry on the vnode 337 * free list. at this time the filesystem needs 338 * to free any private data and remove the node 339 * from any private lists. 340 */ 341 int 342 procfs_reclaim(v) 343 void *v; 344 { 345 struct vop_reclaim_args /* { 346 struct vnode *a_vp; 347 } */ *ap = v; 348 349 return (procfs_freevp(ap->a_vp)); 350 } 351 352 /* 353 * Return POSIX pathconf information applicable to special devices. 354 */ 355 int 356 procfs_pathconf(v) 357 void *v; 358 { 359 struct vop_pathconf_args /* { 360 struct vnode *a_vp; 361 int a_name; 362 register_t *a_retval; 363 } */ *ap = v; 364 365 switch (ap->a_name) { 366 case _PC_LINK_MAX: 367 *ap->a_retval = LINK_MAX; 368 return (0); 369 case _PC_MAX_CANON: 370 *ap->a_retval = MAX_CANON; 371 return (0); 372 case _PC_MAX_INPUT: 373 *ap->a_retval = MAX_INPUT; 374 return (0); 375 case _PC_PIPE_BUF: 376 *ap->a_retval = PIPE_BUF; 377 return (0); 378 case _PC_CHOWN_RESTRICTED: 379 *ap->a_retval = 1; 380 return (0); 381 case _PC_VDISABLE: 382 *ap->a_retval = _POSIX_VDISABLE; 383 return (0); 384 case _PC_SYNC_IO: 385 *ap->a_retval = 1; 386 return (0); 387 default: 388 return (EINVAL); 389 } 390 /* NOTREACHED */ 391 } 392 393 /* 394 * _print is used for debugging. 395 * just print a readable description 396 * of (vp). 397 */ 398 int 399 procfs_print(v) 400 void *v; 401 { 402 struct vop_print_args /* { 403 struct vnode *a_vp; 404 } */ *ap = v; 405 struct pfsnode *pfs = VTOPFS(ap->a_vp); 406 407 printf("tag VT_PROCFS, type %d, pid %d, mode %x, flags %lx\n", 408 pfs->pfs_type, pfs->pfs_pid, pfs->pfs_mode, pfs->pfs_flags); 409 return 0; 410 } 411 412 int 413 procfs_link(v) 414 void *v; 415 { 416 struct vop_link_args /* { 417 struct vnode *a_dvp; 418 struct vnode *a_vp; 419 struct componentname *a_cnp; 420 } */ *ap = v; 421 422 VOP_ABORTOP(ap->a_dvp, ap->a_cnp); 423 vput(ap->a_dvp); 424 return (EROFS); 425 } 426 427 int 428 procfs_symlink(v) 429 void *v; 430 { 431 struct vop_symlink_args /* { 432 struct vnode *a_dvp; 433 struct vnode **a_vpp; 434 struct componentname *a_cnp; 435 struct vattr *a_vap; 436 char *a_target; 437 } */ *ap = v; 438 439 VOP_ABORTOP(ap->a_dvp, ap->a_cnp); 440 vput(ap->a_dvp); 441 return (EROFS); 442 } 443 444 /* 445 * Invent attributes for pfsnode (vp) and store 446 * them in (vap). 447 * Directories lengths are returned as zero since 448 * any real length would require the genuine size 449 * to be computed, and nothing cares anyway. 450 * 451 * this is relatively minimal for procfs. 452 */ 453 int 454 procfs_getattr(v) 455 void *v; 456 { 457 struct vop_getattr_args /* { 458 struct vnode *a_vp; 459 struct vattr *a_vap; 460 struct ucred *a_cred; 461 struct proc *a_p; 462 } */ *ap = v; 463 struct pfsnode *pfs = VTOPFS(ap->a_vp); 464 struct vattr *vap = ap->a_vap; 465 struct proc *procp; 466 struct timeval tv; 467 int error; 468 469 /* first check the process still exists */ 470 switch (pfs->pfs_type) { 471 case Proot: 472 case Pcurproc: 473 case Pself: 474 procp = 0; 475 break; 476 477 default: 478 procp = PFIND(pfs->pfs_pid); 479 if (procp == 0) 480 return (ENOENT); 481 break; 482 } 483 484 error = 0; 485 486 /* start by zeroing out the attributes */ 487 VATTR_NULL(vap); 488 489 /* next do all the common fields */ 490 vap->va_type = ap->a_vp->v_type; 491 vap->va_mode = pfs->pfs_mode; 492 vap->va_fileid = pfs->pfs_fileno; 493 vap->va_flags = 0; 494 vap->va_blocksize = PAGE_SIZE; 495 496 /* 497 * Make all times be current TOD. 498 * It would be possible to get the process start 499 * time from the p_stat structure, but there's 500 * no "file creation" time stamp anyway, and the 501 * p_stat structure is not addressible if u. gets 502 * swapped out for that process. 503 */ 504 microtime(&tv); 505 TIMEVAL_TO_TIMESPEC(&tv, &vap->va_ctime); 506 vap->va_atime = vap->va_mtime = vap->va_ctime; 507 508 switch (pfs->pfs_type) { 509 case Pmem: 510 case Pregs: 511 case Pfpregs: 512 /* 513 * If the process has exercised some setuid or setgid 514 * privilege, then rip away read/write permission so 515 * that only root can gain access. 516 */ 517 if (procp->p_flag & P_SUGID) 518 vap->va_mode &= ~(S_IRUSR|S_IWUSR); 519 /* FALLTHROUGH */ 520 case Pctl: 521 case Pstatus: 522 case Pnote: 523 case Pnotepg: 524 case Pmap: 525 case Pmaps: 526 case Pcmdline: 527 vap->va_nlink = 1; 528 vap->va_uid = procp->p_ucred->cr_uid; 529 vap->va_gid = procp->p_ucred->cr_gid; 530 break; 531 case Pmeminfo: 532 case Pcpuinfo: 533 vap->va_nlink = 1; 534 vap->va_uid = vap->va_gid = 0; 535 break; 536 537 default: 538 break; 539 } 540 541 /* 542 * now do the object specific fields 543 * 544 * The size could be set from struct reg, but it's hardly 545 * worth the trouble, and it puts some (potentially) machine 546 * dependent data into this machine-independent code. If it 547 * becomes important then this function should break out into 548 * a per-file stat function in the corresponding .c file. 549 */ 550 551 switch (pfs->pfs_type) { 552 case Proot: 553 /* 554 * Set nlink to 1 to tell fts(3) we don't actually know. 555 */ 556 vap->va_nlink = 1; 557 vap->va_uid = 0; 558 vap->va_gid = 0; 559 vap->va_bytes = vap->va_size = DEV_BSIZE; 560 break; 561 562 case Pcurproc: { 563 char buf[16]; /* should be enough */ 564 vap->va_nlink = 1; 565 vap->va_uid = 0; 566 vap->va_gid = 0; 567 vap->va_bytes = vap->va_size = 568 sprintf(buf, "%ld", (long)curproc->p_pid); 569 break; 570 } 571 572 case Pself: 573 vap->va_nlink = 1; 574 vap->va_uid = 0; 575 vap->va_gid = 0; 576 vap->va_bytes = vap->va_size = sizeof("curproc"); 577 break; 578 579 case Pproc: 580 vap->va_nlink = 2; 581 vap->va_uid = procp->p_ucred->cr_uid; 582 vap->va_gid = procp->p_ucred->cr_gid; 583 vap->va_bytes = vap->va_size = DEV_BSIZE; 584 break; 585 586 case Pfile: 587 error = EOPNOTSUPP; 588 break; 589 590 case Pmem: 591 vap->va_bytes = vap->va_size = 592 ctob(procp->p_vmspace->vm_tsize + 593 procp->p_vmspace->vm_dsize + 594 procp->p_vmspace->vm_ssize); 595 break; 596 597 #if defined(PT_GETREGS) || defined(PT_SETREGS) 598 case Pregs: 599 vap->va_bytes = vap->va_size = sizeof(struct reg); 600 break; 601 #endif 602 603 #if defined(PT_GETFPREGS) || defined(PT_SETFPREGS) 604 case Pfpregs: 605 vap->va_bytes = vap->va_size = sizeof(struct fpreg); 606 break; 607 #endif 608 609 case Pctl: 610 case Pstatus: 611 case Pnote: 612 case Pnotepg: 613 case Pcmdline: 614 case Pmeminfo: 615 case Pcpuinfo: 616 vap->va_bytes = vap->va_size = 0; 617 break; 618 case Pmap: 619 case Pmaps: 620 /* 621 * Advise a larger blocksize for the map files, so that 622 * they may be read in one pass. 623 */ 624 vap->va_blocksize = 4 * PAGE_SIZE; 625 vap->va_bytes = vap->va_size = 0; 626 break; 627 628 default: 629 panic("procfs_getattr"); 630 } 631 632 return (error); 633 } 634 635 /*ARGSUSED*/ 636 int 637 procfs_setattr(v) 638 void *v; 639 { 640 /* 641 * just fake out attribute setting 642 * it's not good to generate an error 643 * return, otherwise things like creat() 644 * will fail when they try to set the 645 * file length to 0. worse, this means 646 * that echo $note > /proc/$pid/note will fail. 647 */ 648 649 return (0); 650 } 651 652 /* 653 * implement access checking. 654 * 655 * actually, the check for super-user is slightly 656 * broken since it will allow read access to write-only 657 * objects. this doesn't cause any particular trouble 658 * but does mean that the i/o entry points need to check 659 * that the operation really does make sense. 660 */ 661 int 662 procfs_access(v) 663 void *v; 664 { 665 struct vop_access_args /* { 666 struct vnode *a_vp; 667 int a_mode; 668 struct ucred *a_cred; 669 struct proc *a_p; 670 } */ *ap = v; 671 struct vattr va; 672 int error; 673 674 if ((error = VOP_GETATTR(ap->a_vp, &va, ap->a_cred, ap->a_p)) != 0) 675 return (error); 676 677 return (vaccess(va.va_type, va.va_mode, 678 va.va_uid, va.va_gid, ap->a_mode, ap->a_cred)); 679 } 680 681 /* 682 * lookup. this is incredibly complicated in the 683 * general case, however for most pseudo-filesystems 684 * very little needs to be done. 685 * 686 * Locking isn't hard here, just poorly documented. 687 * 688 * If we're looking up ".", just vref the parent & return it. 689 * 690 * If we're looking up "..", unlock the parent, and lock "..". If everything 691 * went ok, and we're on the last component and the caller requested the 692 * parent locked, try to re-lock the parent. We do this to prevent lock 693 * races. 694 * 695 * For anything else, get the needed node. Then unlock the parent if not 696 * the last component or not LOCKPARENT (i.e. if we wouldn't re-lock the 697 * parent in the .. case). 698 * 699 * We try to exit with the parent locked in error cases. 700 */ 701 int 702 procfs_lookup(v) 703 void *v; 704 { 705 struct vop_lookup_args /* { 706 struct vnode * a_dvp; 707 struct vnode ** a_vpp; 708 struct componentname * a_cnp; 709 } */ *ap = v; 710 struct componentname *cnp = ap->a_cnp; 711 struct vnode **vpp = ap->a_vpp; 712 struct vnode *dvp = ap->a_dvp; 713 const char *pname = cnp->cn_nameptr; 714 const struct proc_target *pt = NULL; 715 struct vnode *fvp; 716 pid_t pid; 717 struct pfsnode *pfs; 718 struct proc *p = NULL; 719 int i, error, wantpunlock, iscurproc = 0, isself = 0; 720 721 *vpp = NULL; 722 cnp->cn_flags &= ~PDIRUNLOCK; 723 724 if (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME) 725 return (EROFS); 726 727 if (cnp->cn_namelen == 1 && *pname == '.') { 728 *vpp = dvp; 729 VREF(dvp); 730 return (0); 731 } 732 733 wantpunlock = (~cnp->cn_flags & (LOCKPARENT | ISLASTCN)); 734 pfs = VTOPFS(dvp); 735 switch (pfs->pfs_type) { 736 case Proot: 737 /* 738 * Shouldn't get here with .. in the root node. 739 */ 740 if (cnp->cn_flags & ISDOTDOT) 741 return (EIO); 742 743 iscurproc = CNEQ(cnp, "curproc", 7); 744 isself = CNEQ(cnp, "self", 4); 745 746 if (iscurproc || isself) { 747 error = procfs_allocvp(dvp->v_mount, vpp, 0, 748 iscurproc ? Pcurproc : Pself); 749 if ((error == 0) && (wantpunlock)) { 750 VOP_UNLOCK(dvp, 0); 751 cnp->cn_flags |= PDIRUNLOCK; 752 } 753 return (error); 754 } 755 756 for (i = 0; i < nproc_root_targets; i++) { 757 pt = &proc_root_targets[i]; 758 if (cnp->cn_namelen == pt->pt_namlen && 759 memcmp(pt->pt_name, pname, cnp->cn_namelen) == 0 && 760 (pt->pt_valid == NULL || 761 (*pt->pt_valid)(p, dvp->v_mount))) 762 break; 763 } 764 765 if (i != nproc_root_targets) { 766 error = procfs_allocvp(dvp->v_mount, vpp, 0, 767 pt->pt_pfstype); 768 if ((error == 0) && (wantpunlock)) { 769 VOP_UNLOCK(dvp, 0); 770 cnp->cn_flags |= PDIRUNLOCK; 771 } 772 return (error); 773 } 774 775 pid = atopid(pname, cnp->cn_namelen); 776 if (pid == NO_PID) 777 break; 778 779 p = PFIND(pid); 780 if (p == 0) 781 break; 782 783 error = procfs_allocvp(dvp->v_mount, vpp, pid, Pproc); 784 if ((error == 0) && (wantpunlock)) { 785 VOP_UNLOCK(dvp, 0); 786 cnp->cn_flags |= PDIRUNLOCK; 787 } 788 return (error); 789 790 case Pproc: 791 /* 792 * do the .. dance. We unlock the directory, and then 793 * get the root dir. That will automatically return .. 794 * locked. Then if the caller wanted dvp locked, we 795 * re-lock. 796 */ 797 if (cnp->cn_flags & ISDOTDOT) { 798 VOP_UNLOCK(dvp, 0); 799 cnp->cn_flags |= PDIRUNLOCK; 800 error = procfs_root(dvp->v_mount, vpp); 801 if ((error == 0) && (wantpunlock == 0) && 802 ((error = vn_lock(dvp, LK_EXCLUSIVE)) == 0)) 803 cnp->cn_flags &= ~PDIRUNLOCK; 804 return (error); 805 } 806 807 p = PFIND(pfs->pfs_pid); 808 if (p == 0) 809 break; 810 811 for (pt = proc_targets, i = 0; i < nproc_targets; pt++, i++) { 812 if (cnp->cn_namelen == pt->pt_namlen && 813 memcmp(pt->pt_name, pname, cnp->cn_namelen) == 0 && 814 (pt->pt_valid == NULL || 815 (*pt->pt_valid)(p, dvp->v_mount))) 816 goto found; 817 } 818 break; 819 820 found: 821 if (pt->pt_pfstype == Pfile) { 822 fvp = p->p_textvp; 823 /* We already checked that it exists. */ 824 VREF(fvp); 825 vn_lock(fvp, LK_EXCLUSIVE | LK_RETRY); 826 if (wantpunlock) { 827 VOP_UNLOCK(dvp, 0); 828 cnp->cn_flags |= PDIRUNLOCK; 829 } 830 *vpp = fvp; 831 return (0); 832 } 833 834 error = procfs_allocvp(dvp->v_mount, vpp, pfs->pfs_pid, 835 pt->pt_pfstype); 836 if ((error == 0) && (wantpunlock)) { 837 VOP_UNLOCK(dvp, 0); 838 cnp->cn_flags |= PDIRUNLOCK; 839 } 840 return (error); 841 842 default: 843 return (ENOTDIR); 844 } 845 846 return (cnp->cn_nameiop == LOOKUP ? ENOENT : EROFS); 847 } 848 849 int 850 procfs_validfile(p, mp) 851 struct proc *p; 852 struct mount *mp; 853 { 854 return (p->p_textvp != NULL); 855 } 856 857 static int 858 procfs_validfile_linux(p, mp) 859 struct proc *p; 860 struct mount *mp; 861 { 862 int flags; 863 864 flags = VFSTOPROC(mp)->pmnt_flags; 865 return ((flags & PROCFSMNT_LINUXCOMPAT) && 866 (p == NULL || procfs_validfile(p, mp))); 867 } 868 869 /* 870 * readdir returns directory entries from pfsnode (vp). 871 * 872 * the strategy here with procfs is to generate a single 873 * directory entry at a time (struct dirent) and then 874 * copy that out to userland using uiomove. a more efficent 875 * though more complex implementation, would try to minimize 876 * the number of calls to uiomove(). for procfs, this is 877 * hardly worth the added code complexity. 878 * 879 * this should just be done through read() 880 */ 881 int 882 procfs_readdir(v) 883 void *v; 884 { 885 struct vop_readdir_args /* { 886 struct vnode *a_vp; 887 struct uio *a_uio; 888 struct ucred *a_cred; 889 int *a_eofflag; 890 off_t **a_cookies; 891 int *a_ncookies; 892 } */ *ap = v; 893 struct uio *uio = ap->a_uio; 894 struct dirent d; 895 struct pfsnode *pfs; 896 off_t i; 897 int error; 898 off_t *cookies = NULL; 899 int ncookies, left, skip, j; 900 struct vnode *vp; 901 const struct proc_target *pt; 902 903 vp = ap->a_vp; 904 pfs = VTOPFS(vp); 905 906 if (uio->uio_resid < UIO_MX) 907 return (EINVAL); 908 if (uio->uio_offset < 0) 909 return (EINVAL); 910 911 error = 0; 912 i = uio->uio_offset; 913 memset((caddr_t)&d, 0, UIO_MX); 914 d.d_reclen = UIO_MX; 915 ncookies = uio->uio_resid / UIO_MX; 916 917 switch (pfs->pfs_type) { 918 /* 919 * this is for the process-specific sub-directories. 920 * all that is needed to is copy out all the entries 921 * from the procent[] table (top of this file). 922 */ 923 case Pproc: { 924 struct proc *p; 925 926 if (i >= nproc_targets) 927 return 0; 928 929 p = PFIND(pfs->pfs_pid); 930 if (p == NULL) 931 break; 932 933 if (ap->a_ncookies) { 934 ncookies = min(ncookies, (nproc_targets - i)); 935 cookies = malloc(ncookies * sizeof (off_t), 936 M_TEMP, M_WAITOK); 937 *ap->a_cookies = cookies; 938 } 939 940 for (pt = &proc_targets[i]; 941 uio->uio_resid >= UIO_MX && i < nproc_targets; pt++, i++) { 942 if (pt->pt_valid && 943 (*pt->pt_valid)(p, vp->v_mount) == 0) 944 continue; 945 946 d.d_fileno = PROCFS_FILENO(pfs->pfs_pid, pt->pt_pfstype); 947 d.d_namlen = pt->pt_namlen; 948 memcpy(d.d_name, pt->pt_name, pt->pt_namlen + 1); 949 d.d_type = pt->pt_type; 950 951 if ((error = uiomove((caddr_t)&d, UIO_MX, uio)) != 0) 952 break; 953 if (cookies) 954 *cookies++ = i + 1; 955 } 956 957 break; 958 } 959 960 /* 961 * this is for the root of the procfs filesystem 962 * what is needed are special entries for "curproc" 963 * and "self" followed by an entry for each process 964 * on allproc 965 #ifdef PROCFS_ZOMBIE 966 * and deadproc and zombproc. 967 #endif 968 */ 969 970 case Proot: { 971 int pcnt = i, nc = 0; 972 const struct proclist_desc *pd; 973 volatile struct proc *p; 974 975 if (pcnt > 3) 976 pcnt = 3; 977 if (ap->a_ncookies) { 978 /* 979 * XXX Potentially allocating too much space here, 980 * but I'm lazy. This loop needs some work. 981 */ 982 cookies = malloc(ncookies * sizeof (off_t), 983 M_TEMP, M_WAITOK); 984 *ap->a_cookies = cookies; 985 } 986 /* 987 * XXX: THIS LOOP ASSUMES THAT allproc IS THE FIRST 988 * PROCLIST IN THE proclists! 989 */ 990 proclist_lock_read(); 991 pd = proclists; 992 #ifdef PROCFS_ZOMBIE 993 again: 994 #endif 995 for (p = LIST_FIRST(pd->pd_list); 996 p != NULL && uio->uio_resid >= UIO_MX; i++, pcnt++) { 997 switch (i) { 998 case 0: /* `.' */ 999 case 1: /* `..' */ 1000 d.d_fileno = PROCFS_FILENO(0, Proot); 1001 d.d_namlen = i + 1; 1002 memcpy(d.d_name, "..", d.d_namlen); 1003 d.d_name[i + 1] = '\0'; 1004 d.d_type = DT_DIR; 1005 break; 1006 1007 case 2: 1008 d.d_fileno = PROCFS_FILENO(0, Pcurproc); 1009 d.d_namlen = sizeof("curproc") - 1; 1010 memcpy(d.d_name, "curproc", sizeof("curproc")); 1011 d.d_type = DT_LNK; 1012 break; 1013 1014 case 3: 1015 d.d_fileno = PROCFS_FILENO(0, Pself); 1016 d.d_namlen = sizeof("self") - 1; 1017 memcpy(d.d_name, "self", sizeof("self")); 1018 d.d_type = DT_LNK; 1019 break; 1020 1021 default: 1022 while (pcnt < i) { 1023 pcnt++; 1024 p = LIST_NEXT(p, p_list); 1025 if (!p) 1026 goto done; 1027 } 1028 d.d_fileno = PROCFS_FILENO(p->p_pid, Pproc); 1029 d.d_namlen = sprintf(d.d_name, "%ld", 1030 (long)p->p_pid); 1031 d.d_type = DT_DIR; 1032 p = p->p_list.le_next; 1033 break; 1034 } 1035 1036 if ((error = uiomove((caddr_t)&d, UIO_MX, uio)) != 0) 1037 break; 1038 nc++; 1039 if (cookies) 1040 *cookies++ = i + 1; 1041 } 1042 done: 1043 1044 #ifdef PROCFS_ZOMBIE 1045 pd++; 1046 if (p == NULL && pd->pd_list != NULL) 1047 goto again; 1048 #endif 1049 proclist_unlock_read(); 1050 1051 skip = i - pcnt; 1052 if (skip >= nproc_root_targets) 1053 break; 1054 left = nproc_root_targets - skip; 1055 for (j = 0, pt = &proc_root_targets[0]; 1056 uio->uio_resid >= UIO_MX && j < left; 1057 pt++, j++, i++) { 1058 if (pt->pt_valid && 1059 (*pt->pt_valid)(NULL, vp->v_mount) == 0) 1060 continue; 1061 d.d_fileno = PROCFS_FILENO(0, pt->pt_pfstype); 1062 d.d_namlen = pt->pt_namlen; 1063 memcpy(d.d_name, pt->pt_name, pt->pt_namlen + 1); 1064 d.d_type = pt->pt_type; 1065 1066 if ((error = uiomove((caddr_t)&d, UIO_MX, uio)) != 0) 1067 break; 1068 nc++; 1069 if (cookies) 1070 *cookies++ = i + 1; 1071 } 1072 1073 ncookies = nc; 1074 break; 1075 } 1076 1077 default: 1078 error = ENOTDIR; 1079 break; 1080 } 1081 1082 if (ap->a_ncookies) { 1083 if (error) { 1084 if (cookies) 1085 free(*ap->a_cookies, M_TEMP); 1086 *ap->a_ncookies = 0; 1087 *ap->a_cookies = NULL; 1088 } else 1089 *ap->a_ncookies = ncookies; 1090 } 1091 uio->uio_offset = i; 1092 return (error); 1093 } 1094 1095 /* 1096 * readlink reads the link of `curproc' 1097 */ 1098 int 1099 procfs_readlink(v) 1100 void *v; 1101 { 1102 struct vop_readlink_args *ap = v; 1103 char buf[16]; /* should be enough */ 1104 int len; 1105 1106 if (VTOPFS(ap->a_vp)->pfs_fileno == PROCFS_FILENO(0, Pcurproc)) 1107 len = sprintf(buf, "%ld", (long)curproc->p_pid); 1108 else if (VTOPFS(ap->a_vp)->pfs_fileno == PROCFS_FILENO(0, Pself)) 1109 len = sprintf(buf, "%s", "curproc"); 1110 else 1111 return (EINVAL); 1112 1113 return (uiomove((caddr_t)buf, len, ap->a_uio)); 1114 } 1115 1116 /* 1117 * convert decimal ascii to pid_t 1118 */ 1119 static pid_t 1120 atopid(b, len) 1121 const char *b; 1122 u_int len; 1123 { 1124 pid_t p = 0; 1125 1126 while (len--) { 1127 char c = *b++; 1128 if (c < '0' || c > '9') 1129 return (NO_PID); 1130 p = 10 * p + (c - '0'); 1131 if (p > PID_MAX) 1132 return (NO_PID); 1133 } 1134 1135 return (p); 1136 } 1137