1 /* $NetBSD: procfs_vnops.c,v 1.78 2001/02/21 21:39:58 jdolecek Exp $ */ 2 3 /* 4 * Copyright (c) 1993 Jan-Simon Pendry 5 * Copyright (c) 1993, 1995 6 * The Regents of the University of California. All rights reserved. 7 * 8 * This code is derived from software contributed to Berkeley by 9 * Jan-Simon Pendry. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the University of 22 * California, Berkeley and its contributors. 23 * 4. Neither the name of the University nor the names of its contributors 24 * may be used to endorse or promote products derived from this software 25 * without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 37 * SUCH DAMAGE. 38 * 39 * @(#)procfs_vnops.c 8.18 (Berkeley) 5/21/95 40 */ 41 42 /* 43 * procfs vnode interface 44 */ 45 46 #include <sys/param.h> 47 #include <sys/systm.h> 48 #include <sys/time.h> 49 #include <sys/kernel.h> 50 #include <sys/file.h> 51 #include <sys/proc.h> 52 #include <sys/vnode.h> 53 #include <sys/namei.h> 54 #include <sys/malloc.h> 55 #include <sys/mount.h> 56 #include <sys/dirent.h> 57 #include <sys/resourcevar.h> 58 #include <sys/ptrace.h> 59 #include <sys/stat.h> 60 61 #include <uvm/uvm_extern.h> /* for PAGE_SIZE */ 62 63 #include <machine/reg.h> 64 65 #include <miscfs/genfs/genfs.h> 66 #include <miscfs/procfs/procfs.h> 67 68 /* 69 * Vnode Operations. 70 * 71 */ 72 73 static int procfs_validfile_linux __P((struct proc *, struct mount *)); 74 75 /* 76 * This is a list of the valid names in the 77 * process-specific sub-directories. It is 78 * used in procfs_lookup and procfs_readdir 79 */ 80 const struct proc_target { 81 u_char pt_type; 82 u_char pt_namlen; 83 char *pt_name; 84 pfstype pt_pfstype; 85 int (*pt_valid) __P((struct proc *, struct mount *)); 86 } proc_targets[] = { 87 #define N(s) sizeof(s)-1, s 88 /* name type validp */ 89 { DT_DIR, N("."), Pproc, NULL }, 90 { DT_DIR, N(".."), Proot, NULL }, 91 { DT_REG, N("file"), Pfile, procfs_validfile }, 92 { DT_REG, N("mem"), Pmem, NULL }, 93 { DT_REG, N("regs"), Pregs, procfs_validregs }, 94 { DT_REG, N("fpregs"), Pfpregs, procfs_validfpregs }, 95 { DT_REG, N("ctl"), Pctl, NULL }, 96 { DT_REG, N("status"), Pstatus, NULL }, 97 { DT_REG, N("note"), Pnote, NULL }, 98 { DT_REG, N("notepg"), Pnotepg, NULL }, 99 { DT_REG, N("map"), Pmap, procfs_validmap }, 100 { DT_REG, N("cmdline"), Pcmdline, NULL }, 101 { DT_REG, N("exe"), Pfile, procfs_validfile_linux }, 102 #undef N 103 }; 104 static int nproc_targets = sizeof(proc_targets) / sizeof(proc_targets[0]); 105 106 /* 107 * List of files in the root directory. Note: the validate function will 108 * be called with p == NULL for these ones. 109 */ 110 struct proc_target proc_root_targets[] = { 111 #define N(s) sizeof(s)-1, s 112 /* name type validp */ 113 { DT_REG, N("meminfo"), Pmeminfo, procfs_validfile_linux }, 114 { DT_REG, N("cpuinfo"), Pcpuinfo, procfs_validfile_linux }, 115 #undef N 116 }; 117 static int nproc_root_targets = 118 sizeof(proc_root_targets) / sizeof(proc_root_targets[0]); 119 120 int procfs_lookup __P((void *)); 121 #define procfs_create genfs_eopnotsupp_rele 122 #define procfs_mknod genfs_eopnotsupp_rele 123 int procfs_open __P((void *)); 124 int procfs_close __P((void *)); 125 int procfs_access __P((void *)); 126 int procfs_getattr __P((void *)); 127 int procfs_setattr __P((void *)); 128 #define procfs_read procfs_rw 129 #define procfs_write procfs_rw 130 #define procfs_fcntl genfs_fcntl 131 #define procfs_ioctl genfs_enoioctl 132 #define procfs_poll genfs_poll 133 #define procfs_revoke genfs_revoke 134 #define procfs_mmap genfs_eopnotsupp 135 #define procfs_fsync genfs_nullop 136 #define procfs_seek genfs_nullop 137 #define procfs_remove genfs_eopnotsupp_rele 138 int procfs_link __P((void *)); 139 #define procfs_rename genfs_eopnotsupp_rele 140 #define procfs_mkdir genfs_eopnotsupp_rele 141 #define procfs_rmdir genfs_eopnotsupp_rele 142 int procfs_symlink __P((void *)); 143 int procfs_readdir __P((void *)); 144 int procfs_readlink __P((void *)); 145 #define procfs_abortop genfs_abortop 146 int procfs_inactive __P((void *)); 147 int procfs_reclaim __P((void *)); 148 #define procfs_lock genfs_lock 149 #define procfs_unlock genfs_unlock 150 int procfs_bmap __P((void *)); 151 #define procfs_strategy genfs_badop 152 int procfs_print __P((void *)); 153 int procfs_pathconf __P((void *)); 154 #define procfs_islocked genfs_islocked 155 #define procfs_advlock genfs_einval 156 #define procfs_blkatoff genfs_eopnotsupp 157 #define procfs_valloc genfs_eopnotsupp 158 #define procfs_vfree genfs_nullop 159 #define procfs_truncate genfs_eopnotsupp 160 #define procfs_update genfs_nullop 161 #define procfs_bwrite genfs_eopnotsupp 162 163 static pid_t atopid __P((const char *, u_int)); 164 165 /* 166 * procfs vnode operations. 167 */ 168 int (**procfs_vnodeop_p) __P((void *)); 169 const struct vnodeopv_entry_desc procfs_vnodeop_entries[] = { 170 { &vop_default_desc, vn_default_error }, 171 { &vop_lookup_desc, procfs_lookup }, /* lookup */ 172 { &vop_create_desc, procfs_create }, /* create */ 173 { &vop_mknod_desc, procfs_mknod }, /* mknod */ 174 { &vop_open_desc, procfs_open }, /* open */ 175 { &vop_close_desc, procfs_close }, /* close */ 176 { &vop_access_desc, procfs_access }, /* access */ 177 { &vop_getattr_desc, procfs_getattr }, /* getattr */ 178 { &vop_setattr_desc, procfs_setattr }, /* setattr */ 179 { &vop_read_desc, procfs_read }, /* read */ 180 { &vop_write_desc, procfs_write }, /* write */ 181 { &vop_fcntl_desc, procfs_fcntl }, /* fcntl */ 182 { &vop_ioctl_desc, procfs_ioctl }, /* ioctl */ 183 { &vop_poll_desc, procfs_poll }, /* poll */ 184 { &vop_revoke_desc, procfs_revoke }, /* revoke */ 185 { &vop_mmap_desc, procfs_mmap }, /* mmap */ 186 { &vop_fsync_desc, procfs_fsync }, /* fsync */ 187 { &vop_seek_desc, procfs_seek }, /* seek */ 188 { &vop_remove_desc, procfs_remove }, /* remove */ 189 { &vop_link_desc, procfs_link }, /* link */ 190 { &vop_rename_desc, procfs_rename }, /* rename */ 191 { &vop_mkdir_desc, procfs_mkdir }, /* mkdir */ 192 { &vop_rmdir_desc, procfs_rmdir }, /* rmdir */ 193 { &vop_symlink_desc, procfs_symlink }, /* symlink */ 194 { &vop_readdir_desc, procfs_readdir }, /* readdir */ 195 { &vop_readlink_desc, procfs_readlink }, /* readlink */ 196 { &vop_abortop_desc, procfs_abortop }, /* abortop */ 197 { &vop_inactive_desc, procfs_inactive }, /* inactive */ 198 { &vop_reclaim_desc, procfs_reclaim }, /* reclaim */ 199 { &vop_lock_desc, procfs_lock }, /* lock */ 200 { &vop_unlock_desc, procfs_unlock }, /* unlock */ 201 { &vop_bmap_desc, procfs_bmap }, /* bmap */ 202 { &vop_strategy_desc, procfs_strategy }, /* strategy */ 203 { &vop_print_desc, procfs_print }, /* print */ 204 { &vop_islocked_desc, procfs_islocked }, /* islocked */ 205 { &vop_pathconf_desc, procfs_pathconf }, /* pathconf */ 206 { &vop_advlock_desc, procfs_advlock }, /* advlock */ 207 { &vop_blkatoff_desc, procfs_blkatoff }, /* blkatoff */ 208 { &vop_valloc_desc, procfs_valloc }, /* valloc */ 209 { &vop_vfree_desc, procfs_vfree }, /* vfree */ 210 { &vop_truncate_desc, procfs_truncate }, /* truncate */ 211 { &vop_update_desc, procfs_update }, /* update */ 212 { (struct vnodeop_desc*)NULL, (int(*) __P((void *)))NULL } 213 }; 214 const struct vnodeopv_desc procfs_vnodeop_opv_desc = 215 { &procfs_vnodeop_p, procfs_vnodeop_entries }; 216 /* 217 * set things up for doing i/o on 218 * the pfsnode (vp). (vp) is locked 219 * on entry, and should be left locked 220 * on exit. 221 * 222 * for procfs we don't need to do anything 223 * in particular for i/o. all that is done 224 * is to support exclusive open on process 225 * memory images. 226 */ 227 int 228 procfs_open(v) 229 void *v; 230 { 231 struct vop_open_args /* { 232 struct vnode *a_vp; 233 int a_mode; 234 struct ucred *a_cred; 235 struct proc *a_p; 236 } */ *ap = v; 237 struct pfsnode *pfs = VTOPFS(ap->a_vp); 238 struct proc *p1, *p2; 239 int error; 240 241 p1 = ap->a_p; /* tracer */ 242 p2 = PFIND(pfs->pfs_pid); /* traced */ 243 244 if (p2 == NULL) 245 return (ENOENT); /* was ESRCH, jsp */ 246 247 switch (pfs->pfs_type) { 248 case Pmem: 249 if (((pfs->pfs_flags & FWRITE) && (ap->a_mode & O_EXCL)) || 250 ((pfs->pfs_flags & O_EXCL) && (ap->a_mode & FWRITE))) 251 return (EBUSY); 252 253 if ((error = procfs_checkioperm(p1, p2)) != 0) 254 return (EPERM); 255 256 if (ap->a_mode & FWRITE) 257 pfs->pfs_flags = ap->a_mode & (FWRITE|O_EXCL); 258 259 return (0); 260 261 default: 262 break; 263 } 264 265 return (0); 266 } 267 268 /* 269 * close the pfsnode (vp) after doing i/o. 270 * (vp) is not locked on entry or exit. 271 * 272 * nothing to do for procfs other than undo 273 * any exclusive open flag (see _open above). 274 */ 275 int 276 procfs_close(v) 277 void *v; 278 { 279 struct vop_close_args /* { 280 struct vnode *a_vp; 281 int a_fflag; 282 struct ucred *a_cred; 283 struct proc *a_p; 284 } */ *ap = v; 285 struct pfsnode *pfs = VTOPFS(ap->a_vp); 286 287 switch (pfs->pfs_type) { 288 case Pmem: 289 if ((ap->a_fflag & FWRITE) && (pfs->pfs_flags & O_EXCL)) 290 pfs->pfs_flags &= ~(FWRITE|O_EXCL); 291 break; 292 293 default: 294 break; 295 } 296 297 return (0); 298 } 299 300 /* 301 * do block mapping for pfsnode (vp). 302 * since we don't use the buffer cache 303 * for procfs this function should never 304 * be called. in any case, it's not clear 305 * what part of the kernel ever makes use 306 * of this function. for sanity, this is the 307 * usual no-op bmap, although returning 308 * (EIO) would be a reasonable alternative. 309 */ 310 int 311 procfs_bmap(v) 312 void *v; 313 { 314 struct vop_bmap_args /* { 315 struct vnode *a_vp; 316 daddr_t a_bn; 317 struct vnode **a_vpp; 318 daddr_t *a_bnp; 319 int * a_runp; 320 } */ *ap = v; 321 322 if (ap->a_vpp != NULL) 323 *ap->a_vpp = ap->a_vp; 324 if (ap->a_bnp != NULL) 325 *ap->a_bnp = ap->a_bn; 326 if (ap->a_runp != NULL) 327 *ap->a_runp = 0; 328 return (0); 329 } 330 331 /* 332 * _inactive is called when the pfsnode 333 * is vrele'd and the reference count goes 334 * to zero. (vp) will be on the vnode free 335 * list, so to get it back vget() must be 336 * used. 337 * 338 * for procfs, check if the process is still 339 * alive and if it isn't then just throw away 340 * the vnode by calling vgone(). this may 341 * be overkill and a waste of time since the 342 * chances are that the process will still be 343 * there and PFIND is not free. 344 * 345 * (vp) is locked on entry, but must be unlocked on exit. 346 */ 347 int 348 procfs_inactive(v) 349 void *v; 350 { 351 struct vop_inactive_args /* { 352 struct vnode *a_vp; 353 struct proc *a_p; 354 } */ *ap = v; 355 struct pfsnode *pfs = VTOPFS(ap->a_vp); 356 357 VOP_UNLOCK(ap->a_vp, 0); 358 if (PFIND(pfs->pfs_pid) == 0) 359 vgone(ap->a_vp); 360 361 return (0); 362 } 363 364 /* 365 * _reclaim is called when getnewvnode() 366 * wants to make use of an entry on the vnode 367 * free list. at this time the filesystem needs 368 * to free any private data and remove the node 369 * from any private lists. 370 */ 371 int 372 procfs_reclaim(v) 373 void *v; 374 { 375 struct vop_reclaim_args /* { 376 struct vnode *a_vp; 377 } */ *ap = v; 378 379 return (procfs_freevp(ap->a_vp)); 380 } 381 382 /* 383 * Return POSIX pathconf information applicable to special devices. 384 */ 385 int 386 procfs_pathconf(v) 387 void *v; 388 { 389 struct vop_pathconf_args /* { 390 struct vnode *a_vp; 391 int a_name; 392 register_t *a_retval; 393 } */ *ap = v; 394 395 switch (ap->a_name) { 396 case _PC_LINK_MAX: 397 *ap->a_retval = LINK_MAX; 398 return (0); 399 case _PC_MAX_CANON: 400 *ap->a_retval = MAX_CANON; 401 return (0); 402 case _PC_MAX_INPUT: 403 *ap->a_retval = MAX_INPUT; 404 return (0); 405 case _PC_PIPE_BUF: 406 *ap->a_retval = PIPE_BUF; 407 return (0); 408 case _PC_CHOWN_RESTRICTED: 409 *ap->a_retval = 1; 410 return (0); 411 case _PC_VDISABLE: 412 *ap->a_retval = _POSIX_VDISABLE; 413 return (0); 414 case _PC_SYNC_IO: 415 *ap->a_retval = 1; 416 return (0); 417 default: 418 return (EINVAL); 419 } 420 /* NOTREACHED */ 421 } 422 423 /* 424 * _print is used for debugging. 425 * just print a readable description 426 * of (vp). 427 */ 428 int 429 procfs_print(v) 430 void *v; 431 { 432 struct vop_print_args /* { 433 struct vnode *a_vp; 434 } */ *ap = v; 435 struct pfsnode *pfs = VTOPFS(ap->a_vp); 436 437 printf("tag VT_PROCFS, type %d, pid %d, mode %x, flags %lx\n", 438 pfs->pfs_type, pfs->pfs_pid, pfs->pfs_mode, pfs->pfs_flags); 439 return 0; 440 } 441 442 int 443 procfs_link(v) 444 void *v; 445 { 446 struct vop_link_args /* { 447 struct vnode *a_dvp; 448 struct vnode *a_vp; 449 struct componentname *a_cnp; 450 } */ *ap = v; 451 452 VOP_ABORTOP(ap->a_dvp, ap->a_cnp); 453 vput(ap->a_dvp); 454 return (EROFS); 455 } 456 457 int 458 procfs_symlink(v) 459 void *v; 460 { 461 struct vop_symlink_args /* { 462 struct vnode *a_dvp; 463 struct vnode **a_vpp; 464 struct componentname *a_cnp; 465 struct vattr *a_vap; 466 char *a_target; 467 } */ *ap = v; 468 469 VOP_ABORTOP(ap->a_dvp, ap->a_cnp); 470 vput(ap->a_dvp); 471 return (EROFS); 472 } 473 474 /* 475 * Invent attributes for pfsnode (vp) and store 476 * them in (vap). 477 * Directories lengths are returned as zero since 478 * any real length would require the genuine size 479 * to be computed, and nothing cares anyway. 480 * 481 * this is relatively minimal for procfs. 482 */ 483 int 484 procfs_getattr(v) 485 void *v; 486 { 487 struct vop_getattr_args /* { 488 struct vnode *a_vp; 489 struct vattr *a_vap; 490 struct ucred *a_cred; 491 struct proc *a_p; 492 } */ *ap = v; 493 struct pfsnode *pfs = VTOPFS(ap->a_vp); 494 struct vattr *vap = ap->a_vap; 495 struct proc *procp; 496 struct timeval tv; 497 int error; 498 499 /* first check the process still exists */ 500 switch (pfs->pfs_type) { 501 case Proot: 502 case Pcurproc: 503 case Pself: 504 procp = 0; 505 break; 506 507 default: 508 procp = PFIND(pfs->pfs_pid); 509 if (procp == 0) 510 return (ENOENT); 511 break; 512 } 513 514 error = 0; 515 516 /* start by zeroing out the attributes */ 517 VATTR_NULL(vap); 518 519 /* next do all the common fields */ 520 vap->va_type = ap->a_vp->v_type; 521 vap->va_mode = pfs->pfs_mode; 522 vap->va_fileid = pfs->pfs_fileno; 523 vap->va_flags = 0; 524 vap->va_blocksize = PAGE_SIZE; 525 526 /* 527 * Make all times be current TOD. 528 * It would be possible to get the process start 529 * time from the p_stat structure, but there's 530 * no "file creation" time stamp anyway, and the 531 * p_stat structure is not addressible if u. gets 532 * swapped out for that process. 533 */ 534 microtime(&tv); 535 TIMEVAL_TO_TIMESPEC(&tv, &vap->va_ctime); 536 vap->va_atime = vap->va_mtime = vap->va_ctime; 537 538 switch (pfs->pfs_type) { 539 case Pmem: 540 case Pregs: 541 case Pfpregs: 542 /* 543 * If the process has exercised some setuid or setgid 544 * privilege, then rip away read/write permission so 545 * that only root can gain access. 546 */ 547 if (procp->p_flag & P_SUGID) 548 vap->va_mode &= ~(S_IRUSR|S_IWUSR); 549 /* FALLTHROUGH */ 550 case Pctl: 551 case Pstatus: 552 case Pnote: 553 case Pnotepg: 554 case Pmap: 555 case Pcmdline: 556 vap->va_nlink = 1; 557 vap->va_uid = procp->p_ucred->cr_uid; 558 vap->va_gid = procp->p_ucred->cr_gid; 559 break; 560 case Pmeminfo: 561 case Pcpuinfo: 562 vap->va_nlink = 1; 563 vap->va_uid = vap->va_gid = 0; 564 break; 565 566 default: 567 break; 568 } 569 570 /* 571 * now do the object specific fields 572 * 573 * The size could be set from struct reg, but it's hardly 574 * worth the trouble, and it puts some (potentially) machine 575 * dependent data into this machine-independent code. If it 576 * becomes important then this function should break out into 577 * a per-file stat function in the corresponding .c file. 578 */ 579 580 switch (pfs->pfs_type) { 581 case Proot: 582 /* 583 * Set nlink to 1 to tell fts(3) we don't actually know. 584 */ 585 vap->va_nlink = 1; 586 vap->va_uid = 0; 587 vap->va_gid = 0; 588 vap->va_bytes = vap->va_size = DEV_BSIZE; 589 break; 590 591 case Pcurproc: { 592 char buf[16]; /* should be enough */ 593 vap->va_nlink = 1; 594 vap->va_uid = 0; 595 vap->va_gid = 0; 596 vap->va_bytes = vap->va_size = 597 sprintf(buf, "%ld", (long)curproc->p_pid); 598 break; 599 } 600 601 case Pself: 602 vap->va_nlink = 1; 603 vap->va_uid = 0; 604 vap->va_gid = 0; 605 vap->va_bytes = vap->va_size = sizeof("curproc"); 606 break; 607 608 case Pproc: 609 vap->va_nlink = 2; 610 vap->va_uid = procp->p_ucred->cr_uid; 611 vap->va_gid = procp->p_ucred->cr_gid; 612 vap->va_bytes = vap->va_size = DEV_BSIZE; 613 break; 614 615 case Pfile: 616 error = EOPNOTSUPP; 617 break; 618 619 case Pmem: 620 vap->va_bytes = vap->va_size = 621 ctob(procp->p_vmspace->vm_tsize + 622 procp->p_vmspace->vm_dsize + 623 procp->p_vmspace->vm_ssize); 624 break; 625 626 #if defined(PT_GETREGS) || defined(PT_SETREGS) 627 case Pregs: 628 vap->va_bytes = vap->va_size = sizeof(struct reg); 629 break; 630 #endif 631 632 #if defined(PT_GETFPREGS) || defined(PT_SETFPREGS) 633 case Pfpregs: 634 vap->va_bytes = vap->va_size = sizeof(struct fpreg); 635 break; 636 #endif 637 638 case Pctl: 639 case Pstatus: 640 case Pnote: 641 case Pnotepg: 642 case Pmap: 643 case Pcmdline: 644 case Pmeminfo: 645 case Pcpuinfo: 646 vap->va_bytes = vap->va_size = 0; 647 break; 648 649 default: 650 panic("procfs_getattr"); 651 } 652 653 return (error); 654 } 655 656 /*ARGSUSED*/ 657 int 658 procfs_setattr(v) 659 void *v; 660 { 661 /* 662 * just fake out attribute setting 663 * it's not good to generate an error 664 * return, otherwise things like creat() 665 * will fail when they try to set the 666 * file length to 0. worse, this means 667 * that echo $note > /proc/$pid/note will fail. 668 */ 669 670 return (0); 671 } 672 673 /* 674 * implement access checking. 675 * 676 * actually, the check for super-user is slightly 677 * broken since it will allow read access to write-only 678 * objects. this doesn't cause any particular trouble 679 * but does mean that the i/o entry points need to check 680 * that the operation really does make sense. 681 */ 682 int 683 procfs_access(v) 684 void *v; 685 { 686 struct vop_access_args /* { 687 struct vnode *a_vp; 688 int a_mode; 689 struct ucred *a_cred; 690 struct proc *a_p; 691 } */ *ap = v; 692 struct vattr va; 693 int error; 694 695 if ((error = VOP_GETATTR(ap->a_vp, &va, ap->a_cred, ap->a_p)) != 0) 696 return (error); 697 698 return (vaccess(va.va_type, va.va_mode, 699 va.va_uid, va.va_gid, ap->a_mode, ap->a_cred)); 700 } 701 702 /* 703 * lookup. this is incredibly complicated in the 704 * general case, however for most pseudo-filesystems 705 * very little needs to be done. 706 * 707 * Locking isn't hard here, just poorly documented. 708 * 709 * If we're looking up ".", just vref the parent & return it. 710 * 711 * If we're looking up "..", unlock the parent, and lock "..". If everything 712 * went ok, and we're on the last component and the caller requested the 713 * parent locked, try to re-lock the parent. We do this to prevent lock 714 * races. 715 * 716 * For anything else, get the needed node. Then unlock the parent if not 717 * the last component or not LOCKPARENT (i.e. if we wouldn't re-lock the 718 * parent in the .. case). 719 * 720 * We try to exit with the parent locked in error cases. 721 */ 722 int 723 procfs_lookup(v) 724 void *v; 725 { 726 struct vop_lookup_args /* { 727 struct vnode * a_dvp; 728 struct vnode ** a_vpp; 729 struct componentname * a_cnp; 730 } */ *ap = v; 731 struct componentname *cnp = ap->a_cnp; 732 struct vnode **vpp = ap->a_vpp; 733 struct vnode *dvp = ap->a_dvp; 734 const char *pname = cnp->cn_nameptr; 735 const struct proc_target *pt = NULL; 736 struct vnode *fvp; 737 pid_t pid; 738 struct pfsnode *pfs; 739 struct proc *p = NULL; 740 int i, error, wantpunlock, iscurproc = 0, isself = 0; 741 742 *vpp = NULL; 743 cnp->cn_flags &= ~PDIRUNLOCK; 744 745 if (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME) 746 return (EROFS); 747 748 if (cnp->cn_namelen == 1 && *pname == '.') { 749 *vpp = dvp; 750 VREF(dvp); 751 return (0); 752 } 753 754 wantpunlock = (~cnp->cn_flags & (LOCKPARENT | ISLASTCN)); 755 pfs = VTOPFS(dvp); 756 switch (pfs->pfs_type) { 757 case Proot: 758 /* 759 * Shouldn't get here with .. in the root node. 760 */ 761 if (cnp->cn_flags & ISDOTDOT) 762 return (EIO); 763 764 iscurproc = CNEQ(cnp, "curproc", 7); 765 isself = CNEQ(cnp, "self", 4); 766 767 if (iscurproc || isself) { 768 error = procfs_allocvp(dvp->v_mount, vpp, 0, 769 iscurproc ? Pcurproc : Pself); 770 if ((error == 0) && (wantpunlock)) { 771 VOP_UNLOCK(dvp, 0); 772 cnp->cn_flags |= PDIRUNLOCK; 773 } 774 return (error); 775 } 776 777 for (i = 0; i < nproc_root_targets; i++) { 778 pt = &proc_root_targets[i]; 779 if (cnp->cn_namelen == pt->pt_namlen && 780 memcmp(pt->pt_name, pname, cnp->cn_namelen) == 0 && 781 (pt->pt_valid == NULL || 782 (*pt->pt_valid)(p, dvp->v_mount))) 783 break; 784 } 785 786 if (i != nproc_root_targets) { 787 error = procfs_allocvp(dvp->v_mount, vpp, 0, 788 pt->pt_pfstype); 789 if ((error == 0) && (wantpunlock)) { 790 VOP_UNLOCK(dvp, 0); 791 cnp->cn_flags |= PDIRUNLOCK; 792 } 793 return (error); 794 } 795 796 pid = atopid(pname, cnp->cn_namelen); 797 if (pid == NO_PID) 798 break; 799 800 p = PFIND(pid); 801 if (p == 0) 802 break; 803 804 error = procfs_allocvp(dvp->v_mount, vpp, pid, Pproc); 805 if ((error == 0) && (wantpunlock)) { 806 VOP_UNLOCK(dvp, 0); 807 cnp->cn_flags |= PDIRUNLOCK; 808 } 809 return (error); 810 811 case Pproc: 812 /* 813 * do the .. dance. We unlock the directory, and then 814 * get the root dir. That will automatically return .. 815 * locked. Then if the caller wanted dvp locked, we 816 * re-lock. 817 */ 818 if (cnp->cn_flags & ISDOTDOT) { 819 VOP_UNLOCK(dvp, 0); 820 cnp->cn_flags |= PDIRUNLOCK; 821 error = procfs_root(dvp->v_mount, vpp); 822 if ((error == 0) && (wantpunlock == 0) && 823 ((error = vn_lock(dvp, LK_EXCLUSIVE)) == 0)) 824 cnp->cn_flags &= ~PDIRUNLOCK; 825 return (error); 826 } 827 828 p = PFIND(pfs->pfs_pid); 829 if (p == 0) 830 break; 831 832 for (pt = proc_targets, i = 0; i < nproc_targets; pt++, i++) { 833 if (cnp->cn_namelen == pt->pt_namlen && 834 memcmp(pt->pt_name, pname, cnp->cn_namelen) == 0 && 835 (pt->pt_valid == NULL || 836 (*pt->pt_valid)(p, dvp->v_mount))) 837 goto found; 838 } 839 break; 840 841 found: 842 if (pt->pt_pfstype == Pfile) { 843 fvp = p->p_textvp; 844 /* We already checked that it exists. */ 845 VREF(fvp); 846 vn_lock(fvp, LK_EXCLUSIVE | LK_RETRY); 847 if (wantpunlock) { 848 VOP_UNLOCK(dvp, 0); 849 cnp->cn_flags |= PDIRUNLOCK; 850 } 851 *vpp = fvp; 852 return (0); 853 } 854 855 error = procfs_allocvp(dvp->v_mount, vpp, pfs->pfs_pid, 856 pt->pt_pfstype); 857 if ((error == 0) && (wantpunlock)) { 858 VOP_UNLOCK(dvp, 0); 859 cnp->cn_flags |= PDIRUNLOCK; 860 } 861 return (error); 862 863 default: 864 return (ENOTDIR); 865 } 866 867 return (cnp->cn_nameiop == LOOKUP ? ENOENT : EROFS); 868 } 869 870 int 871 procfs_validfile(p, mp) 872 struct proc *p; 873 struct mount *mp; 874 { 875 return (p->p_textvp != NULL); 876 } 877 878 static int 879 procfs_validfile_linux(p, mp) 880 struct proc *p; 881 struct mount *mp; 882 { 883 int flags; 884 885 flags = VFSTOPROC(mp)->pmnt_flags; 886 return ((flags & PROCFSMNT_LINUXCOMPAT) && 887 (p == NULL || procfs_validfile(p, mp))); 888 } 889 890 /* 891 * readdir returns directory entries from pfsnode (vp). 892 * 893 * the strategy here with procfs is to generate a single 894 * directory entry at a time (struct dirent) and then 895 * copy that out to userland using uiomove. a more efficent 896 * though more complex implementation, would try to minimize 897 * the number of calls to uiomove(). for procfs, this is 898 * hardly worth the added code complexity. 899 * 900 * this should just be done through read() 901 */ 902 int 903 procfs_readdir(v) 904 void *v; 905 { 906 struct vop_readdir_args /* { 907 struct vnode *a_vp; 908 struct uio *a_uio; 909 struct ucred *a_cred; 910 int *a_eofflag; 911 off_t **a_cookies; 912 int *a_ncookies; 913 } */ *ap = v; 914 struct uio *uio = ap->a_uio; 915 struct dirent d; 916 struct pfsnode *pfs; 917 off_t i; 918 int error; 919 off_t *cookies = NULL; 920 int ncookies, left, skip, j; 921 struct vnode *vp; 922 const struct proc_target *pt; 923 924 vp = ap->a_vp; 925 pfs = VTOPFS(vp); 926 927 if (uio->uio_resid < UIO_MX) 928 return (EINVAL); 929 if (uio->uio_offset < 0) 930 return (EINVAL); 931 932 error = 0; 933 i = uio->uio_offset; 934 memset((caddr_t)&d, 0, UIO_MX); 935 d.d_reclen = UIO_MX; 936 ncookies = uio->uio_resid / UIO_MX; 937 938 switch (pfs->pfs_type) { 939 /* 940 * this is for the process-specific sub-directories. 941 * all that is needed to is copy out all the entries 942 * from the procent[] table (top of this file). 943 */ 944 case Pproc: { 945 struct proc *p; 946 947 if (i >= nproc_targets) 948 return 0; 949 950 p = PFIND(pfs->pfs_pid); 951 if (p == NULL) 952 break; 953 954 if (ap->a_ncookies) { 955 ncookies = min(ncookies, (nproc_targets - i)); 956 cookies = malloc(ncookies * sizeof (off_t), 957 M_TEMP, M_WAITOK); 958 *ap->a_cookies = cookies; 959 } 960 961 for (pt = &proc_targets[i]; 962 uio->uio_resid >= UIO_MX && i < nproc_targets; pt++, i++) { 963 if (pt->pt_valid && 964 (*pt->pt_valid)(p, vp->v_mount) == 0) 965 continue; 966 967 d.d_fileno = PROCFS_FILENO(pfs->pfs_pid, pt->pt_pfstype); 968 d.d_namlen = pt->pt_namlen; 969 memcpy(d.d_name, pt->pt_name, pt->pt_namlen + 1); 970 d.d_type = pt->pt_type; 971 972 if ((error = uiomove((caddr_t)&d, UIO_MX, uio)) != 0) 973 break; 974 if (cookies) 975 *cookies++ = i + 1; 976 } 977 978 break; 979 } 980 981 /* 982 * this is for the root of the procfs filesystem 983 * what is needed are special entries for "curproc" 984 * and "self" followed by an entry for each process 985 * on allproc 986 #ifdef PROCFS_ZOMBIE 987 * and deadproc and zombproc. 988 #endif 989 */ 990 991 case Proot: { 992 int pcnt = i, nc = 0; 993 const struct proclist_desc *pd; 994 volatile struct proc *p; 995 996 if (pcnt > 3) 997 pcnt = 3; 998 if (ap->a_ncookies) { 999 /* 1000 * XXX Potentially allocating too much space here, 1001 * but I'm lazy. This loop needs some work. 1002 */ 1003 cookies = malloc(ncookies * sizeof (off_t), 1004 M_TEMP, M_WAITOK); 1005 *ap->a_cookies = cookies; 1006 } 1007 /* 1008 * XXX: THIS LOOP ASSUMES THAT allproc IS THE FIRST 1009 * PROCLIST IN THE proclists! 1010 */ 1011 proclist_lock_read(); 1012 pd = proclists; 1013 #ifdef PROCFS_ZOMBIE 1014 again: 1015 #endif 1016 for (p = LIST_FIRST(pd->pd_list); 1017 p != NULL && uio->uio_resid >= UIO_MX; i++, pcnt++) { 1018 switch (i) { 1019 case 0: /* `.' */ 1020 case 1: /* `..' */ 1021 d.d_fileno = PROCFS_FILENO(0, Proot); 1022 d.d_namlen = i + 1; 1023 memcpy(d.d_name, "..", d.d_namlen); 1024 d.d_name[i + 1] = '\0'; 1025 d.d_type = DT_DIR; 1026 break; 1027 1028 case 2: 1029 d.d_fileno = PROCFS_FILENO(0, Pcurproc); 1030 d.d_namlen = sizeof("curproc") - 1; 1031 memcpy(d.d_name, "curproc", sizeof("curproc")); 1032 d.d_type = DT_LNK; 1033 break; 1034 1035 case 3: 1036 d.d_fileno = PROCFS_FILENO(0, Pself); 1037 d.d_namlen = sizeof("self") - 1; 1038 memcpy(d.d_name, "self", sizeof("self")); 1039 d.d_type = DT_LNK; 1040 break; 1041 1042 default: 1043 while (pcnt < i) { 1044 pcnt++; 1045 p = LIST_NEXT(p, p_list); 1046 if (!p) 1047 goto done; 1048 } 1049 d.d_fileno = PROCFS_FILENO(p->p_pid, Pproc); 1050 d.d_namlen = sprintf(d.d_name, "%ld", 1051 (long)p->p_pid); 1052 d.d_type = DT_REG; 1053 p = p->p_list.le_next; 1054 break; 1055 } 1056 1057 if ((error = uiomove((caddr_t)&d, UIO_MX, uio)) != 0) 1058 break; 1059 nc++; 1060 if (cookies) 1061 *cookies++ = i + 1; 1062 } 1063 done: 1064 1065 #ifdef PROCFS_ZOMBIE 1066 pd++; 1067 if (p == NULL && pd->pd_list != NULL) 1068 goto again; 1069 #endif 1070 proclist_unlock_read(); 1071 1072 skip = i - pcnt; 1073 if (skip >= nproc_root_targets) 1074 break; 1075 left = nproc_root_targets - skip; 1076 for (j = 0, pt = &proc_root_targets[0]; 1077 uio->uio_resid >= UIO_MX && j < left; 1078 pt++, j++, i++) { 1079 if (pt->pt_valid && 1080 (*pt->pt_valid)(NULL, vp->v_mount) == 0) 1081 continue; 1082 d.d_fileno = PROCFS_FILENO(0, pt->pt_pfstype); 1083 d.d_namlen = pt->pt_namlen; 1084 memcpy(d.d_name, pt->pt_name, pt->pt_namlen + 1); 1085 d.d_type = pt->pt_type; 1086 1087 if ((error = uiomove((caddr_t)&d, UIO_MX, uio)) != 0) 1088 break; 1089 nc++; 1090 if (cookies) 1091 *cookies++ = i + 1; 1092 } 1093 1094 ncookies = nc; 1095 break; 1096 } 1097 1098 default: 1099 error = ENOTDIR; 1100 break; 1101 } 1102 1103 if (ap->a_ncookies) { 1104 if (error) { 1105 if (cookies) 1106 free(*ap->a_cookies, M_TEMP); 1107 *ap->a_ncookies = 0; 1108 *ap->a_cookies = NULL; 1109 } else 1110 *ap->a_ncookies = ncookies; 1111 } 1112 uio->uio_offset = i; 1113 return (error); 1114 } 1115 1116 /* 1117 * readlink reads the link of `curproc' 1118 */ 1119 int 1120 procfs_readlink(v) 1121 void *v; 1122 { 1123 struct vop_readlink_args *ap = v; 1124 char buf[16]; /* should be enough */ 1125 int len; 1126 1127 if (VTOPFS(ap->a_vp)->pfs_fileno == PROCFS_FILENO(0, Pcurproc)) 1128 len = sprintf(buf, "%ld", (long)curproc->p_pid); 1129 else if (VTOPFS(ap->a_vp)->pfs_fileno == PROCFS_FILENO(0, Pself)) 1130 len = sprintf(buf, "%s", "curproc"); 1131 else 1132 return (EINVAL); 1133 1134 return (uiomove((caddr_t)buf, len, ap->a_uio)); 1135 } 1136 1137 /* 1138 * convert decimal ascii to pid_t 1139 */ 1140 static pid_t 1141 atopid(b, len) 1142 const char *b; 1143 u_int len; 1144 { 1145 pid_t p = 0; 1146 1147 while (len--) { 1148 char c = *b++; 1149 if (c < '0' || c > '9') 1150 return (NO_PID); 1151 p = 10 * p + (c - '0'); 1152 if (p > PID_MAX) 1153 return (NO_PID); 1154 } 1155 1156 return (p); 1157 } 1158