1 /* $NetBSD: procfs_vnops.c,v 1.75 2000/11/24 18:58:37 chs Exp $ */ 2 3 /* 4 * Copyright (c) 1993 Jan-Simon Pendry 5 * Copyright (c) 1993, 1995 6 * The Regents of the University of California. All rights reserved. 7 * 8 * This code is derived from software contributed to Berkeley by 9 * Jan-Simon Pendry. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the University of 22 * California, Berkeley and its contributors. 23 * 4. Neither the name of the University nor the names of its contributors 24 * may be used to endorse or promote products derived from this software 25 * without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 37 * SUCH DAMAGE. 38 * 39 * @(#)procfs_vnops.c 8.18 (Berkeley) 5/21/95 40 */ 41 42 /* 43 * procfs vnode interface 44 */ 45 46 #if defined(_KERNEL) && !defined(_LKM) 47 #include "opt_compat_linux.h" 48 #endif 49 50 #include <sys/param.h> 51 #include <sys/systm.h> 52 #include <sys/time.h> 53 #include <sys/kernel.h> 54 #include <sys/file.h> 55 #include <sys/proc.h> 56 #include <sys/vnode.h> 57 #include <sys/namei.h> 58 #include <sys/malloc.h> 59 #include <sys/dirent.h> 60 #include <sys/resourcevar.h> 61 #include <sys/ptrace.h> 62 #include <sys/stat.h> 63 64 #include <uvm/uvm_extern.h> /* for PAGE_SIZE */ 65 66 #include <machine/reg.h> 67 68 #include <miscfs/genfs/genfs.h> 69 #include <miscfs/procfs/procfs.h> 70 71 /* 72 * Vnode Operations. 73 * 74 */ 75 76 #ifdef COMPAT_LINUX 77 static int procfs_validfile_linux __P((struct proc *)); 78 #endif 79 80 /* 81 * This is a list of the valid names in the 82 * process-specific sub-directories. It is 83 * used in procfs_lookup and procfs_readdir 84 */ 85 struct proc_target { 86 u_char pt_type; 87 u_char pt_namlen; 88 char *pt_name; 89 pfstype pt_pfstype; 90 int (*pt_valid) __P((struct proc *p)); 91 } proc_targets[] = { 92 #define N(s) sizeof(s)-1, s 93 /* name type validp */ 94 { DT_DIR, N("."), Pproc, NULL }, 95 { DT_DIR, N(".."), Proot, NULL }, 96 { DT_REG, N("file"), Pfile, procfs_validfile }, 97 { DT_REG, N("mem"), Pmem, NULL }, 98 { DT_REG, N("regs"), Pregs, procfs_validregs }, 99 { DT_REG, N("fpregs"), Pfpregs, procfs_validfpregs }, 100 { DT_REG, N("ctl"), Pctl, NULL }, 101 { DT_REG, N("status"), Pstatus, NULL }, 102 { DT_REG, N("note"), Pnote, NULL }, 103 { DT_REG, N("notepg"), Pnotepg, NULL }, 104 { DT_REG, N("map"), Pmap, procfs_validmap }, 105 { DT_REG, N("cmdline"), Pcmdline, NULL }, 106 #ifdef COMPAT_LINUX 107 { DT_REG, N("exe"), Pfile, procfs_validfile_linux }, 108 #endif 109 #undef N 110 }; 111 static int nproc_targets = sizeof(proc_targets) / sizeof(proc_targets[0]); 112 113 int procfs_lookup __P((void *)); 114 #define procfs_create genfs_eopnotsupp_rele 115 #define procfs_mknod genfs_eopnotsupp_rele 116 int procfs_open __P((void *)); 117 int procfs_close __P((void *)); 118 int procfs_access __P((void *)); 119 int procfs_getattr __P((void *)); 120 int procfs_setattr __P((void *)); 121 #define procfs_read procfs_rw 122 #define procfs_write procfs_rw 123 #define procfs_fcntl genfs_fcntl 124 #define procfs_ioctl genfs_enoioctl 125 #define procfs_poll genfs_poll 126 #define procfs_revoke genfs_revoke 127 #define procfs_mmap genfs_eopnotsupp 128 #define procfs_fsync genfs_nullop 129 #define procfs_seek genfs_nullop 130 #define procfs_remove genfs_eopnotsupp_rele 131 int procfs_link __P((void *)); 132 #define procfs_rename genfs_eopnotsupp_rele 133 #define procfs_mkdir genfs_eopnotsupp_rele 134 #define procfs_rmdir genfs_eopnotsupp_rele 135 int procfs_symlink __P((void *)); 136 int procfs_readdir __P((void *)); 137 int procfs_readlink __P((void *)); 138 #define procfs_abortop genfs_abortop 139 int procfs_inactive __P((void *)); 140 int procfs_reclaim __P((void *)); 141 #define procfs_lock genfs_lock 142 #define procfs_unlock genfs_unlock 143 int procfs_bmap __P((void *)); 144 #define procfs_strategy genfs_badop 145 int procfs_print __P((void *)); 146 int procfs_pathconf __P((void *)); 147 #define procfs_islocked genfs_islocked 148 #define procfs_advlock genfs_einval 149 #define procfs_blkatoff genfs_eopnotsupp 150 #define procfs_valloc genfs_eopnotsupp 151 #define procfs_vfree genfs_nullop 152 #define procfs_truncate genfs_eopnotsupp 153 #define procfs_update genfs_nullop 154 #define procfs_bwrite genfs_eopnotsupp 155 156 static pid_t atopid __P((const char *, u_int)); 157 158 /* 159 * procfs vnode operations. 160 */ 161 int (**procfs_vnodeop_p) __P((void *)); 162 struct vnodeopv_entry_desc procfs_vnodeop_entries[] = { 163 { &vop_default_desc, vn_default_error }, 164 { &vop_lookup_desc, procfs_lookup }, /* lookup */ 165 { &vop_create_desc, procfs_create }, /* create */ 166 { &vop_mknod_desc, procfs_mknod }, /* mknod */ 167 { &vop_open_desc, procfs_open }, /* open */ 168 { &vop_close_desc, procfs_close }, /* close */ 169 { &vop_access_desc, procfs_access }, /* access */ 170 { &vop_getattr_desc, procfs_getattr }, /* getattr */ 171 { &vop_setattr_desc, procfs_setattr }, /* setattr */ 172 { &vop_read_desc, procfs_read }, /* read */ 173 { &vop_write_desc, procfs_write }, /* write */ 174 { &vop_fcntl_desc, procfs_fcntl }, /* fcntl */ 175 { &vop_ioctl_desc, procfs_ioctl }, /* ioctl */ 176 { &vop_poll_desc, procfs_poll }, /* poll */ 177 { &vop_revoke_desc, procfs_revoke }, /* revoke */ 178 { &vop_mmap_desc, procfs_mmap }, /* mmap */ 179 { &vop_fsync_desc, procfs_fsync }, /* fsync */ 180 { &vop_seek_desc, procfs_seek }, /* seek */ 181 { &vop_remove_desc, procfs_remove }, /* remove */ 182 { &vop_link_desc, procfs_link }, /* link */ 183 { &vop_rename_desc, procfs_rename }, /* rename */ 184 { &vop_mkdir_desc, procfs_mkdir }, /* mkdir */ 185 { &vop_rmdir_desc, procfs_rmdir }, /* rmdir */ 186 { &vop_symlink_desc, procfs_symlink }, /* symlink */ 187 { &vop_readdir_desc, procfs_readdir }, /* readdir */ 188 { &vop_readlink_desc, procfs_readlink }, /* readlink */ 189 { &vop_abortop_desc, procfs_abortop }, /* abortop */ 190 { &vop_inactive_desc, procfs_inactive }, /* inactive */ 191 { &vop_reclaim_desc, procfs_reclaim }, /* reclaim */ 192 { &vop_lock_desc, procfs_lock }, /* lock */ 193 { &vop_unlock_desc, procfs_unlock }, /* unlock */ 194 { &vop_bmap_desc, procfs_bmap }, /* bmap */ 195 { &vop_strategy_desc, procfs_strategy }, /* strategy */ 196 { &vop_print_desc, procfs_print }, /* print */ 197 { &vop_islocked_desc, procfs_islocked }, /* islocked */ 198 { &vop_pathconf_desc, procfs_pathconf }, /* pathconf */ 199 { &vop_advlock_desc, procfs_advlock }, /* advlock */ 200 { &vop_blkatoff_desc, procfs_blkatoff }, /* blkatoff */ 201 { &vop_valloc_desc, procfs_valloc }, /* valloc */ 202 { &vop_vfree_desc, procfs_vfree }, /* vfree */ 203 { &vop_truncate_desc, procfs_truncate }, /* truncate */ 204 { &vop_update_desc, procfs_update }, /* update */ 205 { (struct vnodeop_desc*)NULL, (int(*) __P((void *)))NULL } 206 }; 207 struct vnodeopv_desc procfs_vnodeop_opv_desc = 208 { &procfs_vnodeop_p, procfs_vnodeop_entries }; 209 /* 210 * set things up for doing i/o on 211 * the pfsnode (vp). (vp) is locked 212 * on entry, and should be left locked 213 * on exit. 214 * 215 * for procfs we don't need to do anything 216 * in particular for i/o. all that is done 217 * is to support exclusive open on process 218 * memory images. 219 */ 220 int 221 procfs_open(v) 222 void *v; 223 { 224 struct vop_open_args /* { 225 struct vnode *a_vp; 226 int a_mode; 227 struct ucred *a_cred; 228 struct proc *a_p; 229 } */ *ap = v; 230 struct pfsnode *pfs = VTOPFS(ap->a_vp); 231 struct proc *p1, *p2; 232 int error; 233 234 p1 = ap->a_p; /* tracer */ 235 p2 = PFIND(pfs->pfs_pid); /* traced */ 236 237 if (p2 == NULL) 238 return (ENOENT); /* was ESRCH, jsp */ 239 240 switch (pfs->pfs_type) { 241 case Pmem: 242 if (((pfs->pfs_flags & FWRITE) && (ap->a_mode & O_EXCL)) || 243 ((pfs->pfs_flags & O_EXCL) && (ap->a_mode & FWRITE))) 244 return (EBUSY); 245 246 if ((error = procfs_checkioperm(p1, p2)) != 0) 247 return (EPERM); 248 249 if (ap->a_mode & FWRITE) 250 pfs->pfs_flags = ap->a_mode & (FWRITE|O_EXCL); 251 252 return (0); 253 254 default: 255 break; 256 } 257 258 return (0); 259 } 260 261 /* 262 * close the pfsnode (vp) after doing i/o. 263 * (vp) is not locked on entry or exit. 264 * 265 * nothing to do for procfs other than undo 266 * any exclusive open flag (see _open above). 267 */ 268 int 269 procfs_close(v) 270 void *v; 271 { 272 struct vop_close_args /* { 273 struct vnode *a_vp; 274 int a_fflag; 275 struct ucred *a_cred; 276 struct proc *a_p; 277 } */ *ap = v; 278 struct pfsnode *pfs = VTOPFS(ap->a_vp); 279 280 switch (pfs->pfs_type) { 281 case Pmem: 282 if ((ap->a_fflag & FWRITE) && (pfs->pfs_flags & O_EXCL)) 283 pfs->pfs_flags &= ~(FWRITE|O_EXCL); 284 break; 285 286 default: 287 break; 288 } 289 290 return (0); 291 } 292 293 /* 294 * do block mapping for pfsnode (vp). 295 * since we don't use the buffer cache 296 * for procfs this function should never 297 * be called. in any case, it's not clear 298 * what part of the kernel ever makes use 299 * of this function. for sanity, this is the 300 * usual no-op bmap, although returning 301 * (EIO) would be a reasonable alternative. 302 */ 303 int 304 procfs_bmap(v) 305 void *v; 306 { 307 struct vop_bmap_args /* { 308 struct vnode *a_vp; 309 daddr_t a_bn; 310 struct vnode **a_vpp; 311 daddr_t *a_bnp; 312 int * a_runp; 313 } */ *ap = v; 314 315 if (ap->a_vpp != NULL) 316 *ap->a_vpp = ap->a_vp; 317 if (ap->a_bnp != NULL) 318 *ap->a_bnp = ap->a_bn; 319 if (ap->a_runp != NULL) 320 *ap->a_runp = 0; 321 return (0); 322 } 323 324 /* 325 * _inactive is called when the pfsnode 326 * is vrele'd and the reference count goes 327 * to zero. (vp) will be on the vnode free 328 * list, so to get it back vget() must be 329 * used. 330 * 331 * for procfs, check if the process is still 332 * alive and if it isn't then just throw away 333 * the vnode by calling vgone(). this may 334 * be overkill and a waste of time since the 335 * chances are that the process will still be 336 * there and PFIND is not free. 337 * 338 * (vp) is locked on entry, but must be unlocked on exit. 339 */ 340 int 341 procfs_inactive(v) 342 void *v; 343 { 344 struct vop_inactive_args /* { 345 struct vnode *a_vp; 346 struct proc *a_p; 347 } */ *ap = v; 348 struct pfsnode *pfs = VTOPFS(ap->a_vp); 349 350 VOP_UNLOCK(ap->a_vp, 0); 351 if (PFIND(pfs->pfs_pid) == 0) 352 vgone(ap->a_vp); 353 354 return (0); 355 } 356 357 /* 358 * _reclaim is called when getnewvnode() 359 * wants to make use of an entry on the vnode 360 * free list. at this time the filesystem needs 361 * to free any private data and remove the node 362 * from any private lists. 363 */ 364 int 365 procfs_reclaim(v) 366 void *v; 367 { 368 struct vop_reclaim_args /* { 369 struct vnode *a_vp; 370 } */ *ap = v; 371 372 return (procfs_freevp(ap->a_vp)); 373 } 374 375 /* 376 * Return POSIX pathconf information applicable to special devices. 377 */ 378 int 379 procfs_pathconf(v) 380 void *v; 381 { 382 struct vop_pathconf_args /* { 383 struct vnode *a_vp; 384 int a_name; 385 register_t *a_retval; 386 } */ *ap = v; 387 388 switch (ap->a_name) { 389 case _PC_LINK_MAX: 390 *ap->a_retval = LINK_MAX; 391 return (0); 392 case _PC_MAX_CANON: 393 *ap->a_retval = MAX_CANON; 394 return (0); 395 case _PC_MAX_INPUT: 396 *ap->a_retval = MAX_INPUT; 397 return (0); 398 case _PC_PIPE_BUF: 399 *ap->a_retval = PIPE_BUF; 400 return (0); 401 case _PC_CHOWN_RESTRICTED: 402 *ap->a_retval = 1; 403 return (0); 404 case _PC_VDISABLE: 405 *ap->a_retval = _POSIX_VDISABLE; 406 return (0); 407 case _PC_SYNC_IO: 408 *ap->a_retval = 1; 409 return (0); 410 default: 411 return (EINVAL); 412 } 413 /* NOTREACHED */ 414 } 415 416 /* 417 * _print is used for debugging. 418 * just print a readable description 419 * of (vp). 420 */ 421 int 422 procfs_print(v) 423 void *v; 424 { 425 struct vop_print_args /* { 426 struct vnode *a_vp; 427 } */ *ap = v; 428 struct pfsnode *pfs = VTOPFS(ap->a_vp); 429 430 printf("tag VT_PROCFS, type %d, pid %d, mode %x, flags %lx\n", 431 pfs->pfs_type, pfs->pfs_pid, pfs->pfs_mode, pfs->pfs_flags); 432 return 0; 433 } 434 435 int 436 procfs_link(v) 437 void *v; 438 { 439 struct vop_link_args /* { 440 struct vnode *a_dvp; 441 struct vnode *a_vp; 442 struct componentname *a_cnp; 443 } */ *ap = v; 444 445 VOP_ABORTOP(ap->a_dvp, ap->a_cnp); 446 vput(ap->a_dvp); 447 return (EROFS); 448 } 449 450 int 451 procfs_symlink(v) 452 void *v; 453 { 454 struct vop_symlink_args /* { 455 struct vnode *a_dvp; 456 struct vnode **a_vpp; 457 struct componentname *a_cnp; 458 struct vattr *a_vap; 459 char *a_target; 460 } */ *ap = v; 461 462 VOP_ABORTOP(ap->a_dvp, ap->a_cnp); 463 vput(ap->a_dvp); 464 return (EROFS); 465 } 466 467 /* 468 * Invent attributes for pfsnode (vp) and store 469 * them in (vap). 470 * Directories lengths are returned as zero since 471 * any real length would require the genuine size 472 * to be computed, and nothing cares anyway. 473 * 474 * this is relatively minimal for procfs. 475 */ 476 int 477 procfs_getattr(v) 478 void *v; 479 { 480 struct vop_getattr_args /* { 481 struct vnode *a_vp; 482 struct vattr *a_vap; 483 struct ucred *a_cred; 484 struct proc *a_p; 485 } */ *ap = v; 486 struct pfsnode *pfs = VTOPFS(ap->a_vp); 487 struct vattr *vap = ap->a_vap; 488 struct proc *procp; 489 struct timeval tv; 490 int error; 491 492 /* first check the process still exists */ 493 switch (pfs->pfs_type) { 494 case Proot: 495 case Pcurproc: 496 case Pself: 497 procp = 0; 498 break; 499 500 default: 501 procp = PFIND(pfs->pfs_pid); 502 if (procp == 0) 503 return (ENOENT); 504 break; 505 } 506 507 error = 0; 508 509 /* start by zeroing out the attributes */ 510 VATTR_NULL(vap); 511 512 /* next do all the common fields */ 513 vap->va_type = ap->a_vp->v_type; 514 vap->va_mode = pfs->pfs_mode; 515 vap->va_fileid = pfs->pfs_fileno; 516 vap->va_flags = 0; 517 vap->va_blocksize = PAGE_SIZE; 518 519 /* 520 * Make all times be current TOD. 521 * It would be possible to get the process start 522 * time from the p_stat structure, but there's 523 * no "file creation" time stamp anyway, and the 524 * p_stat structure is not addressible if u. gets 525 * swapped out for that process. 526 */ 527 microtime(&tv); 528 TIMEVAL_TO_TIMESPEC(&tv, &vap->va_ctime); 529 vap->va_atime = vap->va_mtime = vap->va_ctime; 530 531 switch (pfs->pfs_type) { 532 case Pmem: 533 case Pregs: 534 case Pfpregs: 535 /* 536 * If the process has exercised some setuid or setgid 537 * privilege, then rip away read/write permission so 538 * that only root can gain access. 539 */ 540 if (procp->p_flag & P_SUGID) 541 vap->va_mode &= ~(S_IRUSR|S_IWUSR); 542 /* FALLTHROUGH */ 543 case Pctl: 544 case Pstatus: 545 case Pnote: 546 case Pnotepg: 547 case Pmap: 548 case Pcmdline: 549 vap->va_nlink = 1; 550 vap->va_uid = procp->p_ucred->cr_uid; 551 vap->va_gid = procp->p_ucred->cr_gid; 552 break; 553 554 default: 555 break; 556 } 557 558 /* 559 * now do the object specific fields 560 * 561 * The size could be set from struct reg, but it's hardly 562 * worth the trouble, and it puts some (potentially) machine 563 * dependent data into this machine-independent code. If it 564 * becomes important then this function should break out into 565 * a per-file stat function in the corresponding .c file. 566 */ 567 568 switch (pfs->pfs_type) { 569 case Proot: 570 /* 571 * Set nlink to 1 to tell fts(3) we don't actually know. 572 */ 573 vap->va_nlink = 1; 574 vap->va_uid = 0; 575 vap->va_gid = 0; 576 vap->va_bytes = vap->va_size = DEV_BSIZE; 577 break; 578 579 case Pcurproc: { 580 char buf[16]; /* should be enough */ 581 vap->va_nlink = 1; 582 vap->va_uid = 0; 583 vap->va_gid = 0; 584 vap->va_bytes = vap->va_size = 585 sprintf(buf, "%ld", (long)curproc->p_pid); 586 break; 587 } 588 589 case Pself: 590 vap->va_nlink = 1; 591 vap->va_uid = 0; 592 vap->va_gid = 0; 593 vap->va_bytes = vap->va_size = sizeof("curproc"); 594 break; 595 596 case Pproc: 597 vap->va_nlink = 2; 598 vap->va_uid = procp->p_ucred->cr_uid; 599 vap->va_gid = procp->p_ucred->cr_gid; 600 vap->va_bytes = vap->va_size = DEV_BSIZE; 601 break; 602 603 case Pfile: 604 error = EOPNOTSUPP; 605 break; 606 607 case Pmem: 608 vap->va_bytes = vap->va_size = 609 ctob(procp->p_vmspace->vm_tsize + 610 procp->p_vmspace->vm_dsize + 611 procp->p_vmspace->vm_ssize); 612 break; 613 614 #if defined(PT_GETREGS) || defined(PT_SETREGS) 615 case Pregs: 616 vap->va_bytes = vap->va_size = sizeof(struct reg); 617 break; 618 #endif 619 620 #if defined(PT_GETFPREGS) || defined(PT_SETFPREGS) 621 case Pfpregs: 622 vap->va_bytes = vap->va_size = sizeof(struct fpreg); 623 break; 624 #endif 625 626 case Pctl: 627 case Pstatus: 628 case Pnote: 629 case Pnotepg: 630 case Pmap: 631 case Pcmdline: 632 vap->va_bytes = vap->va_size = 0; 633 break; 634 635 default: 636 panic("procfs_getattr"); 637 } 638 639 return (error); 640 } 641 642 /*ARGSUSED*/ 643 int 644 procfs_setattr(v) 645 void *v; 646 { 647 /* 648 * just fake out attribute setting 649 * it's not good to generate an error 650 * return, otherwise things like creat() 651 * will fail when they try to set the 652 * file length to 0. worse, this means 653 * that echo $note > /proc/$pid/note will fail. 654 */ 655 656 return (0); 657 } 658 659 /* 660 * implement access checking. 661 * 662 * actually, the check for super-user is slightly 663 * broken since it will allow read access to write-only 664 * objects. this doesn't cause any particular trouble 665 * but does mean that the i/o entry points need to check 666 * that the operation really does make sense. 667 */ 668 int 669 procfs_access(v) 670 void *v; 671 { 672 struct vop_access_args /* { 673 struct vnode *a_vp; 674 int a_mode; 675 struct ucred *a_cred; 676 struct proc *a_p; 677 } */ *ap = v; 678 struct vattr va; 679 int error; 680 681 if ((error = VOP_GETATTR(ap->a_vp, &va, ap->a_cred, ap->a_p)) != 0) 682 return (error); 683 684 return (vaccess(va.va_type, va.va_mode, 685 va.va_uid, va.va_gid, ap->a_mode, ap->a_cred)); 686 } 687 688 /* 689 * lookup. this is incredibly complicated in the 690 * general case, however for most pseudo-filesystems 691 * very little needs to be done. 692 * 693 * Locking isn't hard here, just poorly documented. 694 * 695 * If we're looking up ".", just vref the parent & return it. 696 * 697 * If we're looking up "..", unlock the parent, and lock "..". If everything 698 * went ok, and we're on the last component and the caller requested the 699 * parent locked, try to re-lock the parent. We do this to prevent lock 700 * races. 701 * 702 * For anything else, get the needed node. Then unlock the parent if not 703 * the last component or not LOCKPARENT (i.e. if we wouldn't re-lock the 704 * parent in the .. case). 705 * 706 * We try to exit with the parent locked in error cases. 707 */ 708 int 709 procfs_lookup(v) 710 void *v; 711 { 712 struct vop_lookup_args /* { 713 struct vnode * a_dvp; 714 struct vnode ** a_vpp; 715 struct componentname * a_cnp; 716 } */ *ap = v; 717 struct componentname *cnp = ap->a_cnp; 718 struct vnode **vpp = ap->a_vpp; 719 struct vnode *dvp = ap->a_dvp; 720 const char *pname = cnp->cn_nameptr; 721 struct proc_target *pt; 722 struct vnode *fvp; 723 pid_t pid; 724 struct pfsnode *pfs; 725 struct proc *p; 726 int i, error, wantpunlock, iscurproc = 0, isself = 0; 727 728 *vpp = NULL; 729 cnp->cn_flags &= ~PDIRUNLOCK; 730 731 if (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME) 732 return (EROFS); 733 734 if (cnp->cn_namelen == 1 && *pname == '.') { 735 *vpp = dvp; 736 VREF(dvp); 737 return (0); 738 } 739 740 wantpunlock = (~cnp->cn_flags & (LOCKPARENT | ISLASTCN)); 741 pfs = VTOPFS(dvp); 742 switch (pfs->pfs_type) { 743 case Proot: 744 /* 745 * Shouldn't get here with .. in the root node. 746 */ 747 if (cnp->cn_flags & ISDOTDOT) 748 return (EIO); 749 750 iscurproc = CNEQ(cnp, "curproc", 7); 751 isself = CNEQ(cnp, "self", 4); 752 753 if (iscurproc || isself) { 754 error = procfs_allocvp(dvp->v_mount, vpp, 0, 755 iscurproc ? Pcurproc : Pself); 756 if ((error == 0) && (wantpunlock)) { 757 VOP_UNLOCK(dvp, 0); 758 cnp->cn_flags |= PDIRUNLOCK; 759 } 760 return (error); 761 } 762 763 pid = atopid(pname, cnp->cn_namelen); 764 if (pid == NO_PID) 765 break; 766 767 p = PFIND(pid); 768 if (p == 0) 769 break; 770 771 error = procfs_allocvp(dvp->v_mount, vpp, pid, Pproc); 772 if ((error == 0) && (wantpunlock)) { 773 VOP_UNLOCK(dvp, 0); 774 cnp->cn_flags |= PDIRUNLOCK; 775 } 776 return (error); 777 778 case Pproc: 779 /* 780 * do the .. dance. We unlock the directory, and then 781 * get the root dir. That will automatically return .. 782 * locked. Then if the caller wanted dvp locked, we 783 * re-lock. 784 */ 785 if (cnp->cn_flags & ISDOTDOT) { 786 VOP_UNLOCK(dvp, 0); 787 cnp->cn_flags |= PDIRUNLOCK; 788 error = procfs_root(dvp->v_mount, vpp); 789 if ((error == 0) && (wantpunlock == 0) && 790 ((error = vn_lock(dvp, LK_EXCLUSIVE)) == 0)) 791 cnp->cn_flags &= ~PDIRUNLOCK; 792 return (error); 793 } 794 795 p = PFIND(pfs->pfs_pid); 796 if (p == 0) 797 break; 798 799 for (pt = proc_targets, i = 0; i < nproc_targets; pt++, i++) { 800 if (cnp->cn_namelen == pt->pt_namlen && 801 memcmp(pt->pt_name, pname, cnp->cn_namelen) == 0 && 802 (pt->pt_valid == NULL || (*pt->pt_valid)(p))) 803 goto found; 804 } 805 break; 806 807 found: 808 if (pt->pt_pfstype == Pfile) { 809 fvp = p->p_textvp; 810 /* We already checked that it exists. */ 811 VREF(fvp); 812 vn_lock(fvp, LK_EXCLUSIVE | LK_RETRY); 813 if (wantpunlock) { 814 VOP_UNLOCK(dvp, 0); 815 cnp->cn_flags |= PDIRUNLOCK; 816 } 817 *vpp = fvp; 818 return (0); 819 } 820 821 error = procfs_allocvp(dvp->v_mount, vpp, pfs->pfs_pid, 822 pt->pt_pfstype); 823 if ((error == 0) && (wantpunlock)) { 824 VOP_UNLOCK(dvp, 0); 825 cnp->cn_flags |= PDIRUNLOCK; 826 } 827 return (error); 828 829 default: 830 return (ENOTDIR); 831 } 832 833 return (cnp->cn_nameiop == LOOKUP ? ENOENT : EROFS); 834 } 835 836 int 837 procfs_validfile(p) 838 struct proc *p; 839 { 840 return (p->p_textvp != NULL); 841 } 842 843 #ifdef COMPAT_LINUX 844 static int 845 procfs_validfile_linux(p) 846 struct proc *p; 847 { 848 return (!strcmp("linux", p->p_emul->e_name) && procfs_validfile(p)); 849 } 850 #endif 851 852 /* 853 * readdir returns directory entries from pfsnode (vp). 854 * 855 * the strategy here with procfs is to generate a single 856 * directory entry at a time (struct dirent) and then 857 * copy that out to userland using uiomove. a more efficent 858 * though more complex implementation, would try to minimize 859 * the number of calls to uiomove(). for procfs, this is 860 * hardly worth the added code complexity. 861 * 862 * this should just be done through read() 863 */ 864 int 865 procfs_readdir(v) 866 void *v; 867 { 868 struct vop_readdir_args /* { 869 struct vnode *a_vp; 870 struct uio *a_uio; 871 struct ucred *a_cred; 872 int *a_eofflag; 873 off_t **a_cookies; 874 int *a_ncookies; 875 } */ *ap = v; 876 struct uio *uio = ap->a_uio; 877 struct dirent d; 878 struct pfsnode *pfs; 879 off_t i; 880 int error; 881 off_t *cookies = NULL; 882 int ncookies; 883 884 pfs = VTOPFS(ap->a_vp); 885 886 if (uio->uio_resid < UIO_MX) 887 return (EINVAL); 888 if (uio->uio_offset < 0) 889 return (EINVAL); 890 891 error = 0; 892 i = uio->uio_offset; 893 memset((caddr_t)&d, 0, UIO_MX); 894 d.d_reclen = UIO_MX; 895 ncookies = uio->uio_resid / UIO_MX; 896 897 switch (pfs->pfs_type) { 898 /* 899 * this is for the process-specific sub-directories. 900 * all that is needed to is copy out all the entries 901 * from the procent[] table (top of this file). 902 */ 903 case Pproc: { 904 struct proc *p; 905 struct proc_target *pt; 906 907 if (i >= nproc_targets) 908 return 0; 909 910 p = PFIND(pfs->pfs_pid); 911 if (p == NULL) 912 break; 913 914 if (ap->a_ncookies) { 915 ncookies = min(ncookies, (nproc_targets - i)); 916 cookies = malloc(ncookies * sizeof (off_t), 917 M_TEMP, M_WAITOK); 918 *ap->a_cookies = cookies; 919 } 920 921 for (pt = &proc_targets[i]; 922 uio->uio_resid >= UIO_MX && i < nproc_targets; pt++, i++) { 923 if (pt->pt_valid && (*pt->pt_valid)(p) == 0) 924 continue; 925 926 d.d_fileno = PROCFS_FILENO(pfs->pfs_pid, pt->pt_pfstype); 927 d.d_namlen = pt->pt_namlen; 928 memcpy(d.d_name, pt->pt_name, pt->pt_namlen + 1); 929 d.d_type = pt->pt_type; 930 931 if ((error = uiomove((caddr_t)&d, UIO_MX, uio)) != 0) 932 break; 933 if (cookies) 934 *cookies++ = i + 1; 935 } 936 937 break; 938 } 939 940 /* 941 * this is for the root of the procfs filesystem 942 * what is needed are special entries for "curproc" 943 * and "self" followed by an entry for each process 944 * on allproc 945 #ifdef PROCFS_ZOMBIE 946 * and deadproc and zombproc. 947 #endif 948 */ 949 950 case Proot: { 951 int pcnt = i, nc = 0; 952 const struct proclist_desc *pd; 953 volatile struct proc *p; 954 955 if (pcnt > 3) 956 pcnt = 3; 957 if (ap->a_ncookies) { 958 /* 959 * XXX Potentially allocating too much space here, 960 * but I'm lazy. This loop needs some work. 961 */ 962 cookies = malloc(ncookies * sizeof (off_t), 963 M_TEMP, M_WAITOK); 964 *ap->a_cookies = cookies; 965 } 966 /* 967 * XXX: THIS LOOP ASSUMES THAT allproc IS THE FIRST 968 * PROCLIST IN THE proclists! 969 */ 970 proclist_lock_read(); 971 pd = proclists; 972 #ifdef PROCFS_ZOMBIE 973 again: 974 #endif 975 for (p = LIST_FIRST(pd->pd_list); 976 p != NULL && uio->uio_resid >= UIO_MX; i++, pcnt++) { 977 switch (i) { 978 case 0: /* `.' */ 979 case 1: /* `..' */ 980 d.d_fileno = PROCFS_FILENO(0, Proot); 981 d.d_namlen = i + 1; 982 memcpy(d.d_name, "..", d.d_namlen); 983 d.d_name[i + 1] = '\0'; 984 d.d_type = DT_DIR; 985 break; 986 987 case 2: 988 d.d_fileno = PROCFS_FILENO(0, Pcurproc); 989 d.d_namlen = sizeof("curproc") - 1; 990 memcpy(d.d_name, "curproc", sizeof("curproc")); 991 d.d_type = DT_LNK; 992 break; 993 994 case 3: 995 d.d_fileno = PROCFS_FILENO(0, Pself); 996 d.d_namlen = sizeof("self") - 1; 997 memcpy(d.d_name, "self", sizeof("self")); 998 d.d_type = DT_LNK; 999 break; 1000 1001 default: 1002 while (pcnt < i) { 1003 pcnt++; 1004 p = LIST_NEXT(p, p_list); 1005 if (!p) 1006 goto done; 1007 } 1008 d.d_fileno = PROCFS_FILENO(p->p_pid, Pproc); 1009 d.d_namlen = sprintf(d.d_name, "%ld", 1010 (long)p->p_pid); 1011 d.d_type = DT_REG; 1012 p = p->p_list.le_next; 1013 break; 1014 } 1015 1016 if ((error = uiomove((caddr_t)&d, UIO_MX, uio)) != 0) 1017 break; 1018 nc++; 1019 if (cookies) 1020 *cookies++ = i + 1; 1021 } 1022 done: 1023 1024 #ifdef PROCFS_ZOMBIE 1025 pd++; 1026 if (p == NULL && pd->pd_list != NULL) 1027 goto again; 1028 #endif 1029 proclist_unlock_read(); 1030 ncookies = nc; 1031 1032 break; 1033 1034 } 1035 1036 default: 1037 error = ENOTDIR; 1038 break; 1039 } 1040 1041 if (ap->a_ncookies) { 1042 if (error) { 1043 if (cookies) 1044 free(*ap->a_cookies, M_TEMP); 1045 *ap->a_ncookies = 0; 1046 *ap->a_cookies = NULL; 1047 } else 1048 *ap->a_ncookies = ncookies; 1049 } 1050 uio->uio_offset = i; 1051 return (error); 1052 } 1053 1054 /* 1055 * readlink reads the link of `curproc' 1056 */ 1057 int 1058 procfs_readlink(v) 1059 void *v; 1060 { 1061 struct vop_readlink_args *ap = v; 1062 char buf[16]; /* should be enough */ 1063 int len; 1064 1065 if (VTOPFS(ap->a_vp)->pfs_fileno == PROCFS_FILENO(0, Pcurproc)) 1066 len = sprintf(buf, "%ld", (long)curproc->p_pid); 1067 else if (VTOPFS(ap->a_vp)->pfs_fileno == PROCFS_FILENO(0, Pself)) 1068 len = sprintf(buf, "%s", "curproc"); 1069 else 1070 return (EINVAL); 1071 1072 return (uiomove((caddr_t)buf, len, ap->a_uio)); 1073 } 1074 1075 /* 1076 * convert decimal ascii to pid_t 1077 */ 1078 static pid_t 1079 atopid(b, len) 1080 const char *b; 1081 u_int len; 1082 { 1083 pid_t p = 0; 1084 1085 while (len--) { 1086 char c = *b++; 1087 if (c < '0' || c > '9') 1088 return (NO_PID); 1089 p = 10 * p + (c - '0'); 1090 if (p > PID_MAX) 1091 return (NO_PID); 1092 } 1093 1094 return (p); 1095 } 1096