1 /* $NetBSD: procfs_vnops.c,v 1.93 2003/01/04 15:42:35 martin Exp $ */ 2 3 /* 4 * Copyright (c) 1993 Jan-Simon Pendry 5 * Copyright (c) 1993, 1995 6 * The Regents of the University of California. All rights reserved. 7 * 8 * This code is derived from software contributed to Berkeley by 9 * Jan-Simon Pendry. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the University of 22 * California, Berkeley and its contributors. 23 * 4. Neither the name of the University nor the names of its contributors 24 * may be used to endorse or promote products derived from this software 25 * without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 37 * SUCH DAMAGE. 38 * 39 * @(#)procfs_vnops.c 8.18 (Berkeley) 5/21/95 40 */ 41 42 /* 43 * procfs vnode interface 44 */ 45 46 #include <sys/cdefs.h> 47 __KERNEL_RCSID(0, "$NetBSD: procfs_vnops.c,v 1.93 2003/01/04 15:42:35 martin Exp $"); 48 49 #include <sys/param.h> 50 #include <sys/systm.h> 51 #include <sys/time.h> 52 #include <sys/kernel.h> 53 #include <sys/file.h> 54 #include <sys/filedesc.h> 55 #include <sys/proc.h> 56 #include <sys/vnode.h> 57 #include <sys/namei.h> 58 #include <sys/malloc.h> 59 #include <sys/mount.h> 60 #include <sys/dirent.h> 61 #include <sys/resourcevar.h> 62 #include <sys/stat.h> 63 #include <sys/ptrace.h> 64 65 #include <uvm/uvm_extern.h> /* for PAGE_SIZE */ 66 67 #include <machine/reg.h> 68 69 #include <miscfs/genfs/genfs.h> 70 #include <miscfs/procfs/procfs.h> 71 72 /* 73 * Vnode Operations. 74 * 75 */ 76 77 static int procfs_validfile_linux __P((struct proc *, struct mount *)); 78 79 /* 80 * This is a list of the valid names in the 81 * process-specific sub-directories. It is 82 * used in procfs_lookup and procfs_readdir 83 */ 84 const struct proc_target { 85 u_char pt_type; 86 u_char pt_namlen; 87 char *pt_name; 88 pfstype pt_pfstype; 89 int (*pt_valid) __P((struct proc *, struct mount *)); 90 } proc_targets[] = { 91 #define N(s) sizeof(s)-1, s 92 /* name type validp */ 93 { DT_DIR, N("."), Pproc, NULL }, 94 { DT_DIR, N(".."), Proot, NULL }, 95 { DT_DIR, N("fd"), Pfd, NULL }, 96 { DT_REG, N("file"), Pfile, procfs_validfile }, 97 { DT_REG, N("mem"), Pmem, NULL }, 98 { DT_REG, N("regs"), Pregs, procfs_validregs }, 99 { DT_REG, N("fpregs"), Pfpregs, procfs_validfpregs }, 100 { DT_REG, N("ctl"), Pctl, NULL }, 101 { DT_REG, N("status"), Pstatus, NULL }, 102 { DT_REG, N("note"), Pnote, NULL }, 103 { DT_REG, N("notepg"), Pnotepg, NULL }, 104 { DT_REG, N("map"), Pmap, procfs_validmap }, 105 { DT_REG, N("maps"), Pmaps, procfs_validmap }, 106 { DT_REG, N("cmdline"), Pcmdline, NULL }, 107 { DT_REG, N("exe"), Pfile, procfs_validfile_linux }, 108 #ifdef __HAVE_PROCFS_MACHDEP 109 PROCFS_MACHDEP_NODETYPE_DEFNS 110 #endif 111 #undef N 112 }; 113 static int nproc_targets = sizeof(proc_targets) / sizeof(proc_targets[0]); 114 115 /* 116 * List of files in the root directory. Note: the validate function will 117 * be called with p == NULL for these ones. 118 */ 119 struct proc_target proc_root_targets[] = { 120 #define N(s) sizeof(s)-1, s 121 /* name type validp */ 122 { DT_REG, N("meminfo"), Pmeminfo, procfs_validfile_linux }, 123 { DT_REG, N("cpuinfo"), Pcpuinfo, procfs_validfile_linux }, 124 #undef N 125 }; 126 static int nproc_root_targets = 127 sizeof(proc_root_targets) / sizeof(proc_root_targets[0]); 128 129 int procfs_lookup __P((void *)); 130 #define procfs_create genfs_eopnotsupp_rele 131 #define procfs_mknod genfs_eopnotsupp_rele 132 int procfs_open __P((void *)); 133 int procfs_close __P((void *)); 134 int procfs_access __P((void *)); 135 int procfs_getattr __P((void *)); 136 int procfs_setattr __P((void *)); 137 #define procfs_read procfs_rw 138 #define procfs_write procfs_rw 139 #define procfs_fcntl genfs_fcntl 140 #define procfs_ioctl genfs_enoioctl 141 #define procfs_poll genfs_poll 142 #define procfs_revoke genfs_revoke 143 #define procfs_fsync genfs_nullop 144 #define procfs_seek genfs_nullop 145 #define procfs_remove genfs_eopnotsupp_rele 146 int procfs_link __P((void *)); 147 #define procfs_rename genfs_eopnotsupp_rele 148 #define procfs_mkdir genfs_eopnotsupp_rele 149 #define procfs_rmdir genfs_eopnotsupp_rele 150 int procfs_symlink __P((void *)); 151 int procfs_readdir __P((void *)); 152 int procfs_readlink __P((void *)); 153 #define procfs_abortop genfs_abortop 154 int procfs_inactive __P((void *)); 155 int procfs_reclaim __P((void *)); 156 #define procfs_lock genfs_lock 157 #define procfs_unlock genfs_unlock 158 #define procfs_bmap genfs_badop 159 #define procfs_strategy genfs_badop 160 int procfs_print __P((void *)); 161 int procfs_pathconf __P((void *)); 162 #define procfs_islocked genfs_islocked 163 #define procfs_advlock genfs_einval 164 #define procfs_blkatoff genfs_eopnotsupp 165 #define procfs_valloc genfs_eopnotsupp 166 #define procfs_vfree genfs_nullop 167 #define procfs_truncate genfs_eopnotsupp 168 #define procfs_update genfs_nullop 169 #define procfs_bwrite genfs_eopnotsupp 170 #define procfs_putpages genfs_null_putpages 171 172 static int atoi __P((const char *, size_t)); 173 174 /* 175 * procfs vnode operations. 176 */ 177 int (**procfs_vnodeop_p) __P((void *)); 178 const struct vnodeopv_entry_desc procfs_vnodeop_entries[] = { 179 { &vop_default_desc, vn_default_error }, 180 { &vop_lookup_desc, procfs_lookup }, /* lookup */ 181 { &vop_create_desc, procfs_create }, /* create */ 182 { &vop_mknod_desc, procfs_mknod }, /* mknod */ 183 { &vop_open_desc, procfs_open }, /* open */ 184 { &vop_close_desc, procfs_close }, /* close */ 185 { &vop_access_desc, procfs_access }, /* access */ 186 { &vop_getattr_desc, procfs_getattr }, /* getattr */ 187 { &vop_setattr_desc, procfs_setattr }, /* setattr */ 188 { &vop_read_desc, procfs_read }, /* read */ 189 { &vop_write_desc, procfs_write }, /* write */ 190 { &vop_fcntl_desc, procfs_fcntl }, /* fcntl */ 191 { &vop_ioctl_desc, procfs_ioctl }, /* ioctl */ 192 { &vop_poll_desc, procfs_poll }, /* poll */ 193 { &vop_revoke_desc, procfs_revoke }, /* revoke */ 194 { &vop_fsync_desc, procfs_fsync }, /* fsync */ 195 { &vop_seek_desc, procfs_seek }, /* seek */ 196 { &vop_remove_desc, procfs_remove }, /* remove */ 197 { &vop_link_desc, procfs_link }, /* link */ 198 { &vop_rename_desc, procfs_rename }, /* rename */ 199 { &vop_mkdir_desc, procfs_mkdir }, /* mkdir */ 200 { &vop_rmdir_desc, procfs_rmdir }, /* rmdir */ 201 { &vop_symlink_desc, procfs_symlink }, /* symlink */ 202 { &vop_readdir_desc, procfs_readdir }, /* readdir */ 203 { &vop_readlink_desc, procfs_readlink }, /* readlink */ 204 { &vop_abortop_desc, procfs_abortop }, /* abortop */ 205 { &vop_inactive_desc, procfs_inactive }, /* inactive */ 206 { &vop_reclaim_desc, procfs_reclaim }, /* reclaim */ 207 { &vop_lock_desc, procfs_lock }, /* lock */ 208 { &vop_unlock_desc, procfs_unlock }, /* unlock */ 209 { &vop_bmap_desc, procfs_bmap }, /* bmap */ 210 { &vop_strategy_desc, procfs_strategy }, /* strategy */ 211 { &vop_print_desc, procfs_print }, /* print */ 212 { &vop_islocked_desc, procfs_islocked }, /* islocked */ 213 { &vop_pathconf_desc, procfs_pathconf }, /* pathconf */ 214 { &vop_advlock_desc, procfs_advlock }, /* advlock */ 215 { &vop_blkatoff_desc, procfs_blkatoff }, /* blkatoff */ 216 { &vop_valloc_desc, procfs_valloc }, /* valloc */ 217 { &vop_vfree_desc, procfs_vfree }, /* vfree */ 218 { &vop_truncate_desc, procfs_truncate }, /* truncate */ 219 { &vop_update_desc, procfs_update }, /* update */ 220 { &vop_putpages_desc, procfs_putpages }, /* putpages */ 221 { NULL, NULL } 222 }; 223 const struct vnodeopv_desc procfs_vnodeop_opv_desc = 224 { &procfs_vnodeop_p, procfs_vnodeop_entries }; 225 /* 226 * set things up for doing i/o on 227 * the pfsnode (vp). (vp) is locked 228 * on entry, and should be left locked 229 * on exit. 230 * 231 * for procfs we don't need to do anything 232 * in particular for i/o. all that is done 233 * is to support exclusive open on process 234 * memory images. 235 */ 236 int 237 procfs_open(v) 238 void *v; 239 { 240 struct vop_open_args /* { 241 struct vnode *a_vp; 242 int a_mode; 243 struct ucred *a_cred; 244 struct proc *a_p; 245 } */ *ap = v; 246 struct pfsnode *pfs = VTOPFS(ap->a_vp); 247 struct proc *p1, *p2; 248 int error; 249 250 p1 = ap->a_p; /* tracer */ 251 p2 = PFIND(pfs->pfs_pid); /* traced */ 252 253 if (p2 == NULL) 254 return (ENOENT); /* was ESRCH, jsp */ 255 256 switch (pfs->pfs_type) { 257 case Pmem: 258 if (((pfs->pfs_flags & FWRITE) && (ap->a_mode & O_EXCL)) || 259 ((pfs->pfs_flags & O_EXCL) && (ap->a_mode & FWRITE))) 260 return (EBUSY); 261 262 if ((error = process_checkioperm(p1, p2)) != 0) 263 return (error); 264 265 if (ap->a_mode & FWRITE) 266 pfs->pfs_flags = ap->a_mode & (FWRITE|O_EXCL); 267 268 return (0); 269 270 default: 271 break; 272 } 273 274 return (0); 275 } 276 277 /* 278 * close the pfsnode (vp) after doing i/o. 279 * (vp) is not locked on entry or exit. 280 * 281 * nothing to do for procfs other than undo 282 * any exclusive open flag (see _open above). 283 */ 284 int 285 procfs_close(v) 286 void *v; 287 { 288 struct vop_close_args /* { 289 struct vnode *a_vp; 290 int a_fflag; 291 struct ucred *a_cred; 292 struct proc *a_p; 293 } */ *ap = v; 294 struct pfsnode *pfs = VTOPFS(ap->a_vp); 295 296 switch (pfs->pfs_type) { 297 case Pmem: 298 if ((ap->a_fflag & FWRITE) && (pfs->pfs_flags & O_EXCL)) 299 pfs->pfs_flags &= ~(FWRITE|O_EXCL); 300 break; 301 302 default: 303 break; 304 } 305 306 return (0); 307 } 308 309 /* 310 * _inactive is called when the pfsnode 311 * is vrele'd and the reference count goes 312 * to zero. (vp) will be on the vnode free 313 * list, so to get it back vget() must be 314 * used. 315 * 316 * for procfs, check if the process is still 317 * alive and if it isn't then just throw away 318 * the vnode by calling vgone(). this may 319 * be overkill and a waste of time since the 320 * chances are that the process will still be 321 * there and PFIND is not free. 322 * 323 * (vp) is locked on entry, but must be unlocked on exit. 324 */ 325 int 326 procfs_inactive(v) 327 void *v; 328 { 329 struct vop_inactive_args /* { 330 struct vnode *a_vp; 331 struct proc *a_p; 332 } */ *ap = v; 333 struct pfsnode *pfs = VTOPFS(ap->a_vp); 334 335 VOP_UNLOCK(ap->a_vp, 0); 336 if (PFIND(pfs->pfs_pid) == NULL) 337 vgone(ap->a_vp); 338 339 return (0); 340 } 341 342 /* 343 * _reclaim is called when getnewvnode() 344 * wants to make use of an entry on the vnode 345 * free list. at this time the filesystem needs 346 * to free any private data and remove the node 347 * from any private lists. 348 */ 349 int 350 procfs_reclaim(v) 351 void *v; 352 { 353 struct vop_reclaim_args /* { 354 struct vnode *a_vp; 355 } */ *ap = v; 356 357 return (procfs_freevp(ap->a_vp)); 358 } 359 360 /* 361 * Return POSIX pathconf information applicable to special devices. 362 */ 363 int 364 procfs_pathconf(v) 365 void *v; 366 { 367 struct vop_pathconf_args /* { 368 struct vnode *a_vp; 369 int a_name; 370 register_t *a_retval; 371 } */ *ap = v; 372 373 switch (ap->a_name) { 374 case _PC_LINK_MAX: 375 *ap->a_retval = LINK_MAX; 376 return (0); 377 case _PC_MAX_CANON: 378 *ap->a_retval = MAX_CANON; 379 return (0); 380 case _PC_MAX_INPUT: 381 *ap->a_retval = MAX_INPUT; 382 return (0); 383 case _PC_PIPE_BUF: 384 *ap->a_retval = PIPE_BUF; 385 return (0); 386 case _PC_CHOWN_RESTRICTED: 387 *ap->a_retval = 1; 388 return (0); 389 case _PC_VDISABLE: 390 *ap->a_retval = _POSIX_VDISABLE; 391 return (0); 392 case _PC_SYNC_IO: 393 *ap->a_retval = 1; 394 return (0); 395 default: 396 return (EINVAL); 397 } 398 /* NOTREACHED */ 399 } 400 401 /* 402 * _print is used for debugging. 403 * just print a readable description 404 * of (vp). 405 */ 406 int 407 procfs_print(v) 408 void *v; 409 { 410 struct vop_print_args /* { 411 struct vnode *a_vp; 412 } */ *ap = v; 413 struct pfsnode *pfs = VTOPFS(ap->a_vp); 414 415 printf("tag VT_PROCFS, type %d, pid %d, mode %x, flags %lx\n", 416 pfs->pfs_type, pfs->pfs_pid, pfs->pfs_mode, pfs->pfs_flags); 417 return 0; 418 } 419 420 int 421 procfs_link(v) 422 void *v; 423 { 424 struct vop_link_args /* { 425 struct vnode *a_dvp; 426 struct vnode *a_vp; 427 struct componentname *a_cnp; 428 } */ *ap = v; 429 430 VOP_ABORTOP(ap->a_dvp, ap->a_cnp); 431 vput(ap->a_dvp); 432 return (EROFS); 433 } 434 435 int 436 procfs_symlink(v) 437 void *v; 438 { 439 struct vop_symlink_args /* { 440 struct vnode *a_dvp; 441 struct vnode **a_vpp; 442 struct componentname *a_cnp; 443 struct vattr *a_vap; 444 char *a_target; 445 } */ *ap = v; 446 447 VOP_ABORTOP(ap->a_dvp, ap->a_cnp); 448 vput(ap->a_dvp); 449 return (EROFS); 450 } 451 452 /* 453 * Invent attributes for pfsnode (vp) and store 454 * them in (vap). 455 * Directories lengths are returned as zero since 456 * any real length would require the genuine size 457 * to be computed, and nothing cares anyway. 458 * 459 * this is relatively minimal for procfs. 460 */ 461 int 462 procfs_getattr(v) 463 void *v; 464 { 465 struct vop_getattr_args /* { 466 struct vnode *a_vp; 467 struct vattr *a_vap; 468 struct ucred *a_cred; 469 struct proc *a_p; 470 } */ *ap = v; 471 struct pfsnode *pfs = VTOPFS(ap->a_vp); 472 struct vattr *vap = ap->a_vap; 473 struct proc *procp; 474 int error; 475 476 /* first check the process still exists */ 477 switch (pfs->pfs_type) { 478 case Proot: 479 case Pcurproc: 480 case Pself: 481 procp = 0; 482 break; 483 484 default: 485 procp = PFIND(pfs->pfs_pid); 486 if (procp == NULL) 487 return (ENOENT); 488 break; 489 } 490 491 error = 0; 492 493 /* start by zeroing out the attributes */ 494 VATTR_NULL(vap); 495 496 /* next do all the common fields */ 497 vap->va_type = ap->a_vp->v_type; 498 vap->va_mode = pfs->pfs_mode; 499 vap->va_fileid = pfs->pfs_fileno; 500 vap->va_flags = 0; 501 vap->va_blocksize = PAGE_SIZE; 502 503 /* 504 * Make all times be current TOD. Avoid microtime(9), it's slow. 505 * We don't guard the read from time(9) with splclock(9) since we 506 * don't actually need to be THAT sure the access is atomic. 507 * 508 * It would be possible to get the process start 509 * time from the p_stat structure, but there's 510 * no "file creation" time stamp anyway, and the 511 * p_stat structure is not addressible if u. gets 512 * swapped out for that process. 513 */ 514 TIMEVAL_TO_TIMESPEC(&time, &vap->va_ctime); 515 vap->va_atime = vap->va_mtime = vap->va_ctime; 516 517 switch (pfs->pfs_type) { 518 case Pmem: 519 case Pregs: 520 case Pfpregs: 521 #if defined(__HAVE_PROCFS_MACHDEP) && defined(PROCFS_MACHDEP_PROTECT_CASES) 522 PROCFS_MACHDEP_PROTECT_CASES 523 #endif 524 /* 525 * If the process has exercised some setuid or setgid 526 * privilege, then rip away read/write permission so 527 * that only root can gain access. 528 */ 529 if (procp->p_flag & P_SUGID) 530 vap->va_mode &= ~(S_IRUSR|S_IWUSR); 531 /* FALLTHROUGH */ 532 case Pctl: 533 case Pstatus: 534 case Pnote: 535 case Pnotepg: 536 case Pmap: 537 case Pmaps: 538 case Pcmdline: 539 vap->va_nlink = 1; 540 vap->va_uid = procp->p_ucred->cr_uid; 541 vap->va_gid = procp->p_ucred->cr_gid; 542 break; 543 case Pmeminfo: 544 case Pcpuinfo: 545 vap->va_nlink = 1; 546 vap->va_uid = vap->va_gid = 0; 547 break; 548 549 default: 550 break; 551 } 552 553 /* 554 * now do the object specific fields 555 * 556 * The size could be set from struct reg, but it's hardly 557 * worth the trouble, and it puts some (potentially) machine 558 * dependent data into this machine-independent code. If it 559 * becomes important then this function should break out into 560 * a per-file stat function in the corresponding .c file. 561 */ 562 563 switch (pfs->pfs_type) { 564 case Proot: 565 /* 566 * Set nlink to 1 to tell fts(3) we don't actually know. 567 */ 568 vap->va_nlink = 1; 569 vap->va_uid = 0; 570 vap->va_gid = 0; 571 vap->va_bytes = vap->va_size = DEV_BSIZE; 572 break; 573 574 case Pcurproc: { 575 char buf[16]; /* should be enough */ 576 vap->va_nlink = 1; 577 vap->va_uid = 0; 578 vap->va_gid = 0; 579 vap->va_bytes = vap->va_size = 580 sprintf(buf, "%ld", (long)curproc->p_pid); 581 break; 582 } 583 584 case Pself: 585 vap->va_nlink = 1; 586 vap->va_uid = 0; 587 vap->va_gid = 0; 588 vap->va_bytes = vap->va_size = sizeof("curproc"); 589 break; 590 591 case Pfd: 592 if (pfs->pfs_fd != -1) { 593 struct file *fp; 594 if ((error = procfs_getfp(pfs, &fp)) != 0) 595 return error; 596 vap->va_nlink = 1; 597 vap->va_uid = fp->f_cred->cr_uid; 598 vap->va_gid = fp->f_cred->cr_gid; 599 switch (fp->f_type) { 600 case DTYPE_VNODE: 601 vap->va_bytes = vap->va_size = 602 ((struct vnode *)fp->f_data)->v_size; 603 break; 604 default: 605 vap->va_bytes = vap->va_size = 0; 606 break; 607 } 608 break; 609 } 610 /*FALLTHROUGH*/ 611 case Pproc: 612 vap->va_nlink = 2; 613 vap->va_uid = procp->p_ucred->cr_uid; 614 vap->va_gid = procp->p_ucred->cr_gid; 615 vap->va_bytes = vap->va_size = DEV_BSIZE; 616 break; 617 618 case Pfile: 619 error = EOPNOTSUPP; 620 break; 621 622 case Pmem: 623 vap->va_bytes = vap->va_size = 624 ctob(procp->p_vmspace->vm_tsize + 625 procp->p_vmspace->vm_dsize + 626 procp->p_vmspace->vm_ssize); 627 break; 628 629 #if defined(PT_GETREGS) || defined(PT_SETREGS) 630 case Pregs: 631 vap->va_bytes = vap->va_size = sizeof(struct reg); 632 break; 633 #endif 634 635 #if defined(PT_GETFPREGS) || defined(PT_SETFPREGS) 636 case Pfpregs: 637 vap->va_bytes = vap->va_size = sizeof(struct fpreg); 638 break; 639 #endif 640 641 case Pctl: 642 case Pstatus: 643 case Pnote: 644 case Pnotepg: 645 case Pcmdline: 646 case Pmeminfo: 647 case Pcpuinfo: 648 vap->va_bytes = vap->va_size = 0; 649 break; 650 case Pmap: 651 case Pmaps: 652 /* 653 * Advise a larger blocksize for the map files, so that 654 * they may be read in one pass. 655 */ 656 vap->va_blocksize = 4 * PAGE_SIZE; 657 vap->va_bytes = vap->va_size = 0; 658 break; 659 660 #ifdef __HAVE_PROCFS_MACHDEP 661 PROCFS_MACHDEP_NODETYPE_CASES 662 error = procfs_machdep_getattr(ap->a_vp, vap, procp); 663 break; 664 #endif 665 666 default: 667 panic("procfs_getattr"); 668 } 669 670 return (error); 671 } 672 673 /*ARGSUSED*/ 674 int 675 procfs_setattr(v) 676 void *v; 677 { 678 /* 679 * just fake out attribute setting 680 * it's not good to generate an error 681 * return, otherwise things like creat() 682 * will fail when they try to set the 683 * file length to 0. worse, this means 684 * that echo $note > /proc/$pid/note will fail. 685 */ 686 687 return (0); 688 } 689 690 /* 691 * implement access checking. 692 * 693 * actually, the check for super-user is slightly 694 * broken since it will allow read access to write-only 695 * objects. this doesn't cause any particular trouble 696 * but does mean that the i/o entry points need to check 697 * that the operation really does make sense. 698 */ 699 int 700 procfs_access(v) 701 void *v; 702 { 703 struct vop_access_args /* { 704 struct vnode *a_vp; 705 int a_mode; 706 struct ucred *a_cred; 707 struct proc *a_p; 708 } */ *ap = v; 709 struct vattr va; 710 int error; 711 712 if ((error = VOP_GETATTR(ap->a_vp, &va, ap->a_cred, ap->a_p)) != 0) 713 return (error); 714 715 return (vaccess(va.va_type, va.va_mode, 716 va.va_uid, va.va_gid, ap->a_mode, ap->a_cred)); 717 } 718 719 /* 720 * lookup. this is incredibly complicated in the 721 * general case, however for most pseudo-filesystems 722 * very little needs to be done. 723 * 724 * Locking isn't hard here, just poorly documented. 725 * 726 * If we're looking up ".", just vref the parent & return it. 727 * 728 * If we're looking up "..", unlock the parent, and lock "..". If everything 729 * went ok, and we're on the last component and the caller requested the 730 * parent locked, try to re-lock the parent. We do this to prevent lock 731 * races. 732 * 733 * For anything else, get the needed node. Then unlock the parent if not 734 * the last component or not LOCKPARENT (i.e. if we wouldn't re-lock the 735 * parent in the .. case). 736 * 737 * We try to exit with the parent locked in error cases. 738 */ 739 int 740 procfs_lookup(v) 741 void *v; 742 { 743 struct vop_lookup_args /* { 744 struct vnode * a_dvp; 745 struct vnode ** a_vpp; 746 struct componentname * a_cnp; 747 } */ *ap = v; 748 struct componentname *cnp = ap->a_cnp; 749 struct vnode **vpp = ap->a_vpp; 750 struct vnode *dvp = ap->a_dvp; 751 const char *pname = cnp->cn_nameptr; 752 const struct proc_target *pt = NULL; 753 struct vnode *fvp; 754 pid_t pid; 755 struct pfsnode *pfs; 756 struct proc *p = NULL; 757 int i, error, wantpunlock, iscurproc = 0, isself = 0; 758 759 *vpp = NULL; 760 cnp->cn_flags &= ~PDIRUNLOCK; 761 762 if (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME) 763 return (EROFS); 764 765 if (cnp->cn_namelen == 1 && *pname == '.') { 766 *vpp = dvp; 767 VREF(dvp); 768 return (0); 769 } 770 771 wantpunlock = (~cnp->cn_flags & (LOCKPARENT | ISLASTCN)); 772 pfs = VTOPFS(dvp); 773 switch (pfs->pfs_type) { 774 case Proot: 775 /* 776 * Shouldn't get here with .. in the root node. 777 */ 778 if (cnp->cn_flags & ISDOTDOT) 779 return (EIO); 780 781 iscurproc = CNEQ(cnp, "curproc", 7); 782 isself = CNEQ(cnp, "self", 4); 783 784 if (iscurproc || isself) { 785 error = procfs_allocvp(dvp->v_mount, vpp, 0, 786 iscurproc ? Pcurproc : Pself, -1); 787 if ((error == 0) && (wantpunlock)) { 788 VOP_UNLOCK(dvp, 0); 789 cnp->cn_flags |= PDIRUNLOCK; 790 } 791 return (error); 792 } 793 794 for (i = 0; i < nproc_root_targets; i++) { 795 pt = &proc_root_targets[i]; 796 if (cnp->cn_namelen == pt->pt_namlen && 797 memcmp(pt->pt_name, pname, cnp->cn_namelen) == 0 && 798 (pt->pt_valid == NULL || 799 (*pt->pt_valid)(p, dvp->v_mount))) 800 break; 801 } 802 803 if (i != nproc_root_targets) { 804 error = procfs_allocvp(dvp->v_mount, vpp, 0, 805 pt->pt_pfstype, -1); 806 if ((error == 0) && (wantpunlock)) { 807 VOP_UNLOCK(dvp, 0); 808 cnp->cn_flags |= PDIRUNLOCK; 809 } 810 return (error); 811 } 812 813 pid = (pid_t)atoi(pname, cnp->cn_namelen); 814 if (pid > PID_MAX) 815 break; 816 817 p = PFIND(pid); 818 if (p == NULL) 819 break; 820 821 error = procfs_allocvp(dvp->v_mount, vpp, pid, Pproc, -1); 822 if ((error == 0) && (wantpunlock)) { 823 VOP_UNLOCK(dvp, 0); 824 cnp->cn_flags |= PDIRUNLOCK; 825 } 826 return (error); 827 828 case Pproc: 829 /* 830 * do the .. dance. We unlock the directory, and then 831 * get the root dir. That will automatically return .. 832 * locked. Then if the caller wanted dvp locked, we 833 * re-lock. 834 */ 835 if (cnp->cn_flags & ISDOTDOT) { 836 VOP_UNLOCK(dvp, 0); 837 cnp->cn_flags |= PDIRUNLOCK; 838 error = procfs_root(dvp->v_mount, vpp); 839 if ((error == 0) && (wantpunlock == 0) && 840 ((error = vn_lock(dvp, LK_EXCLUSIVE)) == 0)) 841 cnp->cn_flags &= ~PDIRUNLOCK; 842 return (error); 843 } 844 845 p = PFIND(pfs->pfs_pid); 846 if (p == NULL) 847 break; 848 849 for (pt = proc_targets, i = 0; i < nproc_targets; pt++, i++) { 850 if (cnp->cn_namelen == pt->pt_namlen && 851 memcmp(pt->pt_name, pname, cnp->cn_namelen) == 0 && 852 (pt->pt_valid == NULL || 853 (*pt->pt_valid)(p, dvp->v_mount))) 854 goto found; 855 } 856 break; 857 858 found: 859 if (pt->pt_pfstype == Pfile) { 860 fvp = p->p_textvp; 861 /* We already checked that it exists. */ 862 VREF(fvp); 863 vn_lock(fvp, LK_EXCLUSIVE | LK_RETRY); 864 if (wantpunlock) { 865 VOP_UNLOCK(dvp, 0); 866 cnp->cn_flags |= PDIRUNLOCK; 867 } 868 *vpp = fvp; 869 return (0); 870 } 871 872 error = procfs_allocvp(dvp->v_mount, vpp, pfs->pfs_pid, 873 pt->pt_pfstype, -1); 874 if ((error == 0) && (wantpunlock)) { 875 VOP_UNLOCK(dvp, 0); 876 cnp->cn_flags |= PDIRUNLOCK; 877 } 878 return (error); 879 880 case Pfd: { 881 int fd; 882 struct file *fp; 883 /* 884 * do the .. dance. We unlock the directory, and then 885 * get the proc dir. That will automatically return .. 886 * locked. Then if the caller wanted dvp locked, we 887 * re-lock. 888 */ 889 if (cnp->cn_flags & ISDOTDOT) { 890 VOP_UNLOCK(dvp, 0); 891 cnp->cn_flags |= PDIRUNLOCK; 892 error = procfs_allocvp(dvp->v_mount, vpp, pfs->pfs_pid, 893 Pproc, -1); 894 if ((error == 0) && (wantpunlock == 0) && 895 ((error = vn_lock(dvp, LK_EXCLUSIVE)) == 0)) 896 cnp->cn_flags &= ~PDIRUNLOCK; 897 return (error); 898 } 899 fd = atoi(pname, cnp->cn_namelen); 900 if (fd == -1) 901 return EINVAL; 902 p = PFIND(pfs->pfs_pid); 903 if (p == NULL) 904 return ESRCH; 905 if ((fp = p->p_fd->fd_ofiles[fd]) == NULL) 906 return ENOENT; 907 switch (fp->f_type) { 908 case DTYPE_VNODE: 909 fvp = (struct vnode *)fp->f_data; 910 VREF(fvp); 911 vn_lock(fvp, LK_EXCLUSIVE | LK_RETRY | 912 (p == curproc ? LK_CANRECURSE : 0)); 913 *vpp = fvp; 914 error = 0; 915 break; 916 default: 917 error = procfs_allocvp(dvp->v_mount, vpp, pfs->pfs_pid, 918 Pfd, fd); 919 break; 920 } 921 if ((error == 0) && (wantpunlock)) { 922 VOP_UNLOCK(dvp, 0); 923 cnp->cn_flags |= PDIRUNLOCK; 924 } 925 return error; 926 } 927 default: 928 return (ENOTDIR); 929 } 930 931 return (cnp->cn_nameiop == LOOKUP ? ENOENT : EROFS); 932 } 933 934 int 935 procfs_validfile(p, mp) 936 struct proc *p; 937 struct mount *mp; 938 { 939 return (p->p_textvp != NULL); 940 } 941 942 static int 943 procfs_validfile_linux(p, mp) 944 struct proc *p; 945 struct mount *mp; 946 { 947 int flags; 948 949 flags = VFSTOPROC(mp)->pmnt_flags; 950 return ((flags & PROCFSMNT_LINUXCOMPAT) && 951 (p == NULL || procfs_validfile(p, mp))); 952 } 953 954 /* 955 * readdir returns directory entries from pfsnode (vp). 956 * 957 * the strategy here with procfs is to generate a single 958 * directory entry at a time (struct dirent) and then 959 * copy that out to userland using uiomove. a more efficent 960 * though more complex implementation, would try to minimize 961 * the number of calls to uiomove(). for procfs, this is 962 * hardly worth the added code complexity. 963 * 964 * this should just be done through read() 965 */ 966 int 967 procfs_readdir(v) 968 void *v; 969 { 970 struct vop_readdir_args /* { 971 struct vnode *a_vp; 972 struct uio *a_uio; 973 struct ucred *a_cred; 974 int *a_eofflag; 975 off_t **a_cookies; 976 int *a_ncookies; 977 } */ *ap = v; 978 struct uio *uio = ap->a_uio; 979 struct dirent d; 980 struct pfsnode *pfs; 981 off_t i; 982 int error; 983 off_t *cookies = NULL; 984 int ncookies, left, skip, j; 985 struct vnode *vp; 986 const struct proc_target *pt; 987 988 vp = ap->a_vp; 989 pfs = VTOPFS(vp); 990 991 if (uio->uio_resid < UIO_MX) 992 return (EINVAL); 993 if (uio->uio_offset < 0) 994 return (EINVAL); 995 996 error = 0; 997 i = uio->uio_offset; 998 memset((caddr_t)&d, 0, UIO_MX); 999 d.d_reclen = UIO_MX; 1000 ncookies = uio->uio_resid / UIO_MX; 1001 1002 switch (pfs->pfs_type) { 1003 /* 1004 * this is for the process-specific sub-directories. 1005 * all that is needed to is copy out all the entries 1006 * from the procent[] table (top of this file). 1007 */ 1008 case Pproc: { 1009 struct proc *p; 1010 1011 if (i >= nproc_targets) 1012 return 0; 1013 1014 p = PFIND(pfs->pfs_pid); 1015 if (p == NULL) 1016 break; 1017 1018 if (ap->a_ncookies) { 1019 ncookies = min(ncookies, (nproc_targets - i)); 1020 cookies = malloc(ncookies * sizeof (off_t), 1021 M_TEMP, M_WAITOK); 1022 *ap->a_cookies = cookies; 1023 } 1024 1025 for (pt = &proc_targets[i]; 1026 uio->uio_resid >= UIO_MX && i < nproc_targets; pt++, i++) { 1027 if (pt->pt_valid && 1028 (*pt->pt_valid)(p, vp->v_mount) == 0) 1029 continue; 1030 1031 d.d_fileno = PROCFS_FILENO(pfs->pfs_pid, 1032 pt->pt_pfstype, -1); 1033 d.d_namlen = pt->pt_namlen; 1034 memcpy(d.d_name, pt->pt_name, pt->pt_namlen + 1); 1035 d.d_type = pt->pt_type; 1036 1037 if ((error = uiomove((caddr_t)&d, UIO_MX, uio)) != 0) 1038 break; 1039 if (cookies) 1040 *cookies++ = i + 1; 1041 } 1042 1043 break; 1044 } 1045 case Pfd: { 1046 struct proc *p; 1047 struct filedesc *fdp; 1048 int lim, last, nc = 0; 1049 1050 p = PFIND(pfs->pfs_pid); 1051 if (p == NULL) 1052 return ESRCH; 1053 1054 fdp = p->p_fd; 1055 1056 lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles); 1057 last = min(fdp->fd_nfiles, lim); 1058 if (i >= lim) 1059 return 0; 1060 1061 if (ap->a_ncookies) { 1062 ncookies = min(ncookies, (fdp->fd_nfiles + 2 - i)); 1063 cookies = malloc(ncookies * sizeof (off_t), 1064 M_TEMP, M_WAITOK); 1065 *ap->a_cookies = cookies; 1066 } 1067 1068 for (; i < 2 && uio->uio_resid >= UIO_MX; i++) { 1069 pt = &proc_targets[i]; 1070 d.d_fileno = (pfs->pfs_pid << 8) + i; 1071 d.d_namlen = pt->pt_namlen; 1072 d.d_fileno = PROCFS_FILENO(pfs->pfs_pid, 1073 pt->pt_pfstype, -1); 1074 (void)memcpy(d.d_name, pt->pt_name, pt->pt_namlen + 1); 1075 d.d_type = pt->pt_type; 1076 if ((error = uiomove((caddr_t)&d, UIO_MX, uio)) != 0) 1077 break; 1078 if (cookies) 1079 *cookies++ = i + 1; 1080 nc++; 1081 } 1082 if (error) { 1083 ncookies = nc; 1084 break; 1085 } 1086 for (; uio->uio_resid >= UIO_MX && i < fdp->fd_nfiles; i++) { 1087 if (fdp->fd_ofiles[i - 2] == NULL) 1088 continue; 1089 d.d_fileno = PROCFS_FILENO(pfs->pfs_pid, Pfd, i - 2); 1090 d.d_namlen = snprintf(d.d_name, sizeof(d.d_name), 1091 "%lld", (long long)(i - 2)); 1092 d.d_type = VREG; 1093 if ((error = uiomove((caddr_t)&d, UIO_MX, uio)) != 0) 1094 break; 1095 if (cookies) 1096 *cookies++ = i + 1; 1097 nc++; 1098 } 1099 ncookies = nc; 1100 break; 1101 } 1102 1103 /* 1104 * this is for the root of the procfs filesystem 1105 * what is needed are special entries for "curproc" 1106 * and "self" followed by an entry for each process 1107 * on allproc 1108 #ifdef PROCFS_ZOMBIE 1109 * and deadproc and zombproc. 1110 #endif 1111 */ 1112 1113 case Proot: { 1114 int pcnt = i, nc = 0; 1115 const struct proclist_desc *pd; 1116 volatile struct proc *p; 1117 1118 if (pcnt > 3) 1119 pcnt = 3; 1120 if (ap->a_ncookies) { 1121 /* 1122 * XXX Potentially allocating too much space here, 1123 * but I'm lazy. This loop needs some work. 1124 */ 1125 cookies = malloc(ncookies * sizeof (off_t), 1126 M_TEMP, M_WAITOK); 1127 *ap->a_cookies = cookies; 1128 } 1129 /* 1130 * XXX: THIS LOOP ASSUMES THAT allproc IS THE FIRST 1131 * PROCLIST IN THE proclists! 1132 */ 1133 proclist_lock_read(); 1134 pd = proclists; 1135 #ifdef PROCFS_ZOMBIE 1136 again: 1137 #endif 1138 for (p = LIST_FIRST(pd->pd_list); 1139 p != NULL && uio->uio_resid >= UIO_MX; i++, pcnt++) { 1140 switch (i) { 1141 case 0: /* `.' */ 1142 case 1: /* `..' */ 1143 d.d_fileno = PROCFS_FILENO(0, Proot, -1); 1144 d.d_namlen = i + 1; 1145 memcpy(d.d_name, "..", d.d_namlen); 1146 d.d_name[i + 1] = '\0'; 1147 d.d_type = DT_DIR; 1148 break; 1149 1150 case 2: 1151 d.d_fileno = PROCFS_FILENO(0, Pcurproc, -1); 1152 d.d_namlen = sizeof("curproc") - 1; 1153 memcpy(d.d_name, "curproc", sizeof("curproc")); 1154 d.d_type = DT_LNK; 1155 break; 1156 1157 case 3: 1158 d.d_fileno = PROCFS_FILENO(0, Pself, -1); 1159 d.d_namlen = sizeof("self") - 1; 1160 memcpy(d.d_name, "self", sizeof("self")); 1161 d.d_type = DT_LNK; 1162 break; 1163 1164 default: 1165 while (pcnt < i) { 1166 pcnt++; 1167 p = LIST_NEXT(p, p_list); 1168 if (!p) 1169 goto done; 1170 } 1171 d.d_fileno = PROCFS_FILENO(p->p_pid, Pproc, -1); 1172 d.d_namlen = sprintf(d.d_name, "%ld", 1173 (long)p->p_pid); 1174 d.d_type = DT_DIR; 1175 p = p->p_list.le_next; 1176 break; 1177 } 1178 1179 if ((error = uiomove((caddr_t)&d, UIO_MX, uio)) != 0) 1180 break; 1181 nc++; 1182 if (cookies) 1183 *cookies++ = i + 1; 1184 } 1185 done: 1186 1187 #ifdef PROCFS_ZOMBIE 1188 pd++; 1189 if (p == NULL && pd->pd_list != NULL) 1190 goto again; 1191 #endif 1192 proclist_unlock_read(); 1193 1194 skip = i - pcnt; 1195 if (skip >= nproc_root_targets) 1196 break; 1197 left = nproc_root_targets - skip; 1198 for (j = 0, pt = &proc_root_targets[0]; 1199 uio->uio_resid >= UIO_MX && j < left; 1200 pt++, j++, i++) { 1201 if (pt->pt_valid && 1202 (*pt->pt_valid)(NULL, vp->v_mount) == 0) 1203 continue; 1204 d.d_fileno = PROCFS_FILENO(0, pt->pt_pfstype, -1); 1205 d.d_namlen = pt->pt_namlen; 1206 memcpy(d.d_name, pt->pt_name, pt->pt_namlen + 1); 1207 d.d_type = pt->pt_type; 1208 1209 if ((error = uiomove((caddr_t)&d, UIO_MX, uio)) != 0) 1210 break; 1211 nc++; 1212 if (cookies) 1213 *cookies++ = i + 1; 1214 } 1215 1216 ncookies = nc; 1217 break; 1218 } 1219 1220 default: 1221 error = ENOTDIR; 1222 break; 1223 } 1224 1225 if (ap->a_ncookies) { 1226 if (error) { 1227 if (cookies) 1228 free(*ap->a_cookies, M_TEMP); 1229 *ap->a_ncookies = 0; 1230 *ap->a_cookies = NULL; 1231 } else 1232 *ap->a_ncookies = ncookies; 1233 } 1234 uio->uio_offset = i; 1235 return (error); 1236 } 1237 1238 /* 1239 * readlink reads the link of `curproc' 1240 */ 1241 int 1242 procfs_readlink(v) 1243 void *v; 1244 { 1245 struct vop_readlink_args *ap = v; 1246 char buf[16]; /* should be enough */ 1247 int len; 1248 1249 if (VTOPFS(ap->a_vp)->pfs_fileno == PROCFS_FILENO(0, Pcurproc, -1)) 1250 len = sprintf(buf, "%ld", (long)curproc->p_pid); 1251 else if (VTOPFS(ap->a_vp)->pfs_fileno == PROCFS_FILENO(0, Pself, -1)) 1252 len = sprintf(buf, "%s", "curproc"); 1253 else 1254 return (EINVAL); 1255 1256 return (uiomove((caddr_t)buf, len, ap->a_uio)); 1257 } 1258 1259 /* 1260 * convert decimal ascii to int 1261 */ 1262 static int 1263 atoi(b, len) 1264 const char *b; 1265 size_t len; 1266 { 1267 int p = 0; 1268 1269 while (len--) { 1270 char c = *b++; 1271 if (c < '0' || c > '9') 1272 return -1; 1273 p = 10 * p + (c - '0'); 1274 } 1275 1276 return p; 1277 } 1278