1 /* $NetBSD: procfs_subr.c,v 1.72 2006/11/16 01:33:38 christos Exp $ */ 2 3 /* 4 * Copyright (c) 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Jan-Simon Pendry. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)procfs_subr.c 8.6 (Berkeley) 5/14/95 35 */ 36 37 /* 38 * Copyright (c) 1994 Christopher G. Demetriou. All rights reserved. 39 * Copyright (c) 1993 Jan-Simon Pendry 40 * 41 * This code is derived from software contributed to Berkeley by 42 * Jan-Simon Pendry. 43 * 44 * Redistribution and use in source and binary forms, with or without 45 * modification, are permitted provided that the following conditions 46 * are met: 47 * 1. Redistributions of source code must retain the above copyright 48 * notice, this list of conditions and the following disclaimer. 49 * 2. Redistributions in binary form must reproduce the above copyright 50 * notice, this list of conditions and the following disclaimer in the 51 * documentation and/or other materials provided with the distribution. 52 * 3. All advertising materials mentioning features or use of this software 53 * must display the following acknowledgement: 54 * This product includes software developed by the University of 55 * California, Berkeley and its contributors. 56 * 4. Neither the name of the University nor the names of its contributors 57 * may be used to endorse or promote products derived from this software 58 * without specific prior written permission. 59 * 60 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 61 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 62 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 63 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 64 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 65 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 66 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 67 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 68 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 69 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 70 * SUCH DAMAGE. 71 * 72 * @(#)procfs_subr.c 8.6 (Berkeley) 5/14/95 73 */ 74 75 #include <sys/cdefs.h> 76 __KERNEL_RCSID(0, "$NetBSD: procfs_subr.c,v 1.72 2006/11/16 01:33:38 christos Exp $"); 77 78 #include <sys/param.h> 79 #include <sys/systm.h> 80 #include <sys/time.h> 81 #include <sys/kernel.h> 82 #include <sys/proc.h> 83 #include <sys/vnode.h> 84 #include <sys/malloc.h> 85 #include <sys/stat.h> 86 #include <sys/file.h> 87 #include <sys/filedesc.h> 88 89 #include <miscfs/procfs/procfs.h> 90 91 void procfs_hashins(struct pfsnode *); 92 void procfs_hashrem(struct pfsnode *); 93 struct vnode *procfs_hashget(pid_t, pfstype, int, struct mount *); 94 95 LIST_HEAD(pfs_hashhead, pfsnode) *pfs_hashtbl; 96 u_long pfs_ihash; /* size of hash table - 1 */ 97 #define PFSPIDHASH(pid) ((pid) & pfs_ihash) 98 99 struct lock pfs_hashlock; 100 struct simplelock pfs_hash_slock; 101 102 #define ISSET(t, f) ((t) & (f)) 103 104 /* 105 * allocate a pfsnode/vnode pair. the vnode is 106 * referenced, and locked. 107 * 108 * the pid, pfs_type, and mount point uniquely 109 * identify a pfsnode. the mount point is needed 110 * because someone might mount this filesystem 111 * twice. 112 * 113 * all pfsnodes are maintained on a singly-linked 114 * list. new nodes are only allocated when they cannot 115 * be found on this list. entries on the list are 116 * removed when the vfs reclaim entry is called. 117 * 118 * a single lock is kept for the entire list. this is 119 * needed because the getnewvnode() function can block 120 * waiting for a vnode to become free, in which case there 121 * may be more than one process trying to get the same 122 * vnode. this lock is only taken if we are going to 123 * call getnewvnode, since the kernel itself is single-threaded. 124 * 125 * if an entry is found on the list, then call vget() to 126 * take a reference. this is done because there may be 127 * zero references to it and so it needs to removed from 128 * the vnode free list. 129 */ 130 int 131 procfs_allocvp(mp, vpp, pid, pfs_type, fd) 132 struct mount *mp; 133 struct vnode **vpp; 134 pid_t pid; 135 pfstype pfs_type; 136 int fd; 137 { 138 struct pfsnode *pfs; 139 struct vnode *vp; 140 int error; 141 142 do { 143 if ((*vpp = procfs_hashget(pid, pfs_type, fd, mp)) != NULL) 144 return (0); 145 } while (lockmgr(&pfs_hashlock, LK_EXCLUSIVE|LK_SLEEPFAIL, 0)); 146 147 if ((error = getnewvnode(VT_PROCFS, mp, procfs_vnodeop_p, &vp)) != 0) { 148 *vpp = NULL; 149 lockmgr(&pfs_hashlock, LK_RELEASE, NULL); 150 return (error); 151 } 152 153 MALLOC(pfs, void *, sizeof(struct pfsnode), M_TEMP, M_WAITOK); 154 vp->v_data = pfs; 155 156 pfs->pfs_pid = pid; 157 pfs->pfs_type = pfs_type; 158 pfs->pfs_vnode = vp; 159 pfs->pfs_flags = 0; 160 pfs->pfs_fileno = PROCFS_FILENO(pid, pfs_type, fd); 161 pfs->pfs_fd = fd; 162 163 switch (pfs_type) { 164 case PFSroot: /* /proc = dr-xr-xr-x */ 165 pfs->pfs_mode = S_IRUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH; 166 vp->v_type = VDIR; 167 vp->v_flag = VROOT; 168 break; 169 170 case PFScurproc: /* /proc/curproc = lr-xr-xr-x */ 171 case PFSself: /* /proc/self = lr-xr-xr-x */ 172 case PFScwd: /* /proc/N/cwd = lr-xr-xr-x */ 173 case PFSchroot: /* /proc/N/chroot = lr-xr-xr-x */ 174 case PFSexe: /* /proc/N/exe = lr-xr-xr-x */ 175 pfs->pfs_mode = S_IRUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH; 176 vp->v_type = VLNK; 177 break; 178 179 case PFSproc: /* /proc/N = dr-xr-xr-x */ 180 case PFSfd: 181 if (fd == -1) { /* /proc/N/fd = dr-xr-xr-x */ 182 pfs->pfs_mode = S_IRUSR|S_IXUSR; 183 vp->v_type = VDIR; 184 } else { /* /proc/N/fd/M = [ps-]rw------- */ 185 struct file *fp; 186 struct vnode *vxp; 187 struct proc *pown; 188 189 /* XXX can procfs_getfp() ever fail here? */ 190 if ((error = procfs_getfp(pfs, &pown, &fp)) != 0) 191 goto bad; 192 FILE_USE(fp); 193 194 pfs->pfs_mode = S_IRUSR|S_IWUSR; 195 switch (fp->f_type) { 196 case DTYPE_VNODE: 197 vxp = (struct vnode *)fp->f_data; 198 199 /* 200 * We make symlinks for directories 201 * to avoid cycles. 202 */ 203 if (vxp->v_type == VDIR) 204 goto symlink; 205 vp->v_type = vxp->v_type; 206 break; 207 case DTYPE_PIPE: 208 vp->v_type = VFIFO; 209 break; 210 case DTYPE_SOCKET: 211 vp->v_type = VSOCK; 212 break; 213 case DTYPE_KQUEUE: 214 case DTYPE_MISC: 215 symlink: 216 pfs->pfs_mode = S_IRUSR|S_IXUSR|S_IRGRP| 217 S_IXGRP|S_IROTH|S_IXOTH; 218 vp->v_type = VLNK; 219 break; 220 default: 221 error = EOPNOTSUPP; 222 FILE_UNUSE(fp, proc_representative_lwp(pown)); 223 goto bad; 224 } 225 FILE_UNUSE(fp, proc_representative_lwp(pown)); 226 } 227 break; 228 229 case PFSfile: /* /proc/N/file = -rw------- */ 230 case PFSmem: /* /proc/N/mem = -rw------- */ 231 case PFSregs: /* /proc/N/regs = -rw------- */ 232 case PFSfpregs: /* /proc/N/fpregs = -rw------- */ 233 pfs->pfs_mode = S_IRUSR|S_IWUSR; 234 vp->v_type = VREG; 235 break; 236 237 case PFSctl: /* /proc/N/ctl = --w------ */ 238 case PFSnote: /* /proc/N/note = --w------ */ 239 case PFSnotepg: /* /proc/N/notepg = --w------ */ 240 pfs->pfs_mode = S_IWUSR; 241 vp->v_type = VREG; 242 break; 243 244 case PFSmap: /* /proc/N/map = -r--r--r-- */ 245 case PFSmaps: /* /proc/N/maps = -r--r--r-- */ 246 case PFSstatus: /* /proc/N/status = -r--r--r-- */ 247 case PFSstat: /* /proc/N/stat = -r--r--r-- */ 248 case PFScmdline: /* /proc/N/cmdline = -r--r--r-- */ 249 case PFSemul: /* /proc/N/emul = -r--r--r-- */ 250 case PFSmeminfo: /* /proc/meminfo = -r--r--r-- */ 251 case PFSdevices: /* /proc/devices = -r--r--r-- */ 252 case PFScpuinfo: /* /proc/cpuinfo = -r--r--r-- */ 253 case PFSuptime: /* /proc/uptime = -r--r--r-- */ 254 case PFSmounts: /* /proc/mounts = -r--r--r-- */ 255 pfs->pfs_mode = S_IRUSR|S_IRGRP|S_IROTH; 256 vp->v_type = VREG; 257 break; 258 259 #ifdef __HAVE_PROCFS_MACHDEP 260 PROCFS_MACHDEP_NODETYPE_CASES 261 procfs_machdep_allocvp(vp); 262 break; 263 #endif 264 265 default: 266 panic("procfs_allocvp"); 267 } 268 269 procfs_hashins(pfs); 270 uvm_vnp_setsize(vp, 0); 271 lockmgr(&pfs_hashlock, LK_RELEASE, NULL); 272 273 *vpp = vp; 274 return (0); 275 276 bad: 277 lockmgr(&pfs_hashlock, LK_RELEASE, NULL); 278 FREE(pfs, M_TEMP); 279 ungetnewvnode(vp); 280 return (error); 281 } 282 283 int 284 procfs_freevp(vp) 285 struct vnode *vp; 286 { 287 struct pfsnode *pfs = VTOPFS(vp); 288 289 procfs_hashrem(pfs); 290 291 FREE(vp->v_data, M_TEMP); 292 vp->v_data = 0; 293 return (0); 294 } 295 296 int 297 procfs_rw(v) 298 void *v; 299 { 300 struct vop_read_args *ap = v; 301 struct vnode *vp = ap->a_vp; 302 struct uio *uio = ap->a_uio; 303 struct lwp *curl; 304 struct lwp *l; 305 struct pfsnode *pfs = VTOPFS(vp); 306 struct proc *p; 307 308 if (uio->uio_offset < 0) 309 return EINVAL; 310 p = PFIND(pfs->pfs_pid); 311 if (p == 0) 312 return ESRCH; 313 /* 314 * Do not allow init to be modified while in secure mode; it 315 * could be duped into changing the security level. 316 */ 317 if (uio->uio_rw == UIO_WRITE && p == initproc && securelevel > -1) 318 return EPERM; 319 320 curl = curlwp; 321 322 /* XXX NJWLWP 323 * The entire procfs interface needs work to be useful to 324 * a process with multiple LWPs. For the moment, we'll 325 * just kluge this and fail on others. 326 */ 327 l = proc_representative_lwp(p); 328 329 switch (pfs->pfs_type) { 330 case PFSnote: 331 case PFSnotepg: 332 return (procfs_donote(curl, p, pfs, uio)); 333 334 case PFSregs: 335 return (procfs_doregs(curl, l, pfs, uio)); 336 337 case PFSfpregs: 338 return (procfs_dofpregs(curl, l, pfs, uio)); 339 340 case PFSctl: 341 return (procfs_doctl(curl, l, pfs, uio)); 342 343 case PFSstatus: 344 return (procfs_dostatus(curl, l, pfs, uio)); 345 346 case PFSstat: 347 return (procfs_do_pid_stat(curl, l, pfs, uio)); 348 349 case PFSmap: 350 return (procfs_domap(curl, p, pfs, uio, 0)); 351 352 case PFSmaps: 353 return (procfs_domap(curl, p, pfs, uio, 1)); 354 355 case PFSmem: 356 return (procfs_domem(curl, l, pfs, uio)); 357 358 case PFScmdline: 359 return (procfs_docmdline(curl, p, pfs, uio)); 360 361 case PFSmeminfo: 362 return (procfs_domeminfo(curl, p, pfs, uio)); 363 364 case PFSdevices: 365 return (procfs_dodevices(curl, p, pfs, uio)); 366 367 case PFScpuinfo: 368 return (procfs_docpuinfo(curl, p, pfs, uio)); 369 370 case PFSfd: 371 return (procfs_dofd(curl, p, pfs, uio)); 372 373 case PFSuptime: 374 return (procfs_douptime(curl, p, pfs, uio)); 375 376 case PFSmounts: 377 return (procfs_domounts(curl, p, pfs, uio)); 378 379 case PFSemul: 380 return procfs_doemul(curl, p, pfs, uio); 381 382 #ifdef __HAVE_PROCFS_MACHDEP 383 PROCFS_MACHDEP_NODETYPE_CASES 384 return (procfs_machdep_rw(curl, l, pfs, uio)); 385 #endif 386 387 default: 388 return (EOPNOTSUPP); 389 } 390 } 391 392 /* 393 * Get a string from userland into (bf). Strip a trailing 394 * nl character (to allow easy access from the shell). 395 * The buffer should be *buflenp + 1 chars long. vfs_getuserstr 396 * will automatically add a nul char at the end. 397 * 398 * Returns 0 on success or the following errors 399 * 400 * EINVAL: file offset is non-zero. 401 * EMSGSIZE: message is longer than kernel buffer 402 * EFAULT: user i/o buffer is not addressable 403 */ 404 int 405 vfs_getuserstr(uio, bf, buflenp) 406 struct uio *uio; 407 char *bf; 408 int *buflenp; 409 { 410 int xlen; 411 int error; 412 413 if (uio->uio_offset != 0) 414 return (EINVAL); 415 416 xlen = *buflenp; 417 418 /* must be able to read the whole string in one go */ 419 if (xlen < uio->uio_resid) 420 return (EMSGSIZE); 421 xlen = uio->uio_resid; 422 423 if ((error = uiomove(bf, xlen, uio)) != 0) 424 return (error); 425 426 /* allow multiple writes without seeks */ 427 uio->uio_offset = 0; 428 429 /* cleanup string and remove trailing newline */ 430 bf[xlen] = '\0'; 431 xlen = strlen(bf); 432 if (xlen > 0 && bf[xlen-1] == '\n') 433 bf[--xlen] = '\0'; 434 *buflenp = xlen; 435 436 return (0); 437 } 438 439 const vfs_namemap_t * 440 vfs_findname(nm, bf, buflen) 441 const vfs_namemap_t *nm; 442 const char *bf; 443 int buflen; 444 { 445 446 for (; nm->nm_name; nm++) 447 if (memcmp(bf, nm->nm_name, buflen+1) == 0) 448 return (nm); 449 450 return (0); 451 } 452 453 /* 454 * Initialize pfsnode hash table. 455 */ 456 void 457 procfs_hashinit() 458 { 459 lockinit(&pfs_hashlock, PINOD, "pfs_hashlock", 0, 0); 460 pfs_hashtbl = hashinit(desiredvnodes / 4, HASH_LIST, M_UFSMNT, 461 M_WAITOK, &pfs_ihash); 462 simple_lock_init(&pfs_hash_slock); 463 } 464 465 void 466 procfs_hashreinit() 467 { 468 struct pfsnode *pp; 469 struct pfs_hashhead *oldhash, *hash; 470 u_long i, oldmask, mask, val; 471 472 hash = hashinit(desiredvnodes / 4, HASH_LIST, M_UFSMNT, M_WAITOK, 473 &mask); 474 475 simple_lock(&pfs_hash_slock); 476 oldhash = pfs_hashtbl; 477 oldmask = pfs_ihash; 478 pfs_hashtbl = hash; 479 pfs_ihash = mask; 480 for (i = 0; i <= oldmask; i++) { 481 while ((pp = LIST_FIRST(&oldhash[i])) != NULL) { 482 LIST_REMOVE(pp, pfs_hash); 483 val = PFSPIDHASH(pp->pfs_pid); 484 LIST_INSERT_HEAD(&hash[val], pp, pfs_hash); 485 } 486 } 487 simple_unlock(&pfs_hash_slock); 488 hashdone(oldhash, M_UFSMNT); 489 } 490 491 /* 492 * Free pfsnode hash table. 493 */ 494 void 495 procfs_hashdone() 496 { 497 hashdone(pfs_hashtbl, M_UFSMNT); 498 } 499 500 struct vnode * 501 procfs_hashget(pid, type, fd, mp) 502 pid_t pid; 503 pfstype type; 504 int fd; 505 struct mount *mp; 506 { 507 struct pfs_hashhead *ppp; 508 struct pfsnode *pp; 509 struct vnode *vp; 510 511 loop: 512 simple_lock(&pfs_hash_slock); 513 ppp = &pfs_hashtbl[PFSPIDHASH(pid)]; 514 LIST_FOREACH(pp, ppp, pfs_hash) { 515 vp = PFSTOV(pp); 516 if (pid == pp->pfs_pid && pp->pfs_type == type && 517 pp->pfs_fd == fd && vp->v_mount == mp) { 518 simple_lock(&vp->v_interlock); 519 simple_unlock(&pfs_hash_slock); 520 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK)) 521 goto loop; 522 return (vp); 523 } 524 } 525 simple_unlock(&pfs_hash_slock); 526 return (NULL); 527 } 528 529 /* 530 * Insert the pfsnode into the hash table and lock it. 531 */ 532 void 533 procfs_hashins(pp) 534 struct pfsnode *pp; 535 { 536 struct pfs_hashhead *ppp; 537 538 /* lock the pfsnode, then put it on the appropriate hash list */ 539 lockmgr(&pp->pfs_vnode->v_lock, LK_EXCLUSIVE, (struct simplelock *)0); 540 541 simple_lock(&pfs_hash_slock); 542 ppp = &pfs_hashtbl[PFSPIDHASH(pp->pfs_pid)]; 543 LIST_INSERT_HEAD(ppp, pp, pfs_hash); 544 simple_unlock(&pfs_hash_slock); 545 } 546 547 /* 548 * Remove the pfsnode from the hash table. 549 */ 550 void 551 procfs_hashrem(pp) 552 struct pfsnode *pp; 553 { 554 simple_lock(&pfs_hash_slock); 555 LIST_REMOVE(pp, pfs_hash); 556 simple_unlock(&pfs_hash_slock); 557 } 558 559 void 560 procfs_revoke_vnodes(p, arg) 561 struct proc *p; 562 void *arg; 563 { 564 struct pfsnode *pfs, *pnext; 565 struct vnode *vp; 566 struct mount *mp = (struct mount *)arg; 567 struct pfs_hashhead *ppp; 568 569 if (!(p->p_flag & P_SUGID)) 570 return; 571 572 ppp = &pfs_hashtbl[PFSPIDHASH(p->p_pid)]; 573 for (pfs = LIST_FIRST(ppp); pfs; pfs = pnext) { 574 vp = PFSTOV(pfs); 575 pnext = LIST_NEXT(pfs, pfs_hash); 576 if (vp->v_usecount > 0 && pfs->pfs_pid == p->p_pid && 577 vp->v_mount == mp) 578 VOP_REVOKE(vp, REVOKEALL); 579 } 580 } 581 582 int 583 procfs_getfp(pfs, pown, fp) 584 struct pfsnode *pfs; 585 struct proc **pown; 586 struct file **fp; 587 { 588 struct proc *p = PFIND(pfs->pfs_pid); 589 590 if (p == NULL) 591 return ESRCH; 592 593 if (pfs->pfs_fd == -1) 594 return EINVAL; 595 596 if ((*fp = fd_getfile(p->p_fd, pfs->pfs_fd)) == NULL) 597 return EBADF; 598 599 *pown = p; 600 return 0; 601 } 602 603 int 604 procfs_doemul(struct lwp *curl, struct proc *p, 605 struct pfsnode *pfs, struct uio *uio) 606 { 607 const char *ename = p->p_emul->e_name; 608 return uiomove_frombuf(__UNCONST(ename), strlen(ename), uio); 609 } 610