1 /* $NetBSD: procfs_subr.c,v 1.69 2006/09/20 08:09:05 manu Exp $ */ 2 3 /* 4 * Copyright (c) 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Jan-Simon Pendry. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)procfs_subr.c 8.6 (Berkeley) 5/14/95 35 */ 36 37 /* 38 * Copyright (c) 1994 Christopher G. Demetriou. All rights reserved. 39 * Copyright (c) 1993 Jan-Simon Pendry 40 * 41 * This code is derived from software contributed to Berkeley by 42 * Jan-Simon Pendry. 43 * 44 * Redistribution and use in source and binary forms, with or without 45 * modification, are permitted provided that the following conditions 46 * are met: 47 * 1. Redistributions of source code must retain the above copyright 48 * notice, this list of conditions and the following disclaimer. 49 * 2. Redistributions in binary form must reproduce the above copyright 50 * notice, this list of conditions and the following disclaimer in the 51 * documentation and/or other materials provided with the distribution. 52 * 3. All advertising materials mentioning features or use of this software 53 * must display the following acknowledgement: 54 * This product includes software developed by the University of 55 * California, Berkeley and its contributors. 56 * 4. Neither the name of the University nor the names of its contributors 57 * may be used to endorse or promote products derived from this software 58 * without specific prior written permission. 59 * 60 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 61 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 62 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 63 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 64 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 65 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 66 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 67 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 68 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 69 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 70 * SUCH DAMAGE. 71 * 72 * @(#)procfs_subr.c 8.6 (Berkeley) 5/14/95 73 */ 74 75 #include <sys/cdefs.h> 76 __KERNEL_RCSID(0, "$NetBSD: procfs_subr.c,v 1.69 2006/09/20 08:09:05 manu Exp $"); 77 78 #include <sys/param.h> 79 #include <sys/systm.h> 80 #include <sys/time.h> 81 #include <sys/kernel.h> 82 #include <sys/proc.h> 83 #include <sys/vnode.h> 84 #include <sys/malloc.h> 85 #include <sys/stat.h> 86 #include <sys/file.h> 87 #include <sys/filedesc.h> 88 89 #include <miscfs/procfs/procfs.h> 90 91 void procfs_hashins(struct pfsnode *); 92 void procfs_hashrem(struct pfsnode *); 93 struct vnode *procfs_hashget(pid_t, pfstype, int, struct mount *); 94 95 LIST_HEAD(pfs_hashhead, pfsnode) *pfs_hashtbl; 96 u_long pfs_ihash; /* size of hash table - 1 */ 97 #define PFSPIDHASH(pid) ((pid) & pfs_ihash) 98 99 struct lock pfs_hashlock; 100 struct simplelock pfs_hash_slock; 101 102 #define ISSET(t, f) ((t) & (f)) 103 104 /* 105 * allocate a pfsnode/vnode pair. the vnode is 106 * referenced, and locked. 107 * 108 * the pid, pfs_type, and mount point uniquely 109 * identify a pfsnode. the mount point is needed 110 * because someone might mount this filesystem 111 * twice. 112 * 113 * all pfsnodes are maintained on a singly-linked 114 * list. new nodes are only allocated when they cannot 115 * be found on this list. entries on the list are 116 * removed when the vfs reclaim entry is called. 117 * 118 * a single lock is kept for the entire list. this is 119 * needed because the getnewvnode() function can block 120 * waiting for a vnode to become free, in which case there 121 * may be more than one process trying to get the same 122 * vnode. this lock is only taken if we are going to 123 * call getnewvnode, since the kernel itself is single-threaded. 124 * 125 * if an entry is found on the list, then call vget() to 126 * take a reference. this is done because there may be 127 * zero references to it and so it needs to removed from 128 * the vnode free list. 129 */ 130 int 131 procfs_allocvp(mp, vpp, pid, pfs_type, fd) 132 struct mount *mp; 133 struct vnode **vpp; 134 pid_t pid; 135 pfstype pfs_type; 136 int fd; 137 { 138 struct pfsnode *pfs; 139 struct vnode *vp; 140 int error; 141 142 do { 143 if ((*vpp = procfs_hashget(pid, pfs_type, fd, mp)) != NULL) 144 return (0); 145 } while (lockmgr(&pfs_hashlock, LK_EXCLUSIVE|LK_SLEEPFAIL, 0)); 146 147 if ((error = getnewvnode(VT_PROCFS, mp, procfs_vnodeop_p, &vp)) != 0) { 148 *vpp = NULL; 149 lockmgr(&pfs_hashlock, LK_RELEASE, NULL); 150 return (error); 151 } 152 153 MALLOC(pfs, void *, sizeof(struct pfsnode), M_TEMP, M_WAITOK); 154 vp->v_data = pfs; 155 156 pfs->pfs_pid = pid; 157 pfs->pfs_type = pfs_type; 158 pfs->pfs_vnode = vp; 159 pfs->pfs_flags = 0; 160 pfs->pfs_fileno = PROCFS_FILENO(pid, pfs_type, fd); 161 pfs->pfs_fd = fd; 162 163 switch (pfs_type) { 164 case PFSroot: /* /proc = dr-xr-xr-x */ 165 pfs->pfs_mode = S_IRUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH; 166 vp->v_type = VDIR; 167 vp->v_flag = VROOT; 168 break; 169 170 case PFScurproc: /* /proc/curproc = lr-xr-xr-x */ 171 case PFSself: /* /proc/self = lr-xr-xr-x */ 172 case PFScwd: /* /proc/N/cwd = lr-xr-xr-x */ 173 case PFSchroot: /* /proc/N/chroot = lr-xr-xr-x */ 174 pfs->pfs_mode = S_IRUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH; 175 vp->v_type = VLNK; 176 break; 177 178 case PFSproc: /* /proc/N = dr-xr-xr-x */ 179 case PFSfd: 180 if (fd == -1) { /* /proc/N/fd = dr-xr-xr-x */ 181 pfs->pfs_mode = S_IRUSR|S_IXUSR; 182 vp->v_type = VDIR; 183 } else { /* /proc/N/fd/M = [ps-]rw------- */ 184 struct file *fp; 185 struct vnode *vxp; 186 struct proc *pown; 187 188 /* XXX can procfs_getfp() ever fail here? */ 189 if ((error = procfs_getfp(pfs, &pown, &fp)) != 0) 190 goto bad; 191 FILE_USE(fp); 192 193 pfs->pfs_mode = S_IRUSR|S_IWUSR; 194 switch (fp->f_type) { 195 case DTYPE_VNODE: 196 vxp = (struct vnode *)fp->f_data; 197 198 /* 199 * We make symlinks for directories 200 * to avoid cycles. 201 */ 202 if (vxp->v_type == VDIR) 203 goto symlink; 204 vp->v_type = vxp->v_type; 205 break; 206 case DTYPE_PIPE: 207 vp->v_type = VFIFO; 208 break; 209 case DTYPE_SOCKET: 210 vp->v_type = VSOCK; 211 break; 212 case DTYPE_KQUEUE: 213 case DTYPE_MISC: 214 symlink: 215 pfs->pfs_mode = S_IRUSR|S_IXUSR|S_IRGRP| 216 S_IXGRP|S_IROTH|S_IXOTH; 217 vp->v_type = VLNK; 218 break; 219 default: 220 error = EOPNOTSUPP; 221 FILE_UNUSE(fp, proc_representative_lwp(pown)); 222 goto bad; 223 } 224 FILE_UNUSE(fp, proc_representative_lwp(pown)); 225 } 226 break; 227 228 case PFSfile: /* /proc/N/file = -rw------- */ 229 case PFSmem: /* /proc/N/mem = -rw------- */ 230 case PFSregs: /* /proc/N/regs = -rw------- */ 231 case PFSfpregs: /* /proc/N/fpregs = -rw------- */ 232 pfs->pfs_mode = S_IRUSR|S_IWUSR; 233 vp->v_type = VREG; 234 break; 235 236 case PFSctl: /* /proc/N/ctl = --w------ */ 237 case PFSnote: /* /proc/N/note = --w------ */ 238 case PFSnotepg: /* /proc/N/notepg = --w------ */ 239 pfs->pfs_mode = S_IWUSR; 240 vp->v_type = VREG; 241 break; 242 243 case PFSmap: /* /proc/N/map = -r--r--r-- */ 244 case PFSmaps: /* /proc/N/maps = -r--r--r-- */ 245 case PFSstatus: /* /proc/N/status = -r--r--r-- */ 246 case PFSstat: /* /proc/N/stat = -r--r--r-- */ 247 case PFScmdline: /* /proc/N/cmdline = -r--r--r-- */ 248 case PFSmeminfo: /* /proc/meminfo = -r--r--r-- */ 249 case PFSdevices: /* /proc/devices = -r--r--r-- */ 250 case PFScpuinfo: /* /proc/cpuinfo = -r--r--r-- */ 251 case PFSuptime: /* /proc/uptime = -r--r--r-- */ 252 case PFSmounts: /* /proc/mounts = -r--r--r-- */ 253 pfs->pfs_mode = S_IRUSR|S_IRGRP|S_IROTH; 254 vp->v_type = VREG; 255 break; 256 257 #ifdef __HAVE_PROCFS_MACHDEP 258 PROCFS_MACHDEP_NODETYPE_CASES 259 procfs_machdep_allocvp(vp); 260 break; 261 #endif 262 263 default: 264 panic("procfs_allocvp"); 265 } 266 267 procfs_hashins(pfs); 268 uvm_vnp_setsize(vp, 0); 269 lockmgr(&pfs_hashlock, LK_RELEASE, NULL); 270 271 *vpp = vp; 272 return (0); 273 274 bad: 275 lockmgr(&pfs_hashlock, LK_RELEASE, NULL); 276 FREE(pfs, M_TEMP); 277 ungetnewvnode(vp); 278 return (error); 279 } 280 281 int 282 procfs_freevp(vp) 283 struct vnode *vp; 284 { 285 struct pfsnode *pfs = VTOPFS(vp); 286 287 procfs_hashrem(pfs); 288 289 FREE(vp->v_data, M_TEMP); 290 vp->v_data = 0; 291 return (0); 292 } 293 294 int 295 procfs_rw(v) 296 void *v; 297 { 298 struct vop_read_args *ap = v; 299 struct vnode *vp = ap->a_vp; 300 struct uio *uio = ap->a_uio; 301 struct lwp *curl; 302 struct lwp *l; 303 struct pfsnode *pfs = VTOPFS(vp); 304 struct proc *p; 305 306 if (uio->uio_offset < 0) 307 return EINVAL; 308 p = PFIND(pfs->pfs_pid); 309 if (p == 0) 310 return ESRCH; 311 /* 312 * Do not allow init to be modified while in secure mode; it 313 * could be duped into changing the security level. 314 */ 315 if (uio->uio_rw == UIO_WRITE && p == initproc && securelevel > -1) 316 return EPERM; 317 318 curl = curlwp; 319 320 /* XXX NJWLWP 321 * The entire procfs interface needs work to be useful to 322 * a process with multiple LWPs. For the moment, we'll 323 * just kluge this and fail on others. 324 */ 325 l = proc_representative_lwp(p); 326 327 switch (pfs->pfs_type) { 328 case PFSnote: 329 case PFSnotepg: 330 return (procfs_donote(curl, p, pfs, uio)); 331 332 case PFSregs: 333 return (procfs_doregs(curl, l, pfs, uio)); 334 335 case PFSfpregs: 336 return (procfs_dofpregs(curl, l, pfs, uio)); 337 338 case PFSctl: 339 return (procfs_doctl(curl, l, pfs, uio)); 340 341 case PFSstatus: 342 return (procfs_dostatus(curl, l, pfs, uio)); 343 344 case PFSstat: 345 return (procfs_do_pid_stat(curl, l, pfs, uio)); 346 347 case PFSmap: 348 return (procfs_domap(curl, p, pfs, uio, 0)); 349 350 case PFSmaps: 351 return (procfs_domap(curl, p, pfs, uio, 1)); 352 353 case PFSmem: 354 return (procfs_domem(curl, l, pfs, uio)); 355 356 case PFScmdline: 357 return (procfs_docmdline(curl, p, pfs, uio)); 358 359 case PFSmeminfo: 360 return (procfs_domeminfo(curl, p, pfs, uio)); 361 362 case PFSdevices: 363 return (procfs_dodevices(curl, p, pfs, uio)); 364 365 case PFScpuinfo: 366 return (procfs_docpuinfo(curl, p, pfs, uio)); 367 368 case PFSfd: 369 return (procfs_dofd(curl, p, pfs, uio)); 370 371 case PFSuptime: 372 return (procfs_douptime(curl, p, pfs, uio)); 373 374 case PFSmounts: 375 return (procfs_domounts(curl, p, pfs, uio)); 376 377 #ifdef __HAVE_PROCFS_MACHDEP 378 PROCFS_MACHDEP_NODETYPE_CASES 379 return (procfs_machdep_rw(curl, l, pfs, uio)); 380 #endif 381 382 default: 383 return (EOPNOTSUPP); 384 } 385 } 386 387 /* 388 * Get a string from userland into (bf). Strip a trailing 389 * nl character (to allow easy access from the shell). 390 * The buffer should be *buflenp + 1 chars long. vfs_getuserstr 391 * will automatically add a nul char at the end. 392 * 393 * Returns 0 on success or the following errors 394 * 395 * EINVAL: file offset is non-zero. 396 * EMSGSIZE: message is longer than kernel buffer 397 * EFAULT: user i/o buffer is not addressable 398 */ 399 int 400 vfs_getuserstr(uio, bf, buflenp) 401 struct uio *uio; 402 char *bf; 403 int *buflenp; 404 { 405 int xlen; 406 int error; 407 408 if (uio->uio_offset != 0) 409 return (EINVAL); 410 411 xlen = *buflenp; 412 413 /* must be able to read the whole string in one go */ 414 if (xlen < uio->uio_resid) 415 return (EMSGSIZE); 416 xlen = uio->uio_resid; 417 418 if ((error = uiomove(bf, xlen, uio)) != 0) 419 return (error); 420 421 /* allow multiple writes without seeks */ 422 uio->uio_offset = 0; 423 424 /* cleanup string and remove trailing newline */ 425 bf[xlen] = '\0'; 426 xlen = strlen(bf); 427 if (xlen > 0 && bf[xlen-1] == '\n') 428 bf[--xlen] = '\0'; 429 *buflenp = xlen; 430 431 return (0); 432 } 433 434 const vfs_namemap_t * 435 vfs_findname(nm, bf, buflen) 436 const vfs_namemap_t *nm; 437 const char *bf; 438 int buflen; 439 { 440 441 for (; nm->nm_name; nm++) 442 if (memcmp(bf, nm->nm_name, buflen+1) == 0) 443 return (nm); 444 445 return (0); 446 } 447 448 /* 449 * Initialize pfsnode hash table. 450 */ 451 void 452 procfs_hashinit() 453 { 454 lockinit(&pfs_hashlock, PINOD, "pfs_hashlock", 0, 0); 455 pfs_hashtbl = hashinit(desiredvnodes / 4, HASH_LIST, M_UFSMNT, 456 M_WAITOK, &pfs_ihash); 457 simple_lock_init(&pfs_hash_slock); 458 } 459 460 void 461 procfs_hashreinit() 462 { 463 struct pfsnode *pp; 464 struct pfs_hashhead *oldhash, *hash; 465 u_long i, oldmask, mask, val; 466 467 hash = hashinit(desiredvnodes / 4, HASH_LIST, M_UFSMNT, M_WAITOK, 468 &mask); 469 470 simple_lock(&pfs_hash_slock); 471 oldhash = pfs_hashtbl; 472 oldmask = pfs_ihash; 473 pfs_hashtbl = hash; 474 pfs_ihash = mask; 475 for (i = 0; i <= oldmask; i++) { 476 while ((pp = LIST_FIRST(&oldhash[i])) != NULL) { 477 LIST_REMOVE(pp, pfs_hash); 478 val = PFSPIDHASH(pp->pfs_pid); 479 LIST_INSERT_HEAD(&hash[val], pp, pfs_hash); 480 } 481 } 482 simple_unlock(&pfs_hash_slock); 483 hashdone(oldhash, M_UFSMNT); 484 } 485 486 /* 487 * Free pfsnode hash table. 488 */ 489 void 490 procfs_hashdone() 491 { 492 hashdone(pfs_hashtbl, M_UFSMNT); 493 } 494 495 struct vnode * 496 procfs_hashget(pid, type, fd, mp) 497 pid_t pid; 498 pfstype type; 499 int fd; 500 struct mount *mp; 501 { 502 struct pfs_hashhead *ppp; 503 struct pfsnode *pp; 504 struct vnode *vp; 505 506 loop: 507 simple_lock(&pfs_hash_slock); 508 ppp = &pfs_hashtbl[PFSPIDHASH(pid)]; 509 LIST_FOREACH(pp, ppp, pfs_hash) { 510 vp = PFSTOV(pp); 511 if (pid == pp->pfs_pid && pp->pfs_type == type && 512 pp->pfs_fd == fd && vp->v_mount == mp) { 513 simple_lock(&vp->v_interlock); 514 simple_unlock(&pfs_hash_slock); 515 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK)) 516 goto loop; 517 return (vp); 518 } 519 } 520 simple_unlock(&pfs_hash_slock); 521 return (NULL); 522 } 523 524 /* 525 * Insert the pfsnode into the hash table and lock it. 526 */ 527 void 528 procfs_hashins(pp) 529 struct pfsnode *pp; 530 { 531 struct pfs_hashhead *ppp; 532 533 /* lock the pfsnode, then put it on the appropriate hash list */ 534 lockmgr(&pp->pfs_vnode->v_lock, LK_EXCLUSIVE, (struct simplelock *)0); 535 536 simple_lock(&pfs_hash_slock); 537 ppp = &pfs_hashtbl[PFSPIDHASH(pp->pfs_pid)]; 538 LIST_INSERT_HEAD(ppp, pp, pfs_hash); 539 simple_unlock(&pfs_hash_slock); 540 } 541 542 /* 543 * Remove the pfsnode from the hash table. 544 */ 545 void 546 procfs_hashrem(pp) 547 struct pfsnode *pp; 548 { 549 simple_lock(&pfs_hash_slock); 550 LIST_REMOVE(pp, pfs_hash); 551 simple_unlock(&pfs_hash_slock); 552 } 553 554 void 555 procfs_revoke_vnodes(p, arg) 556 struct proc *p; 557 void *arg; 558 { 559 struct pfsnode *pfs, *pnext; 560 struct vnode *vp; 561 struct mount *mp = (struct mount *)arg; 562 struct pfs_hashhead *ppp; 563 564 if (!(p->p_flag & P_SUGID)) 565 return; 566 567 ppp = &pfs_hashtbl[PFSPIDHASH(p->p_pid)]; 568 for (pfs = LIST_FIRST(ppp); pfs; pfs = pnext) { 569 vp = PFSTOV(pfs); 570 pnext = LIST_NEXT(pfs, pfs_hash); 571 if (vp->v_usecount > 0 && pfs->pfs_pid == p->p_pid && 572 vp->v_mount == mp) 573 VOP_REVOKE(vp, REVOKEALL); 574 } 575 } 576 577 int 578 procfs_getfp(pfs, pown, fp) 579 struct pfsnode *pfs; 580 struct proc **pown; 581 struct file **fp; 582 { 583 struct proc *p = PFIND(pfs->pfs_pid); 584 585 if (p == NULL) 586 return ESRCH; 587 588 if (pfs->pfs_fd == -1) 589 return EINVAL; 590 591 if ((*fp = fd_getfile(p->p_fd, pfs->pfs_fd)) == NULL) 592 return EBADF; 593 594 *pown = p; 595 return 0; 596 } 597