1 /* 2 * Copyright (c) 1993 Jan-Simon Pendry 3 * Copyright (c) 1993 4 * The Regents of the University of California. All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * Jan-Simon Pendry. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * @(#)procfs_subr.c 8.6 (Berkeley) 5/14/95 34 * 35 * $FreeBSD: src/sys/miscfs/procfs/procfs_subr.c,v 1.26.2.3 2002/02/18 21:28:04 des Exp $ 36 */ 37 38 #include <sys/param.h> 39 #include <sys/systm.h> 40 #include <sys/sysctl.h> 41 #include <sys/uio.h> 42 #include <sys/proc.h> 43 #include <sys/mount.h> 44 #include <sys/vnode.h> 45 #include <sys/malloc.h> 46 #include <sys/spinlock.h> 47 48 #include <sys/spinlock2.h> 49 50 #include <vfs/procfs/procfs.h> 51 52 #define PFS_HSIZE 1031 53 54 struct pfshead { 55 struct spinlock spin; 56 struct pfsnode *first; 57 } __cachealign; 58 59 static struct pfshead pfshead[PFS_HSIZE]; 60 static struct lock procfslk = LOCK_INITIALIZER("pvplk", 0, 0); 61 62 MALLOC_DEFINE(M_PROCFS, "procfs", "procfs v_data"); 63 64 #define PFSHASH(pid) &pfshead[((pid) & ~PFS_DEAD) % PFS_HSIZE] 65 66 /* 67 * Allocate a pfsnode/vnode pair. If no error occurs the returned vnode 68 * will be referenced and exclusively locked. 69 * 70 * The pid, pfs_type, and mount point uniquely identify a pfsnode. 71 * The mount point is needed because someone might mount this filesystem 72 * twice. 73 * 74 * All pfsnodes are maintained on a singly-linked list. new nodes are 75 * only allocated when they cannot be found on this list. entries on 76 * the list are removed when the vfs reclaim entry is called. 77 * 78 * A single lock is kept for the entire list. this is needed because the 79 * getnewvnode() function can block waiting for a vnode to become free, 80 * in which case there may be more than one process trying to get the same 81 * vnode. this lock is only taken if we are going to call getnewvnode, 82 * since the kernel itself is single-threaded. 83 * 84 * If an entry is found on the list, then call vget() to take a reference 85 * and obtain the lock. This will properly re-reference the vnode if it 86 * had gotten onto the free list. 87 */ 88 int 89 procfs_allocvp(struct mount *mp, struct vnode **vpp, long pid, pfstype pfs_type) 90 { 91 struct pfsnode *pfs; 92 struct vnode *vp; 93 struct pfshead *ph; 94 int error; 95 96 ph = PFSHASH(pid); 97 loop: 98 spin_lock(&ph->spin); 99 for (pfs = ph->first; pfs; pfs = pfs->pfs_next) { 100 if (pfs->pfs_pid == pid && pfs->pfs_type == pfs_type && 101 PFSTOV(pfs)->v_mount == mp) { 102 vp = PFSTOV(pfs); 103 vhold(vp); 104 spin_unlock(&ph->spin); 105 if (vget(vp, LK_EXCLUSIVE)) { 106 vdrop(vp); 107 goto loop; 108 } 109 vdrop(vp); 110 111 /* 112 * Make sure the vnode is still in the cache after 113 * getting the interlock to avoid racing a free. 114 */ 115 spin_lock(&ph->spin); 116 for (pfs = ph->first; pfs; pfs = pfs->pfs_next) { 117 if (PFSTOV(pfs) == vp && 118 pfs->pfs_pid == pid && 119 pfs->pfs_type == pfs_type && 120 PFSTOV(pfs)->v_mount == mp) { 121 break; 122 } 123 } 124 if (pfs == NULL || PFSTOV(pfs) != vp) { 125 spin_unlock(&ph->spin); 126 vput(vp); 127 goto loop; 128 129 } 130 spin_unlock(&ph->spin); 131 *vpp = vp; 132 return (0); 133 } 134 } 135 spin_unlock(&ph->spin); 136 137 /* 138 * otherwise lock the vp list while we call getnewvnode 139 * since that can block. 140 */ 141 if (lockmgr(&procfslk, LK_EXCLUSIVE|LK_SLEEPFAIL)) 142 goto loop; 143 144 /* 145 * Do the MALLOC before the getnewvnode since doing so afterward 146 * might cause a bogus v_data pointer to get dereferenced 147 * elsewhere if MALLOC should block. 148 * 149 * XXX this may not matter anymore since getnewvnode now returns 150 * a VX locked vnode. 151 */ 152 pfs = kmalloc(sizeof(struct pfsnode), M_PROCFS, M_WAITOK); 153 154 error = getnewvnode(VT_PROCFS, mp, vpp, 0, 0); 155 if (error) { 156 kfree(pfs, M_PROCFS); 157 goto out; 158 } 159 vp = *vpp; 160 161 vp->v_data = pfs; 162 163 pfs->pfs_next = 0; 164 pfs->pfs_pid = (pid_t) pid; 165 pfs->pfs_type = pfs_type; 166 pfs->pfs_vnode = vp; 167 pfs->pfs_flags = 0; 168 pfs->pfs_fileno = PROCFS_FILENO(pid, pfs_type); 169 lockinit(&pfs->pfs_lock, "pfslk", 0, 0); 170 171 switch (pfs_type) { 172 case Proot: /* /proc = dr-xr-xr-x */ 173 pfs->pfs_mode = (VREAD|VEXEC) | 174 (VREAD|VEXEC) >> 3 | 175 (VREAD|VEXEC) >> 6; 176 vp->v_type = VDIR; 177 vp->v_flag = VROOT; 178 break; 179 180 case Pcurproc: /* /proc/curproc = lr--r--r-- */ 181 pfs->pfs_mode = (VREAD) | 182 (VREAD >> 3) | 183 (VREAD >> 6); 184 vp->v_type = VLNK; 185 break; 186 187 case Pproc: 188 pfs->pfs_mode = (VREAD|VEXEC) | 189 (VREAD|VEXEC) >> 3 | 190 (VREAD|VEXEC) >> 6; 191 vp->v_type = VDIR; 192 break; 193 194 case Pfile: 195 pfs->pfs_mode = (VREAD|VEXEC) | 196 (VREAD|VEXEC) >> 3 | 197 (VREAD|VEXEC) >> 6; 198 vp->v_type = VLNK; 199 break; 200 201 case Pmem: 202 pfs->pfs_mode = (VREAD|VWRITE); 203 vp->v_type = VREG; 204 break; 205 206 case Pregs: 207 case Pfpregs: 208 case Pdbregs: 209 pfs->pfs_mode = (VREAD|VWRITE); 210 vp->v_type = VREG; 211 break; 212 213 case Pctl: 214 case Pnote: 215 case Pnotepg: 216 pfs->pfs_mode = (VWRITE); 217 vp->v_type = VREG; 218 break; 219 220 case Ptype: 221 case Pmap: 222 case Pstatus: 223 case Pcmdline: 224 case Prlimit: 225 pfs->pfs_mode = (VREAD) | 226 (VREAD >> 3) | 227 (VREAD >> 6); 228 vp->v_type = VREG; 229 break; 230 231 default: 232 panic("procfs_allocvp"); 233 } 234 235 /* add to procfs vnode list */ 236 spin_lock(&ph->spin); 237 pfs->pfs_next = ph->first; 238 ph->first = pfs; 239 spin_unlock(&ph->spin); 240 vx_downgrade(vp); 241 242 out: 243 lockmgr(&procfslk, LK_RELEASE); 244 245 return (error); 246 } 247 248 int 249 procfs_freevp(struct vnode *vp) 250 { 251 struct pfshead *ph; 252 struct pfsnode **pp; 253 struct pfsnode *pfs; 254 255 pfs = VTOPFS(vp); 256 vp->v_data = NULL; 257 ph = PFSHASH(pfs->pfs_pid); 258 259 spin_lock(&ph->spin); 260 pp = &ph->first; 261 while (*pp != pfs) { 262 KKASSERT(*pp != NULL); 263 pp = &(*pp)->pfs_next; 264 } 265 *pp = pfs->pfs_next; 266 spin_unlock(&ph->spin); 267 268 pfs->pfs_next = NULL; 269 pfs->pfs_vnode = NULL; 270 kfree(pfs, M_PROCFS); 271 272 return (0); 273 } 274 275 /* 276 * Try to find the calling pid. Note that pfind() 277 * now references the proc structure to be returned 278 * and needs to be released later with PRELE(). 279 */ 280 struct proc * 281 pfs_pfind(pid_t pfs_pid) 282 { 283 struct proc *p = NULL; 284 285 if (pfs_pid == 0) { 286 p = &proc0; 287 PHOLD(p); 288 } else { 289 p = pfind(pfs_pid); 290 } 291 292 /* 293 * Make sure the process is not in the middle of exiting (where 294 * a lot of its structural members may wind up being NULL). If it 295 * is we give up on it. 296 */ 297 if (p) { 298 lwkt_gettoken(&p->p_token); 299 if (p->p_flags & P_POSTEXIT) { 300 lwkt_reltoken(&p->p_token); 301 PRELE(p); 302 p = NULL; 303 } 304 } 305 return p; 306 } 307 308 struct proc * 309 pfs_zpfind(pid_t pfs_pid) 310 { 311 struct proc *p = NULL; 312 313 if (pfs_pid == 0) { 314 p = &proc0; 315 PHOLD(p); 316 } else { 317 p = zpfind(pfs_pid); 318 } 319 320 /* 321 * Make sure the process is not in the middle of exiting (where 322 * a lot of its structural members may wind up being NULL). If it 323 * is we give up on it. 324 */ 325 if (p) { 326 lwkt_gettoken(&p->p_token); 327 if (p->p_flags & P_POSTEXIT) { 328 lwkt_reltoken(&p->p_token); 329 PRELE(p); 330 p = NULL; 331 } 332 } 333 return p; 334 } 335 336 void 337 pfs_pdone(struct proc *p) 338 { 339 if (p) { 340 lwkt_reltoken(&p->p_token); 341 PRELE(p); 342 } 343 } 344 345 int 346 procfs_rw(struct vop_read_args *ap) 347 { 348 struct vnode *vp = ap->a_vp; 349 struct uio *uio = ap->a_uio; 350 struct thread *curtd = uio->uio_td; 351 struct proc *curp; 352 struct pfsnode *pfs = VTOPFS(vp); 353 struct proc *p; 354 struct lwp *lp; 355 int rtval; 356 357 if (curtd == NULL) 358 return (EINVAL); 359 if ((curp = curtd->td_proc) == NULL) /* XXX */ 360 return (EINVAL); 361 362 p = pfs_pfind(pfs->pfs_pid); 363 if (p == NULL) { 364 rtval = EINVAL; 365 goto out; 366 } 367 if (p->p_pid == 1 && securelevel > 0 && uio->uio_rw == UIO_WRITE) { 368 rtval = EACCES; 369 goto out; 370 } 371 372 /* 373 * XXX lwp 374 */ 375 lp = FIRST_LWP_IN_PROC(p); 376 if (lp == NULL) { 377 rtval = EINVAL; 378 goto out; 379 } 380 LWPHOLD(lp); 381 382 lockmgr(&pfs->pfs_lock, LK_EXCLUSIVE); 383 384 switch (pfs->pfs_type) { 385 case Pnote: 386 case Pnotepg: 387 rtval = procfs_donote(curp, lp, pfs, uio); 388 break; 389 390 case Pregs: 391 rtval = procfs_doregs(curp, lp, pfs, uio); 392 break; 393 394 case Pfpregs: 395 rtval = procfs_dofpregs(curp, lp, pfs, uio); 396 break; 397 398 case Pdbregs: 399 rtval = procfs_dodbregs(curp, lp, pfs, uio); 400 break; 401 402 case Pctl: 403 rtval = procfs_doctl(curp, lp, pfs, uio); 404 break; 405 406 case Pstatus: 407 rtval = procfs_dostatus(curp, lp, pfs, uio); 408 break; 409 410 case Pmap: 411 rtval = procfs_domap(curp, lp, pfs, uio); 412 break; 413 414 case Pmem: 415 rtval = procfs_domem(curp, lp, pfs, uio); 416 break; 417 418 case Ptype: 419 rtval = procfs_dotype(curp, lp, pfs, uio); 420 break; 421 422 case Pcmdline: 423 rtval = procfs_docmdline(curp, lp, pfs, uio); 424 break; 425 426 case Prlimit: 427 rtval = procfs_dorlimit(curp, lp, pfs, uio); 428 break; 429 430 default: 431 rtval = EOPNOTSUPP; 432 break; 433 } 434 LWPRELE(lp); 435 436 lockmgr(&pfs->pfs_lock, LK_RELEASE); 437 438 if (uio->uio_rw == UIO_WRITE && rtval == 0) 439 KNOTE(&PFSTOV(pfs)->v_pollinfo.vpi_kqinfo.ki_note, NOTE_WRITE); 440 441 out: 442 pfs_pdone(p); 443 444 return rtval; 445 } 446 447 /* 448 * Get a string from userland into (buf). Strip a trailing 449 * nl character (to allow easy access from the shell). 450 * The buffer should be *buflenp + 1 chars long. vfs_getuserstr 451 * will automatically add a nul char at the end. 452 * 453 * Returns 0 on success or the following errors 454 * 455 * EINVAL: file offset is non-zero. 456 * EMSGSIZE: message is longer than kernel buffer 457 * EFAULT: user i/o buffer is not addressable 458 */ 459 int 460 vfs_getuserstr(struct uio *uio, char *buf, int *buflenp) 461 { 462 int xlen; 463 int error; 464 465 if (uio->uio_offset != 0) 466 return (EINVAL); 467 468 xlen = *buflenp; 469 470 /* must be able to read the whole string in one go */ 471 if (xlen < uio->uio_resid) 472 return (EMSGSIZE); 473 xlen = uio->uio_resid; 474 475 if ((error = uiomove(buf, xlen, uio)) != 0) 476 return (error); 477 478 /* allow multiple writes without seeks */ 479 uio->uio_offset = 0; 480 481 /* cleanup string and remove trailing newline */ 482 buf[xlen] = '\0'; 483 xlen = strlen(buf); 484 if (xlen > 0 && buf[xlen-1] == '\n') 485 buf[--xlen] = '\0'; 486 *buflenp = xlen; 487 488 return (0); 489 } 490 491 vfs_namemap_t * 492 vfs_findname(vfs_namemap_t *nm, char *buf, int buflen) 493 { 494 495 for (; nm->nm_name; nm++) 496 if (bcmp(buf, nm->nm_name, buflen+1) == 0) 497 return (nm); 498 499 return (0); 500 } 501 502 void 503 procfs_exit(struct thread *td) 504 { 505 struct pfshead *ph; 506 struct pfsnode *pfs; 507 struct vnode *vp; 508 pid_t pid; 509 510 KKASSERT(td->td_proc); 511 pid = td->td_proc->p_pid; 512 513 /* 514 * NOTE: We can't just vgone() the vnode any more, not while 515 * it may potentially still be active. This will clean 516 * the vp and clear the mount and cause the new VOP subsystem 517 * to assert or panic when someone tries to do an operation 518 * on an open (exited) procfs descriptor. 519 * 520 * Prevent further operations on this pid by setting pfs_pid to -1. 521 * Note that a pfs_pid of 0 is used for nodes which do not track 522 * any particular pid. 523 * 524 * Use vx_get() to properly ref/lock a vp which may not have any 525 * refs and which may or may not already be reclaimed. vx_put() 526 * will then properly deactivate it and cause it to be recycled. 527 * 528 * The hash table can also get ripped out from under us when 529 * we block so take the easy way out and restart the scan. 530 */ 531 for (;;) { 532 ph = PFSHASH(pid); 533 spin_lock(&ph->spin); 534 for (pfs = ph->first; pfs; pfs = pfs->pfs_next) { 535 if (pfs->pfs_pid == pid) 536 break; 537 } 538 if (pfs == NULL) { 539 spin_unlock(&ph->spin); 540 break; 541 } 542 vp = PFSTOV(pfs); 543 vhold(vp); 544 spin_unlock(&ph->spin); 545 vx_get(vp); 546 pfs->pfs_pid |= PFS_DEAD; /* does not effect hash */ 547 vx_put(vp); 548 vdrop(vp); 549 } 550 } 551