1 /* $NetBSD: procfs_vnops.c,v 1.220 2021/12/08 20:11:54 andvar Exp $ */ 2 3 /*- 4 * Copyright (c) 2006, 2007, 2008, 2020 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Copyright (c) 1993, 1995 34 * The Regents of the University of California. All rights reserved. 35 * 36 * This code is derived from software contributed to Berkeley by 37 * Jan-Simon Pendry. 38 * 39 * Redistribution and use in source and binary forms, with or without 40 * modification, are permitted provided that the following conditions 41 * are met: 42 * 1. Redistributions of source code must retain the above copyright 43 * notice, this list of conditions and the following disclaimer. 44 * 2. Redistributions in binary form must reproduce the above copyright 45 * notice, this list of conditions and the following disclaimer in the 46 * documentation and/or other materials provided with the distribution. 47 * 3. Neither the name of the University nor the names of its contributors 48 * may be used to endorse or promote products derived from this software 49 * without specific prior written permission. 50 * 51 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 52 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 54 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 61 * SUCH DAMAGE. 62 * 63 * @(#)procfs_vnops.c 8.18 (Berkeley) 5/21/95 64 */ 65 66 /* 67 * Copyright (c) 1993 Jan-Simon Pendry 68 * 69 * This code is derived from software contributed to Berkeley by 70 * Jan-Simon Pendry. 71 * 72 * Redistribution and use in source and binary forms, with or without 73 * modification, are permitted provided that the following conditions 74 * are met: 75 * 1. Redistributions of source code must retain the above copyright 76 * notice, this list of conditions and the following disclaimer. 77 * 2. Redistributions in binary form must reproduce the above copyright 78 * notice, this list of conditions and the following disclaimer in the 79 * documentation and/or other materials provided with the distribution. 80 * 3. All advertising materials mentioning features or use of this software 81 * must display the following acknowledgement: 82 * This product includes software developed by the University of 83 * California, Berkeley and its contributors. 84 * 4. Neither the name of the University nor the names of its contributors 85 * may be used to endorse or promote products derived from this software 86 * without specific prior written permission. 87 * 88 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 89 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 90 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 91 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 92 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 93 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 94 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 95 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 96 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 97 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 98 * SUCH DAMAGE. 99 * 100 * @(#)procfs_vnops.c 8.18 (Berkeley) 5/21/95 101 */ 102 103 /* 104 * procfs vnode interface 105 */ 106 107 #include <sys/cdefs.h> 108 __KERNEL_RCSID(0, "$NetBSD: procfs_vnops.c,v 1.220 2021/12/08 20:11:54 andvar Exp $"); 109 110 #include <sys/param.h> 111 #include <sys/atomic.h> 112 #include <sys/systm.h> 113 #include <sys/time.h> 114 #include <sys/kernel.h> 115 #include <sys/file.h> 116 #include <sys/filedesc.h> 117 #include <sys/proc.h> 118 #include <sys/vnode.h> 119 #include <sys/namei.h> 120 #include <sys/malloc.h> 121 #include <sys/mount.h> 122 #include <sys/dirent.h> 123 #include <sys/resourcevar.h> 124 #include <sys/stat.h> 125 #include <sys/ptrace.h> 126 #include <sys/kauth.h> 127 #include <sys/exec.h> 128 129 #include <uvm/uvm_extern.h> /* for PAGE_SIZE */ 130 131 #include <machine/reg.h> 132 133 #include <miscfs/genfs/genfs.h> 134 #include <miscfs/procfs/procfs.h> 135 136 /* 137 * Vnode Operations. 138 * 139 */ 140 141 static int procfs_validfile_linux(struct lwp *, struct mount *); 142 static int procfs_root_readdir_callback(struct proc *, void *); 143 static void procfs_dir(pfstype, struct lwp *, struct proc *, char **, char *, 144 size_t); 145 146 /* 147 * This is a list of the valid names in the 148 * process-specific sub-directories. It is 149 * used in procfs_lookup and procfs_readdir 150 */ 151 static const struct proc_target { 152 u_char pt_type; 153 u_char pt_namlen; 154 const char *pt_name; 155 pfstype pt_pfstype; 156 int (*pt_valid)(struct lwp *, struct mount *); 157 } proc_targets[] = { 158 #define N(s) sizeof(s)-1, s 159 /* name type validp */ 160 { DT_DIR, N("."), PFSproc, NULL }, 161 { DT_DIR, N(".."), PFSroot, NULL }, 162 { DT_DIR, N("fd"), PFSfd, NULL }, 163 { DT_DIR, N("task"), PFStask, procfs_validfile_linux }, 164 { DT_LNK, N("cwd"), PFScwd, NULL }, 165 { DT_LNK, N("emul"), PFSemul, NULL }, 166 { DT_LNK, N("root"), PFSchroot, NULL }, 167 { DT_REG, N("auxv"), PFSauxv, procfs_validauxv }, 168 { DT_REG, N("cmdline"), PFScmdline, NULL }, 169 { DT_REG, N("environ"), PFSenviron, NULL }, 170 { DT_REG, N("exe"), PFSexe, procfs_validfile }, 171 { DT_REG, N("file"), PFSfile, procfs_validfile }, 172 { DT_REG, N("fpregs"), PFSfpregs, procfs_validfpregs }, 173 { DT_REG, N("limit"), PFSlimit, NULL }, 174 { DT_REG, N("map"), PFSmap, procfs_validmap }, 175 { DT_REG, N("maps"), PFSmaps, procfs_validmap }, 176 { DT_REG, N("mem"), PFSmem, NULL }, 177 { DT_REG, N("note"), PFSnote, NULL }, 178 { DT_REG, N("notepg"), PFSnotepg, NULL }, 179 { DT_REG, N("regs"), PFSregs, procfs_validregs }, 180 { DT_REG, N("stat"), PFSstat, procfs_validfile_linux }, 181 { DT_REG, N("statm"), PFSstatm, procfs_validfile_linux }, 182 { DT_REG, N("status"), PFSstatus, NULL }, 183 #ifdef __HAVE_PROCFS_MACHDEP 184 PROCFS_MACHDEP_NODETYPE_DEFNS 185 #endif 186 #undef N 187 }; 188 static const int nproc_targets = sizeof(proc_targets) / sizeof(proc_targets[0]); 189 190 /* 191 * List of files in the root directory. Note: the validate function will 192 * be called with p == NULL for these ones. 193 */ 194 static const struct proc_target proc_root_targets[] = { 195 #define N(s) sizeof(s)-1, s 196 /* name type validp */ 197 { DT_REG, N("meminfo"), PFSmeminfo, procfs_validfile_linux }, 198 { DT_REG, N("cpuinfo"), PFScpuinfo, procfs_validfile_linux }, 199 { DT_REG, N("uptime"), PFSuptime, procfs_validfile_linux }, 200 { DT_REG, N("mounts"), PFSmounts, procfs_validfile_linux }, 201 { DT_REG, N("devices"), PFSdevices, procfs_validfile_linux }, 202 { DT_REG, N("stat"), PFScpustat, procfs_validfile_linux }, 203 { DT_REG, N("loadavg"), PFSloadavg, procfs_validfile_linux }, 204 { DT_REG, N("version"), PFSversion, procfs_validfile_linux }, 205 #undef N 206 }; 207 static const int nproc_root_targets = 208 sizeof(proc_root_targets) / sizeof(proc_root_targets[0]); 209 210 int procfs_lookup(void *); 211 int procfs_open(void *); 212 int procfs_close(void *); 213 int procfs_access(void *); 214 int procfs_getattr(void *); 215 int procfs_setattr(void *); 216 int procfs_link(void *); 217 int procfs_symlink(void *); 218 int procfs_readdir(void *); 219 int procfs_readlink(void *); 220 int procfs_inactive(void *); 221 int procfs_reclaim(void *); 222 int procfs_print(void *); 223 int procfs_pathconf(void *); 224 int procfs_getpages(void *); 225 226 static int atoi(const char *, size_t); 227 228 /* 229 * procfs vnode operations. 230 */ 231 int (**procfs_vnodeop_p)(void *); 232 const struct vnodeopv_entry_desc procfs_vnodeop_entries[] = { 233 { &vop_default_desc, vn_default_error }, 234 { &vop_parsepath_desc, genfs_parsepath }, /* parsepath */ 235 { &vop_lookup_desc, procfs_lookup }, /* lookup */ 236 { &vop_create_desc, genfs_eopnotsupp }, /* create */ 237 { &vop_mknod_desc, genfs_eopnotsupp }, /* mknod */ 238 { &vop_open_desc, procfs_open }, /* open */ 239 { &vop_close_desc, procfs_close }, /* close */ 240 { &vop_access_desc, procfs_access }, /* access */ 241 { &vop_accessx_desc, genfs_accessx }, /* accessx */ 242 { &vop_getattr_desc, procfs_getattr }, /* getattr */ 243 { &vop_setattr_desc, procfs_setattr }, /* setattr */ 244 { &vop_read_desc, procfs_rw }, /* read */ 245 { &vop_write_desc, procfs_rw }, /* write */ 246 { &vop_fallocate_desc, genfs_eopnotsupp }, /* fallocate */ 247 { &vop_fdiscard_desc, genfs_eopnotsupp }, /* fdiscard */ 248 { &vop_fcntl_desc, genfs_fcntl }, /* fcntl */ 249 { &vop_ioctl_desc, genfs_enoioctl }, /* ioctl */ 250 { &vop_poll_desc, genfs_poll }, /* poll */ 251 { &vop_kqfilter_desc, genfs_kqfilter }, /* kqfilter */ 252 { &vop_revoke_desc, genfs_revoke }, /* revoke */ 253 { &vop_fsync_desc, genfs_nullop }, /* fsync */ 254 { &vop_seek_desc, genfs_nullop }, /* seek */ 255 { &vop_remove_desc, genfs_eopnotsupp }, /* remove */ 256 { &vop_link_desc, procfs_link }, /* link */ 257 { &vop_rename_desc, genfs_eopnotsupp }, /* rename */ 258 { &vop_mkdir_desc, genfs_eopnotsupp }, /* mkdir */ 259 { &vop_rmdir_desc, genfs_eopnotsupp }, /* rmdir */ 260 { &vop_symlink_desc, procfs_symlink }, /* symlink */ 261 { &vop_readdir_desc, procfs_readdir }, /* readdir */ 262 { &vop_readlink_desc, procfs_readlink }, /* readlink */ 263 { &vop_abortop_desc, genfs_abortop }, /* abortop */ 264 { &vop_inactive_desc, procfs_inactive }, /* inactive */ 265 { &vop_reclaim_desc, procfs_reclaim }, /* reclaim */ 266 { &vop_lock_desc, genfs_lock }, /* lock */ 267 { &vop_unlock_desc, genfs_unlock }, /* unlock */ 268 { &vop_bmap_desc, genfs_eopnotsupp }, /* bmap */ 269 { &vop_strategy_desc, genfs_badop }, /* strategy */ 270 { &vop_print_desc, procfs_print }, /* print */ 271 { &vop_islocked_desc, genfs_islocked }, /* islocked */ 272 { &vop_pathconf_desc, procfs_pathconf }, /* pathconf */ 273 { &vop_advlock_desc, genfs_einval }, /* advlock */ 274 { &vop_getpages_desc, procfs_getpages }, /* getpages */ 275 { &vop_putpages_desc, genfs_null_putpages }, /* putpages */ 276 { NULL, NULL } 277 }; 278 const struct vnodeopv_desc procfs_vnodeop_opv_desc = 279 { &procfs_vnodeop_p, procfs_vnodeop_entries }; 280 /* 281 * set things up for doing i/o on 282 * the pfsnode (vp). (vp) is locked 283 * on entry, and should be left locked 284 * on exit. 285 * 286 * for procfs we don't need to do anything 287 * in particular for i/o. all that is done 288 * is to support exclusive open on process 289 * memory images. 290 */ 291 int 292 procfs_open(void *v) 293 { 294 struct vop_open_args /* { 295 struct vnode *a_vp; 296 int a_mode; 297 kauth_cred_t a_cred; 298 } */ *ap = v; 299 struct vnode *vp = ap->a_vp; 300 struct pfsnode *pfs = VTOPFS(vp); 301 struct lwp *l1; 302 struct proc *p2; 303 int error; 304 305 if ((error = 306 procfs_proc_lock(vp->v_mount, pfs->pfs_pid, &p2, ENOENT)) != 0) 307 return error; 308 309 l1 = curlwp; /* tracer */ 310 311 #define M2K(m) (((m) & FREAD) && ((m) & FWRITE) ? \ 312 KAUTH_REQ_PROCESS_PROCFS_RW : \ 313 (m) & FWRITE ? KAUTH_REQ_PROCESS_PROCFS_WRITE : \ 314 KAUTH_REQ_PROCESS_PROCFS_READ) 315 316 mutex_enter(p2->p_lock); 317 error = kauth_authorize_process(l1->l_cred, KAUTH_PROCESS_PROCFS, 318 p2, pfs, KAUTH_ARG(M2K(ap->a_mode)), NULL); 319 mutex_exit(p2->p_lock); 320 if (error) { 321 procfs_proc_unlock(p2); 322 return (error); 323 } 324 325 #undef M2K 326 327 switch (pfs->pfs_type) { 328 case PFSmem: 329 if (((pfs->pfs_flags & FWRITE) && (ap->a_mode & O_EXCL)) || 330 ((pfs->pfs_flags & O_EXCL) && (ap->a_mode & FWRITE))) { 331 error = EBUSY; 332 break; 333 } 334 335 if (!proc_isunder(p2, l1)) { 336 error = EPERM; 337 break; 338 } 339 340 if (ap->a_mode & FWRITE) 341 pfs->pfs_flags = ap->a_mode & (FWRITE|O_EXCL); 342 343 break; 344 345 case PFSregs: 346 case PFSfpregs: 347 if (!proc_isunder(p2, l1)) { 348 error = EPERM; 349 break; 350 } 351 break; 352 353 default: 354 break; 355 } 356 357 procfs_proc_unlock(p2); 358 return (error); 359 } 360 361 /* 362 * close the pfsnode (vp) after doing i/o. 363 * (vp) is not locked on entry or exit. 364 * 365 * nothing to do for procfs other than undo 366 * any exclusive open flag (see _open above). 367 */ 368 int 369 procfs_close(void *v) 370 { 371 struct vop_close_args /* { 372 struct vnode *a_vp; 373 int a_fflag; 374 kauth_cred_t a_cred; 375 } */ *ap = v; 376 struct pfsnode *pfs = VTOPFS(ap->a_vp); 377 378 switch (pfs->pfs_type) { 379 case PFSmem: 380 if ((ap->a_fflag & FWRITE) && (pfs->pfs_flags & O_EXCL)) 381 pfs->pfs_flags &= ~(FWRITE|O_EXCL); 382 break; 383 384 default: 385 break; 386 } 387 388 return (0); 389 } 390 391 /* 392 * _inactive is called when the pfsnode 393 * is vrele'd and the reference count goes 394 * to zero. (vp) will be on the vnode free 395 * list, so to get it back vget() must be 396 * used. 397 * 398 * (vp) is locked on entry, but must be unlocked on exit. 399 */ 400 int 401 procfs_inactive(void *v) 402 { 403 struct vop_inactive_v2_args /* { 404 struct vnode *a_vp; 405 bool *a_recycle; 406 } */ *ap = v; 407 struct vnode *vp = ap->a_vp; 408 struct pfsnode *pfs = VTOPFS(vp); 409 410 mutex_enter(&proc_lock); 411 *ap->a_recycle = (procfs_proc_find(vp->v_mount, pfs->pfs_pid) == NULL); 412 mutex_exit(&proc_lock); 413 414 return (0); 415 } 416 417 /* 418 * _reclaim is called when getnewvnode() 419 * wants to make use of an entry on the vnode 420 * free list. at this time the filesystem needs 421 * to free any private data and remove the node 422 * from any private lists. 423 */ 424 int 425 procfs_reclaim(void *v) 426 { 427 struct vop_reclaim_v2_args /* { 428 struct vnode *a_vp; 429 } */ *ap = v; 430 struct vnode *vp = ap->a_vp; 431 struct pfsnode *pfs = VTOPFS(vp); 432 433 VOP_UNLOCK(vp); 434 435 /* 436 * To interlock with procfs_revoke_vnodes(). 437 */ 438 mutex_enter(vp->v_interlock); 439 vp->v_data = NULL; 440 mutex_exit(vp->v_interlock); 441 kmem_free(pfs, sizeof(*pfs)); 442 return 0; 443 } 444 445 /* 446 * Return POSIX pathconf information applicable to special devices. 447 */ 448 int 449 procfs_pathconf(void *v) 450 { 451 struct vop_pathconf_args /* { 452 struct vnode *a_vp; 453 int a_name; 454 register_t *a_retval; 455 } */ *ap = v; 456 457 switch (ap->a_name) { 458 case _PC_LINK_MAX: 459 *ap->a_retval = LINK_MAX; 460 return (0); 461 case _PC_MAX_CANON: 462 *ap->a_retval = MAX_CANON; 463 return (0); 464 case _PC_MAX_INPUT: 465 *ap->a_retval = MAX_INPUT; 466 return (0); 467 case _PC_PIPE_BUF: 468 *ap->a_retval = PIPE_BUF; 469 return (0); 470 case _PC_CHOWN_RESTRICTED: 471 *ap->a_retval = 1; 472 return (0); 473 case _PC_VDISABLE: 474 *ap->a_retval = _POSIX_VDISABLE; 475 return (0); 476 case _PC_SYNC_IO: 477 *ap->a_retval = 1; 478 return (0); 479 default: 480 return genfs_pathconf(ap); 481 } 482 /* NOTREACHED */ 483 } 484 485 /* 486 * _print is used for debugging. 487 * just print a readable description 488 * of (vp). 489 */ 490 int 491 procfs_print(void *v) 492 { 493 struct vop_print_args /* { 494 struct vnode *a_vp; 495 } */ *ap = v; 496 struct pfsnode *pfs = VTOPFS(ap->a_vp); 497 498 printf("tag VT_PROCFS, type %d, pid %d, mode %x, flags %lx\n", 499 pfs->pfs_type, pfs->pfs_pid, pfs->pfs_mode, pfs->pfs_flags); 500 return 0; 501 } 502 503 int 504 procfs_link(void *v) 505 { 506 struct vop_link_v2_args /* { 507 struct vnode *a_dvp; 508 struct vnode *a_vp; 509 struct componentname *a_cnp; 510 } */ *ap = v; 511 512 VOP_ABORTOP(ap->a_dvp, ap->a_cnp); 513 return (EROFS); 514 } 515 516 int 517 procfs_symlink(void *v) 518 { 519 struct vop_symlink_v3_args /* { 520 struct vnode *a_dvp; 521 struct vnode **a_vpp; 522 struct componentname *a_cnp; 523 struct vattr *a_vap; 524 char *a_target; 525 } */ *ap = v; 526 527 VOP_ABORTOP(ap->a_dvp, ap->a_cnp); 528 return (EROFS); 529 } 530 531 /* 532 * Works out the path to the target process's current 533 * working directory or chroot. If the caller is in a chroot and 534 * can't "reach" the target's cwd or root (or some other error 535 * occurs), a "/" is returned for the path. 536 */ 537 static void 538 procfs_dir(pfstype t, struct lwp *caller, struct proc *target, char **bpp, 539 char *path, size_t len) 540 { 541 struct cwdinfo *cwdi; 542 struct vnode *vp, *rvp; 543 char *bp; 544 545 /* 546 * Lock target cwdi and take a reference to the vnode 547 * we are interested in to prevent it from disappearing 548 * before getcwd_common() below. 549 */ 550 rw_enter(&target->p_cwdi->cwdi_lock, RW_READER); 551 switch (t) { 552 case PFScwd: 553 vp = target->p_cwdi->cwdi_cdir; 554 break; 555 case PFSchroot: 556 vp = target->p_cwdi->cwdi_rdir; 557 break; 558 default: 559 rw_exit(&target->p_cwdi->cwdi_lock); 560 return; 561 } 562 if (vp != NULL) 563 vref(vp); 564 rw_exit(&target->p_cwdi->cwdi_lock); 565 566 cwdi = caller->l_proc->p_cwdi; 567 rw_enter(&cwdi->cwdi_lock, RW_READER); 568 569 rvp = cwdi->cwdi_rdir; 570 bp = bpp ? *bpp : NULL; 571 572 /* 573 * XXX: this horrible kludge avoids locking panics when 574 * attempting to lookup links that point to within procfs 575 */ 576 if (vp != NULL && vp->v_tag == VT_PROCFS) { 577 if (bpp) { 578 *--bp = '/'; 579 *bpp = bp; 580 } 581 vrele(vp); 582 rw_exit(&cwdi->cwdi_lock); 583 return; 584 } 585 586 if (rvp == NULL) 587 rvp = rootvnode; 588 if (vp == NULL || getcwd_common(vp, rvp, bp ? &bp : NULL, path, 589 len / 2, 0, caller) != 0) { 590 if (bpp) { 591 bp = *bpp; 592 *--bp = '/'; 593 } 594 } 595 596 if (bpp) 597 *bpp = bp; 598 599 if (vp != NULL) 600 vrele(vp); 601 rw_exit(&cwdi->cwdi_lock); 602 } 603 604 /* 605 * Invent attributes for pfsnode (vp) and store 606 * them in (vap). 607 * Directories lengths are returned as zero since 608 * any real length would require the genuine size 609 * to be computed, and nothing cares anyway. 610 * 611 * this is relatively minimal for procfs. 612 */ 613 int 614 procfs_getattr(void *v) 615 { 616 struct vop_getattr_args /* { 617 struct vnode *a_vp; 618 struct vattr *a_vap; 619 kauth_cred_t a_cred; 620 } */ *ap = v; 621 struct vnode *vp = ap->a_vp; 622 struct pfsnode *pfs = VTOPFS(vp); 623 struct vattr *vap = ap->a_vap; 624 struct proc *procp; 625 char *path, *bp, bf[16]; 626 int error; 627 628 /* first check the process still exists */ 629 switch (pfs->pfs_type) { 630 case PFSroot: 631 case PFScurproc: 632 case PFSself: 633 procp = NULL; 634 break; 635 636 default: 637 error = 638 procfs_proc_lock(vp->v_mount, pfs->pfs_pid, &procp, ENOENT); 639 if (error != 0) 640 return (error); 641 break; 642 } 643 644 switch (pfs->pfs_type) { 645 case PFStask: 646 if (pfs->pfs_fd == -1) { 647 path = NULL; 648 break; 649 } 650 /*FALLTHROUGH*/ 651 case PFScwd: 652 case PFSchroot: 653 path = malloc(MAXPATHLEN + 4, M_TEMP, M_WAITOK); 654 if (path == NULL && procp != NULL) { 655 procfs_proc_unlock(procp); 656 return (ENOMEM); 657 } 658 break; 659 660 default: 661 path = NULL; 662 break; 663 } 664 665 if (procp != NULL) { 666 mutex_enter(procp->p_lock); 667 error = kauth_authorize_process(kauth_cred_get(), 668 KAUTH_PROCESS_CANSEE, procp, 669 KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_ENTRY), NULL, NULL); 670 mutex_exit(procp->p_lock); 671 if (error != 0) { 672 procfs_proc_unlock(procp); 673 if (path != NULL) 674 free(path, M_TEMP); 675 return (ENOENT); 676 } 677 } 678 679 error = 0; 680 681 /* start by zeroing out the attributes */ 682 vattr_null(vap); 683 684 /* next do all the common fields */ 685 vap->va_type = ap->a_vp->v_type; 686 vap->va_mode = pfs->pfs_mode; 687 vap->va_fileid = pfs->pfs_fileno; 688 vap->va_flags = 0; 689 vap->va_blocksize = PAGE_SIZE; 690 691 /* 692 * Make all times be current TOD. 693 * 694 * It would be possible to get the process start 695 * time from the p_stats structure, but there's 696 * no "file creation" time stamp anyway, and the 697 * p_stats structure is not addressable if u. gets 698 * swapped out for that process. 699 */ 700 getnanotime(&vap->va_ctime); 701 vap->va_atime = vap->va_mtime = vap->va_ctime; 702 if (procp) 703 TIMEVAL_TO_TIMESPEC(&procp->p_stats->p_start, 704 &vap->va_birthtime); 705 else 706 getnanotime(&vap->va_birthtime); 707 708 switch (pfs->pfs_type) { 709 case PFSmem: 710 case PFSregs: 711 case PFSfpregs: 712 #if defined(__HAVE_PROCFS_MACHDEP) && defined(PROCFS_MACHDEP_PROTECT_CASES) 713 PROCFS_MACHDEP_PROTECT_CASES 714 #endif 715 /* 716 * If the process has exercised some setuid or setgid 717 * privilege, then rip away read/write permission so 718 * that only root can gain access. 719 */ 720 if (procp->p_flag & PK_SUGID) 721 vap->va_mode &= ~(S_IRUSR|S_IWUSR); 722 /* FALLTHROUGH */ 723 case PFSstatus: 724 case PFSstat: 725 case PFSnote: 726 case PFSnotepg: 727 case PFScmdline: 728 case PFSenviron: 729 case PFSemul: 730 case PFSstatm: 731 732 case PFSmap: 733 case PFSmaps: 734 case PFSlimit: 735 case PFSauxv: 736 vap->va_nlink = 1; 737 vap->va_uid = kauth_cred_geteuid(procp->p_cred); 738 vap->va_gid = kauth_cred_getegid(procp->p_cred); 739 break; 740 case PFScwd: 741 case PFSchroot: 742 case PFSmeminfo: 743 case PFSdevices: 744 case PFScpuinfo: 745 case PFSuptime: 746 case PFSmounts: 747 case PFScpustat: 748 case PFSloadavg: 749 case PFSversion: 750 case PFSexe: 751 case PFSself: 752 case PFScurproc: 753 case PFSroot: 754 vap->va_nlink = 1; 755 vap->va_uid = vap->va_gid = 0; 756 break; 757 758 case PFSproc: 759 case PFStask: 760 case PFSfile: 761 case PFSfd: 762 break; 763 764 default: 765 panic("%s: %d/1", __func__, pfs->pfs_type); 766 } 767 768 /* 769 * now do the object specific fields 770 * 771 * The size could be set from struct reg, but it's hardly 772 * worth the trouble, and it puts some (potentially) machine 773 * dependent data into this machine-independent code. If it 774 * becomes important then this function should break out into 775 * a per-file stat function in the corresponding .c file. 776 */ 777 778 switch (pfs->pfs_type) { 779 case PFSroot: 780 vap->va_bytes = vap->va_size = DEV_BSIZE; 781 break; 782 783 case PFSself: 784 case PFScurproc: 785 vap->va_bytes = vap->va_size = 786 snprintf(bf, sizeof(bf), "%ld", (long)curproc->p_pid); 787 break; 788 case PFStask: 789 if (pfs->pfs_fd != -1) { 790 vap->va_nlink = 1; 791 vap->va_uid = 0; 792 vap->va_gid = 0; 793 vap->va_bytes = vap->va_size = 794 snprintf(bf, sizeof(bf), ".."); 795 break; 796 } 797 /*FALLTHROUGH*/ 798 case PFSfd: 799 if (pfs->pfs_fd != -1) { 800 file_t *fp; 801 802 fp = fd_getfile2(procp, pfs->pfs_fd); 803 if (fp == NULL) { 804 error = EBADF; 805 break; 806 } 807 vap->va_nlink = 1; 808 vap->va_uid = kauth_cred_geteuid(fp->f_cred); 809 vap->va_gid = kauth_cred_getegid(fp->f_cred); 810 switch (fp->f_type) { 811 case DTYPE_VNODE: 812 vap->va_bytes = vap->va_size = 813 fp->f_vnode->v_size; 814 break; 815 default: 816 vap->va_bytes = vap->va_size = 0; 817 break; 818 } 819 closef(fp); 820 break; 821 } 822 /*FALLTHROUGH*/ 823 case PFSproc: 824 vap->va_nlink = 2; 825 vap->va_uid = kauth_cred_geteuid(procp->p_cred); 826 vap->va_gid = kauth_cred_getegid(procp->p_cred); 827 vap->va_bytes = vap->va_size = DEV_BSIZE; 828 break; 829 830 case PFSfile: 831 error = EOPNOTSUPP; 832 break; 833 834 case PFSmem: 835 vap->va_bytes = vap->va_size = 836 ctob(procp->p_vmspace->vm_tsize + 837 procp->p_vmspace->vm_dsize + 838 procp->p_vmspace->vm_ssize); 839 break; 840 841 case PFSauxv: 842 vap->va_bytes = vap->va_size = procp->p_execsw->es_arglen; 843 break; 844 845 #if defined(PT_GETREGS) || defined(PT_SETREGS) 846 case PFSregs: 847 vap->va_bytes = vap->va_size = sizeof(struct reg); 848 break; 849 #endif 850 851 #if defined(PT_GETFPREGS) || defined(PT_SETFPREGS) 852 case PFSfpregs: 853 vap->va_bytes = vap->va_size = sizeof(struct fpreg); 854 break; 855 #endif 856 857 case PFSstatus: 858 case PFSstat: 859 case PFSnote: 860 case PFSnotepg: 861 case PFScmdline: 862 case PFSenviron: 863 case PFSmeminfo: 864 case PFSdevices: 865 case PFScpuinfo: 866 case PFSuptime: 867 case PFSmounts: 868 case PFScpustat: 869 case PFSloadavg: 870 case PFSstatm: 871 case PFSversion: 872 vap->va_bytes = vap->va_size = 0; 873 break; 874 case PFSlimit: 875 case PFSmap: 876 case PFSmaps: 877 /* 878 * Advise a larger blocksize for the map files, so that 879 * they may be read in one pass. 880 */ 881 vap->va_blocksize = 4 * PAGE_SIZE; 882 vap->va_bytes = vap->va_size = 0; 883 break; 884 885 case PFScwd: 886 case PFSchroot: 887 bp = path + MAXPATHLEN; 888 *--bp = '\0'; 889 procfs_dir(pfs->pfs_type, curlwp, procp, &bp, path, 890 MAXPATHLEN); 891 vap->va_bytes = vap->va_size = strlen(bp); 892 break; 893 894 case PFSexe: 895 vap->va_bytes = vap->va_size = strlen(procp->p_path); 896 break; 897 898 case PFSemul: 899 vap->va_bytes = vap->va_size = strlen(procp->p_emul->e_name); 900 break; 901 902 #ifdef __HAVE_PROCFS_MACHDEP 903 PROCFS_MACHDEP_NODETYPE_CASES 904 error = procfs_machdep_getattr(ap->a_vp, vap, procp); 905 break; 906 #endif 907 908 default: 909 panic("%s: %d/2", __func__, pfs->pfs_type); 910 } 911 912 if (procp != NULL) 913 procfs_proc_unlock(procp); 914 if (path != NULL) 915 free(path, M_TEMP); 916 917 return (error); 918 } 919 920 /*ARGSUSED*/ 921 int 922 procfs_setattr(void *v) 923 { 924 /* 925 * just fake out attribute setting 926 * it's not good to generate an error 927 * return, otherwise things like creat() 928 * will fail when they try to set the 929 * file length to 0. worse, this means 930 * that echo $note > /proc/$pid/note will fail. 931 */ 932 933 return (0); 934 } 935 936 /* 937 * implement access checking. 938 * 939 * actually, the check for super-user is slightly 940 * broken since it will allow read access to write-only 941 * objects. this doesn't cause any particular trouble 942 * but does mean that the i/o entry points need to check 943 * that the operation really does make sense. 944 */ 945 int 946 procfs_access(void *v) 947 { 948 struct vop_access_args /* { 949 struct vnode *a_vp; 950 accmode_t a_accmode; 951 kauth_cred_t a_cred; 952 } */ *ap = v; 953 struct vattr va; 954 int error; 955 956 if ((error = VOP_GETATTR(ap->a_vp, &va, ap->a_cred)) != 0) 957 return (error); 958 959 return kauth_authorize_vnode(ap->a_cred, 960 KAUTH_ACCESS_ACTION(ap->a_accmode, ap->a_vp->v_type, va.va_mode), 961 ap->a_vp, NULL, genfs_can_access(ap->a_vp, ap->a_cred, 962 va.va_uid, va.va_gid, va.va_mode, NULL, ap->a_accmode)); 963 } 964 965 /* 966 * lookup. this is incredibly complicated in the 967 * general case, however for most pseudo-filesystems 968 * very little needs to be done. 969 * 970 * Locking isn't hard here, just poorly documented. 971 * 972 * If we're looking up ".", just vref the parent & return it. 973 * 974 * If we're looking up "..", unlock the parent, and lock "..". If everything 975 * went ok, and we're on the last component and the caller requested the 976 * parent locked, try to re-lock the parent. We do this to prevent lock 977 * races. 978 * 979 * For anything else, get the needed node. Then unlock the parent if not 980 * the last component or not LOCKPARENT (i.e. if we wouldn't re-lock the 981 * parent in the .. case). 982 * 983 * We try to exit with the parent locked in error cases. 984 */ 985 int 986 procfs_lookup(void *v) 987 { 988 struct vop_lookup_v2_args /* { 989 struct vnode * a_dvp; 990 struct vnode ** a_vpp; 991 struct componentname * a_cnp; 992 } */ *ap = v; 993 struct componentname *cnp = ap->a_cnp; 994 struct vnode **vpp = ap->a_vpp; 995 struct vnode *dvp = ap->a_dvp; 996 const char *pname = cnp->cn_nameptr; 997 const struct proc_target *pt = NULL; 998 struct vnode *fvp; 999 pid_t pid, vnpid; 1000 struct pfsnode *pfs; 1001 struct proc *p = NULL; 1002 struct lwp *plwp; 1003 int i, error; 1004 pfstype type; 1005 1006 *vpp = NULL; 1007 1008 if (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME) 1009 return (EROFS); 1010 1011 if (cnp->cn_namelen == 1 && *pname == '.') { 1012 *vpp = dvp; 1013 vref(dvp); 1014 return (0); 1015 } 1016 1017 pfs = VTOPFS(dvp); 1018 switch (pfs->pfs_type) { 1019 case PFSroot: 1020 /* 1021 * Shouldn't get here with .. in the root node. 1022 */ 1023 if (cnp->cn_flags & ISDOTDOT) 1024 return (EIO); 1025 1026 for (i = 0; i < nproc_root_targets; i++) { 1027 pt = &proc_root_targets[i]; 1028 /* 1029 * check for node match. proc is always NULL here, 1030 * so call pt_valid with constant NULL lwp. 1031 */ 1032 if (cnp->cn_namelen == pt->pt_namlen && 1033 memcmp(pt->pt_name, pname, cnp->cn_namelen) == 0 && 1034 (pt->pt_valid == NULL || 1035 (*pt->pt_valid)(NULL, dvp->v_mount))) 1036 break; 1037 } 1038 1039 if (i != nproc_root_targets) { 1040 error = procfs_allocvp(dvp->v_mount, vpp, 0, 1041 pt->pt_pfstype, -1); 1042 return (error); 1043 } 1044 1045 if (CNEQ(cnp, "curproc", 7)) { 1046 pid = curproc->p_pid; 1047 vnpid = 0; 1048 type = PFScurproc; 1049 } else if (CNEQ(cnp, "self", 4)) { 1050 pid = curproc->p_pid; 1051 vnpid = 0; 1052 type = PFSself; 1053 } else { 1054 pid = (pid_t)atoi(pname, cnp->cn_namelen); 1055 vnpid = pid; 1056 type = PFSproc; 1057 } 1058 1059 if (procfs_proc_lock(dvp->v_mount, pid, &p, ESRCH) != 0) 1060 break; 1061 error = procfs_allocvp(dvp->v_mount, vpp, vnpid, type, -1); 1062 procfs_proc_unlock(p); 1063 return (error); 1064 1065 case PFSproc: 1066 if (cnp->cn_flags & ISDOTDOT) { 1067 error = procfs_allocvp(dvp->v_mount, vpp, 0, PFSroot, 1068 -1); 1069 return (error); 1070 } 1071 1072 if (procfs_proc_lock(dvp->v_mount, pfs->pfs_pid, &p, 1073 ESRCH) != 0) 1074 break; 1075 1076 mutex_enter(p->p_lock); 1077 LIST_FOREACH(plwp, &p->p_lwps, l_sibling) { 1078 if (plwp->l_stat != LSZOMB) 1079 break; 1080 } 1081 /* Process is exiting if no-LWPS or all LWPs are LSZOMB */ 1082 if (plwp == NULL) { 1083 mutex_exit(p->p_lock); 1084 procfs_proc_unlock(p); 1085 return ESRCH; 1086 } 1087 1088 lwp_addref(plwp); 1089 mutex_exit(p->p_lock); 1090 1091 for (pt = proc_targets, i = 0; i < nproc_targets; pt++, i++) { 1092 int found; 1093 1094 found = cnp->cn_namelen == pt->pt_namlen && 1095 memcmp(pt->pt_name, pname, cnp->cn_namelen) == 0 && 1096 (pt->pt_valid == NULL 1097 || (*pt->pt_valid)(plwp, dvp->v_mount)); 1098 if (found) 1099 break; 1100 } 1101 lwp_delref(plwp); 1102 1103 if (i == nproc_targets) { 1104 procfs_proc_unlock(p); 1105 break; 1106 } 1107 if (pt->pt_pfstype == PFSfile) { 1108 fvp = p->p_textvp; 1109 /* We already checked that it exists. */ 1110 vref(fvp); 1111 procfs_proc_unlock(p); 1112 *vpp = fvp; 1113 return (0); 1114 } 1115 1116 error = procfs_allocvp(dvp->v_mount, vpp, pfs->pfs_pid, 1117 pt->pt_pfstype, -1); 1118 procfs_proc_unlock(p); 1119 return (error); 1120 1121 case PFSfd: { 1122 int fd; 1123 file_t *fp; 1124 1125 if ((error = procfs_proc_lock(dvp->v_mount, pfs->pfs_pid, &p, 1126 ENOENT)) != 0) 1127 return error; 1128 1129 if (cnp->cn_flags & ISDOTDOT) { 1130 error = procfs_allocvp(dvp->v_mount, vpp, pfs->pfs_pid, 1131 PFSproc, -1); 1132 procfs_proc_unlock(p); 1133 return (error); 1134 } 1135 fd = atoi(pname, cnp->cn_namelen); 1136 1137 fp = fd_getfile2(p, fd); 1138 if (fp == NULL) { 1139 procfs_proc_unlock(p); 1140 return ENOENT; 1141 } 1142 fvp = fp->f_vnode; 1143 1144 /* Don't show directories */ 1145 if (fp->f_type == DTYPE_VNODE && fvp->v_type != VDIR) { 1146 vref(fvp); 1147 closef(fp); 1148 procfs_proc_unlock(p); 1149 *vpp = fvp; 1150 return 0; 1151 } 1152 1153 closef(fp); 1154 error = procfs_allocvp(dvp->v_mount, vpp, pfs->pfs_pid, 1155 PFSfd, fd); 1156 procfs_proc_unlock(p); 1157 return error; 1158 } 1159 case PFStask: { 1160 int xpid; 1161 1162 if ((error = procfs_proc_lock(dvp->v_mount, pfs->pfs_pid, &p, 1163 ENOENT)) != 0) 1164 return error; 1165 1166 if (cnp->cn_flags & ISDOTDOT) { 1167 error = procfs_allocvp(dvp->v_mount, vpp, pfs->pfs_pid, 1168 PFSproc, -1); 1169 procfs_proc_unlock(p); 1170 return (error); 1171 } 1172 xpid = atoi(pname, cnp->cn_namelen); 1173 1174 if (xpid != pfs->pfs_pid) { 1175 procfs_proc_unlock(p); 1176 return ENOENT; 1177 } 1178 error = procfs_allocvp(dvp->v_mount, vpp, pfs->pfs_pid, 1179 PFStask, 0); 1180 procfs_proc_unlock(p); 1181 return error; 1182 } 1183 default: 1184 return (ENOTDIR); 1185 } 1186 1187 return (cnp->cn_nameiop == LOOKUP ? ENOENT : EROFS); 1188 } 1189 1190 int 1191 procfs_validfile(struct lwp *l, struct mount *mp) 1192 { 1193 return l != NULL && l->l_proc != NULL && l->l_proc->p_textvp != NULL; 1194 } 1195 1196 static int 1197 procfs_validfile_linux(struct lwp *l, struct mount *mp) 1198 { 1199 return procfs_use_linux_compat(mp) && 1200 (l == NULL || l->l_proc == NULL || procfs_validfile(l, mp)); 1201 } 1202 1203 struct procfs_root_readdir_ctx { 1204 struct uio *uiop; 1205 off_t *cookies; 1206 int ncookies; 1207 off_t off; 1208 off_t startoff; 1209 int error; 1210 }; 1211 1212 static int 1213 procfs_root_readdir_callback(struct proc *p, void *arg) 1214 { 1215 struct procfs_root_readdir_ctx *ctxp = arg; 1216 struct dirent d; 1217 struct uio *uiop; 1218 int error; 1219 1220 uiop = ctxp->uiop; 1221 if (uiop->uio_resid < UIO_MX) 1222 return -1; /* no space */ 1223 1224 if (kauth_authorize_process(kauth_cred_get(), 1225 KAUTH_PROCESS_CANSEE, p, 1226 KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_ENTRY), NULL, NULL) != 0) 1227 return 0; 1228 1229 if (ctxp->off < ctxp->startoff) { 1230 ctxp->off++; 1231 return 0; 1232 } 1233 1234 memset(&d, 0, UIO_MX); 1235 d.d_reclen = UIO_MX; 1236 d.d_fileno = PROCFS_FILENO(p->p_pid, PFSproc, -1); 1237 d.d_namlen = snprintf(d.d_name, 1238 UIO_MX - offsetof(struct dirent, d_name), "%ld", (long)p->p_pid); 1239 d.d_type = DT_DIR; 1240 1241 mutex_exit(&proc_lock); 1242 error = uiomove(&d, UIO_MX, uiop); 1243 mutex_enter(&proc_lock); 1244 if (error) { 1245 ctxp->error = error; 1246 return -1; 1247 } 1248 1249 ctxp->ncookies++; 1250 if (ctxp->cookies) 1251 *(ctxp->cookies)++ = ctxp->off + 1; 1252 ctxp->off++; 1253 1254 return 0; 1255 } 1256 1257 /* 1258 * readdir returns directory entries from pfsnode (vp). 1259 * 1260 * the strategy here with procfs is to generate a single 1261 * directory entry at a time (struct dirent) and then 1262 * copy that out to userland using uiomove. a more efficient 1263 * though more complex implementation, would try to minimize 1264 * the number of calls to uiomove(). for procfs, this is 1265 * hardly worth the added code complexity. 1266 * 1267 * this should just be done through read() 1268 */ 1269 int 1270 procfs_readdir(void *v) 1271 { 1272 struct vop_readdir_args /* { 1273 struct vnode *a_vp; 1274 struct uio *a_uio; 1275 kauth_cred_t a_cred; 1276 int *a_eofflag; 1277 off_t **a_cookies; 1278 int *a_ncookies; 1279 } */ *ap = v; 1280 struct uio *uio = ap->a_uio; 1281 struct dirent d; 1282 struct pfsnode *pfs; 1283 off_t i; 1284 int error; 1285 off_t *cookies = NULL; 1286 int ncookies; 1287 struct vnode *vp; 1288 const struct proc_target *pt; 1289 struct procfs_root_readdir_ctx ctx; 1290 struct lwp *l; 1291 int nfd; 1292 1293 vp = ap->a_vp; 1294 pfs = VTOPFS(vp); 1295 1296 if (uio->uio_resid < UIO_MX) 1297 return (EINVAL); 1298 if (uio->uio_offset < 0) 1299 return (EINVAL); 1300 1301 error = 0; 1302 i = uio->uio_offset; 1303 memset(&d, 0, UIO_MX); 1304 d.d_reclen = UIO_MX; 1305 ncookies = uio->uio_resid / UIO_MX; 1306 1307 switch (pfs->pfs_type) { 1308 /* 1309 * this is for the process-specific sub-directories. 1310 * all that is needed to is copy out all the entries 1311 * from the procent[] table (top of this file). 1312 */ 1313 case PFSproc: { 1314 struct proc *p; 1315 1316 if (i >= nproc_targets) 1317 return 0; 1318 1319 if (procfs_proc_lock(vp->v_mount, pfs->pfs_pid, &p, ESRCH) != 0) 1320 break; 1321 1322 if (ap->a_ncookies) { 1323 ncookies = uimin(ncookies, (nproc_targets - i)); 1324 cookies = malloc(ncookies * sizeof (off_t), 1325 M_TEMP, M_WAITOK); 1326 *ap->a_cookies = cookies; 1327 } 1328 1329 for (pt = &proc_targets[i]; 1330 uio->uio_resid >= UIO_MX && i < nproc_targets; pt++, i++) { 1331 if (pt->pt_valid) { 1332 /* XXXSMP LWP can disappear */ 1333 mutex_enter(p->p_lock); 1334 l = LIST_FIRST(&p->p_lwps); 1335 KASSERT(l != NULL); 1336 mutex_exit(p->p_lock); 1337 if ((*pt->pt_valid)(l, vp->v_mount) == 0) 1338 continue; 1339 } 1340 1341 d.d_fileno = PROCFS_FILENO(pfs->pfs_pid, 1342 pt->pt_pfstype, -1); 1343 d.d_namlen = pt->pt_namlen; 1344 memcpy(d.d_name, pt->pt_name, pt->pt_namlen + 1); 1345 d.d_type = pt->pt_type; 1346 1347 if ((error = uiomove(&d, UIO_MX, uio)) != 0) 1348 break; 1349 if (cookies) 1350 *cookies++ = i + 1; 1351 } 1352 1353 procfs_proc_unlock(p); 1354 break; 1355 } 1356 case PFSfd: { 1357 struct proc *p; 1358 file_t *fp; 1359 int lim, nc = 0; 1360 1361 if ((error = procfs_proc_lock(vp->v_mount, pfs->pfs_pid, &p, 1362 ESRCH)) != 0) 1363 return error; 1364 1365 /* XXX Should this be by file as well? */ 1366 if (kauth_authorize_process(kauth_cred_get(), 1367 KAUTH_PROCESS_CANSEE, p, 1368 KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_OPENFILES), NULL, 1369 NULL) != 0) { 1370 procfs_proc_unlock(p); 1371 return ESRCH; 1372 } 1373 1374 nfd = atomic_load_consume(&p->p_fd->fd_dt)->dt_nfiles; 1375 1376 lim = uimin((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles); 1377 if (i >= lim) { 1378 procfs_proc_unlock(p); 1379 return 0; 1380 } 1381 1382 if (ap->a_ncookies) { 1383 ncookies = uimin(ncookies, (nfd + 2 - i)); 1384 cookies = malloc(ncookies * sizeof (off_t), 1385 M_TEMP, M_WAITOK); 1386 *ap->a_cookies = cookies; 1387 } 1388 1389 for (; i < 2 && uio->uio_resid >= UIO_MX; i++) { 1390 pt = &proc_targets[i]; 1391 d.d_namlen = pt->pt_namlen; 1392 d.d_fileno = PROCFS_FILENO(pfs->pfs_pid, 1393 pt->pt_pfstype, -1); 1394 (void)memcpy(d.d_name, pt->pt_name, pt->pt_namlen + 1); 1395 d.d_type = pt->pt_type; 1396 if ((error = uiomove(&d, UIO_MX, uio)) != 0) 1397 break; 1398 if (cookies) 1399 *cookies++ = i + 1; 1400 nc++; 1401 } 1402 if (error) { 1403 ncookies = nc; 1404 break; 1405 } 1406 for (; uio->uio_resid >= UIO_MX && i < nfd; i++) { 1407 /* check the descriptor exists */ 1408 if ((fp = fd_getfile2(p, i - 2)) == NULL) 1409 continue; 1410 closef(fp); 1411 1412 d.d_fileno = PROCFS_FILENO(pfs->pfs_pid, PFSfd, i - 2); 1413 d.d_namlen = snprintf(d.d_name, sizeof(d.d_name), 1414 "%lld", (long long)(i - 2)); 1415 d.d_type = VREG; 1416 if ((error = uiomove(&d, UIO_MX, uio)) != 0) 1417 break; 1418 if (cookies) 1419 *cookies++ = i + 1; 1420 nc++; 1421 } 1422 ncookies = nc; 1423 procfs_proc_unlock(p); 1424 break; 1425 } 1426 case PFStask: { 1427 struct proc *p; 1428 int nc = 0; 1429 1430 if ((error = procfs_proc_lock(vp->v_mount, pfs->pfs_pid, &p, 1431 ESRCH)) != 0) 1432 return error; 1433 1434 nfd = 3; /* ., .., pid */ 1435 1436 if (ap->a_ncookies) { 1437 ncookies = uimin(ncookies, (nfd + 2 - i)); 1438 cookies = malloc(ncookies * sizeof (off_t), 1439 M_TEMP, M_WAITOK); 1440 *ap->a_cookies = cookies; 1441 } 1442 1443 for (; i < 2 && uio->uio_resid >= UIO_MX; i++) { 1444 pt = &proc_targets[i]; 1445 d.d_namlen = pt->pt_namlen; 1446 d.d_fileno = PROCFS_FILENO(pfs->pfs_pid, 1447 pt->pt_pfstype, -1); 1448 (void)memcpy(d.d_name, pt->pt_name, pt->pt_namlen + 1); 1449 d.d_type = pt->pt_type; 1450 if ((error = uiomove(&d, UIO_MX, uio)) != 0) 1451 break; 1452 if (cookies) 1453 *cookies++ = i + 1; 1454 nc++; 1455 } 1456 if (error) { 1457 ncookies = nc; 1458 break; 1459 } 1460 for (; uio->uio_resid >= UIO_MX && i < nfd; i++) { 1461 /* check the descriptor exists */ 1462 d.d_fileno = PROCFS_FILENO(pfs->pfs_pid, PFStask, 1463 i - 2); 1464 d.d_namlen = snprintf(d.d_name, sizeof(d.d_name), 1465 "%ld", (long)pfs->pfs_pid); 1466 d.d_type = DT_LNK; 1467 if ((error = uiomove(&d, UIO_MX, uio)) != 0) 1468 break; 1469 if (cookies) 1470 *cookies++ = i + 1; 1471 nc++; 1472 } 1473 ncookies = nc; 1474 procfs_proc_unlock(p); 1475 break; 1476 } 1477 1478 /* 1479 * this is for the root of the procfs filesystem 1480 * what is needed are special entries for "curproc" 1481 * and "self" followed by an entry for each process 1482 * on allproc. 1483 */ 1484 1485 case PFSroot: { 1486 struct proc *p; 1487 int nc = 0; 1488 1489 if (ap->a_ncookies) { 1490 /* 1491 * XXX Potentially allocating too much space here, 1492 * but I'm lazy. This loop needs some work. 1493 */ 1494 cookies = malloc(ncookies * sizeof (off_t), 1495 M_TEMP, M_WAITOK); 1496 *ap->a_cookies = cookies; 1497 } 1498 error = 0; 1499 /* 0 ... 3 are static entries. */ 1500 for (; i <= 3 && uio->uio_resid >= UIO_MX; i++) { 1501 switch (i) { 1502 case 0: /* `.' */ 1503 case 1: /* `..' */ 1504 d.d_fileno = PROCFS_FILENO(0, PFSroot, -1); 1505 d.d_namlen = i + 1; 1506 memcpy(d.d_name, "..", d.d_namlen); 1507 d.d_name[i + 1] = '\0'; 1508 d.d_type = DT_DIR; 1509 break; 1510 1511 case 2: 1512 d.d_fileno = PROCFS_FILENO(0, PFScurproc, -1); 1513 d.d_namlen = sizeof("curproc") - 1; 1514 memcpy(d.d_name, "curproc", sizeof("curproc")); 1515 d.d_type = DT_LNK; 1516 break; 1517 1518 case 3: 1519 d.d_fileno = PROCFS_FILENO(0, PFSself, -1); 1520 d.d_namlen = sizeof("self") - 1; 1521 memcpy(d.d_name, "self", sizeof("self")); 1522 d.d_type = DT_LNK; 1523 break; 1524 } 1525 1526 if ((error = uiomove(&d, UIO_MX, uio)) != 0) 1527 break; 1528 nc++; 1529 if (cookies) 1530 *cookies++ = i + 1; 1531 } 1532 /* 4 ... are process entries. */ 1533 ctx.uiop = uio; 1534 ctx.error = 0; 1535 ctx.off = 4; 1536 ctx.startoff = i; 1537 ctx.cookies = cookies; 1538 ctx.ncookies = nc; 1539 proclist_foreach_call(&allproc, 1540 procfs_root_readdir_callback, &ctx); 1541 cookies = ctx.cookies; 1542 nc = ctx.ncookies; 1543 error = ctx.error; 1544 if (error) 1545 break; 1546 1547 /* misc entries. */ 1548 if (i < ctx.off) 1549 i = ctx.off; 1550 if (i >= ctx.off + nproc_root_targets) 1551 break; 1552 error = procfs_proc_lock(vp->v_mount, pfs->pfs_pid, &p, ESRCH); 1553 if (error) 1554 break; 1555 for (pt = &proc_root_targets[i - ctx.off]; 1556 uio->uio_resid >= UIO_MX && 1557 pt < &proc_root_targets[nproc_root_targets]; 1558 pt++, i++) { 1559 if (pt->pt_valid && 1560 (*pt->pt_valid)(NULL, vp->v_mount) == 0) 1561 continue; 1562 if (kauth_authorize_process(kauth_cred_get(), 1563 KAUTH_PROCESS_CANSEE, p, 1564 KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_ENTRY), 1565 NULL, NULL) != 0) 1566 continue; 1567 d.d_fileno = PROCFS_FILENO(0, pt->pt_pfstype, -1); 1568 d.d_namlen = pt->pt_namlen; 1569 memcpy(d.d_name, pt->pt_name, pt->pt_namlen + 1); 1570 d.d_type = pt->pt_type; 1571 1572 if ((error = uiomove(&d, UIO_MX, uio)) != 0) 1573 break; 1574 nc++; 1575 if (cookies) 1576 *cookies++ = i + 1; 1577 } 1578 1579 ncookies = nc; 1580 procfs_proc_unlock(p); 1581 break; 1582 } 1583 1584 default: 1585 error = ENOTDIR; 1586 break; 1587 } 1588 1589 if (ap->a_ncookies) { 1590 if (error) { 1591 if (cookies) 1592 free(*ap->a_cookies, M_TEMP); 1593 *ap->a_ncookies = 0; 1594 *ap->a_cookies = NULL; 1595 } else 1596 *ap->a_ncookies = ncookies; 1597 } 1598 uio->uio_offset = i; 1599 return (error); 1600 } 1601 1602 /* 1603 * readlink reads the link of `curproc' and others 1604 */ 1605 int 1606 procfs_readlink(void *v) 1607 { 1608 struct vop_readlink_args *ap = v; 1609 char bf[16]; /* should be enough */ 1610 char *bp = bf; 1611 char *path = NULL; 1612 int len = 0; 1613 int error = 0; 1614 struct vnode *vp = ap->a_vp; 1615 struct pfsnode *pfs = VTOPFS(vp); 1616 struct proc *pown = NULL; 1617 1618 if (pfs->pfs_fileno == PROCFS_FILENO(0, PFScurproc, -1)) 1619 len = snprintf(bf, sizeof(bf), "%ld", (long)curproc->p_pid); 1620 else if (pfs->pfs_fileno == PROCFS_FILENO(0, PFSself, -1)) 1621 len = snprintf(bf, sizeof(bf), "%s", "curproc"); 1622 else if (pfs->pfs_fileno == PROCFS_FILENO(pfs->pfs_pid, PFStask, 0)) 1623 len = snprintf(bf, sizeof(bf), ".."); 1624 else if (pfs->pfs_fileno == PROCFS_FILENO(pfs->pfs_pid, PFSexe, -1)) { 1625 if ((error = procfs_proc_lock(vp->v_mount, pfs->pfs_pid, &pown, 1626 ESRCH)) != 0) 1627 return error; 1628 bp = pown->p_path; 1629 len = strlen(bp); 1630 } else if (pfs->pfs_fileno == PROCFS_FILENO(pfs->pfs_pid, PFScwd, -1) || 1631 pfs->pfs_fileno == PROCFS_FILENO(pfs->pfs_pid, PFSchroot, -1)) { 1632 if ((error = procfs_proc_lock(vp->v_mount, pfs->pfs_pid, &pown, 1633 ESRCH)) != 0) 1634 return error; 1635 path = malloc(MAXPATHLEN + 4, M_TEMP, M_WAITOK); 1636 if (path == NULL) { 1637 procfs_proc_unlock(pown); 1638 return (ENOMEM); 1639 } 1640 bp = path + MAXPATHLEN; 1641 *--bp = '\0'; 1642 procfs_dir(PROCFS_TYPE(pfs->pfs_fileno), curlwp, pown, 1643 &bp, path, MAXPATHLEN); 1644 len = strlen(bp); 1645 } else { 1646 file_t *fp; 1647 struct vnode *vxp; 1648 1649 if ((error = procfs_proc_lock(vp->v_mount, pfs->pfs_pid, &pown, 1650 ESRCH)) != 0) 1651 return error; 1652 1653 fp = fd_getfile2(pown, pfs->pfs_fd); 1654 if (fp == NULL) { 1655 procfs_proc_unlock(pown); 1656 return EBADF; 1657 } 1658 1659 switch (fp->f_type) { 1660 case DTYPE_VNODE: 1661 vxp = fp->f_vnode; 1662 if (vxp->v_type != VDIR) { 1663 error = EINVAL; 1664 break; 1665 } 1666 if ((path = malloc(MAXPATHLEN, M_TEMP, M_WAITOK)) 1667 == NULL) { 1668 error = ENOMEM; 1669 break; 1670 } 1671 bp = path + MAXPATHLEN; 1672 *--bp = '\0'; 1673 1674 /* 1675 * XXX: kludge to avoid locking against ourselves 1676 * in getcwd() 1677 */ 1678 if (vxp->v_tag == VT_PROCFS) { 1679 *--bp = '/'; 1680 } else { 1681 rw_enter(&curproc->p_cwdi->cwdi_lock, 1682 RW_READER); 1683 vp = curproc->p_cwdi->cwdi_rdir; 1684 if (vp == NULL) 1685 vp = rootvnode; 1686 error = getcwd_common(vxp, vp, &bp, path, 1687 MAXPATHLEN / 2, 0, curlwp); 1688 rw_exit(&curproc->p_cwdi->cwdi_lock); 1689 } 1690 if (error) 1691 break; 1692 len = strlen(bp); 1693 break; 1694 1695 case DTYPE_MISC: 1696 len = snprintf(bf, sizeof(bf), "%s", "[misc]"); 1697 break; 1698 1699 case DTYPE_KQUEUE: 1700 len = snprintf(bf, sizeof(bf), "%s", "[kqueue]"); 1701 break; 1702 1703 case DTYPE_SEM: 1704 len = snprintf(bf, sizeof(bf), "%s", "[ksem]"); 1705 break; 1706 1707 default: 1708 error = EINVAL; 1709 break; 1710 } 1711 closef(fp); 1712 } 1713 1714 if (error == 0) 1715 error = uiomove(bp, len, ap->a_uio); 1716 if (pown) 1717 procfs_proc_unlock(pown); 1718 if (path) 1719 free(path, M_TEMP); 1720 return error; 1721 } 1722 1723 int 1724 procfs_getpages(void *v) 1725 { 1726 struct vop_getpages_args /* { 1727 struct vnode *a_vp; 1728 voff_t a_offset; 1729 struct vm_page **a_m; 1730 int *a_count; 1731 int a_centeridx; 1732 vm_prot_t a_access_type; 1733 int a_advice; 1734 int a_flags; 1735 } */ *ap = v; 1736 1737 if ((ap->a_flags & PGO_LOCKED) == 0) 1738 rw_exit(ap->a_vp->v_uobj.vmobjlock); 1739 1740 return (EFAULT); 1741 } 1742 1743 /* 1744 * convert decimal ascii to int 1745 */ 1746 static int 1747 atoi(const char *b, size_t len) 1748 { 1749 int p = 0; 1750 1751 while (len--) { 1752 char c = *b++; 1753 if (c < '0' || c > '9') 1754 return -1; 1755 p = 10 * p + (c - '0'); 1756 } 1757 1758 return p; 1759 } 1760