1 /* $NetBSD: procfs_vnops.c,v 1.230 2024/01/17 10:19:21 hannken Exp $ */ 2 3 /*- 4 * Copyright (c) 2006, 2007, 2008, 2020 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Copyright (c) 1993, 1995 34 * The Regents of the University of California. All rights reserved. 35 * 36 * This code is derived from software contributed to Berkeley by 37 * Jan-Simon Pendry. 38 * 39 * Redistribution and use in source and binary forms, with or without 40 * modification, are permitted provided that the following conditions 41 * are met: 42 * 1. Redistributions of source code must retain the above copyright 43 * notice, this list of conditions and the following disclaimer. 44 * 2. Redistributions in binary form must reproduce the above copyright 45 * notice, this list of conditions and the following disclaimer in the 46 * documentation and/or other materials provided with the distribution. 47 * 3. Neither the name of the University nor the names of its contributors 48 * may be used to endorse or promote products derived from this software 49 * without specific prior written permission. 50 * 51 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 52 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 54 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 61 * SUCH DAMAGE. 62 * 63 * @(#)procfs_vnops.c 8.18 (Berkeley) 5/21/95 64 */ 65 66 /* 67 * Copyright (c) 1993 Jan-Simon Pendry 68 * 69 * This code is derived from software contributed to Berkeley by 70 * Jan-Simon Pendry. 71 * 72 * Redistribution and use in source and binary forms, with or without 73 * modification, are permitted provided that the following conditions 74 * are met: 75 * 1. Redistributions of source code must retain the above copyright 76 * notice, this list of conditions and the following disclaimer. 77 * 2. Redistributions in binary form must reproduce the above copyright 78 * notice, this list of conditions and the following disclaimer in the 79 * documentation and/or other materials provided with the distribution. 80 * 3. All advertising materials mentioning features or use of this software 81 * must display the following acknowledgement: 82 * This product includes software developed by the University of 83 * California, Berkeley and its contributors. 84 * 4. Neither the name of the University nor the names of its contributors 85 * may be used to endorse or promote products derived from this software 86 * without specific prior written permission. 87 * 88 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 89 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 90 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 91 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 92 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 93 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 94 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 95 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 96 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 97 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 98 * SUCH DAMAGE. 99 * 100 * @(#)procfs_vnops.c 8.18 (Berkeley) 5/21/95 101 */ 102 103 /* 104 * procfs vnode interface 105 */ 106 107 #include <sys/cdefs.h> 108 __KERNEL_RCSID(0, "$NetBSD: procfs_vnops.c,v 1.230 2024/01/17 10:19:21 hannken Exp $"); 109 110 #include <sys/param.h> 111 #include <sys/atomic.h> 112 #include <sys/systm.h> 113 #include <sys/time.h> 114 #include <sys/kernel.h> 115 #include <sys/file.h> 116 #include <sys/filedesc.h> 117 #include <sys/proc.h> 118 #include <sys/vnode.h> 119 #include <sys/namei.h> 120 #include <sys/malloc.h> 121 #include <sys/mount.h> 122 #include <sys/dirent.h> 123 #include <sys/resourcevar.h> 124 #include <sys/stat.h> 125 #include <sys/ptrace.h> 126 #include <sys/kauth.h> 127 #include <sys/exec.h> 128 129 #include <uvm/uvm_extern.h> /* for PAGE_SIZE */ 130 131 #include <machine/reg.h> 132 133 #include <miscfs/genfs/genfs.h> 134 #include <miscfs/procfs/procfs.h> 135 136 /* 137 * Vnode Operations. 138 * 139 */ 140 141 static int procfs_validfile_linux(struct lwp *, struct mount *); 142 static int procfs_root_readdir_callback(struct proc *, void *); 143 static void procfs_dir(pfstype, struct lwp *, struct proc *, char **, char *, 144 size_t); 145 146 /* 147 * This is a list of the valid names in the 148 * process-specific sub-directories. It is 149 * used in procfs_lookup and procfs_readdir 150 */ 151 static const struct proc_target { 152 u_char pt_type; 153 u_char pt_namlen; 154 const char *pt_name; 155 pfstype pt_pfstype; 156 int (*pt_valid)(struct lwp *, struct mount *); 157 } proc_targets[] = { 158 #define N(s) sizeof(s)-1, s 159 /* name type validp */ 160 { DT_DIR, N("."), PFSproc, NULL }, 161 { DT_DIR, N(".."), PFSroot, NULL }, 162 { DT_DIR, N("fd"), PFSfd, NULL }, 163 { DT_DIR, N("task"), PFStask, procfs_validfile_linux }, 164 { DT_LNK, N("cwd"), PFScwd, NULL }, 165 { DT_REG, N("emul"), PFSemul, NULL }, 166 { DT_LNK, N("root"), PFSchroot, NULL }, 167 { DT_REG, N("auxv"), PFSauxv, procfs_validauxv }, 168 { DT_REG, N("cmdline"), PFScmdline, NULL }, 169 { DT_REG, N("environ"), PFSenviron, NULL }, 170 { DT_LNK, N("exe"), PFSexe, procfs_validfile }, 171 { DT_REG, N("file"), PFSfile, procfs_validfile }, 172 { DT_REG, N("fpregs"), PFSfpregs, procfs_validfpregs }, 173 { DT_REG, N("limit"), PFSlimit, NULL }, 174 { DT_REG, N("map"), PFSmap, procfs_validmap }, 175 { DT_REG, N("maps"), PFSmaps, procfs_validmap }, 176 { DT_REG, N("mem"), PFSmem, NULL }, 177 { DT_REG, N("note"), PFSnote, NULL }, 178 { DT_REG, N("notepg"), PFSnotepg, NULL }, 179 { DT_REG, N("regs"), PFSregs, procfs_validregs }, 180 { DT_REG, N("stat"), PFSstat, procfs_validfile_linux }, 181 { DT_REG, N("statm"), PFSstatm, procfs_validfile_linux }, 182 { DT_REG, N("status"), PFSstatus, NULL }, 183 #ifdef __HAVE_PROCFS_MACHDEP 184 PROCFS_MACHDEP_NODETYPE_DEFNS 185 #endif 186 #undef N 187 }; 188 static const int nproc_targets = sizeof(proc_targets) / sizeof(proc_targets[0]); 189 190 /* 191 * List of files in the root directory. Note: the validate function will 192 * be called with p == NULL for these ones. 193 */ 194 static const struct proc_target proc_root_targets[] = { 195 #define N(s) sizeof(s)-1, s 196 /* name type validp */ 197 { DT_REG, N("meminfo"), PFSmeminfo, procfs_validfile_linux }, 198 { DT_REG, N("cpuinfo"), PFScpuinfo, procfs_validfile_linux }, 199 { DT_REG, N("uptime"), PFSuptime, procfs_validfile_linux }, 200 { DT_REG, N("mounts"), PFSmounts, procfs_validfile_linux }, 201 { DT_REG, N("devices"), PFSdevices, procfs_validfile_linux }, 202 { DT_REG, N("stat"), PFScpustat, procfs_validfile_linux }, 203 { DT_REG, N("loadavg"), PFSloadavg, procfs_validfile_linux }, 204 { DT_REG, N("version"), PFSversion, procfs_validfile_linux }, 205 #undef N 206 }; 207 static const int nproc_root_targets = 208 sizeof(proc_root_targets) / sizeof(proc_root_targets[0]); 209 210 int procfs_lookup(void *); 211 int procfs_open(void *); 212 int procfs_close(void *); 213 int procfs_access(void *); 214 int procfs_getattr(void *); 215 int procfs_setattr(void *); 216 int procfs_readdir(void *); 217 int procfs_readlink(void *); 218 int procfs_inactive(void *); 219 int procfs_reclaim(void *); 220 int procfs_print(void *); 221 int procfs_pathconf(void *); 222 int procfs_getpages(void *); 223 224 static uint8_t fttodt(file_t *); 225 static int atoi(const char *, size_t); 226 227 /* 228 * procfs vnode operations. 229 */ 230 int (**procfs_vnodeop_p)(void *); 231 const struct vnodeopv_entry_desc procfs_vnodeop_entries[] = { 232 { &vop_default_desc, vn_default_error }, 233 { &vop_parsepath_desc, genfs_parsepath }, /* parsepath */ 234 { &vop_lookup_desc, procfs_lookup }, /* lookup */ 235 { &vop_create_desc, genfs_eopnotsupp }, /* create */ 236 { &vop_mknod_desc, genfs_eopnotsupp }, /* mknod */ 237 { &vop_open_desc, procfs_open }, /* open */ 238 { &vop_close_desc, procfs_close }, /* close */ 239 { &vop_access_desc, procfs_access }, /* access */ 240 { &vop_accessx_desc, genfs_accessx }, /* accessx */ 241 { &vop_getattr_desc, procfs_getattr }, /* getattr */ 242 { &vop_setattr_desc, procfs_setattr }, /* setattr */ 243 { &vop_read_desc, procfs_rw }, /* read */ 244 { &vop_write_desc, procfs_rw }, /* write */ 245 { &vop_fallocate_desc, genfs_eopnotsupp }, /* fallocate */ 246 { &vop_fdiscard_desc, genfs_eopnotsupp }, /* fdiscard */ 247 { &vop_fcntl_desc, genfs_fcntl }, /* fcntl */ 248 { &vop_ioctl_desc, genfs_enoioctl }, /* ioctl */ 249 { &vop_poll_desc, genfs_poll }, /* poll */ 250 { &vop_kqfilter_desc, genfs_kqfilter }, /* kqfilter */ 251 { &vop_revoke_desc, genfs_revoke }, /* revoke */ 252 { &vop_fsync_desc, genfs_nullop }, /* fsync */ 253 { &vop_seek_desc, genfs_nullop }, /* seek */ 254 { &vop_remove_desc, genfs_eopnotsupp }, /* remove */ 255 { &vop_link_desc, genfs_erofs_link }, /* link */ 256 { &vop_rename_desc, genfs_eopnotsupp }, /* rename */ 257 { &vop_mkdir_desc, genfs_eopnotsupp }, /* mkdir */ 258 { &vop_rmdir_desc, genfs_eopnotsupp }, /* rmdir */ 259 { &vop_symlink_desc, genfs_erofs_symlink }, /* symlink */ 260 { &vop_readdir_desc, procfs_readdir }, /* readdir */ 261 { &vop_readlink_desc, procfs_readlink }, /* readlink */ 262 { &vop_abortop_desc, genfs_abortop }, /* abortop */ 263 { &vop_inactive_desc, procfs_inactive }, /* inactive */ 264 { &vop_reclaim_desc, procfs_reclaim }, /* reclaim */ 265 { &vop_lock_desc, genfs_lock }, /* lock */ 266 { &vop_unlock_desc, genfs_unlock }, /* unlock */ 267 { &vop_bmap_desc, genfs_eopnotsupp }, /* bmap */ 268 { &vop_strategy_desc, genfs_badop }, /* strategy */ 269 { &vop_print_desc, procfs_print }, /* print */ 270 { &vop_islocked_desc, genfs_islocked }, /* islocked */ 271 { &vop_pathconf_desc, procfs_pathconf }, /* pathconf */ 272 { &vop_advlock_desc, genfs_einval }, /* advlock */ 273 { &vop_getpages_desc, procfs_getpages }, /* getpages */ 274 { &vop_putpages_desc, genfs_null_putpages }, /* putpages */ 275 { NULL, NULL } 276 }; 277 const struct vnodeopv_desc procfs_vnodeop_opv_desc = 278 { &procfs_vnodeop_p, procfs_vnodeop_entries }; 279 /* 280 * set things up for doing i/o on 281 * the pfsnode (vp). (vp) is locked 282 * on entry, and should be left locked 283 * on exit. 284 * 285 * for procfs we don't need to do anything 286 * in particular for i/o. all that is done 287 * is to support exclusive open on process 288 * memory images. 289 */ 290 int 291 procfs_open(void *v) 292 { 293 struct vop_open_args /* { 294 struct vnode *a_vp; 295 int a_mode; 296 kauth_cred_t a_cred; 297 } */ *ap = v; 298 struct vnode *vp = ap->a_vp; 299 struct pfsnode *pfs = VTOPFS(vp); 300 struct lwp *l1; 301 struct proc *p2; 302 int error; 303 304 if ((error = 305 procfs_proc_lock(vp->v_mount, pfs->pfs_pid, &p2, ENOENT)) != 0) 306 return error; 307 308 l1 = curlwp; /* tracer */ 309 310 #define M2K(m) (((m) & FREAD) && ((m) & FWRITE) ? \ 311 KAUTH_REQ_PROCESS_PROCFS_RW : \ 312 (m) & FWRITE ? KAUTH_REQ_PROCESS_PROCFS_WRITE : \ 313 KAUTH_REQ_PROCESS_PROCFS_READ) 314 315 mutex_enter(p2->p_lock); 316 error = kauth_authorize_process(l1->l_cred, KAUTH_PROCESS_PROCFS, 317 p2, pfs, KAUTH_ARG(M2K(ap->a_mode)), NULL); 318 mutex_exit(p2->p_lock); 319 if (error) { 320 procfs_proc_unlock(p2); 321 return (error); 322 } 323 324 #undef M2K 325 326 switch (pfs->pfs_type) { 327 case PFSmem: 328 if (((pfs->pfs_flags & FWRITE) && (ap->a_mode & O_EXCL)) || 329 ((pfs->pfs_flags & O_EXCL) && (ap->a_mode & FWRITE))) { 330 error = EBUSY; 331 break; 332 } 333 334 if (!proc_isunder(p2, l1)) { 335 error = EPERM; 336 break; 337 } 338 339 if (ap->a_mode & FWRITE) 340 pfs->pfs_flags = ap->a_mode & (FWRITE|O_EXCL); 341 342 break; 343 344 case PFSregs: 345 case PFSfpregs: 346 if (!proc_isunder(p2, l1)) { 347 error = EPERM; 348 break; 349 } 350 break; 351 352 default: 353 break; 354 } 355 356 procfs_proc_unlock(p2); 357 return (error); 358 } 359 360 /* 361 * close the pfsnode (vp) after doing i/o. 362 * (vp) is not locked on entry or exit. 363 * 364 * nothing to do for procfs other than undo 365 * any exclusive open flag (see _open above). 366 */ 367 int 368 procfs_close(void *v) 369 { 370 struct vop_close_args /* { 371 struct vnode *a_vp; 372 int a_fflag; 373 kauth_cred_t a_cred; 374 } */ *ap = v; 375 struct pfsnode *pfs = VTOPFS(ap->a_vp); 376 377 switch (pfs->pfs_type) { 378 case PFSmem: 379 if ((ap->a_fflag & FWRITE) && (pfs->pfs_flags & O_EXCL)) 380 pfs->pfs_flags &= ~(FWRITE|O_EXCL); 381 break; 382 383 default: 384 break; 385 } 386 387 return (0); 388 } 389 390 /* 391 * _inactive is called when the pfsnode 392 * is vrele'd and the reference count goes 393 * to zero. (vp) will be on the vnode free 394 * list, so to get it back vget() must be 395 * used. 396 * 397 * (vp) is locked on entry, but must be unlocked on exit. 398 */ 399 int 400 procfs_inactive(void *v) 401 { 402 struct vop_inactive_v2_args /* { 403 struct vnode *a_vp; 404 bool *a_recycle; 405 } */ *ap = v; 406 struct vnode *vp = ap->a_vp; 407 struct pfsnode *pfs = VTOPFS(vp); 408 409 mutex_enter(&proc_lock); 410 *ap->a_recycle = (procfs_proc_find(vp->v_mount, pfs->pfs_pid) == NULL); 411 mutex_exit(&proc_lock); 412 413 return (0); 414 } 415 416 /* 417 * _reclaim is called when getnewvnode() 418 * wants to make use of an entry on the vnode 419 * free list. at this time the filesystem needs 420 * to free any private data and remove the node 421 * from any private lists. 422 */ 423 int 424 procfs_reclaim(void *v) 425 { 426 struct vop_reclaim_v2_args /* { 427 struct vnode *a_vp; 428 } */ *ap = v; 429 struct vnode *vp = ap->a_vp; 430 struct pfsnode *pfs = VTOPFS(vp); 431 432 VOP_UNLOCK(vp); 433 434 /* 435 * To interlock with procfs_revoke_vnodes(). 436 */ 437 mutex_enter(vp->v_interlock); 438 vp->v_data = NULL; 439 mutex_exit(vp->v_interlock); 440 procfs_hashrem(pfs); 441 kmem_free(pfs, sizeof(*pfs)); 442 return 0; 443 } 444 445 /* 446 * Return POSIX pathconf information applicable to special devices. 447 */ 448 int 449 procfs_pathconf(void *v) 450 { 451 struct vop_pathconf_args /* { 452 struct vnode *a_vp; 453 int a_name; 454 register_t *a_retval; 455 } */ *ap = v; 456 457 switch (ap->a_name) { 458 case _PC_LINK_MAX: 459 *ap->a_retval = LINK_MAX; 460 return (0); 461 case _PC_MAX_CANON: 462 *ap->a_retval = MAX_CANON; 463 return (0); 464 case _PC_MAX_INPUT: 465 *ap->a_retval = MAX_INPUT; 466 return (0); 467 case _PC_PIPE_BUF: 468 *ap->a_retval = PIPE_BUF; 469 return (0); 470 case _PC_CHOWN_RESTRICTED: 471 *ap->a_retval = 1; 472 return (0); 473 case _PC_VDISABLE: 474 *ap->a_retval = _POSIX_VDISABLE; 475 return (0); 476 case _PC_SYNC_IO: 477 *ap->a_retval = 1; 478 return (0); 479 default: 480 return genfs_pathconf(ap); 481 } 482 /* NOTREACHED */ 483 } 484 485 /* 486 * _print is used for debugging. 487 * just print a readable description 488 * of (vp). 489 */ 490 int 491 procfs_print(void *v) 492 { 493 struct vop_print_args /* { 494 struct vnode *a_vp; 495 } */ *ap = v; 496 struct pfsnode *pfs = VTOPFS(ap->a_vp); 497 498 printf("tag VT_PROCFS, type %d, pid %d, mode %x, flags %lx\n", 499 pfs->pfs_type, pfs->pfs_pid, pfs->pfs_mode, pfs->pfs_flags); 500 return 0; 501 } 502 503 /* 504 * Works out the path to the target process's current 505 * working directory or chroot. If the caller is in a chroot and 506 * can't "reach" the target's cwd or root (or some other error 507 * occurs), a "/" is returned for the path. 508 */ 509 static void 510 procfs_dir(pfstype t, struct lwp *caller, struct proc *target, char **bpp, 511 char *path, size_t len) 512 { 513 struct cwdinfo *cwdi; 514 struct vnode *vp, *rvp; 515 char *bp; 516 517 /* 518 * Lock target cwdi and take a reference to the vnode 519 * we are interested in to prevent it from disappearing 520 * before getcwd_common() below. 521 */ 522 rw_enter(&target->p_cwdi->cwdi_lock, RW_READER); 523 switch (t) { 524 case PFScwd: 525 vp = target->p_cwdi->cwdi_cdir; 526 break; 527 case PFSchroot: 528 vp = target->p_cwdi->cwdi_rdir; 529 break; 530 default: 531 rw_exit(&target->p_cwdi->cwdi_lock); 532 return; 533 } 534 if (vp != NULL) 535 vref(vp); 536 rw_exit(&target->p_cwdi->cwdi_lock); 537 538 cwdi = caller->l_proc->p_cwdi; 539 rw_enter(&cwdi->cwdi_lock, RW_READER); 540 541 rvp = cwdi->cwdi_rdir; 542 bp = bpp ? *bpp : NULL; 543 544 /* 545 * XXX: this horrible kludge avoids locking panics when 546 * attempting to lookup links that point to within procfs 547 */ 548 if (vp != NULL && vp->v_tag == VT_PROCFS) { 549 if (bpp) { 550 *--bp = '/'; 551 *bpp = bp; 552 } 553 vrele(vp); 554 rw_exit(&cwdi->cwdi_lock); 555 return; 556 } 557 558 if (rvp == NULL) 559 rvp = rootvnode; 560 if (vp == NULL || getcwd_common(vp, rvp, bp ? &bp : NULL, path, 561 len / 2, 0, caller) != 0) { 562 if (bpp) { 563 bp = *bpp; 564 *--bp = '/'; 565 } 566 } 567 568 if (bpp) 569 *bpp = bp; 570 571 if (vp != NULL) 572 vrele(vp); 573 rw_exit(&cwdi->cwdi_lock); 574 } 575 576 /* 577 * Invent attributes for pfsnode (vp) and store 578 * them in (vap). 579 * Directories lengths are returned as zero since 580 * any real length would require the genuine size 581 * to be computed, and nothing cares anyway. 582 * 583 * this is relatively minimal for procfs. 584 */ 585 int 586 procfs_getattr(void *v) 587 { 588 struct vop_getattr_args /* { 589 struct vnode *a_vp; 590 struct vattr *a_vap; 591 kauth_cred_t a_cred; 592 } */ *ap = v; 593 struct vnode *vp = ap->a_vp; 594 struct pfsnode *pfs = VTOPFS(vp); 595 struct vattr *vap = ap->a_vap; 596 struct proc *procp; 597 char *path, *bp, bf[16]; 598 int error; 599 600 /* first check the process still exists */ 601 switch (pfs->pfs_type) { 602 case PFSroot: 603 case PFScurproc: 604 case PFSself: 605 procp = NULL; 606 break; 607 608 default: 609 error = 610 procfs_proc_lock(vp->v_mount, pfs->pfs_pid, &procp, ENOENT); 611 if (error != 0) 612 return (error); 613 break; 614 } 615 616 switch (pfs->pfs_type) { 617 case PFStask: 618 if (pfs->pfs_fd == -1) { 619 path = NULL; 620 break; 621 } 622 /*FALLTHROUGH*/ 623 case PFScwd: 624 case PFSchroot: 625 path = malloc(MAXPATHLEN + 4, M_TEMP, M_WAITOK); 626 if (path == NULL && procp != NULL) { 627 procfs_proc_unlock(procp); 628 return (ENOMEM); 629 } 630 break; 631 632 default: 633 path = NULL; 634 break; 635 } 636 637 if (procp != NULL) { 638 mutex_enter(procp->p_lock); 639 error = kauth_authorize_process(kauth_cred_get(), 640 KAUTH_PROCESS_CANSEE, procp, 641 KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_ENTRY), NULL, NULL); 642 mutex_exit(procp->p_lock); 643 if (error != 0) { 644 procfs_proc_unlock(procp); 645 if (path != NULL) 646 free(path, M_TEMP); 647 return (ENOENT); 648 } 649 } 650 651 error = 0; 652 653 /* start by zeroing out the attributes */ 654 vattr_null(vap); 655 656 /* next do all the common fields */ 657 vap->va_type = ap->a_vp->v_type; 658 vap->va_mode = pfs->pfs_mode; 659 vap->va_fileid = pfs->pfs_fileno; 660 vap->va_flags = 0; 661 vap->va_blocksize = PAGE_SIZE; 662 663 /* 664 * Make all times be current TOD. 665 * 666 * It would be possible to get the process start 667 * time from the p_stats structure, but there's 668 * no "file creation" time stamp anyway, and the 669 * p_stats structure is not addressable if u. gets 670 * swapped out for that process. 671 */ 672 getnanotime(&vap->va_ctime); 673 vap->va_atime = vap->va_mtime = vap->va_ctime; 674 if (procp) 675 TIMEVAL_TO_TIMESPEC(&procp->p_stats->p_start, 676 &vap->va_birthtime); 677 else 678 getnanotime(&vap->va_birthtime); 679 680 switch (pfs->pfs_type) { 681 case PFSmem: 682 case PFSregs: 683 case PFSfpregs: 684 #if defined(__HAVE_PROCFS_MACHDEP) && defined(PROCFS_MACHDEP_PROTECT_CASES) 685 PROCFS_MACHDEP_PROTECT_CASES 686 #endif 687 /* 688 * If the process has exercised some setuid or setgid 689 * privilege, then rip away read/write permission so 690 * that only root can gain access. 691 */ 692 if (procp->p_flag & PK_SUGID) 693 vap->va_mode &= ~(S_IRUSR|S_IWUSR); 694 /* FALLTHROUGH */ 695 case PFSstatus: 696 case PFSstat: 697 case PFSnote: 698 case PFSnotepg: 699 case PFScmdline: 700 case PFSenviron: 701 case PFSemul: 702 case PFSstatm: 703 704 case PFSmap: 705 case PFSmaps: 706 case PFSlimit: 707 case PFSauxv: 708 vap->va_nlink = 1; 709 vap->va_uid = kauth_cred_geteuid(procp->p_cred); 710 vap->va_gid = kauth_cred_getegid(procp->p_cred); 711 break; 712 case PFScwd: 713 case PFSchroot: 714 case PFSmeminfo: 715 case PFSdevices: 716 case PFScpuinfo: 717 case PFSuptime: 718 case PFSmounts: 719 case PFScpustat: 720 case PFSloadavg: 721 case PFSversion: 722 case PFSexe: 723 case PFSself: 724 case PFScurproc: 725 case PFSroot: 726 vap->va_nlink = 1; 727 vap->va_uid = vap->va_gid = 0; 728 break; 729 730 case PFSproc: 731 case PFStask: 732 case PFSfile: 733 case PFSfd: 734 break; 735 736 default: 737 panic("%s: %d/1", __func__, pfs->pfs_type); 738 } 739 740 /* 741 * now do the object specific fields 742 * 743 * The size could be set from struct reg, but it's hardly 744 * worth the trouble, and it puts some (potentially) machine 745 * dependent data into this machine-independent code. If it 746 * becomes important then this function should break out into 747 * a per-file stat function in the corresponding .c file. 748 */ 749 750 switch (pfs->pfs_type) { 751 case PFSroot: 752 vap->va_bytes = vap->va_size = DEV_BSIZE; 753 break; 754 755 case PFSself: 756 case PFScurproc: 757 vap->va_bytes = vap->va_size = 758 snprintf(bf, sizeof(bf), "%ld", (long)curproc->p_pid); 759 break; 760 case PFStask: 761 if (pfs->pfs_fd != -1) { 762 vap->va_nlink = 1; 763 vap->va_uid = 0; 764 vap->va_gid = 0; 765 vap->va_bytes = vap->va_size = 766 snprintf(bf, sizeof(bf), ".."); 767 break; 768 } 769 /*FALLTHROUGH*/ 770 case PFSfd: 771 if (pfs->pfs_fd != -1) { 772 file_t *fp; 773 774 fp = fd_getfile2(procp, pfs->pfs_fd); 775 if (fp == NULL) { 776 error = EBADF; 777 break; 778 } 779 vap->va_nlink = 1; 780 vap->va_uid = kauth_cred_geteuid(fp->f_cred); 781 vap->va_gid = kauth_cred_getegid(fp->f_cred); 782 switch (fp->f_type) { 783 case DTYPE_VNODE: 784 vap->va_bytes = vap->va_size = 785 fp->f_vnode->v_size; 786 break; 787 default: 788 vap->va_bytes = vap->va_size = 0; 789 break; 790 } 791 closef(fp); 792 break; 793 } 794 /*FALLTHROUGH*/ 795 case PFSproc: 796 vap->va_nlink = 2; 797 vap->va_uid = kauth_cred_geteuid(procp->p_cred); 798 vap->va_gid = kauth_cred_getegid(procp->p_cred); 799 vap->va_bytes = vap->va_size = DEV_BSIZE; 800 break; 801 802 case PFSfile: 803 error = EOPNOTSUPP; 804 break; 805 806 case PFSmem: 807 vap->va_bytes = vap->va_size = 808 ctob(procp->p_vmspace->vm_tsize + 809 procp->p_vmspace->vm_dsize + 810 procp->p_vmspace->vm_ssize); 811 break; 812 813 case PFSauxv: 814 vap->va_bytes = vap->va_size = procp->p_execsw->es_arglen; 815 break; 816 817 #if defined(PT_GETREGS) || defined(PT_SETREGS) 818 case PFSregs: 819 vap->va_bytes = vap->va_size = sizeof(struct reg); 820 break; 821 #endif 822 823 #if defined(PT_GETFPREGS) || defined(PT_SETFPREGS) 824 case PFSfpregs: 825 vap->va_bytes = vap->va_size = sizeof(struct fpreg); 826 break; 827 #endif 828 829 case PFSstatus: 830 case PFSstat: 831 case PFSnote: 832 case PFSnotepg: 833 case PFScmdline: 834 case PFSenviron: 835 case PFSmeminfo: 836 case PFSdevices: 837 case PFScpuinfo: 838 case PFSuptime: 839 case PFSmounts: 840 case PFScpustat: 841 case PFSloadavg: 842 case PFSstatm: 843 case PFSversion: 844 vap->va_bytes = vap->va_size = 0; 845 break; 846 case PFSlimit: 847 case PFSmap: 848 case PFSmaps: 849 /* 850 * Advise a larger blocksize for the map files, so that 851 * they may be read in one pass. 852 */ 853 vap->va_blocksize = 4 * PAGE_SIZE; 854 vap->va_bytes = vap->va_size = 0; 855 break; 856 857 case PFScwd: 858 case PFSchroot: 859 bp = path + MAXPATHLEN; 860 *--bp = '\0'; 861 procfs_dir(pfs->pfs_type, curlwp, procp, &bp, path, 862 MAXPATHLEN); 863 vap->va_bytes = vap->va_size = strlen(bp); 864 break; 865 866 case PFSexe: 867 vap->va_bytes = vap->va_size = strlen(procp->p_path); 868 break; 869 870 case PFSemul: 871 vap->va_bytes = vap->va_size = strlen(procp->p_emul->e_name); 872 break; 873 874 #ifdef __HAVE_PROCFS_MACHDEP 875 PROCFS_MACHDEP_NODETYPE_CASES 876 error = procfs_machdep_getattr(ap->a_vp, vap, procp); 877 break; 878 #endif 879 880 default: 881 panic("%s: %d/2", __func__, pfs->pfs_type); 882 } 883 884 if (procp != NULL) 885 procfs_proc_unlock(procp); 886 if (path != NULL) 887 free(path, M_TEMP); 888 889 return (error); 890 } 891 892 /*ARGSUSED*/ 893 int 894 procfs_setattr(void *v) 895 { 896 /* 897 * just fake out attribute setting 898 * it's not good to generate an error 899 * return, otherwise things like creat() 900 * will fail when they try to set the 901 * file length to 0. worse, this means 902 * that echo $note > /proc/$pid/note will fail. 903 */ 904 905 return (0); 906 } 907 908 /* 909 * implement access checking. 910 * 911 * actually, the check for super-user is slightly 912 * broken since it will allow read access to write-only 913 * objects. this doesn't cause any particular trouble 914 * but does mean that the i/o entry points need to check 915 * that the operation really does make sense. 916 */ 917 int 918 procfs_access(void *v) 919 { 920 struct vop_access_args /* { 921 struct vnode *a_vp; 922 accmode_t a_accmode; 923 kauth_cred_t a_cred; 924 } */ *ap = v; 925 struct vattr va; 926 int error; 927 928 if ((error = VOP_GETATTR(ap->a_vp, &va, ap->a_cred)) != 0) 929 return (error); 930 931 return kauth_authorize_vnode(ap->a_cred, 932 KAUTH_ACCESS_ACTION(ap->a_accmode, ap->a_vp->v_type, va.va_mode), 933 ap->a_vp, NULL, genfs_can_access(ap->a_vp, ap->a_cred, 934 va.va_uid, va.va_gid, va.va_mode, NULL, ap->a_accmode)); 935 } 936 937 /* 938 * lookup. this is incredibly complicated in the 939 * general case, however for most pseudo-filesystems 940 * very little needs to be done. 941 * 942 * Locking isn't hard here, just poorly documented. 943 * 944 * If we're looking up ".", just vref the parent & return it. 945 * 946 * If we're looking up "..", unlock the parent, and lock "..". If everything 947 * went ok, and we're on the last component and the caller requested the 948 * parent locked, try to re-lock the parent. We do this to prevent lock 949 * races. 950 * 951 * For anything else, get the needed node. Then unlock the parent if not 952 * the last component or not LOCKPARENT (i.e. if we wouldn't re-lock the 953 * parent in the .. case). 954 * 955 * We try to exit with the parent locked in error cases. 956 */ 957 int 958 procfs_lookup(void *v) 959 { 960 struct vop_lookup_v2_args /* { 961 struct vnode * a_dvp; 962 struct vnode ** a_vpp; 963 struct componentname * a_cnp; 964 } */ *ap = v; 965 struct componentname *cnp = ap->a_cnp; 966 struct vnode **vpp = ap->a_vpp; 967 struct vnode *dvp = ap->a_dvp; 968 const char *pname = cnp->cn_nameptr; 969 const struct proc_target *pt = NULL; 970 struct vnode *fvp; 971 pid_t pid, vnpid; 972 struct pfsnode *pfs; 973 struct proc *p = NULL; 974 struct lwp *plwp; 975 int i, error; 976 pfstype type; 977 978 *vpp = NULL; 979 980 if ((error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred)) != 0) 981 return (error); 982 983 if (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME) 984 return (EROFS); 985 986 if (cnp->cn_namelen == 1 && *pname == '.') { 987 *vpp = dvp; 988 vref(dvp); 989 return (0); 990 } 991 992 pfs = VTOPFS(dvp); 993 switch (pfs->pfs_type) { 994 case PFSroot: 995 /* 996 * Shouldn't get here with .. in the root node. 997 */ 998 if (cnp->cn_flags & ISDOTDOT) 999 return (EIO); 1000 1001 for (i = 0; i < nproc_root_targets; i++) { 1002 pt = &proc_root_targets[i]; 1003 /* 1004 * check for node match. proc is always NULL here, 1005 * so call pt_valid with constant NULL lwp. 1006 */ 1007 if (cnp->cn_namelen == pt->pt_namlen && 1008 memcmp(pt->pt_name, pname, cnp->cn_namelen) == 0 && 1009 (pt->pt_valid == NULL || 1010 (*pt->pt_valid)(NULL, dvp->v_mount))) 1011 break; 1012 } 1013 1014 if (i != nproc_root_targets) { 1015 error = procfs_allocvp(dvp->v_mount, vpp, 0, 1016 pt->pt_pfstype, -1); 1017 return (error); 1018 } 1019 1020 if (CNEQ(cnp, "curproc", 7)) { 1021 pid = curproc->p_pid; 1022 vnpid = 0; 1023 type = PFScurproc; 1024 } else if (CNEQ(cnp, "self", 4)) { 1025 pid = curproc->p_pid; 1026 vnpid = 0; 1027 type = PFSself; 1028 } else { 1029 pid = (pid_t)atoi(pname, cnp->cn_namelen); 1030 vnpid = pid; 1031 type = PFSproc; 1032 } 1033 1034 if (procfs_proc_lock(dvp->v_mount, pid, &p, ESRCH) != 0) 1035 break; 1036 error = procfs_allocvp(dvp->v_mount, vpp, vnpid, type, -1); 1037 procfs_proc_unlock(p); 1038 return (error); 1039 1040 case PFSproc: 1041 if (cnp->cn_flags & ISDOTDOT) { 1042 error = procfs_allocvp(dvp->v_mount, vpp, 0, PFSroot, 1043 -1); 1044 return (error); 1045 } 1046 1047 if (procfs_proc_lock(dvp->v_mount, pfs->pfs_pid, &p, 1048 ESRCH) != 0) 1049 break; 1050 1051 mutex_enter(p->p_lock); 1052 LIST_FOREACH(plwp, &p->p_lwps, l_sibling) { 1053 if (plwp->l_stat != LSZOMB) 1054 break; 1055 } 1056 /* Process is exiting if no-LWPS or all LWPs are LSZOMB */ 1057 if (plwp == NULL) { 1058 mutex_exit(p->p_lock); 1059 procfs_proc_unlock(p); 1060 return ESRCH; 1061 } 1062 1063 lwp_addref(plwp); 1064 mutex_exit(p->p_lock); 1065 1066 for (pt = proc_targets, i = 0; i < nproc_targets; pt++, i++) { 1067 int found; 1068 1069 found = cnp->cn_namelen == pt->pt_namlen && 1070 memcmp(pt->pt_name, pname, cnp->cn_namelen) == 0 && 1071 (pt->pt_valid == NULL 1072 || (*pt->pt_valid)(plwp, dvp->v_mount)); 1073 if (found) 1074 break; 1075 } 1076 lwp_delref(plwp); 1077 1078 if (i == nproc_targets) { 1079 procfs_proc_unlock(p); 1080 break; 1081 } 1082 if (pt->pt_pfstype == PFSfile) { 1083 fvp = p->p_textvp; 1084 /* We already checked that it exists. */ 1085 vref(fvp); 1086 procfs_proc_unlock(p); 1087 *vpp = fvp; 1088 return (0); 1089 } 1090 1091 error = procfs_allocvp(dvp->v_mount, vpp, pfs->pfs_pid, 1092 pt->pt_pfstype, -1); 1093 procfs_proc_unlock(p); 1094 return (error); 1095 1096 case PFSfd: { 1097 int fd; 1098 file_t *fp; 1099 1100 if ((error = procfs_proc_lock(dvp->v_mount, pfs->pfs_pid, &p, 1101 ENOENT)) != 0) 1102 return error; 1103 1104 if (cnp->cn_flags & ISDOTDOT) { 1105 error = procfs_allocvp(dvp->v_mount, vpp, pfs->pfs_pid, 1106 PFSproc, -1); 1107 procfs_proc_unlock(p); 1108 return (error); 1109 } 1110 fd = atoi(pname, cnp->cn_namelen); 1111 1112 fp = fd_getfile2(p, fd); 1113 if (fp == NULL) { 1114 procfs_proc_unlock(p); 1115 return ENOENT; 1116 } 1117 fvp = fp->f_vnode; 1118 1119 /* Don't show directories */ 1120 if (fp->f_type == DTYPE_VNODE && fvp->v_type != VDIR && 1121 !procfs_proc_is_linux_compat()) { 1122 vref(fvp); 1123 closef(fp); 1124 procfs_proc_unlock(p); 1125 *vpp = fvp; 1126 return 0; 1127 } 1128 1129 closef(fp); 1130 error = procfs_allocvp(dvp->v_mount, vpp, pfs->pfs_pid, 1131 PFSfd, fd); 1132 procfs_proc_unlock(p); 1133 return error; 1134 } 1135 case PFStask: { 1136 int xpid; 1137 1138 if ((error = procfs_proc_lock(dvp->v_mount, pfs->pfs_pid, &p, 1139 ENOENT)) != 0) 1140 return error; 1141 1142 if (cnp->cn_flags & ISDOTDOT) { 1143 error = procfs_allocvp(dvp->v_mount, vpp, pfs->pfs_pid, 1144 PFSproc, -1); 1145 procfs_proc_unlock(p); 1146 return (error); 1147 } 1148 xpid = atoi(pname, cnp->cn_namelen); 1149 1150 if (xpid != pfs->pfs_pid) { 1151 procfs_proc_unlock(p); 1152 return ENOENT; 1153 } 1154 error = procfs_allocvp(dvp->v_mount, vpp, pfs->pfs_pid, 1155 PFStask, 0); 1156 procfs_proc_unlock(p); 1157 return error; 1158 } 1159 default: 1160 return (ENOTDIR); 1161 } 1162 1163 return (cnp->cn_nameiop == LOOKUP ? ENOENT : EROFS); 1164 } 1165 1166 int 1167 procfs_validfile(struct lwp *l, struct mount *mp) 1168 { 1169 return l != NULL && l->l_proc != NULL && l->l_proc->p_textvp != NULL; 1170 } 1171 1172 static int 1173 procfs_validfile_linux(struct lwp *l, struct mount *mp) 1174 { 1175 return procfs_use_linux_compat(mp) && 1176 (l == NULL || l->l_proc == NULL || procfs_validfile(l, mp)); 1177 } 1178 1179 struct procfs_root_readdir_ctx { 1180 struct uio *uiop; 1181 off_t *cookies; 1182 int ncookies; 1183 off_t off; 1184 off_t startoff; 1185 int error; 1186 }; 1187 1188 static int 1189 procfs_root_readdir_callback(struct proc *p, void *arg) 1190 { 1191 struct procfs_root_readdir_ctx *ctxp = arg; 1192 struct dirent d; 1193 struct uio *uiop; 1194 int error; 1195 1196 uiop = ctxp->uiop; 1197 if (uiop->uio_resid < UIO_MX) 1198 return -1; /* no space */ 1199 1200 if (kauth_authorize_process(kauth_cred_get(), 1201 KAUTH_PROCESS_CANSEE, p, 1202 KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_ENTRY), NULL, NULL) != 0) 1203 return 0; 1204 1205 if (ctxp->off < ctxp->startoff) { 1206 ctxp->off++; 1207 return 0; 1208 } 1209 1210 memset(&d, 0, UIO_MX); 1211 d.d_reclen = UIO_MX; 1212 d.d_fileno = PROCFS_FILENO(p->p_pid, PFSproc, -1); 1213 d.d_namlen = snprintf(d.d_name, 1214 UIO_MX - offsetof(struct dirent, d_name), "%ld", (long)p->p_pid); 1215 d.d_type = DT_DIR; 1216 1217 mutex_exit(&proc_lock); 1218 error = uiomove(&d, UIO_MX, uiop); 1219 mutex_enter(&proc_lock); 1220 if (error) { 1221 ctxp->error = error; 1222 return -1; 1223 } 1224 1225 ctxp->ncookies++; 1226 if (ctxp->cookies) 1227 *(ctxp->cookies)++ = ctxp->off + 1; 1228 ctxp->off++; 1229 1230 return 0; 1231 } 1232 1233 /* 1234 * readdir returns directory entries from pfsnode (vp). 1235 * 1236 * the strategy here with procfs is to generate a single 1237 * directory entry at a time (struct dirent) and then 1238 * copy that out to userland using uiomove. a more efficient 1239 * though more complex implementation, would try to minimize 1240 * the number of calls to uiomove(). for procfs, this is 1241 * hardly worth the added code complexity. 1242 * 1243 * this should just be done through read() 1244 */ 1245 int 1246 procfs_readdir(void *v) 1247 { 1248 struct vop_readdir_args /* { 1249 struct vnode *a_vp; 1250 struct uio *a_uio; 1251 kauth_cred_t a_cred; 1252 int *a_eofflag; 1253 off_t **a_cookies; 1254 int *a_ncookies; 1255 } */ *ap = v; 1256 struct uio *uio = ap->a_uio; 1257 struct dirent d; 1258 struct pfsnode *pfs; 1259 off_t i; 1260 int error; 1261 off_t *cookies = NULL; 1262 int ncookies; 1263 struct vnode *vp; 1264 const struct proc_target *pt; 1265 struct procfs_root_readdir_ctx ctx; 1266 struct proc *p = NULL; 1267 struct lwp *l; 1268 int nfd; 1269 int nc = 0; 1270 1271 vp = ap->a_vp; 1272 pfs = VTOPFS(vp); 1273 1274 if (uio->uio_resid < UIO_MX) 1275 return (EINVAL); 1276 if (uio->uio_offset < 0) 1277 return (EINVAL); 1278 1279 error = 0; 1280 i = uio->uio_offset; 1281 memset(&d, 0, UIO_MX); 1282 d.d_reclen = UIO_MX; 1283 ncookies = uio->uio_resid / UIO_MX; 1284 1285 switch (pfs->pfs_type) { 1286 /* 1287 * this is for the process-specific sub-directories. 1288 * all that is needed to is copy out all the entries 1289 * from the procent[] table (top of this file). 1290 */ 1291 case PFSproc: { 1292 1293 if (i >= nproc_targets) 1294 return 0; 1295 1296 if (procfs_proc_lock(vp->v_mount, pfs->pfs_pid, &p, ESRCH) != 0) 1297 break; 1298 1299 if (ap->a_ncookies) { 1300 ncookies = uimin(ncookies, (nproc_targets - i)); 1301 cookies = malloc(ncookies * sizeof (off_t), 1302 M_TEMP, M_WAITOK); 1303 *ap->a_cookies = cookies; 1304 } 1305 1306 for (pt = &proc_targets[i]; 1307 uio->uio_resid >= UIO_MX && i < nproc_targets; pt++, i++) { 1308 if (pt->pt_valid) { 1309 /* XXXSMP LWP can disappear */ 1310 mutex_enter(p->p_lock); 1311 l = LIST_FIRST(&p->p_lwps); 1312 KASSERT(l != NULL); 1313 mutex_exit(p->p_lock); 1314 if ((*pt->pt_valid)(l, vp->v_mount) == 0) 1315 continue; 1316 } 1317 1318 d.d_fileno = PROCFS_FILENO(pfs->pfs_pid, 1319 pt->pt_pfstype, -1); 1320 d.d_namlen = pt->pt_namlen; 1321 memcpy(d.d_name, pt->pt_name, pt->pt_namlen + 1); 1322 d.d_type = pt->pt_type; 1323 1324 if ((error = uiomove(&d, UIO_MX, uio)) != 0) 1325 break; 1326 if (cookies) 1327 *cookies++ = i + 1; 1328 } 1329 1330 procfs_proc_unlock(p); 1331 break; 1332 } 1333 case PFSfd: { 1334 file_t *fp; 1335 int lim; 1336 1337 if ((error = procfs_proc_lock(vp->v_mount, pfs->pfs_pid, &p, 1338 ESRCH)) != 0) 1339 return error; 1340 1341 /* XXX Should this be by file as well? */ 1342 if (kauth_authorize_process(kauth_cred_get(), 1343 KAUTH_PROCESS_CANSEE, p, 1344 KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_OPENFILES), NULL, 1345 NULL) != 0) { 1346 procfs_proc_unlock(p); 1347 return ESRCH; 1348 } 1349 1350 nfd = atomic_load_consume(&p->p_fd->fd_dt)->dt_nfiles; 1351 1352 lim = uimin((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles); 1353 if (i >= lim) { 1354 procfs_proc_unlock(p); 1355 return 0; 1356 } 1357 1358 if (ap->a_ncookies) { 1359 ncookies = uimin(ncookies, (nfd + 2 - i)); 1360 cookies = malloc(ncookies * sizeof (off_t), 1361 M_TEMP, M_WAITOK); 1362 *ap->a_cookies = cookies; 1363 } 1364 1365 for (; i < 2 && uio->uio_resid >= UIO_MX; i++) { 1366 pt = &proc_targets[i]; 1367 d.d_namlen = pt->pt_namlen; 1368 d.d_fileno = PROCFS_FILENO(pfs->pfs_pid, 1369 pt->pt_pfstype, -1); 1370 (void)memcpy(d.d_name, pt->pt_name, pt->pt_namlen + 1); 1371 d.d_type = pt->pt_type; 1372 if ((error = uiomove(&d, UIO_MX, uio)) != 0) 1373 break; 1374 if (cookies) 1375 *cookies++ = i + 1; 1376 nc++; 1377 } 1378 if (error) 1379 goto out; 1380 for (; uio->uio_resid >= UIO_MX && i < nfd; i++) { 1381 /* check the descriptor exists */ 1382 if ((fp = fd_getfile2(p, i - 2)) == NULL) 1383 continue; 1384 closef(fp); 1385 1386 d.d_fileno = PROCFS_FILENO(pfs->pfs_pid, PFSfd, i - 2); 1387 d.d_namlen = snprintf(d.d_name, sizeof(d.d_name), 1388 "%lld", (long long)(i - 2)); 1389 d.d_type = fttodt(fp); 1390 if ((error = uiomove(&d, UIO_MX, uio)) != 0) 1391 break; 1392 if (cookies) 1393 *cookies++ = i + 1; 1394 nc++; 1395 } 1396 goto out; 1397 } 1398 case PFStask: { 1399 1400 if ((error = procfs_proc_lock(vp->v_mount, pfs->pfs_pid, &p, 1401 ESRCH)) != 0) 1402 return error; 1403 1404 nfd = 3; /* ., .., pid */ 1405 1406 if (ap->a_ncookies) { 1407 ncookies = uimin(ncookies, (nfd + 2 - i)); 1408 cookies = malloc(ncookies * sizeof (off_t), 1409 M_TEMP, M_WAITOK); 1410 *ap->a_cookies = cookies; 1411 } 1412 1413 for (; i < 2 && uio->uio_resid >= UIO_MX; i++) { 1414 pt = &proc_targets[i]; 1415 d.d_namlen = pt->pt_namlen; 1416 d.d_fileno = PROCFS_FILENO(pfs->pfs_pid, 1417 pt->pt_pfstype, -1); 1418 (void)memcpy(d.d_name, pt->pt_name, pt->pt_namlen + 1); 1419 d.d_type = pt->pt_type; 1420 if ((error = uiomove(&d, UIO_MX, uio)) != 0) 1421 break; 1422 if (cookies) 1423 *cookies++ = i + 1; 1424 nc++; 1425 } 1426 if (error) 1427 goto out; 1428 for (; uio->uio_resid >= UIO_MX && i < nfd; i++) { 1429 /* check the descriptor exists */ 1430 d.d_fileno = PROCFS_FILENO(pfs->pfs_pid, PFStask, 1431 i - 2); 1432 d.d_namlen = snprintf(d.d_name, sizeof(d.d_name), 1433 "%ld", (long)pfs->pfs_pid); 1434 d.d_type = DT_LNK; 1435 if ((error = uiomove(&d, UIO_MX, uio)) != 0) 1436 break; 1437 if (cookies) 1438 *cookies++ = i + 1; 1439 nc++; 1440 } 1441 goto out; 1442 } 1443 1444 /* 1445 * this is for the root of the procfs filesystem 1446 * what is needed are special entries for "curproc" 1447 * and "self" followed by an entry for each process 1448 * on allproc. 1449 */ 1450 1451 case PFSroot: { 1452 1453 if (ap->a_ncookies) { 1454 /* 1455 * XXX Potentially allocating too much space here, 1456 * but I'm lazy. This loop needs some work. 1457 */ 1458 cookies = malloc(ncookies * sizeof (off_t), 1459 M_TEMP, M_WAITOK); 1460 *ap->a_cookies = cookies; 1461 } 1462 1463 /* 0 ... 3 are static entries. */ 1464 for (; i <= 3 && uio->uio_resid >= UIO_MX; i++) { 1465 switch (i) { 1466 case 0: /* `.' */ 1467 case 1: /* `..' */ 1468 d.d_fileno = PROCFS_FILENO(0, PFSroot, -1); 1469 d.d_namlen = i + 1; 1470 memcpy(d.d_name, "..", d.d_namlen); 1471 d.d_name[i + 1] = '\0'; 1472 d.d_type = DT_DIR; 1473 break; 1474 1475 case 2: 1476 d.d_fileno = PROCFS_FILENO(0, PFScurproc, -1); 1477 d.d_namlen = sizeof("curproc") - 1; 1478 memcpy(d.d_name, "curproc", sizeof("curproc")); 1479 d.d_type = DT_LNK; 1480 break; 1481 1482 case 3: 1483 d.d_fileno = PROCFS_FILENO(0, PFSself, -1); 1484 d.d_namlen = sizeof("self") - 1; 1485 memcpy(d.d_name, "self", sizeof("self")); 1486 d.d_type = DT_LNK; 1487 break; 1488 } 1489 1490 if ((error = uiomove(&d, UIO_MX, uio)) != 0) 1491 break; 1492 nc++; 1493 if (cookies) 1494 *cookies++ = i + 1; 1495 } 1496 if (error) 1497 break; 1498 /* 4 ... are process entries. */ 1499 ctx.uiop = uio; 1500 ctx.error = 0; 1501 ctx.off = 4; 1502 ctx.startoff = i; 1503 ctx.cookies = cookies; 1504 ctx.ncookies = nc; 1505 proclist_foreach_call(&allproc, 1506 procfs_root_readdir_callback, &ctx); 1507 cookies = ctx.cookies; 1508 nc = ctx.ncookies; 1509 error = ctx.error; 1510 if (error) 1511 break; 1512 1513 /* misc entries. */ 1514 if (i < ctx.off) 1515 i = ctx.off; 1516 if (i >= ctx.off + nproc_root_targets) 1517 break; 1518 error = procfs_proc_lock(vp->v_mount, pfs->pfs_pid, &p, ESRCH); 1519 if (error) 1520 break; 1521 for (pt = &proc_root_targets[i - ctx.off]; 1522 uio->uio_resid >= UIO_MX && 1523 pt < &proc_root_targets[nproc_root_targets]; 1524 pt++, i++) { 1525 if (pt->pt_valid && 1526 (*pt->pt_valid)(NULL, vp->v_mount) == 0) 1527 continue; 1528 if (kauth_authorize_process(kauth_cred_get(), 1529 KAUTH_PROCESS_CANSEE, p, 1530 KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_ENTRY), 1531 NULL, NULL) != 0) 1532 continue; 1533 d.d_fileno = PROCFS_FILENO(0, pt->pt_pfstype, -1); 1534 d.d_namlen = pt->pt_namlen; 1535 memcpy(d.d_name, pt->pt_name, pt->pt_namlen + 1); 1536 d.d_type = pt->pt_type; 1537 1538 if ((error = uiomove(&d, UIO_MX, uio)) != 0) 1539 break; 1540 nc++; 1541 if (cookies) 1542 *cookies++ = i + 1; 1543 } 1544 out: 1545 KASSERT(p != NULL); 1546 ncookies = nc; 1547 procfs_proc_unlock(p); 1548 break; 1549 } 1550 1551 default: 1552 error = ENOTDIR; 1553 break; 1554 } 1555 1556 if (ap->a_ncookies) { 1557 if (error) { 1558 if (cookies) 1559 free(*ap->a_cookies, M_TEMP); 1560 *ap->a_ncookies = 0; 1561 *ap->a_cookies = NULL; 1562 } else 1563 *ap->a_ncookies = ncookies; 1564 } 1565 uio->uio_offset = i; 1566 return (error); 1567 } 1568 1569 /* 1570 * readlink reads the link of `curproc' and others 1571 */ 1572 int 1573 procfs_readlink(void *v) 1574 { 1575 struct vop_readlink_args *ap = v; 1576 char bf[16]; /* should be enough */ 1577 char *bp = bf; 1578 char *path = NULL; 1579 int len = 0; 1580 int error = 0; 1581 struct vnode *vp = ap->a_vp; 1582 struct pfsnode *pfs = VTOPFS(vp); 1583 struct proc *pown = NULL; 1584 1585 if (pfs->pfs_fileno == PROCFS_FILENO(0, PFScurproc, -1)) 1586 len = snprintf(bf, sizeof(bf), "%ld", (long)curproc->p_pid); 1587 else if (pfs->pfs_fileno == PROCFS_FILENO(0, PFSself, -1)) 1588 len = snprintf(bf, sizeof(bf), "%s", "curproc"); 1589 else if (pfs->pfs_fileno == PROCFS_FILENO(pfs->pfs_pid, PFStask, 0)) 1590 len = snprintf(bf, sizeof(bf), ".."); 1591 else if (pfs->pfs_fileno == PROCFS_FILENO(pfs->pfs_pid, PFSexe, -1)) { 1592 if ((error = procfs_proc_lock(vp->v_mount, pfs->pfs_pid, &pown, 1593 ESRCH)) != 0) 1594 return error; 1595 bp = pown->p_path; 1596 len = strlen(bp); 1597 } else if (pfs->pfs_fileno == PROCFS_FILENO(pfs->pfs_pid, PFScwd, -1) || 1598 pfs->pfs_fileno == PROCFS_FILENO(pfs->pfs_pid, PFSchroot, -1)) { 1599 if ((error = procfs_proc_lock(vp->v_mount, pfs->pfs_pid, &pown, 1600 ESRCH)) != 0) 1601 return error; 1602 path = malloc(MAXPATHLEN + 4, M_TEMP, M_WAITOK); 1603 if (path == NULL) { 1604 procfs_proc_unlock(pown); 1605 return (ENOMEM); 1606 } 1607 bp = path + MAXPATHLEN; 1608 *--bp = '\0'; 1609 procfs_dir(PROCFS_TYPE(pfs->pfs_fileno), curlwp, pown, 1610 &bp, path, MAXPATHLEN); 1611 len = strlen(bp); 1612 } else { 1613 file_t *fp; 1614 struct vnode *vxp; 1615 1616 if ((error = procfs_proc_lock(vp->v_mount, pfs->pfs_pid, &pown, 1617 ESRCH)) != 0) 1618 return error; 1619 1620 fp = fd_getfile2(pown, pfs->pfs_fd); 1621 if (fp == NULL) { 1622 procfs_proc_unlock(pown); 1623 return EBADF; 1624 } 1625 1626 switch (fp->f_type) { 1627 case DTYPE_VNODE: 1628 vxp = fp->f_vnode; 1629 if (vxp->v_type != VDIR && 1630 !procfs_proc_is_linux_compat()) { 1631 error = EINVAL; 1632 break; 1633 } 1634 if ((path = malloc(MAXPATHLEN, M_TEMP, M_WAITOK)) 1635 == NULL) { 1636 error = ENOMEM; 1637 break; 1638 } 1639 bp = path + MAXPATHLEN; 1640 *--bp = '\0'; 1641 1642 /* 1643 * XXX: kludge to avoid locking against ourselves 1644 * in getcwd() 1645 */ 1646 if (vxp->v_tag == VT_PROCFS) { 1647 *--bp = '/'; 1648 } else { 1649 rw_enter(&curproc->p_cwdi->cwdi_lock, 1650 RW_READER); 1651 vp = curproc->p_cwdi->cwdi_rdir; 1652 if (vp == NULL) 1653 vp = rootvnode; 1654 error = getcwd_common(vxp, vp, &bp, path, 1655 MAXPATHLEN / 2, 0, curlwp); 1656 rw_exit(&curproc->p_cwdi->cwdi_lock); 1657 } 1658 if (error) 1659 break; 1660 len = strlen(bp); 1661 break; 1662 1663 case DTYPE_MISC: 1664 len = snprintf(bf, sizeof(bf), "%s", "[misc]"); 1665 break; 1666 1667 case DTYPE_KQUEUE: 1668 len = snprintf(bf, sizeof(bf), "%s", "[kqueue]"); 1669 break; 1670 1671 case DTYPE_SEM: 1672 len = snprintf(bf, sizeof(bf), "%s", "[ksem]"); 1673 break; 1674 1675 default: 1676 error = EINVAL; 1677 break; 1678 } 1679 closef(fp); 1680 } 1681 1682 if (error == 0) 1683 error = uiomove(bp, len, ap->a_uio); 1684 if (pown) 1685 procfs_proc_unlock(pown); 1686 if (path) 1687 free(path, M_TEMP); 1688 return error; 1689 } 1690 1691 int 1692 procfs_getpages(void *v) 1693 { 1694 struct vop_getpages_args /* { 1695 struct vnode *a_vp; 1696 voff_t a_offset; 1697 struct vm_page **a_m; 1698 int *a_count; 1699 int a_centeridx; 1700 vm_prot_t a_access_type; 1701 int a_advice; 1702 int a_flags; 1703 } */ *ap = v; 1704 1705 if ((ap->a_flags & PGO_LOCKED) == 0) 1706 rw_exit(ap->a_vp->v_uobj.vmobjlock); 1707 1708 return (EFAULT); 1709 } 1710 1711 /* 1712 * convert decimal ascii to int 1713 */ 1714 static int 1715 atoi(const char *b, size_t len) 1716 { 1717 int p = 0; 1718 1719 while (len--) { 1720 char c = *b++; 1721 if (c < '0' || c > '9') 1722 return -1; 1723 p = 10 * p + (c - '0'); 1724 } 1725 1726 return p; 1727 } 1728 1729 /** 1730 * convert DTYPE_XXX to corresponding DT_XXX 1731 * matching what procfs_loadvnode() does. 1732 */ 1733 static uint8_t 1734 fttodt(file_t *fp) 1735 { 1736 switch (fp->f_type) { 1737 case DTYPE_VNODE: 1738 switch (fp->f_vnode->v_type) { 1739 case VREG: return DT_REG; 1740 case VDIR: return DT_LNK; /* symlink */ 1741 case VBLK: return DT_BLK; 1742 case VCHR: return DT_CHR; 1743 case VLNK: return DT_LNK; 1744 case VSOCK: return DT_SOCK; 1745 case VFIFO: return DT_FIFO; 1746 default: return DT_UNKNOWN; 1747 } 1748 case DTYPE_PIPE: return DT_FIFO; 1749 case DTYPE_SOCKET: return DT_SOCK; 1750 case DTYPE_KQUEUE: /*FALLTHROUGH*/ 1751 case DTYPE_MISC: /*FALLTHROUGH*/ 1752 case DTYPE_SEM: return DT_LNK; /* symlinks */ 1753 default: return DT_UNKNOWN; 1754 } 1755 } 1756