1 /* $NetBSD: procfs_vnops.c,v 1.232 2024/05/12 17:26:51 christos Exp $ */ 2 3 /*- 4 * Copyright (c) 2006, 2007, 2008, 2020 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Copyright (c) 1993, 1995 34 * The Regents of the University of California. All rights reserved. 35 * 36 * This code is derived from software contributed to Berkeley by 37 * Jan-Simon Pendry. 38 * 39 * Redistribution and use in source and binary forms, with or without 40 * modification, are permitted provided that the following conditions 41 * are met: 42 * 1. Redistributions of source code must retain the above copyright 43 * notice, this list of conditions and the following disclaimer. 44 * 2. Redistributions in binary form must reproduce the above copyright 45 * notice, this list of conditions and the following disclaimer in the 46 * documentation and/or other materials provided with the distribution. 47 * 3. Neither the name of the University nor the names of its contributors 48 * may be used to endorse or promote products derived from this software 49 * without specific prior written permission. 50 * 51 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 52 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 54 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 61 * SUCH DAMAGE. 62 * 63 * @(#)procfs_vnops.c 8.18 (Berkeley) 5/21/95 64 */ 65 66 /* 67 * Copyright (c) 1993 Jan-Simon Pendry 68 * 69 * This code is derived from software contributed to Berkeley by 70 * Jan-Simon Pendry. 71 * 72 * Redistribution and use in source and binary forms, with or without 73 * modification, are permitted provided that the following conditions 74 * are met: 75 * 1. Redistributions of source code must retain the above copyright 76 * notice, this list of conditions and the following disclaimer. 77 * 2. Redistributions in binary form must reproduce the above copyright 78 * notice, this list of conditions and the following disclaimer in the 79 * documentation and/or other materials provided with the distribution. 80 * 3. All advertising materials mentioning features or use of this software 81 * must display the following acknowledgement: 82 * This product includes software developed by the University of 83 * California, Berkeley and its contributors. 84 * 4. Neither the name of the University nor the names of its contributors 85 * may be used to endorse or promote products derived from this software 86 * without specific prior written permission. 87 * 88 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 89 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 90 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 91 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 92 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 93 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 94 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 95 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 96 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 97 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 98 * SUCH DAMAGE. 99 * 100 * @(#)procfs_vnops.c 8.18 (Berkeley) 5/21/95 101 */ 102 103 /* 104 * procfs vnode interface 105 */ 106 107 #include <sys/cdefs.h> 108 __KERNEL_RCSID(0, "$NetBSD: procfs_vnops.c,v 1.232 2024/05/12 17:26:51 christos Exp $"); 109 110 #include <sys/param.h> 111 #include <sys/atomic.h> 112 #include <sys/systm.h> 113 #include <sys/time.h> 114 #include <sys/kernel.h> 115 #include <sys/file.h> 116 #include <sys/filedesc.h> 117 #include <sys/proc.h> 118 #include <sys/vnode.h> 119 #include <sys/namei.h> 120 #include <sys/malloc.h> 121 #include <sys/mount.h> 122 #include <sys/dirent.h> 123 #include <sys/resourcevar.h> 124 #include <sys/stat.h> 125 #include <sys/ptrace.h> 126 #include <sys/kauth.h> 127 #include <sys/exec.h> 128 129 #include <uvm/uvm_extern.h> /* for PAGE_SIZE */ 130 131 #include <machine/reg.h> 132 133 #include <miscfs/genfs/genfs.h> 134 #include <miscfs/procfs/procfs.h> 135 136 /* 137 * Vnode Operations. 138 * 139 */ 140 141 static int procfs_validfile_linux(struct lwp *, struct mount *); 142 static int procfs_root_readdir_callback(struct proc *, void *); 143 static void procfs_dir(pfstype, struct lwp *, struct proc *, char **, char *, 144 size_t); 145 146 /* 147 * This is a list of the valid names in the 148 * process-specific sub-directories. It is 149 * used in procfs_lookup and procfs_readdir 150 */ 151 static const struct proc_target { 152 u_char pt_type; 153 u_char pt_namlen; 154 const char *pt_name; 155 pfstype pt_pfstype; 156 int (*pt_valid)(struct lwp *, struct mount *); 157 } proc_targets[] = { 158 #define N(s) sizeof(s)-1, s 159 /* name type validp */ 160 { DT_DIR, N("."), PFSproc, NULL }, 161 { DT_DIR, N(".."), PFSroot, NULL }, 162 { DT_DIR, N("fd"), PFSfd, NULL }, 163 { DT_DIR, N("task"), PFStask, procfs_validfile_linux }, 164 { DT_LNK, N("cwd"), PFScwd, NULL }, 165 { DT_REG, N("emul"), PFSemul, NULL }, 166 { DT_LNK, N("root"), PFSchroot, NULL }, 167 { DT_REG, N("auxv"), PFSauxv, procfs_validauxv }, 168 { DT_REG, N("cmdline"), PFScmdline, NULL }, 169 { DT_REG, N("environ"), PFSenviron, NULL }, 170 { DT_LNK, N("exe"), PFSexe, procfs_validfile }, 171 { DT_REG, N("file"), PFSfile, procfs_validfile }, 172 { DT_REG, N("fpregs"), PFSfpregs, procfs_validfpregs }, 173 { DT_REG, N("limit"), PFSlimit, NULL }, 174 { DT_REG, N("limits"), PFSlimits, procfs_validfile_linux }, 175 { DT_REG, N("map"), PFSmap, procfs_validmap }, 176 { DT_REG, N("maps"), PFSmaps, procfs_validmap }, 177 { DT_REG, N("mem"), PFSmem, NULL }, 178 { DT_REG, N("note"), PFSnote, NULL }, 179 { DT_REG, N("notepg"), PFSnotepg, NULL }, 180 { DT_REG, N("regs"), PFSregs, procfs_validregs }, 181 { DT_REG, N("stat"), PFSstat, procfs_validfile_linux }, 182 { DT_REG, N("statm"), PFSstatm, procfs_validfile_linux }, 183 { DT_REG, N("status"), PFSstatus, NULL }, 184 #ifdef __HAVE_PROCFS_MACHDEP 185 PROCFS_MACHDEP_NODETYPE_DEFNS 186 #endif 187 #undef N 188 }; 189 static const int nproc_targets = sizeof(proc_targets) / sizeof(proc_targets[0]); 190 191 /* 192 * List of files in the root directory. Note: the validate function will 193 * be called with p == NULL for these ones. 194 */ 195 static const struct proc_target proc_root_targets[] = { 196 #define N(s) sizeof(s)-1, s 197 /* name type validp */ 198 { DT_REG, N("meminfo"), PFSmeminfo, procfs_validfile_linux }, 199 { DT_REG, N("cpuinfo"), PFScpuinfo, procfs_validfile_linux }, 200 { DT_REG, N("uptime"), PFSuptime, procfs_validfile_linux }, 201 { DT_REG, N("mounts"), PFSmounts, procfs_validfile_linux }, 202 { DT_REG, N("devices"), PFSdevices, procfs_validfile_linux }, 203 { DT_REG, N("stat"), PFScpustat, procfs_validfile_linux }, 204 { DT_REG, N("loadavg"), PFSloadavg, procfs_validfile_linux }, 205 { DT_REG, N("version"), PFSversion, procfs_validfile_linux }, 206 { DT_DIR, N("sysvipc"), PFSsysvipc, procfs_validfile_linux }, 207 #undef N 208 }; 209 static const int nproc_root_targets = 210 sizeof(proc_root_targets) / sizeof(proc_root_targets[0]); 211 212 /* 213 * List of files in the sysvipc directory 214 */ 215 static const struct proc_target proc_sysvipc_targets[] = { 216 #define N(s) sizeof(s)-1, s 217 /* name type validp */ 218 { DT_DIR, N("."), PFSsysvipc, NULL }, 219 { DT_DIR, N(".."), PFSroot, NULL }, 220 { DT_REG, N("msg"), PFSsysvipc_msg, procfs_validfile_linux }, 221 { DT_REG, N("sem"), PFSsysvipc_sem, procfs_validfile_linux }, 222 { DT_REG, N("shm"), PFSsysvipc_shm, procfs_validfile_linux }, 223 #undef N 224 }; 225 static const int nproc_sysvipc_targets = 226 sizeof(proc_sysvipc_targets) / sizeof(proc_sysvipc_targets[0]); 227 228 int procfs_lookup(void *); 229 int procfs_open(void *); 230 int procfs_close(void *); 231 int procfs_access(void *); 232 int procfs_getattr(void *); 233 int procfs_setattr(void *); 234 int procfs_readdir(void *); 235 int procfs_readlink(void *); 236 int procfs_inactive(void *); 237 int procfs_reclaim(void *); 238 int procfs_print(void *); 239 int procfs_pathconf(void *); 240 int procfs_getpages(void *); 241 242 static uint8_t fttodt(file_t *); 243 static int atoi(const char *, size_t); 244 245 /* 246 * procfs vnode operations. 247 */ 248 int (**procfs_vnodeop_p)(void *); 249 const struct vnodeopv_entry_desc procfs_vnodeop_entries[] = { 250 { &vop_default_desc, vn_default_error }, 251 { &vop_parsepath_desc, genfs_parsepath }, /* parsepath */ 252 { &vop_lookup_desc, procfs_lookup }, /* lookup */ 253 { &vop_create_desc, genfs_eopnotsupp }, /* create */ 254 { &vop_mknod_desc, genfs_eopnotsupp }, /* mknod */ 255 { &vop_open_desc, procfs_open }, /* open */ 256 { &vop_close_desc, procfs_close }, /* close */ 257 { &vop_access_desc, procfs_access }, /* access */ 258 { &vop_accessx_desc, genfs_accessx }, /* accessx */ 259 { &vop_getattr_desc, procfs_getattr }, /* getattr */ 260 { &vop_setattr_desc, procfs_setattr }, /* setattr */ 261 { &vop_read_desc, procfs_rw }, /* read */ 262 { &vop_write_desc, procfs_rw }, /* write */ 263 { &vop_fallocate_desc, genfs_eopnotsupp }, /* fallocate */ 264 { &vop_fdiscard_desc, genfs_eopnotsupp }, /* fdiscard */ 265 { &vop_fcntl_desc, genfs_fcntl }, /* fcntl */ 266 { &vop_ioctl_desc, genfs_enoioctl }, /* ioctl */ 267 { &vop_poll_desc, genfs_poll }, /* poll */ 268 { &vop_kqfilter_desc, genfs_kqfilter }, /* kqfilter */ 269 { &vop_revoke_desc, genfs_revoke }, /* revoke */ 270 { &vop_fsync_desc, genfs_nullop }, /* fsync */ 271 { &vop_seek_desc, genfs_nullop }, /* seek */ 272 { &vop_remove_desc, genfs_eopnotsupp }, /* remove */ 273 { &vop_link_desc, genfs_erofs_link }, /* link */ 274 { &vop_rename_desc, genfs_eopnotsupp }, /* rename */ 275 { &vop_mkdir_desc, genfs_eopnotsupp }, /* mkdir */ 276 { &vop_rmdir_desc, genfs_eopnotsupp }, /* rmdir */ 277 { &vop_symlink_desc, genfs_erofs_symlink }, /* symlink */ 278 { &vop_readdir_desc, procfs_readdir }, /* readdir */ 279 { &vop_readlink_desc, procfs_readlink }, /* readlink */ 280 { &vop_abortop_desc, genfs_abortop }, /* abortop */ 281 { &vop_inactive_desc, procfs_inactive }, /* inactive */ 282 { &vop_reclaim_desc, procfs_reclaim }, /* reclaim */ 283 { &vop_lock_desc, genfs_lock }, /* lock */ 284 { &vop_unlock_desc, genfs_unlock }, /* unlock */ 285 { &vop_bmap_desc, genfs_eopnotsupp }, /* bmap */ 286 { &vop_strategy_desc, genfs_badop }, /* strategy */ 287 { &vop_print_desc, procfs_print }, /* print */ 288 { &vop_islocked_desc, genfs_islocked }, /* islocked */ 289 { &vop_pathconf_desc, procfs_pathconf }, /* pathconf */ 290 { &vop_advlock_desc, genfs_einval }, /* advlock */ 291 { &vop_getpages_desc, procfs_getpages }, /* getpages */ 292 { &vop_putpages_desc, genfs_null_putpages }, /* putpages */ 293 { NULL, NULL } 294 }; 295 const struct vnodeopv_desc procfs_vnodeop_opv_desc = 296 { &procfs_vnodeop_p, procfs_vnodeop_entries }; 297 /* 298 * set things up for doing i/o on 299 * the pfsnode (vp). (vp) is locked 300 * on entry, and should be left locked 301 * on exit. 302 * 303 * for procfs we don't need to do anything 304 * in particular for i/o. all that is done 305 * is to support exclusive open on process 306 * memory images. 307 */ 308 int 309 procfs_open(void *v) 310 { 311 struct vop_open_args /* { 312 struct vnode *a_vp; 313 int a_mode; 314 kauth_cred_t a_cred; 315 } */ *ap = v; 316 struct vnode *vp = ap->a_vp; 317 struct pfsnode *pfs = VTOPFS(vp); 318 struct lwp *l1; 319 struct proc *p2; 320 int error; 321 322 if ((error = 323 procfs_proc_lock(vp->v_mount, pfs->pfs_pid, &p2, ENOENT)) != 0) 324 return error; 325 326 l1 = curlwp; /* tracer */ 327 328 #define M2K(m) (((m) & FREAD) && ((m) & FWRITE) ? \ 329 KAUTH_REQ_PROCESS_PROCFS_RW : \ 330 (m) & FWRITE ? KAUTH_REQ_PROCESS_PROCFS_WRITE : \ 331 KAUTH_REQ_PROCESS_PROCFS_READ) 332 333 mutex_enter(p2->p_lock); 334 error = kauth_authorize_process(l1->l_cred, KAUTH_PROCESS_PROCFS, 335 p2, pfs, KAUTH_ARG(M2K(ap->a_mode)), NULL); 336 mutex_exit(p2->p_lock); 337 if (error) { 338 procfs_proc_unlock(p2); 339 return (error); 340 } 341 342 #undef M2K 343 344 switch (pfs->pfs_type) { 345 case PFSmem: 346 if (((pfs->pfs_flags & FWRITE) && (ap->a_mode & O_EXCL)) || 347 ((pfs->pfs_flags & O_EXCL) && (ap->a_mode & FWRITE))) { 348 error = EBUSY; 349 break; 350 } 351 352 if (!proc_isunder(p2, l1)) { 353 error = EPERM; 354 break; 355 } 356 357 if (ap->a_mode & FWRITE) 358 pfs->pfs_flags = ap->a_mode & (FWRITE|O_EXCL); 359 360 break; 361 362 case PFSregs: 363 case PFSfpregs: 364 if (!proc_isunder(p2, l1)) { 365 error = EPERM; 366 break; 367 } 368 break; 369 370 default: 371 break; 372 } 373 374 procfs_proc_unlock(p2); 375 return (error); 376 } 377 378 /* 379 * close the pfsnode (vp) after doing i/o. 380 * (vp) is not locked on entry or exit. 381 * 382 * nothing to do for procfs other than undo 383 * any exclusive open flag (see _open above). 384 */ 385 int 386 procfs_close(void *v) 387 { 388 struct vop_close_args /* { 389 struct vnode *a_vp; 390 int a_fflag; 391 kauth_cred_t a_cred; 392 } */ *ap = v; 393 struct pfsnode *pfs = VTOPFS(ap->a_vp); 394 395 switch (pfs->pfs_type) { 396 case PFSmem: 397 if ((ap->a_fflag & FWRITE) && (pfs->pfs_flags & O_EXCL)) 398 pfs->pfs_flags &= ~(FWRITE|O_EXCL); 399 break; 400 401 default: 402 break; 403 } 404 405 return (0); 406 } 407 408 /* 409 * _inactive is called when the pfsnode 410 * is vrele'd and the reference count goes 411 * to zero. (vp) will be on the vnode free 412 * list, so to get it back vget() must be 413 * used. 414 * 415 * (vp) is locked on entry, but must be unlocked on exit. 416 */ 417 int 418 procfs_inactive(void *v) 419 { 420 struct vop_inactive_v2_args /* { 421 struct vnode *a_vp; 422 bool *a_recycle; 423 } */ *ap = v; 424 struct vnode *vp = ap->a_vp; 425 struct pfsnode *pfs = VTOPFS(vp); 426 427 mutex_enter(&proc_lock); 428 *ap->a_recycle = (procfs_proc_find(vp->v_mount, pfs->pfs_pid) == NULL); 429 mutex_exit(&proc_lock); 430 431 return (0); 432 } 433 434 /* 435 * _reclaim is called when getnewvnode() 436 * wants to make use of an entry on the vnode 437 * free list. at this time the filesystem needs 438 * to free any private data and remove the node 439 * from any private lists. 440 */ 441 int 442 procfs_reclaim(void *v) 443 { 444 struct vop_reclaim_v2_args /* { 445 struct vnode *a_vp; 446 } */ *ap = v; 447 struct vnode *vp = ap->a_vp; 448 struct pfsnode *pfs = VTOPFS(vp); 449 450 VOP_UNLOCK(vp); 451 452 /* 453 * To interlock with procfs_revoke_vnodes(). 454 */ 455 mutex_enter(vp->v_interlock); 456 vp->v_data = NULL; 457 mutex_exit(vp->v_interlock); 458 procfs_hashrem(pfs); 459 kmem_free(pfs, sizeof(*pfs)); 460 return 0; 461 } 462 463 /* 464 * Return POSIX pathconf information applicable to special devices. 465 */ 466 int 467 procfs_pathconf(void *v) 468 { 469 struct vop_pathconf_args /* { 470 struct vnode *a_vp; 471 int a_name; 472 register_t *a_retval; 473 } */ *ap = v; 474 475 switch (ap->a_name) { 476 case _PC_LINK_MAX: 477 *ap->a_retval = LINK_MAX; 478 return (0); 479 case _PC_MAX_CANON: 480 *ap->a_retval = MAX_CANON; 481 return (0); 482 case _PC_MAX_INPUT: 483 *ap->a_retval = MAX_INPUT; 484 return (0); 485 case _PC_PIPE_BUF: 486 *ap->a_retval = PIPE_BUF; 487 return (0); 488 case _PC_CHOWN_RESTRICTED: 489 *ap->a_retval = 1; 490 return (0); 491 case _PC_VDISABLE: 492 *ap->a_retval = _POSIX_VDISABLE; 493 return (0); 494 case _PC_SYNC_IO: 495 *ap->a_retval = 1; 496 return (0); 497 default: 498 return genfs_pathconf(ap); 499 } 500 /* NOTREACHED */ 501 } 502 503 /* 504 * _print is used for debugging. 505 * just print a readable description 506 * of (vp). 507 */ 508 int 509 procfs_print(void *v) 510 { 511 struct vop_print_args /* { 512 struct vnode *a_vp; 513 } */ *ap = v; 514 struct pfsnode *pfs = VTOPFS(ap->a_vp); 515 516 printf("tag VT_PROCFS, type %d, pid %d, mode %x, flags %lx\n", 517 pfs->pfs_type, pfs->pfs_pid, pfs->pfs_mode, pfs->pfs_flags); 518 return 0; 519 } 520 521 /* 522 * Works out the path to the target process's current 523 * working directory or chroot. If the caller is in a chroot and 524 * can't "reach" the target's cwd or root (or some other error 525 * occurs), a "/" is returned for the path. 526 */ 527 static void 528 procfs_dir(pfstype t, struct lwp *caller, struct proc *target, char **bpp, 529 char *path, size_t len) 530 { 531 struct cwdinfo *cwdi; 532 struct vnode *vp, *rvp; 533 char *bp; 534 535 /* 536 * Lock target cwdi and take a reference to the vnode 537 * we are interested in to prevent it from disappearing 538 * before getcwd_common() below. 539 */ 540 rw_enter(&target->p_cwdi->cwdi_lock, RW_READER); 541 switch (t) { 542 case PFScwd: 543 vp = target->p_cwdi->cwdi_cdir; 544 break; 545 case PFSchroot: 546 vp = target->p_cwdi->cwdi_rdir; 547 break; 548 default: 549 rw_exit(&target->p_cwdi->cwdi_lock); 550 return; 551 } 552 if (vp != NULL) 553 vref(vp); 554 rw_exit(&target->p_cwdi->cwdi_lock); 555 556 cwdi = caller->l_proc->p_cwdi; 557 rw_enter(&cwdi->cwdi_lock, RW_READER); 558 559 rvp = cwdi->cwdi_rdir; 560 bp = bpp ? *bpp : NULL; 561 562 /* 563 * XXX: this horrible kludge avoids locking panics when 564 * attempting to lookup links that point to within procfs 565 */ 566 if (vp != NULL && vp->v_tag == VT_PROCFS) { 567 if (bpp) { 568 *--bp = '/'; 569 *bpp = bp; 570 } 571 vrele(vp); 572 rw_exit(&cwdi->cwdi_lock); 573 return; 574 } 575 576 if (rvp == NULL) 577 rvp = rootvnode; 578 if (vp == NULL || getcwd_common(vp, rvp, bp ? &bp : NULL, path, 579 len / 2, 0, caller) != 0) { 580 if (bpp) { 581 bp = *bpp; 582 *--bp = '/'; 583 } 584 } 585 586 if (bpp) 587 *bpp = bp; 588 589 if (vp != NULL) 590 vrele(vp); 591 rw_exit(&cwdi->cwdi_lock); 592 } 593 594 /* 595 * Invent attributes for pfsnode (vp) and store 596 * them in (vap). 597 * Directories lengths are returned as zero since 598 * any real length would require the genuine size 599 * to be computed, and nothing cares anyway. 600 * 601 * this is relatively minimal for procfs. 602 */ 603 int 604 procfs_getattr(void *v) 605 { 606 struct vop_getattr_args /* { 607 struct vnode *a_vp; 608 struct vattr *a_vap; 609 kauth_cred_t a_cred; 610 } */ *ap = v; 611 struct vnode *vp = ap->a_vp; 612 struct pfsnode *pfs = VTOPFS(vp); 613 struct vattr *vap = ap->a_vap; 614 struct proc *procp; 615 char *path, *bp, bf[16]; 616 int error; 617 618 /* first check the process still exists */ 619 switch (pfs->pfs_type) { 620 case PFSroot: 621 case PFScurproc: 622 case PFSself: 623 procp = NULL; 624 break; 625 626 default: 627 error = 628 procfs_proc_lock(vp->v_mount, pfs->pfs_pid, &procp, ENOENT); 629 if (error != 0) 630 return (error); 631 break; 632 } 633 634 switch (pfs->pfs_type) { 635 case PFStask: 636 if (pfs->pfs_fd == -1) { 637 path = NULL; 638 break; 639 } 640 /*FALLTHROUGH*/ 641 case PFScwd: 642 case PFSchroot: 643 path = malloc(MAXPATHLEN + 4, M_TEMP, M_WAITOK); 644 if (path == NULL && procp != NULL) { 645 procfs_proc_unlock(procp); 646 return (ENOMEM); 647 } 648 break; 649 650 default: 651 path = NULL; 652 break; 653 } 654 655 if (procp != NULL) { 656 mutex_enter(procp->p_lock); 657 error = kauth_authorize_process(kauth_cred_get(), 658 KAUTH_PROCESS_CANSEE, procp, 659 KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_ENTRY), NULL, NULL); 660 mutex_exit(procp->p_lock); 661 if (error != 0) { 662 procfs_proc_unlock(procp); 663 if (path != NULL) 664 free(path, M_TEMP); 665 return (ENOENT); 666 } 667 } 668 669 error = 0; 670 671 /* start by zeroing out the attributes */ 672 vattr_null(vap); 673 674 /* next do all the common fields */ 675 vap->va_type = ap->a_vp->v_type; 676 vap->va_mode = pfs->pfs_mode; 677 vap->va_fileid = pfs->pfs_fileno; 678 vap->va_flags = 0; 679 vap->va_blocksize = PAGE_SIZE; 680 681 /* 682 * Make all times be current TOD. 683 * 684 * It would be possible to get the process start 685 * time from the p_stats structure, but there's 686 * no "file creation" time stamp anyway, and the 687 * p_stats structure is not addressable if u. gets 688 * swapped out for that process. 689 */ 690 getnanotime(&vap->va_ctime); 691 vap->va_atime = vap->va_mtime = vap->va_ctime; 692 if (procp) 693 TIMEVAL_TO_TIMESPEC(&procp->p_stats->p_start, 694 &vap->va_birthtime); 695 else 696 getnanotime(&vap->va_birthtime); 697 698 switch (pfs->pfs_type) { 699 case PFSmem: 700 case PFSregs: 701 case PFSfpregs: 702 #if defined(__HAVE_PROCFS_MACHDEP) && defined(PROCFS_MACHDEP_PROTECT_CASES) 703 PROCFS_MACHDEP_PROTECT_CASES 704 #endif 705 /* 706 * If the process has exercised some setuid or setgid 707 * privilege, then rip away read/write permission so 708 * that only root can gain access. 709 */ 710 if (procp->p_flag & PK_SUGID) 711 vap->va_mode &= ~(S_IRUSR|S_IWUSR); 712 /* FALLTHROUGH */ 713 case PFSstatus: 714 case PFSstat: 715 case PFSnote: 716 case PFSnotepg: 717 case PFScmdline: 718 case PFSenviron: 719 case PFSemul: 720 case PFSstatm: 721 722 case PFSmap: 723 case PFSmaps: 724 case PFSlimit: 725 case PFSlimits: 726 case PFSauxv: 727 vap->va_nlink = 1; 728 vap->va_uid = kauth_cred_geteuid(procp->p_cred); 729 vap->va_gid = kauth_cred_getegid(procp->p_cred); 730 break; 731 case PFScwd: 732 case PFSchroot: 733 case PFSmeminfo: 734 case PFSdevices: 735 case PFScpuinfo: 736 case PFSuptime: 737 case PFSmounts: 738 case PFScpustat: 739 case PFSloadavg: 740 case PFSversion: 741 case PFSexe: 742 case PFSself: 743 case PFScurproc: 744 case PFSroot: 745 case PFSsysvipc_msg: 746 case PFSsysvipc_sem: 747 case PFSsysvipc_shm: 748 vap->va_nlink = 1; 749 vap->va_uid = vap->va_gid = 0; 750 break; 751 752 case PFSsysvipc: 753 vap->va_nlink = 5; 754 vap->va_uid = vap->va_gid = 0; 755 break; 756 757 case PFSproc: 758 case PFStask: 759 case PFSfile: 760 case PFSfd: 761 break; 762 763 default: 764 panic("%s: %d/1", __func__, pfs->pfs_type); 765 } 766 767 /* 768 * now do the object specific fields 769 * 770 * The size could be set from struct reg, but it's hardly 771 * worth the trouble, and it puts some (potentially) machine 772 * dependent data into this machine-independent code. If it 773 * becomes important then this function should break out into 774 * a per-file stat function in the corresponding .c file. 775 */ 776 777 switch (pfs->pfs_type) { 778 case PFSroot: 779 vap->va_bytes = vap->va_size = DEV_BSIZE; 780 break; 781 782 case PFSself: 783 case PFScurproc: 784 vap->va_bytes = vap->va_size = 785 snprintf(bf, sizeof(bf), "%ld", (long)curproc->p_pid); 786 break; 787 case PFStask: 788 if (pfs->pfs_fd != -1) { 789 vap->va_nlink = 1; 790 vap->va_uid = 0; 791 vap->va_gid = 0; 792 vap->va_bytes = vap->va_size = 793 snprintf(bf, sizeof(bf), ".."); 794 break; 795 } 796 /*FALLTHROUGH*/ 797 case PFSfd: 798 if (pfs->pfs_fd != -1) { 799 file_t *fp; 800 801 fp = fd_getfile2(procp, pfs->pfs_fd); 802 if (fp == NULL) { 803 error = EBADF; 804 break; 805 } 806 vap->va_nlink = 1; 807 vap->va_uid = kauth_cred_geteuid(fp->f_cred); 808 vap->va_gid = kauth_cred_getegid(fp->f_cred); 809 switch (fp->f_type) { 810 case DTYPE_VNODE: 811 vap->va_bytes = vap->va_size = 812 fp->f_vnode->v_size; 813 break; 814 default: 815 vap->va_bytes = vap->va_size = 0; 816 break; 817 } 818 closef(fp); 819 break; 820 } 821 /*FALLTHROUGH*/ 822 case PFSproc: 823 vap->va_nlink = 2; 824 vap->va_uid = kauth_cred_geteuid(procp->p_cred); 825 vap->va_gid = kauth_cred_getegid(procp->p_cred); 826 vap->va_bytes = vap->va_size = DEV_BSIZE; 827 break; 828 829 case PFSfile: 830 error = EOPNOTSUPP; 831 break; 832 833 case PFSmem: 834 vap->va_bytes = vap->va_size = 835 ctob(procp->p_vmspace->vm_tsize + 836 procp->p_vmspace->vm_dsize + 837 procp->p_vmspace->vm_ssize); 838 break; 839 840 case PFSauxv: 841 vap->va_bytes = vap->va_size = procp->p_execsw->es_arglen; 842 break; 843 844 #if defined(PT_GETREGS) || defined(PT_SETREGS) 845 case PFSregs: 846 vap->va_bytes = vap->va_size = sizeof(struct reg); 847 break; 848 #endif 849 850 #if defined(PT_GETFPREGS) || defined(PT_SETFPREGS) 851 case PFSfpregs: 852 vap->va_bytes = vap->va_size = sizeof(struct fpreg); 853 break; 854 #endif 855 856 case PFSstatus: 857 case PFSstat: 858 case PFSnote: 859 case PFSnotepg: 860 case PFScmdline: 861 case PFSenviron: 862 case PFSmeminfo: 863 case PFSdevices: 864 case PFScpuinfo: 865 case PFSuptime: 866 case PFSmounts: 867 case PFScpustat: 868 case PFSloadavg: 869 case PFSstatm: 870 case PFSversion: 871 case PFSsysvipc: 872 case PFSsysvipc_msg: 873 case PFSsysvipc_sem: 874 case PFSsysvipc_shm: 875 vap->va_bytes = vap->va_size = 0; 876 break; 877 case PFSlimit: 878 case PFSlimits: 879 case PFSmap: 880 case PFSmaps: 881 /* 882 * Advise a larger blocksize for the map files, so that 883 * they may be read in one pass. 884 */ 885 vap->va_blocksize = 4 * PAGE_SIZE; 886 vap->va_bytes = vap->va_size = 0; 887 break; 888 889 case PFScwd: 890 case PFSchroot: 891 bp = path + MAXPATHLEN; 892 *--bp = '\0'; 893 procfs_dir(pfs->pfs_type, curlwp, procp, &bp, path, 894 MAXPATHLEN); 895 vap->va_bytes = vap->va_size = strlen(bp); 896 break; 897 898 case PFSexe: 899 vap->va_bytes = vap->va_size = strlen(procp->p_path); 900 break; 901 902 case PFSemul: 903 vap->va_bytes = vap->va_size = strlen(procp->p_emul->e_name); 904 break; 905 906 #ifdef __HAVE_PROCFS_MACHDEP 907 PROCFS_MACHDEP_NODETYPE_CASES 908 error = procfs_machdep_getattr(ap->a_vp, vap, procp); 909 break; 910 #endif 911 912 default: 913 panic("%s: %d/2", __func__, pfs->pfs_type); 914 } 915 916 if (procp != NULL) 917 procfs_proc_unlock(procp); 918 if (path != NULL) 919 free(path, M_TEMP); 920 921 return (error); 922 } 923 924 /*ARGSUSED*/ 925 int 926 procfs_setattr(void *v) 927 { 928 /* 929 * just fake out attribute setting 930 * it's not good to generate an error 931 * return, otherwise things like creat() 932 * will fail when they try to set the 933 * file length to 0. worse, this means 934 * that echo $note > /proc/$pid/note will fail. 935 */ 936 937 return (0); 938 } 939 940 /* 941 * implement access checking. 942 * 943 * actually, the check for super-user is slightly 944 * broken since it will allow read access to write-only 945 * objects. this doesn't cause any particular trouble 946 * but does mean that the i/o entry points need to check 947 * that the operation really does make sense. 948 */ 949 int 950 procfs_access(void *v) 951 { 952 struct vop_access_args /* { 953 struct vnode *a_vp; 954 accmode_t a_accmode; 955 kauth_cred_t a_cred; 956 } */ *ap = v; 957 struct vattr va; 958 int error; 959 960 if ((error = VOP_GETATTR(ap->a_vp, &va, ap->a_cred)) != 0) 961 return (error); 962 963 return kauth_authorize_vnode(ap->a_cred, 964 KAUTH_ACCESS_ACTION(ap->a_accmode, ap->a_vp->v_type, va.va_mode), 965 ap->a_vp, NULL, genfs_can_access(ap->a_vp, ap->a_cred, 966 va.va_uid, va.va_gid, va.va_mode, NULL, ap->a_accmode)); 967 } 968 969 /* 970 * lookup. this is incredibly complicated in the 971 * general case, however for most pseudo-filesystems 972 * very little needs to be done. 973 * 974 * Locking isn't hard here, just poorly documented. 975 * 976 * If we're looking up ".", just vref the parent & return it. 977 * 978 * If we're looking up "..", unlock the parent, and lock "..". If everything 979 * went ok, and we're on the last component and the caller requested the 980 * parent locked, try to re-lock the parent. We do this to prevent lock 981 * races. 982 * 983 * For anything else, get the needed node. Then unlock the parent if not 984 * the last component or not LOCKPARENT (i.e. if we wouldn't re-lock the 985 * parent in the .. case). 986 * 987 * We try to exit with the parent locked in error cases. 988 */ 989 int 990 procfs_lookup(void *v) 991 { 992 struct vop_lookup_v2_args /* { 993 struct vnode * a_dvp; 994 struct vnode ** a_vpp; 995 struct componentname * a_cnp; 996 } */ *ap = v; 997 struct componentname *cnp = ap->a_cnp; 998 struct vnode **vpp = ap->a_vpp; 999 struct vnode *dvp = ap->a_dvp; 1000 const char *pname = cnp->cn_nameptr; 1001 const struct proc_target *pt = NULL; 1002 struct vnode *fvp; 1003 pid_t pid, vnpid; 1004 struct pfsnode *pfs; 1005 struct proc *p = NULL; 1006 struct lwp *plwp; 1007 int i, error; 1008 pfstype type; 1009 1010 *vpp = NULL; 1011 1012 if ((error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred)) != 0) 1013 return (error); 1014 1015 if (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME) 1016 return (EROFS); 1017 1018 if (cnp->cn_namelen == 1 && *pname == '.') { 1019 *vpp = dvp; 1020 vref(dvp); 1021 return (0); 1022 } 1023 1024 pfs = VTOPFS(dvp); 1025 switch (pfs->pfs_type) { 1026 case PFSroot: 1027 /* 1028 * Shouldn't get here with .. in the root node. 1029 */ 1030 if (cnp->cn_flags & ISDOTDOT) 1031 return (EIO); 1032 1033 for (i = 0; i < nproc_root_targets; i++) { 1034 pt = &proc_root_targets[i]; 1035 /* 1036 * check for node match. proc is always NULL here, 1037 * so call pt_valid with constant NULL lwp. 1038 */ 1039 if (cnp->cn_namelen == pt->pt_namlen && 1040 memcmp(pt->pt_name, pname, cnp->cn_namelen) == 0 && 1041 (pt->pt_valid == NULL || 1042 (*pt->pt_valid)(NULL, dvp->v_mount))) 1043 break; 1044 } 1045 1046 if (i != nproc_root_targets) { 1047 error = procfs_allocvp(dvp->v_mount, vpp, 0, 1048 pt->pt_pfstype, -1); 1049 return (error); 1050 } 1051 1052 if (CNEQ(cnp, "curproc", 7)) { 1053 pid = curproc->p_pid; 1054 vnpid = 0; 1055 type = PFScurproc; 1056 } else if (CNEQ(cnp, "self", 4)) { 1057 pid = curproc->p_pid; 1058 vnpid = 0; 1059 type = PFSself; 1060 } else { 1061 pid = (pid_t)atoi(pname, cnp->cn_namelen); 1062 vnpid = pid; 1063 type = PFSproc; 1064 } 1065 1066 if (procfs_proc_lock(dvp->v_mount, pid, &p, ESRCH) != 0) 1067 break; 1068 error = procfs_allocvp(dvp->v_mount, vpp, vnpid, type, -1); 1069 procfs_proc_unlock(p); 1070 return (error); 1071 1072 case PFSproc: 1073 if (cnp->cn_flags & ISDOTDOT) { 1074 error = procfs_allocvp(dvp->v_mount, vpp, 0, PFSroot, 1075 -1); 1076 return (error); 1077 } 1078 1079 if (procfs_proc_lock(dvp->v_mount, pfs->pfs_pid, &p, 1080 ESRCH) != 0) 1081 break; 1082 1083 mutex_enter(p->p_lock); 1084 LIST_FOREACH(plwp, &p->p_lwps, l_sibling) { 1085 if (plwp->l_stat != LSZOMB) 1086 break; 1087 } 1088 /* Process is exiting if no-LWPS or all LWPs are LSZOMB */ 1089 if (plwp == NULL) { 1090 mutex_exit(p->p_lock); 1091 procfs_proc_unlock(p); 1092 return ESRCH; 1093 } 1094 1095 lwp_addref(plwp); 1096 mutex_exit(p->p_lock); 1097 1098 for (pt = proc_targets, i = 0; i < nproc_targets; pt++, i++) { 1099 int found; 1100 1101 found = cnp->cn_namelen == pt->pt_namlen && 1102 memcmp(pt->pt_name, pname, cnp->cn_namelen) == 0 && 1103 (pt->pt_valid == NULL 1104 || (*pt->pt_valid)(plwp, dvp->v_mount)); 1105 if (found) 1106 break; 1107 } 1108 lwp_delref(plwp); 1109 1110 if (i == nproc_targets) { 1111 procfs_proc_unlock(p); 1112 break; 1113 } 1114 if (pt->pt_pfstype == PFSfile) { 1115 fvp = p->p_textvp; 1116 /* We already checked that it exists. */ 1117 vref(fvp); 1118 procfs_proc_unlock(p); 1119 *vpp = fvp; 1120 return (0); 1121 } 1122 1123 error = procfs_allocvp(dvp->v_mount, vpp, pfs->pfs_pid, 1124 pt->pt_pfstype, -1); 1125 procfs_proc_unlock(p); 1126 return (error); 1127 1128 case PFSfd: { 1129 int fd; 1130 file_t *fp; 1131 1132 if ((error = procfs_proc_lock(dvp->v_mount, pfs->pfs_pid, &p, 1133 ENOENT)) != 0) 1134 return error; 1135 1136 if (cnp->cn_flags & ISDOTDOT) { 1137 error = procfs_allocvp(dvp->v_mount, vpp, pfs->pfs_pid, 1138 PFSproc, -1); 1139 procfs_proc_unlock(p); 1140 return (error); 1141 } 1142 fd = atoi(pname, cnp->cn_namelen); 1143 1144 fp = fd_getfile2(p, fd); 1145 if (fp == NULL) { 1146 procfs_proc_unlock(p); 1147 return ENOENT; 1148 } 1149 fvp = fp->f_vnode; 1150 1151 /* Don't show directories */ 1152 if (fp->f_type == DTYPE_VNODE && fvp->v_type != VDIR && 1153 !procfs_proc_is_linux_compat()) { 1154 vref(fvp); 1155 closef(fp); 1156 procfs_proc_unlock(p); 1157 *vpp = fvp; 1158 return 0; 1159 } 1160 1161 closef(fp); 1162 error = procfs_allocvp(dvp->v_mount, vpp, pfs->pfs_pid, 1163 PFSfd, fd); 1164 procfs_proc_unlock(p); 1165 return error; 1166 } 1167 case PFStask: { 1168 int xpid; 1169 1170 if ((error = procfs_proc_lock(dvp->v_mount, pfs->pfs_pid, &p, 1171 ENOENT)) != 0) 1172 return error; 1173 1174 if (cnp->cn_flags & ISDOTDOT) { 1175 error = procfs_allocvp(dvp->v_mount, vpp, pfs->pfs_pid, 1176 PFSproc, -1); 1177 procfs_proc_unlock(p); 1178 return (error); 1179 } 1180 xpid = atoi(pname, cnp->cn_namelen); 1181 1182 if (xpid != pfs->pfs_pid) { 1183 procfs_proc_unlock(p); 1184 return ENOENT; 1185 } 1186 error = procfs_allocvp(dvp->v_mount, vpp, pfs->pfs_pid, 1187 PFStask, 0); 1188 procfs_proc_unlock(p); 1189 return error; 1190 } 1191 case PFSsysvipc: 1192 if (cnp->cn_flags & ISDOTDOT) { 1193 error = procfs_allocvp(dvp->v_mount, vpp, 0, PFSroot, 1194 -1); 1195 return (error); 1196 } 1197 1198 for (i = 0; i < nproc_sysvipc_targets; i++) { 1199 pt = &proc_sysvipc_targets[i]; 1200 /* 1201 * check for node match. proc is always NULL here, 1202 * so call pt_valid with constant NULL lwp. 1203 */ 1204 if (cnp->cn_namelen == pt->pt_namlen && 1205 memcmp(pt->pt_name, pname, cnp->cn_namelen) == 0 && 1206 (pt->pt_valid == NULL || 1207 (*pt->pt_valid)(NULL, dvp->v_mount))) 1208 break; 1209 } 1210 1211 if (i != nproc_sysvipc_targets) { 1212 error = procfs_allocvp(dvp->v_mount, vpp, 0, 1213 pt->pt_pfstype, -1); 1214 return (error); 1215 } 1216 1217 return (ENOENT); 1218 1219 default: 1220 return (ENOTDIR); 1221 } 1222 1223 return (cnp->cn_nameiop == LOOKUP ? ENOENT : EROFS); 1224 } 1225 1226 int 1227 procfs_validfile(struct lwp *l, struct mount *mp) 1228 { 1229 return l != NULL && l->l_proc != NULL && l->l_proc->p_textvp != NULL; 1230 } 1231 1232 static int 1233 procfs_validfile_linux(struct lwp *l, struct mount *mp) 1234 { 1235 return procfs_use_linux_compat(mp) && 1236 (l == NULL || l->l_proc == NULL || procfs_validfile(l, mp)); 1237 } 1238 1239 struct procfs_root_readdir_ctx { 1240 struct uio *uiop; 1241 off_t *cookies; 1242 int ncookies; 1243 off_t off; 1244 off_t startoff; 1245 int error; 1246 }; 1247 1248 static int 1249 procfs_root_readdir_callback(struct proc *p, void *arg) 1250 { 1251 struct procfs_root_readdir_ctx *ctxp = arg; 1252 struct dirent d; 1253 struct uio *uiop; 1254 int error; 1255 1256 uiop = ctxp->uiop; 1257 if (uiop->uio_resid < UIO_MX) 1258 return -1; /* no space */ 1259 1260 if (kauth_authorize_process(kauth_cred_get(), 1261 KAUTH_PROCESS_CANSEE, p, 1262 KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_ENTRY), NULL, NULL) != 0) 1263 return 0; 1264 1265 if (ctxp->off < ctxp->startoff) { 1266 ctxp->off++; 1267 return 0; 1268 } 1269 1270 memset(&d, 0, UIO_MX); 1271 d.d_reclen = UIO_MX; 1272 d.d_fileno = PROCFS_FILENO(p->p_pid, PFSproc, -1); 1273 d.d_namlen = snprintf(d.d_name, 1274 UIO_MX - offsetof(struct dirent, d_name), "%ld", (long)p->p_pid); 1275 d.d_type = DT_DIR; 1276 1277 mutex_exit(&proc_lock); 1278 error = uiomove(&d, UIO_MX, uiop); 1279 mutex_enter(&proc_lock); 1280 if (error) { 1281 ctxp->error = error; 1282 return -1; 1283 } 1284 1285 ctxp->ncookies++; 1286 if (ctxp->cookies) 1287 *(ctxp->cookies)++ = ctxp->off + 1; 1288 ctxp->off++; 1289 1290 return 0; 1291 } 1292 1293 /* 1294 * readdir returns directory entries from pfsnode (vp). 1295 * 1296 * the strategy here with procfs is to generate a single 1297 * directory entry at a time (struct dirent) and then 1298 * copy that out to userland using uiomove. a more efficient 1299 * though more complex implementation, would try to minimize 1300 * the number of calls to uiomove(). for procfs, this is 1301 * hardly worth the added code complexity. 1302 * 1303 * this should just be done through read() 1304 */ 1305 int 1306 procfs_readdir(void *v) 1307 { 1308 struct vop_readdir_args /* { 1309 struct vnode *a_vp; 1310 struct uio *a_uio; 1311 kauth_cred_t a_cred; 1312 int *a_eofflag; 1313 off_t **a_cookies; 1314 int *a_ncookies; 1315 } */ *ap = v; 1316 struct uio *uio = ap->a_uio; 1317 struct dirent d; 1318 struct pfsnode *pfs; 1319 off_t i; 1320 int error; 1321 off_t *cookies = NULL; 1322 int ncookies; 1323 struct vnode *vp; 1324 const struct proc_target *pt; 1325 struct procfs_root_readdir_ctx ctx; 1326 struct proc *p = NULL; 1327 struct lwp *l; 1328 int nfd; 1329 int nc = 0; 1330 1331 vp = ap->a_vp; 1332 pfs = VTOPFS(vp); 1333 1334 if (uio->uio_resid < UIO_MX) 1335 return (EINVAL); 1336 if (uio->uio_offset < 0) 1337 return (EINVAL); 1338 1339 error = 0; 1340 i = uio->uio_offset; 1341 memset(&d, 0, UIO_MX); 1342 d.d_reclen = UIO_MX; 1343 ncookies = uio->uio_resid / UIO_MX; 1344 1345 switch (pfs->pfs_type) { 1346 /* 1347 * this is for the process-specific sub-directories. 1348 * all that is needed to is copy out all the entries 1349 * from the procent[] table (top of this file). 1350 */ 1351 case PFSproc: { 1352 1353 if (i >= nproc_targets) 1354 return 0; 1355 1356 if (procfs_proc_lock(vp->v_mount, pfs->pfs_pid, &p, ESRCH) != 0) 1357 break; 1358 1359 if (ap->a_ncookies) { 1360 ncookies = uimin(ncookies, (nproc_targets - i)); 1361 cookies = malloc(ncookies * sizeof (off_t), 1362 M_TEMP, M_WAITOK); 1363 *ap->a_cookies = cookies; 1364 } 1365 1366 for (pt = &proc_targets[i]; 1367 uio->uio_resid >= UIO_MX && i < nproc_targets; pt++, i++) { 1368 if (pt->pt_valid) { 1369 /* XXXSMP LWP can disappear */ 1370 mutex_enter(p->p_lock); 1371 l = LIST_FIRST(&p->p_lwps); 1372 KASSERT(l != NULL); 1373 mutex_exit(p->p_lock); 1374 if ((*pt->pt_valid)(l, vp->v_mount) == 0) 1375 continue; 1376 } 1377 1378 d.d_fileno = PROCFS_FILENO(pfs->pfs_pid, 1379 pt->pt_pfstype, -1); 1380 d.d_namlen = pt->pt_namlen; 1381 memcpy(d.d_name, pt->pt_name, pt->pt_namlen + 1); 1382 d.d_type = pt->pt_type; 1383 1384 if ((error = uiomove(&d, UIO_MX, uio)) != 0) 1385 break; 1386 if (cookies) 1387 *cookies++ = i + 1; 1388 } 1389 1390 procfs_proc_unlock(p); 1391 break; 1392 } 1393 case PFSfd: { 1394 file_t *fp; 1395 int lim; 1396 1397 if ((error = procfs_proc_lock(vp->v_mount, pfs->pfs_pid, &p, 1398 ESRCH)) != 0) 1399 return error; 1400 1401 /* XXX Should this be by file as well? */ 1402 if (kauth_authorize_process(kauth_cred_get(), 1403 KAUTH_PROCESS_CANSEE, p, 1404 KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_OPENFILES), NULL, 1405 NULL) != 0) { 1406 procfs_proc_unlock(p); 1407 return ESRCH; 1408 } 1409 1410 nfd = atomic_load_consume(&p->p_fd->fd_dt)->dt_nfiles; 1411 1412 lim = uimin((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles); 1413 if (i >= lim) { 1414 procfs_proc_unlock(p); 1415 return 0; 1416 } 1417 1418 if (ap->a_ncookies) { 1419 ncookies = uimin(ncookies, (nfd + 2 - i)); 1420 cookies = malloc(ncookies * sizeof (off_t), 1421 M_TEMP, M_WAITOK); 1422 *ap->a_cookies = cookies; 1423 } 1424 1425 for (; i < 2 && uio->uio_resid >= UIO_MX; i++) { 1426 pt = &proc_targets[i]; 1427 d.d_namlen = pt->pt_namlen; 1428 d.d_fileno = PROCFS_FILENO(pfs->pfs_pid, 1429 pt->pt_pfstype, -1); 1430 (void)memcpy(d.d_name, pt->pt_name, pt->pt_namlen + 1); 1431 d.d_type = pt->pt_type; 1432 if ((error = uiomove(&d, UIO_MX, uio)) != 0) 1433 break; 1434 if (cookies) 1435 *cookies++ = i + 1; 1436 nc++; 1437 } 1438 if (error) 1439 goto out; 1440 for (; uio->uio_resid >= UIO_MX && i < nfd; i++) { 1441 /* check the descriptor exists */ 1442 if ((fp = fd_getfile2(p, i - 2)) == NULL) 1443 continue; 1444 closef(fp); 1445 1446 d.d_fileno = PROCFS_FILENO(pfs->pfs_pid, PFSfd, i - 2); 1447 d.d_namlen = snprintf(d.d_name, sizeof(d.d_name), 1448 "%lld", (long long)(i - 2)); 1449 d.d_type = fttodt(fp); 1450 if ((error = uiomove(&d, UIO_MX, uio)) != 0) 1451 break; 1452 if (cookies) 1453 *cookies++ = i + 1; 1454 nc++; 1455 } 1456 goto out; 1457 } 1458 case PFStask: { 1459 1460 if ((error = procfs_proc_lock(vp->v_mount, pfs->pfs_pid, &p, 1461 ESRCH)) != 0) 1462 return error; 1463 1464 nfd = 3; /* ., .., pid */ 1465 1466 if (ap->a_ncookies) { 1467 ncookies = uimin(ncookies, (nfd + 2 - i)); 1468 cookies = malloc(ncookies * sizeof (off_t), 1469 M_TEMP, M_WAITOK); 1470 *ap->a_cookies = cookies; 1471 } 1472 1473 for (; i < 2 && uio->uio_resid >= UIO_MX; i++) { 1474 pt = &proc_targets[i]; 1475 d.d_namlen = pt->pt_namlen; 1476 d.d_fileno = PROCFS_FILENO(pfs->pfs_pid, 1477 pt->pt_pfstype, -1); 1478 (void)memcpy(d.d_name, pt->pt_name, pt->pt_namlen + 1); 1479 d.d_type = pt->pt_type; 1480 if ((error = uiomove(&d, UIO_MX, uio)) != 0) 1481 break; 1482 if (cookies) 1483 *cookies++ = i + 1; 1484 nc++; 1485 } 1486 if (error) 1487 goto out; 1488 for (; uio->uio_resid >= UIO_MX && i < nfd; i++) { 1489 /* check the descriptor exists */ 1490 d.d_fileno = PROCFS_FILENO(pfs->pfs_pid, PFStask, 1491 i - 2); 1492 d.d_namlen = snprintf(d.d_name, sizeof(d.d_name), 1493 "%ld", (long)pfs->pfs_pid); 1494 d.d_type = DT_LNK; 1495 if ((error = uiomove(&d, UIO_MX, uio)) != 0) 1496 break; 1497 if (cookies) 1498 *cookies++ = i + 1; 1499 nc++; 1500 } 1501 goto out; 1502 } 1503 1504 /* 1505 * sysvipc subdirectory 1506 */ 1507 case PFSsysvipc: { 1508 if ((error = procfs_proc_lock(vp->v_mount, pfs->pfs_pid, &p, 1509 ESRCH)) != 0) 1510 return error; 1511 if (ap->a_ncookies) { 1512 ncookies = uimin(ncookies, (nproc_sysvipc_targets - i)); 1513 cookies = malloc(ncookies * sizeof (off_t), 1514 M_TEMP, M_WAITOK); 1515 *ap->a_cookies = cookies; 1516 } 1517 1518 for (pt = &proc_sysvipc_targets[i]; 1519 uio->uio_resid >= UIO_MX && i < nproc_sysvipc_targets; pt++, i++) { 1520 if (pt->pt_valid && 1521 (*pt->pt_valid)(NULL, vp->v_mount) == 0) 1522 continue; 1523 d.d_fileno = PROCFS_FILENO(pfs->pfs_pid, 1524 pt->pt_pfstype, -1); 1525 d.d_namlen = pt->pt_namlen; 1526 memcpy(d.d_name, pt->pt_name, pt->pt_namlen + 1); 1527 d.d_type = pt->pt_type; 1528 1529 if ((error = uiomove(&d, UIO_MX, uio)) != 0) 1530 break; 1531 if (cookies) 1532 *cookies++ = i + 1; 1533 } 1534 1535 goto out; 1536 } 1537 1538 /* 1539 * this is for the root of the procfs filesystem 1540 * what is needed are special entries for "curproc" 1541 * and "self" followed by an entry for each process 1542 * on allproc. 1543 */ 1544 1545 case PFSroot: { 1546 1547 if (ap->a_ncookies) { 1548 /* 1549 * XXX Potentially allocating too much space here, 1550 * but I'm lazy. This loop needs some work. 1551 */ 1552 cookies = malloc(ncookies * sizeof (off_t), 1553 M_TEMP, M_WAITOK); 1554 *ap->a_cookies = cookies; 1555 } 1556 1557 /* 0 ... 3 are static entries. */ 1558 for (; i <= 3 && uio->uio_resid >= UIO_MX; i++) { 1559 switch (i) { 1560 case 0: /* `.' */ 1561 case 1: /* `..' */ 1562 d.d_fileno = PROCFS_FILENO(0, PFSroot, -1); 1563 d.d_namlen = i + 1; 1564 memcpy(d.d_name, "..", d.d_namlen); 1565 d.d_name[i + 1] = '\0'; 1566 d.d_type = DT_DIR; 1567 break; 1568 1569 case 2: 1570 d.d_fileno = PROCFS_FILENO(0, PFScurproc, -1); 1571 d.d_namlen = sizeof("curproc") - 1; 1572 memcpy(d.d_name, "curproc", sizeof("curproc")); 1573 d.d_type = DT_LNK; 1574 break; 1575 1576 case 3: 1577 d.d_fileno = PROCFS_FILENO(0, PFSself, -1); 1578 d.d_namlen = sizeof("self") - 1; 1579 memcpy(d.d_name, "self", sizeof("self")); 1580 d.d_type = DT_LNK; 1581 break; 1582 } 1583 1584 if ((error = uiomove(&d, UIO_MX, uio)) != 0) 1585 break; 1586 nc++; 1587 if (cookies) 1588 *cookies++ = i + 1; 1589 } 1590 if (error) 1591 break; 1592 /* 4 ... are process entries. */ 1593 ctx.uiop = uio; 1594 ctx.error = 0; 1595 ctx.off = 4; 1596 ctx.startoff = i; 1597 ctx.cookies = cookies; 1598 ctx.ncookies = nc; 1599 proclist_foreach_call(&allproc, 1600 procfs_root_readdir_callback, &ctx); 1601 cookies = ctx.cookies; 1602 nc = ctx.ncookies; 1603 error = ctx.error; 1604 if (error) 1605 break; 1606 1607 /* misc entries. */ 1608 if (i < ctx.off) 1609 i = ctx.off; 1610 if (i >= ctx.off + nproc_root_targets) 1611 break; 1612 error = procfs_proc_lock(vp->v_mount, pfs->pfs_pid, &p, ESRCH); 1613 if (error) 1614 break; 1615 for (pt = &proc_root_targets[i - ctx.off]; 1616 uio->uio_resid >= UIO_MX && 1617 pt < &proc_root_targets[nproc_root_targets]; 1618 pt++, i++) { 1619 if (pt->pt_valid && 1620 (*pt->pt_valid)(NULL, vp->v_mount) == 0) 1621 continue; 1622 if (kauth_authorize_process(kauth_cred_get(), 1623 KAUTH_PROCESS_CANSEE, p, 1624 KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_ENTRY), 1625 NULL, NULL) != 0) 1626 continue; 1627 d.d_fileno = PROCFS_FILENO(0, pt->pt_pfstype, -1); 1628 d.d_namlen = pt->pt_namlen; 1629 memcpy(d.d_name, pt->pt_name, pt->pt_namlen + 1); 1630 d.d_type = pt->pt_type; 1631 1632 if ((error = uiomove(&d, UIO_MX, uio)) != 0) 1633 break; 1634 nc++; 1635 if (cookies) 1636 *cookies++ = i + 1; 1637 } 1638 out: 1639 KASSERT(p != NULL); 1640 ncookies = nc; 1641 procfs_proc_unlock(p); 1642 break; 1643 } 1644 1645 default: 1646 error = ENOTDIR; 1647 break; 1648 } 1649 1650 if (ap->a_ncookies) { 1651 if (error) { 1652 if (cookies) 1653 free(*ap->a_cookies, M_TEMP); 1654 *ap->a_ncookies = 0; 1655 *ap->a_cookies = NULL; 1656 } else 1657 *ap->a_ncookies = ncookies; 1658 } 1659 uio->uio_offset = i; 1660 return (error); 1661 } 1662 1663 /* 1664 * readlink reads the link of `curproc' and others 1665 */ 1666 int 1667 procfs_readlink(void *v) 1668 { 1669 struct vop_readlink_args *ap = v; 1670 char bf[16]; /* should be enough */ 1671 char *bp = bf; 1672 char *path = NULL; 1673 int len = 0; 1674 int error = 0; 1675 struct vnode *vp = ap->a_vp; 1676 struct pfsnode *pfs = VTOPFS(vp); 1677 struct proc *pown = NULL; 1678 1679 if (pfs->pfs_fileno == PROCFS_FILENO(0, PFScurproc, -1)) 1680 len = snprintf(bf, sizeof(bf), "%ld", (long)curproc->p_pid); 1681 else if (pfs->pfs_fileno == PROCFS_FILENO(0, PFSself, -1)) 1682 len = snprintf(bf, sizeof(bf), "%s", "curproc"); 1683 else if (pfs->pfs_fileno == PROCFS_FILENO(pfs->pfs_pid, PFStask, 0)) 1684 len = snprintf(bf, sizeof(bf), ".."); 1685 else if (pfs->pfs_fileno == PROCFS_FILENO(pfs->pfs_pid, PFSexe, -1)) { 1686 if ((error = procfs_proc_lock(vp->v_mount, pfs->pfs_pid, &pown, 1687 ESRCH)) != 0) 1688 return error; 1689 bp = pown->p_path; 1690 len = strlen(bp); 1691 } else if (pfs->pfs_fileno == PROCFS_FILENO(pfs->pfs_pid, PFScwd, -1) || 1692 pfs->pfs_fileno == PROCFS_FILENO(pfs->pfs_pid, PFSchroot, -1)) { 1693 if ((error = procfs_proc_lock(vp->v_mount, pfs->pfs_pid, &pown, 1694 ESRCH)) != 0) 1695 return error; 1696 path = malloc(MAXPATHLEN + 4, M_TEMP, M_WAITOK); 1697 if (path == NULL) { 1698 procfs_proc_unlock(pown); 1699 return (ENOMEM); 1700 } 1701 bp = path + MAXPATHLEN; 1702 *--bp = '\0'; 1703 procfs_dir(PROCFS_TYPE(pfs->pfs_fileno), curlwp, pown, 1704 &bp, path, MAXPATHLEN); 1705 len = strlen(bp); 1706 } else { 1707 file_t *fp; 1708 struct vnode *vxp; 1709 1710 if ((error = procfs_proc_lock(vp->v_mount, pfs->pfs_pid, &pown, 1711 ESRCH)) != 0) 1712 return error; 1713 1714 fp = fd_getfile2(pown, pfs->pfs_fd); 1715 if (fp == NULL) { 1716 procfs_proc_unlock(pown); 1717 return EBADF; 1718 } 1719 1720 switch (fp->f_type) { 1721 case DTYPE_VNODE: 1722 vxp = fp->f_vnode; 1723 if (vxp->v_type != VDIR && 1724 !procfs_proc_is_linux_compat()) { 1725 error = EINVAL; 1726 break; 1727 } 1728 if ((path = malloc(MAXPATHLEN, M_TEMP, M_WAITOK)) 1729 == NULL) { 1730 error = ENOMEM; 1731 break; 1732 } 1733 bp = path + MAXPATHLEN; 1734 *--bp = '\0'; 1735 1736 /* 1737 * XXX: kludge to avoid locking against ourselves 1738 * in getcwd() 1739 */ 1740 if (vxp->v_tag == VT_PROCFS) { 1741 *--bp = '/'; 1742 } else { 1743 rw_enter(&curproc->p_cwdi->cwdi_lock, 1744 RW_READER); 1745 vp = curproc->p_cwdi->cwdi_rdir; 1746 if (vp == NULL) 1747 vp = rootvnode; 1748 error = getcwd_common(vxp, vp, &bp, path, 1749 MAXPATHLEN / 2, 0, curlwp); 1750 rw_exit(&curproc->p_cwdi->cwdi_lock); 1751 } 1752 if (error) 1753 break; 1754 len = strlen(bp); 1755 break; 1756 1757 case DTYPE_MISC: 1758 len = snprintf(bf, sizeof(bf), "%s", "[misc]"); 1759 break; 1760 1761 case DTYPE_KQUEUE: 1762 len = snprintf(bf, sizeof(bf), "%s", "[kqueue]"); 1763 break; 1764 1765 case DTYPE_SEM: 1766 len = snprintf(bf, sizeof(bf), "%s", "[ksem]"); 1767 break; 1768 1769 default: 1770 error = EINVAL; 1771 break; 1772 } 1773 closef(fp); 1774 } 1775 1776 if (error == 0) 1777 error = uiomove(bp, len, ap->a_uio); 1778 if (pown) 1779 procfs_proc_unlock(pown); 1780 if (path) 1781 free(path, M_TEMP); 1782 return error; 1783 } 1784 1785 int 1786 procfs_getpages(void *v) 1787 { 1788 struct vop_getpages_args /* { 1789 struct vnode *a_vp; 1790 voff_t a_offset; 1791 struct vm_page **a_m; 1792 int *a_count; 1793 int a_centeridx; 1794 vm_prot_t a_access_type; 1795 int a_advice; 1796 int a_flags; 1797 } */ *ap = v; 1798 1799 if ((ap->a_flags & PGO_LOCKED) == 0) 1800 rw_exit(ap->a_vp->v_uobj.vmobjlock); 1801 1802 return (EFAULT); 1803 } 1804 1805 /* 1806 * convert decimal ascii to int 1807 */ 1808 static int 1809 atoi(const char *b, size_t len) 1810 { 1811 int p = 0; 1812 1813 while (len--) { 1814 char c = *b++; 1815 if (c < '0' || c > '9') 1816 return -1; 1817 p = 10 * p + (c - '0'); 1818 } 1819 1820 return p; 1821 } 1822 1823 /** 1824 * convert DTYPE_XXX to corresponding DT_XXX 1825 * matching what procfs_loadvnode() does. 1826 */ 1827 static uint8_t 1828 fttodt(file_t *fp) 1829 { 1830 switch (fp->f_type) { 1831 case DTYPE_VNODE: 1832 switch (fp->f_vnode->v_type) { 1833 case VREG: return DT_REG; 1834 case VDIR: return DT_LNK; /* symlink */ 1835 case VBLK: return DT_BLK; 1836 case VCHR: return DT_CHR; 1837 case VLNK: return DT_LNK; 1838 case VSOCK: return DT_SOCK; 1839 case VFIFO: return DT_FIFO; 1840 default: return DT_UNKNOWN; 1841 } 1842 case DTYPE_PIPE: return DT_FIFO; 1843 case DTYPE_SOCKET: return DT_SOCK; 1844 case DTYPE_KQUEUE: /*FALLTHROUGH*/ 1845 case DTYPE_MISC: /*FALLTHROUGH*/ 1846 case DTYPE_SEM: return DT_LNK; /* symlinks */ 1847 default: return DT_UNKNOWN; 1848 } 1849 } 1850