1 /* $NetBSD: procfs_vfsops.c,v 1.113 2024/01/17 10:20:12 hannken Exp $ */ 2 3 /* 4 * Copyright (c) 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Jan-Simon Pendry. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)procfs_vfsops.c 8.7 (Berkeley) 5/10/95 35 */ 36 37 /* 38 * Copyright (c) 1993 Jan-Simon Pendry 39 * 40 * This code is derived from software contributed to Berkeley by 41 * Jan-Simon Pendry. 42 * 43 * Redistribution and use in source and binary forms, with or without 44 * modification, are permitted provided that the following conditions 45 * are met: 46 * 1. Redistributions of source code must retain the above copyright 47 * notice, this list of conditions and the following disclaimer. 48 * 2. Redistributions in binary form must reproduce the above copyright 49 * notice, this list of conditions and the following disclaimer in the 50 * documentation and/or other materials provided with the distribution. 51 * 3. All advertising materials mentioning features or use of this software 52 * must display the following acknowledgement: 53 * This product includes software developed by the University of 54 * California, Berkeley and its contributors. 55 * 4. Neither the name of the University nor the names of its contributors 56 * may be used to endorse or promote products derived from this software 57 * without specific prior written permission. 58 * 59 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 60 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 61 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 62 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 63 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 64 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 65 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 66 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 67 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 68 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 69 * SUCH DAMAGE. 70 * 71 * @(#)procfs_vfsops.c 8.7 (Berkeley) 5/10/95 72 */ 73 74 /* 75 * procfs VFS interface 76 */ 77 78 #include <sys/cdefs.h> 79 __KERNEL_RCSID(0, "$NetBSD: procfs_vfsops.c,v 1.113 2024/01/17 10:20:12 hannken Exp $"); 80 81 #if defined(_KERNEL_OPT) 82 #include "opt_compat_netbsd.h" 83 #endif 84 85 #include <sys/param.h> 86 #include <sys/atomic.h> 87 #include <sys/buf.h> 88 #include <sys/dirent.h> 89 #include <sys/file.h> 90 #include <sys/filedesc.h> 91 #include <sys/fstrans.h> 92 #include <sys/kauth.h> 93 #include <sys/kernel.h> 94 #include <sys/module.h> 95 #include <sys/mount.h> 96 #include <sys/proc.h> 97 #include <sys/signalvar.h> 98 #include <sys/sysctl.h> 99 #include <sys/syslog.h> 100 #include <sys/systm.h> 101 #include <sys/time.h> 102 #include <sys/vnode.h> 103 104 #include <miscfs/genfs/genfs.h> 105 106 #include <miscfs/procfs/procfs.h> 107 108 #include <uvm/uvm_extern.h> /* for PAGE_SIZE */ 109 110 MODULE(MODULE_CLASS_VFS, procfs, "ptrace_common"); 111 112 VFS_PROTOS(procfs); 113 114 #define PROCFS_HASHSIZE 256 115 116 static kauth_listener_t procfs_listener; 117 static void *procfs_exechook; 118 LIST_HEAD(hashhead, pfsnode); 119 static u_long procfs_hashmask; 120 static struct hashhead *procfs_hashtab; 121 static kmutex_t procfs_hashlock; 122 123 static struct hashhead * 124 procfs_hashhead(pid_t pid) 125 { 126 127 return &procfs_hashtab[pid & procfs_hashmask]; 128 } 129 130 void 131 procfs_hashrem(struct pfsnode *pfs) 132 { 133 134 mutex_enter(&procfs_hashlock); 135 LIST_REMOVE(pfs, pfs_hash); 136 mutex_exit(&procfs_hashlock); 137 } 138 139 /* 140 * VFS Operations. 141 * 142 * mount system call 143 */ 144 /* ARGSUSED */ 145 int 146 procfs_mount( 147 struct mount *mp, 148 const char *path, 149 void *data, 150 size_t *data_len) 151 { 152 struct lwp *l = curlwp; 153 struct procfsmount *pmnt; 154 struct procfs_args *args = data; 155 int error; 156 157 if (args == NULL) 158 return EINVAL; 159 160 if (UIO_MX & (UIO_MX-1)) { 161 log(LOG_ERR, "procfs: invalid directory entry size"); 162 return (EINVAL); 163 } 164 165 if (mp->mnt_flag & MNT_GETARGS) { 166 if (*data_len < sizeof *args) 167 return EINVAL; 168 169 pmnt = VFSTOPROC(mp); 170 if (pmnt == NULL) 171 return EIO; 172 args->version = PROCFS_ARGSVERSION; 173 args->flags = pmnt->pmnt_flags; 174 *data_len = sizeof *args; 175 return 0; 176 } 177 178 if (mp->mnt_flag & MNT_UPDATE) 179 return (EOPNOTSUPP); 180 181 if (*data_len >= sizeof *args && args->version != PROCFS_ARGSVERSION) 182 return EINVAL; 183 184 pmnt = kmem_zalloc(sizeof(struct procfsmount), KM_SLEEP); 185 186 mp->mnt_stat.f_namemax = PROCFS_MAXNAMLEN; 187 mp->mnt_flag |= MNT_LOCAL; 188 mp->mnt_data = pmnt; 189 vfs_getnewfsid(mp); 190 191 error = set_statvfs_info(path, UIO_USERSPACE, "procfs", UIO_SYSSPACE, 192 mp->mnt_op->vfs_name, mp, l); 193 if (*data_len >= sizeof *args) 194 pmnt->pmnt_flags = args->flags; 195 else 196 pmnt->pmnt_flags = 0; 197 198 mp->mnt_iflag |= IMNT_MPSAFE | IMNT_SHRLOOKUP; 199 return error; 200 } 201 202 /* 203 * unmount system call 204 */ 205 int 206 procfs_unmount(struct mount *mp, int mntflags) 207 { 208 int error; 209 int flags = 0; 210 211 if (mntflags & MNT_FORCE) 212 flags |= FORCECLOSE; 213 214 if ((error = vflush(mp, 0, flags)) != 0) 215 return (error); 216 217 kmem_free(mp->mnt_data, sizeof(struct procfsmount)); 218 mp->mnt_data = NULL; 219 220 return 0; 221 } 222 223 int 224 procfs_root(struct mount *mp, int lktype, struct vnode **vpp) 225 { 226 int error; 227 228 error = procfs_allocvp(mp, vpp, 0, PFSroot, -1); 229 if (error == 0) { 230 error = vn_lock(*vpp, lktype); 231 if (error != 0) { 232 vrele(*vpp); 233 *vpp = NULL; 234 } 235 } 236 237 return error; 238 } 239 240 /* ARGSUSED */ 241 int 242 procfs_start(struct mount *mp, int flags) 243 { 244 245 return (0); 246 } 247 248 /* 249 * Get file system statistics. 250 */ 251 int 252 procfs_statvfs(struct mount *mp, struct statvfs *sbp) 253 { 254 255 genfs_statvfs(mp, sbp); 256 257 sbp->f_bsize = PAGE_SIZE; 258 sbp->f_frsize = PAGE_SIZE; 259 sbp->f_iosize = PAGE_SIZE; 260 sbp->f_blocks = 1; 261 sbp->f_files = maxproc; /* approx */ 262 sbp->f_ffree = maxproc - atomic_load_relaxed(&nprocs); /* approx */ 263 sbp->f_favail = maxproc - atomic_load_relaxed(&nprocs); /* approx */ 264 265 return (0); 266 } 267 268 /*ARGSUSED*/ 269 int 270 procfs_sync( 271 struct mount *mp, 272 int waitfor, 273 kauth_cred_t uc) 274 { 275 276 return (0); 277 } 278 279 /*ARGSUSED*/ 280 int 281 procfs_vget(struct mount *mp, ino_t ino, int lktype, 282 struct vnode **vpp) 283 { 284 return (EOPNOTSUPP); 285 } 286 287 int 288 procfs_loadvnode(struct mount *mp, struct vnode *vp, 289 const void *key, size_t key_len, const void **new_key) 290 { 291 int error; 292 struct pfskey pfskey; 293 struct pfsnode *pfs; 294 295 KASSERT(key_len == sizeof(pfskey)); 296 memcpy(&pfskey, key, key_len); 297 298 pfs = kmem_alloc(sizeof(*pfs), KM_SLEEP); 299 pfs->pfs_pid = pfskey.pk_pid; 300 pfs->pfs_type = pfskey.pk_type; 301 pfs->pfs_fd = pfskey.pk_fd; 302 pfs->pfs_vnode = vp; 303 pfs->pfs_mount = mp; 304 pfs->pfs_flags = 0; 305 pfs->pfs_fileno = 306 PROCFS_FILENO(pfs->pfs_pid, pfs->pfs_type, pfs->pfs_fd); 307 vp->v_tag = VT_PROCFS; 308 vp->v_op = procfs_vnodeop_p; 309 vp->v_data = pfs; 310 311 switch (pfs->pfs_type) { 312 case PFSroot: /* /proc = dr-xr-xr-x */ 313 vp->v_vflag |= VV_ROOT; 314 /*FALLTHROUGH*/ 315 case PFSproc: /* /proc/N = dr-xr-xr-x */ 316 pfs->pfs_mode = S_IRUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH; 317 vp->v_type = VDIR; 318 break; 319 320 case PFStask: /* /proc/N/task = dr-xr-xr-x */ 321 if (pfs->pfs_fd == -1) { 322 pfs->pfs_mode = S_IRUSR|S_IXUSR|S_IRGRP|S_IXGRP| 323 S_IROTH|S_IXOTH; 324 vp->v_type = VDIR; 325 break; 326 } 327 /*FALLTHROUGH*/ 328 case PFScurproc: /* /proc/curproc = lr-xr-xr-x */ 329 case PFSself: /* /proc/self = lr-xr-xr-x */ 330 case PFScwd: /* /proc/N/cwd = lr-xr-xr-x */ 331 case PFSchroot: /* /proc/N/chroot = lr-xr-xr-x */ 332 case PFSexe: /* /proc/N/exe = lr-xr-xr-x */ 333 pfs->pfs_mode = S_IRUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH; 334 vp->v_type = VLNK; 335 break; 336 337 case PFSfd: 338 if (pfs->pfs_fd == -1) { /* /proc/N/fd = dr-x------ */ 339 pfs->pfs_mode = S_IRUSR|S_IXUSR; 340 vp->v_type = VDIR; 341 } else { /* /proc/N/fd/M = [ps-]rw------- */ 342 file_t *fp; 343 vnode_t *vxp; 344 struct proc *p; 345 346 mutex_enter(&proc_lock); 347 p = procfs_proc_find(mp, pfs->pfs_pid); 348 mutex_exit(&proc_lock); 349 if (p == NULL) { 350 error = ENOENT; 351 goto bad; 352 } 353 KASSERT(rw_read_held(&p->p_reflock)); 354 if ((fp = fd_getfile2(p, pfs->pfs_fd)) == NULL) { 355 error = EBADF; 356 goto bad; 357 } 358 359 pfs->pfs_mode = S_IRUSR|S_IWUSR; 360 switch (fp->f_type) { 361 case DTYPE_VNODE: 362 vxp = fp->f_vnode; 363 364 /* 365 * We make symlinks for directories 366 * to avoid cycles. 367 */ 368 if (vxp->v_type == VDIR || 369 procfs_proc_is_linux_compat()) 370 goto symlink; 371 vp->v_type = vxp->v_type; 372 break; 373 case DTYPE_PIPE: 374 vp->v_type = VFIFO; 375 break; 376 case DTYPE_SOCKET: 377 vp->v_type = VSOCK; 378 break; 379 case DTYPE_KQUEUE: 380 case DTYPE_MISC: 381 case DTYPE_SEM: 382 symlink: 383 pfs->pfs_mode = S_IRUSR|S_IXUSR|S_IRGRP| 384 S_IXGRP|S_IROTH|S_IXOTH; 385 vp->v_type = VLNK; 386 break; 387 default: 388 error = EOPNOTSUPP; 389 closef(fp); 390 goto bad; 391 } 392 closef(fp); 393 } 394 break; 395 396 case PFSfile: /* /proc/N/file = -rw------- */ 397 case PFSmem: /* /proc/N/mem = -rw------- */ 398 case PFSregs: /* /proc/N/regs = -rw------- */ 399 case PFSfpregs: /* /proc/N/fpregs = -rw------- */ 400 pfs->pfs_mode = S_IRUSR|S_IWUSR; 401 vp->v_type = VREG; 402 break; 403 404 case PFSnote: /* /proc/N/note = --w------ */ 405 case PFSnotepg: /* /proc/N/notepg = --w------ */ 406 pfs->pfs_mode = S_IWUSR; 407 vp->v_type = VREG; 408 break; 409 410 case PFSmap: /* /proc/N/map = -r-------- */ 411 case PFSmaps: /* /proc/N/maps = -r-------- */ 412 case PFSauxv: /* /proc/N/auxv = -r-------- */ 413 case PFSenviron: /* /proc/N/environ = -r-------- */ 414 pfs->pfs_mode = S_IRUSR; 415 vp->v_type = VREG; 416 break; 417 418 case PFSstatus: /* /proc/N/status = -r--r--r-- */ 419 case PFSstat: /* /proc/N/stat = -r--r--r-- */ 420 case PFScmdline: /* /proc/N/cmdline = -r--r--r-- */ 421 case PFSemul: /* /proc/N/emul = -r--r--r-- */ 422 case PFSmeminfo: /* /proc/meminfo = -r--r--r-- */ 423 case PFScpustat: /* /proc/stat = -r--r--r-- */ 424 case PFSdevices: /* /proc/devices = -r--r--r-- */ 425 case PFScpuinfo: /* /proc/cpuinfo = -r--r--r-- */ 426 case PFSuptime: /* /proc/uptime = -r--r--r-- */ 427 case PFSmounts: /* /proc/mounts = -r--r--r-- */ 428 case PFSloadavg: /* /proc/loadavg = -r--r--r-- */ 429 case PFSstatm: /* /proc/N/statm = -r--r--r-- */ 430 case PFSversion: /* /proc/version = -r--r--r-- */ 431 case PFSlimit: /* /proc/limit = -r--r--r-- */ 432 pfs->pfs_mode = S_IRUSR|S_IRGRP|S_IROTH; 433 vp->v_type = VREG; 434 break; 435 436 #ifdef __HAVE_PROCFS_MACHDEP 437 PROCFS_MACHDEP_NODETYPE_CASES 438 procfs_machdep_allocvp(vp); 439 break; 440 #endif 441 442 default: 443 panic("procfs_allocvp"); 444 } 445 446 mutex_enter(&procfs_hashlock); 447 LIST_INSERT_HEAD(procfs_hashhead(pfs->pfs_pid), pfs, pfs_hash); 448 mutex_exit(&procfs_hashlock); 449 450 uvm_vnp_setsize(vp, 0); 451 *new_key = &pfs->pfs_key; 452 453 return 0; 454 455 bad: 456 vp->v_tag =VT_NON; 457 vp->v_type = VNON; 458 vp->v_op = NULL; 459 vp->v_data = NULL; 460 kmem_free(pfs, sizeof(*pfs)); 461 return error; 462 } 463 464 void 465 procfs_init(void) 466 { 467 468 } 469 470 void 471 procfs_reinit(void) 472 { 473 474 } 475 476 void 477 procfs_done(void) 478 { 479 480 } 481 482 extern const struct vnodeopv_desc procfs_vnodeop_opv_desc; 483 484 const struct vnodeopv_desc * const procfs_vnodeopv_descs[] = { 485 &procfs_vnodeop_opv_desc, 486 NULL, 487 }; 488 489 struct vfsops procfs_vfsops = { 490 .vfs_name = MOUNT_PROCFS, 491 .vfs_min_mount_data = sizeof (struct procfs_args), 492 .vfs_mount = procfs_mount, 493 .vfs_start = procfs_start, 494 .vfs_unmount = procfs_unmount, 495 .vfs_root = procfs_root, 496 .vfs_quotactl = (void *)eopnotsupp, 497 .vfs_statvfs = procfs_statvfs, 498 .vfs_sync = procfs_sync, 499 .vfs_vget = procfs_vget, 500 .vfs_loadvnode = procfs_loadvnode, 501 .vfs_fhtovp = (void *)eopnotsupp, 502 .vfs_vptofh = (void *)eopnotsupp, 503 .vfs_init = procfs_init, 504 .vfs_reinit = procfs_reinit, 505 .vfs_done = procfs_done, 506 .vfs_snapshot = (void *)eopnotsupp, 507 .vfs_extattrctl = vfs_stdextattrctl, 508 .vfs_suspendctl = genfs_suspendctl, 509 .vfs_renamelock_enter = genfs_renamelock_enter, 510 .vfs_renamelock_exit = genfs_renamelock_exit, 511 .vfs_fsync = (void *)eopnotsupp, 512 .vfs_opv_descs = procfs_vnodeopv_descs 513 }; 514 515 static void 516 procfs_exechook_cb(struct proc *p, void *arg) 517 { 518 struct hashhead *head; 519 struct pfsnode *pfs; 520 struct mount *mp; 521 struct pfskey key; 522 struct vnode *vp; 523 int error; 524 525 if (!(p->p_flag & PK_SUGID)) 526 return; 527 528 head = procfs_hashhead(p->p_pid); 529 530 again: 531 mutex_enter(&procfs_hashlock); 532 LIST_FOREACH(pfs, head, pfs_hash) { 533 if (pfs->pfs_pid != p->p_pid) 534 continue; 535 mp = pfs->pfs_mount; 536 key = pfs->pfs_key; 537 vfs_ref(mp); 538 mutex_exit(&procfs_hashlock); 539 540 error = vcache_get(mp, &key, sizeof(key), &vp); 541 vfs_rele(mp); 542 if (error != 0) 543 goto again; 544 if (vrecycle(vp)) 545 goto again; 546 do { 547 error = vfs_suspend(mp, 0); 548 } while (error == EINTR || error == ERESTART); 549 vgone(vp); 550 if (error == 0) 551 vfs_resume(mp); 552 goto again; 553 } 554 mutex_exit(&procfs_hashlock); 555 } 556 557 static int 558 procfs_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie, 559 void *arg0, void *arg1, void *arg2, void *arg3) 560 { 561 struct proc *p; 562 struct pfsnode *pfs; 563 int result; 564 565 result = KAUTH_RESULT_DEFER; 566 p = arg0; 567 pfs = arg1; 568 569 if (action != KAUTH_PROCESS_PROCFS) 570 return result; 571 572 switch (pfs->pfs_type) { 573 case PFSregs: 574 case PFSfpregs: 575 case PFSmem: 576 if (kauth_cred_getuid(cred) != kauth_cred_getuid(p->p_cred) || 577 ISSET(p->p_flag, PK_SUGID)) 578 break; 579 580 /*FALLTHROUGH*/ 581 default: 582 result = KAUTH_RESULT_ALLOW; 583 break; 584 } 585 586 return result; 587 } 588 589 SYSCTL_SETUP(procfs_sysctl_setup, "procfs sysctl") 590 { 591 592 sysctl_createv(clog, 0, NULL, NULL, 593 CTLFLAG_PERMANENT, 594 CTLTYPE_NODE, "procfs", 595 SYSCTL_DESCR("Process file system"), 596 NULL, 0, NULL, 0, 597 CTL_VFS, 12, CTL_EOL); 598 /* 599 * XXX the "12" above could be dynamic, thereby eliminating 600 * one more instance of the "number to vfs" mapping problem, 601 * but "12" is the order as taken from sys/mount.h 602 */ 603 } 604 605 static int 606 procfs_modcmd(modcmd_t cmd, void *arg) 607 { 608 int error; 609 610 switch (cmd) { 611 case MODULE_CMD_INIT: 612 error = vfs_attach(&procfs_vfsops); 613 if (error != 0) 614 break; 615 616 procfs_listener = kauth_listen_scope(KAUTH_SCOPE_PROCESS, 617 procfs_listener_cb, NULL); 618 619 procfs_exechook = exechook_establish(procfs_exechook_cb, NULL); 620 621 mutex_init(&procfs_hashlock, MUTEX_DEFAULT, IPL_NONE); 622 procfs_hashtab = hashinit(PROCFS_HASHSIZE, HASH_LIST, true, 623 &procfs_hashmask); 624 625 break; 626 case MODULE_CMD_FINI: 627 error = vfs_detach(&procfs_vfsops); 628 if (error != 0) 629 break; 630 kauth_unlisten_scope(procfs_listener); 631 exechook_disestablish(procfs_exechook); 632 mutex_destroy(&procfs_hashlock); 633 hashdone(procfs_hashtab, HASH_LIST, procfs_hashmask); 634 break; 635 default: 636 error = ENOTTY; 637 break; 638 } 639 640 return (error); 641 } 642