1 /* $NetBSD: procfs_vnops.c,v 1.215 2020/06/27 17:29:19 christos Exp $ */ 2 3 /*- 4 * Copyright (c) 2006, 2007, 2008, 2020 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Copyright (c) 1993, 1995 34 * The Regents of the University of California. All rights reserved. 35 * 36 * This code is derived from software contributed to Berkeley by 37 * Jan-Simon Pendry. 38 * 39 * Redistribution and use in source and binary forms, with or without 40 * modification, are permitted provided that the following conditions 41 * are met: 42 * 1. Redistributions of source code must retain the above copyright 43 * notice, this list of conditions and the following disclaimer. 44 * 2. Redistributions in binary form must reproduce the above copyright 45 * notice, this list of conditions and the following disclaimer in the 46 * documentation and/or other materials provided with the distribution. 47 * 3. Neither the name of the University nor the names of its contributors 48 * may be used to endorse or promote products derived from this software 49 * without specific prior written permission. 50 * 51 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 52 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 54 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 61 * SUCH DAMAGE. 62 * 63 * @(#)procfs_vnops.c 8.18 (Berkeley) 5/21/95 64 */ 65 66 /* 67 * Copyright (c) 1993 Jan-Simon Pendry 68 * 69 * This code is derived from software contributed to Berkeley by 70 * Jan-Simon Pendry. 71 * 72 * Redistribution and use in source and binary forms, with or without 73 * modification, are permitted provided that the following conditions 74 * are met: 75 * 1. Redistributions of source code must retain the above copyright 76 * notice, this list of conditions and the following disclaimer. 77 * 2. Redistributions in binary form must reproduce the above copyright 78 * notice, this list of conditions and the following disclaimer in the 79 * documentation and/or other materials provided with the distribution. 80 * 3. All advertising materials mentioning features or use of this software 81 * must display the following acknowledgement: 82 * This product includes software developed by the University of 83 * California, Berkeley and its contributors. 84 * 4. Neither the name of the University nor the names of its contributors 85 * may be used to endorse or promote products derived from this software 86 * without specific prior written permission. 87 * 88 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 89 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 90 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 91 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 92 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 93 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 94 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 95 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 96 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 97 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 98 * SUCH DAMAGE. 99 * 100 * @(#)procfs_vnops.c 8.18 (Berkeley) 5/21/95 101 */ 102 103 /* 104 * procfs vnode interface 105 */ 106 107 #include <sys/cdefs.h> 108 __KERNEL_RCSID(0, "$NetBSD: procfs_vnops.c,v 1.215 2020/06/27 17:29:19 christos Exp $"); 109 110 #include <sys/param.h> 111 #include <sys/atomic.h> 112 #include <sys/systm.h> 113 #include <sys/time.h> 114 #include <sys/kernel.h> 115 #include <sys/file.h> 116 #include <sys/filedesc.h> 117 #include <sys/proc.h> 118 #include <sys/vnode.h> 119 #include <sys/namei.h> 120 #include <sys/malloc.h> 121 #include <sys/mount.h> 122 #include <sys/dirent.h> 123 #include <sys/resourcevar.h> 124 #include <sys/stat.h> 125 #include <sys/ptrace.h> 126 #include <sys/kauth.h> 127 #include <sys/exec.h> 128 129 #include <uvm/uvm_extern.h> /* for PAGE_SIZE */ 130 131 #include <machine/reg.h> 132 133 #include <miscfs/genfs/genfs.h> 134 #include <miscfs/procfs/procfs.h> 135 136 /* 137 * Vnode Operations. 138 * 139 */ 140 141 static int procfs_validfile_linux(struct lwp *, struct mount *); 142 static int procfs_root_readdir_callback(struct proc *, void *); 143 static void procfs_dir(pfstype, struct lwp *, struct proc *, char **, char *, 144 size_t); 145 146 /* 147 * This is a list of the valid names in the 148 * process-specific sub-directories. It is 149 * used in procfs_lookup and procfs_readdir 150 */ 151 static const struct proc_target { 152 u_char pt_type; 153 u_char pt_namlen; 154 const char *pt_name; 155 pfstype pt_pfstype; 156 int (*pt_valid)(struct lwp *, struct mount *); 157 } proc_targets[] = { 158 #define N(s) sizeof(s)-1, s 159 /* name type validp */ 160 { DT_DIR, N("."), PFSproc, NULL }, 161 { DT_DIR, N(".."), PFSroot, NULL }, 162 { DT_DIR, N("fd"), PFSfd, NULL }, 163 { DT_DIR, N("task"), PFStask, procfs_validfile_linux }, 164 { DT_LNK, N("cwd"), PFScwd, NULL }, 165 { DT_LNK, N("emul"), PFSemul, NULL }, 166 { DT_LNK, N("root"), PFSchroot, NULL }, 167 { DT_REG, N("auxv"), PFSauxv, procfs_validauxv }, 168 { DT_REG, N("cmdline"), PFScmdline, NULL }, 169 { DT_REG, N("environ"), PFSenviron, NULL }, 170 { DT_REG, N("exe"), PFSexe, procfs_validfile }, 171 { DT_REG, N("file"), PFSfile, procfs_validfile }, 172 { DT_REG, N("fpregs"), PFSfpregs, procfs_validfpregs }, 173 { DT_REG, N("limit"), PFSlimit, NULL }, 174 { DT_REG, N("map"), PFSmap, procfs_validmap }, 175 { DT_REG, N("maps"), PFSmaps, procfs_validmap }, 176 { DT_REG, N("mem"), PFSmem, NULL }, 177 { DT_REG, N("note"), PFSnote, NULL }, 178 { DT_REG, N("notepg"), PFSnotepg, NULL }, 179 { DT_REG, N("regs"), PFSregs, procfs_validregs }, 180 { DT_REG, N("stat"), PFSstat, procfs_validfile_linux }, 181 { DT_REG, N("statm"), PFSstatm, procfs_validfile_linux }, 182 { DT_REG, N("status"), PFSstatus, NULL }, 183 #ifdef __HAVE_PROCFS_MACHDEP 184 PROCFS_MACHDEP_NODETYPE_DEFNS 185 #endif 186 #undef N 187 }; 188 static const int nproc_targets = sizeof(proc_targets) / sizeof(proc_targets[0]); 189 190 /* 191 * List of files in the root directory. Note: the validate function will 192 * be called with p == NULL for these ones. 193 */ 194 static const struct proc_target proc_root_targets[] = { 195 #define N(s) sizeof(s)-1, s 196 /* name type validp */ 197 { DT_REG, N("meminfo"), PFSmeminfo, procfs_validfile_linux }, 198 { DT_REG, N("cpuinfo"), PFScpuinfo, procfs_validfile_linux }, 199 { DT_REG, N("uptime"), PFSuptime, procfs_validfile_linux }, 200 { DT_REG, N("mounts"), PFSmounts, procfs_validfile_linux }, 201 { DT_REG, N("devices"), PFSdevices, procfs_validfile_linux }, 202 { DT_REG, N("stat"), PFScpustat, procfs_validfile_linux }, 203 { DT_REG, N("loadavg"), PFSloadavg, procfs_validfile_linux }, 204 { DT_REG, N("version"), PFSversion, procfs_validfile_linux }, 205 #undef N 206 }; 207 static const int nproc_root_targets = 208 sizeof(proc_root_targets) / sizeof(proc_root_targets[0]); 209 210 int procfs_lookup(void *); 211 #define procfs_create genfs_eopnotsupp 212 #define procfs_mknod genfs_eopnotsupp 213 int procfs_open(void *); 214 int procfs_close(void *); 215 int procfs_access(void *); 216 int procfs_getattr(void *); 217 int procfs_setattr(void *); 218 #define procfs_read procfs_rw 219 #define procfs_write procfs_rw 220 #define procfs_fcntl genfs_fcntl 221 #define procfs_ioctl genfs_enoioctl 222 #define procfs_poll genfs_poll 223 #define procfs_kqfilter genfs_kqfilter 224 #define procfs_revoke genfs_revoke 225 #define procfs_fsync genfs_nullop 226 #define procfs_seek genfs_nullop 227 #define procfs_remove genfs_eopnotsupp 228 int procfs_link(void *); 229 #define procfs_rename genfs_eopnotsupp 230 #define procfs_mkdir genfs_eopnotsupp 231 #define procfs_rmdir genfs_eopnotsupp 232 int procfs_symlink(void *); 233 int procfs_readdir(void *); 234 int procfs_readlink(void *); 235 #define procfs_abortop genfs_abortop 236 int procfs_inactive(void *); 237 int procfs_reclaim(void *); 238 #define procfs_lock genfs_lock 239 #define procfs_unlock genfs_unlock 240 #define procfs_bmap genfs_badop 241 #define procfs_strategy genfs_badop 242 int procfs_print(void *); 243 int procfs_pathconf(void *); 244 #define procfs_islocked genfs_islocked 245 #define procfs_advlock genfs_einval 246 #define procfs_bwrite genfs_eopnotsupp 247 int procfs_getpages(void *); 248 #define procfs_putpages genfs_null_putpages 249 250 static int atoi(const char *, size_t); 251 252 /* 253 * procfs vnode operations. 254 */ 255 int (**procfs_vnodeop_p)(void *); 256 const struct vnodeopv_entry_desc procfs_vnodeop_entries[] = { 257 { &vop_default_desc, vn_default_error }, 258 { &vop_lookup_desc, procfs_lookup }, /* lookup */ 259 { &vop_create_desc, procfs_create }, /* create */ 260 { &vop_mknod_desc, procfs_mknod }, /* mknod */ 261 { &vop_open_desc, procfs_open }, /* open */ 262 { &vop_close_desc, procfs_close }, /* close */ 263 { &vop_access_desc, procfs_access }, /* access */ 264 { &vop_accessx_desc, genfs_accessx }, /* accessx */ 265 { &vop_getattr_desc, procfs_getattr }, /* getattr */ 266 { &vop_setattr_desc, procfs_setattr }, /* setattr */ 267 { &vop_read_desc, procfs_read }, /* read */ 268 { &vop_write_desc, procfs_write }, /* write */ 269 { &vop_fallocate_desc, genfs_eopnotsupp }, /* fallocate */ 270 { &vop_fdiscard_desc, genfs_eopnotsupp }, /* fdiscard */ 271 { &vop_fcntl_desc, procfs_fcntl }, /* fcntl */ 272 { &vop_ioctl_desc, procfs_ioctl }, /* ioctl */ 273 { &vop_poll_desc, procfs_poll }, /* poll */ 274 { &vop_kqfilter_desc, procfs_kqfilter }, /* kqfilter */ 275 { &vop_revoke_desc, procfs_revoke }, /* revoke */ 276 { &vop_fsync_desc, procfs_fsync }, /* fsync */ 277 { &vop_seek_desc, procfs_seek }, /* seek */ 278 { &vop_remove_desc, procfs_remove }, /* remove */ 279 { &vop_link_desc, procfs_link }, /* link */ 280 { &vop_rename_desc, procfs_rename }, /* rename */ 281 { &vop_mkdir_desc, procfs_mkdir }, /* mkdir */ 282 { &vop_rmdir_desc, procfs_rmdir }, /* rmdir */ 283 { &vop_symlink_desc, procfs_symlink }, /* symlink */ 284 { &vop_readdir_desc, procfs_readdir }, /* readdir */ 285 { &vop_readlink_desc, procfs_readlink }, /* readlink */ 286 { &vop_abortop_desc, procfs_abortop }, /* abortop */ 287 { &vop_inactive_desc, procfs_inactive }, /* inactive */ 288 { &vop_reclaim_desc, procfs_reclaim }, /* reclaim */ 289 { &vop_lock_desc, procfs_lock }, /* lock */ 290 { &vop_unlock_desc, procfs_unlock }, /* unlock */ 291 { &vop_bmap_desc, procfs_bmap }, /* bmap */ 292 { &vop_strategy_desc, procfs_strategy }, /* strategy */ 293 { &vop_print_desc, procfs_print }, /* print */ 294 { &vop_islocked_desc, procfs_islocked }, /* islocked */ 295 { &vop_pathconf_desc, procfs_pathconf }, /* pathconf */ 296 { &vop_advlock_desc, procfs_advlock }, /* advlock */ 297 { &vop_getpages_desc, procfs_getpages }, /* getpages */ 298 { &vop_putpages_desc, procfs_putpages }, /* putpages */ 299 { NULL, NULL } 300 }; 301 const struct vnodeopv_desc procfs_vnodeop_opv_desc = 302 { &procfs_vnodeop_p, procfs_vnodeop_entries }; 303 /* 304 * set things up for doing i/o on 305 * the pfsnode (vp). (vp) is locked 306 * on entry, and should be left locked 307 * on exit. 308 * 309 * for procfs we don't need to do anything 310 * in particular for i/o. all that is done 311 * is to support exclusive open on process 312 * memory images. 313 */ 314 int 315 procfs_open(void *v) 316 { 317 struct vop_open_args /* { 318 struct vnode *a_vp; 319 int a_mode; 320 kauth_cred_t a_cred; 321 } */ *ap = v; 322 struct vnode *vp = ap->a_vp; 323 struct pfsnode *pfs = VTOPFS(vp); 324 struct lwp *l1; 325 struct proc *p2; 326 int error; 327 328 if ((error = 329 procfs_proc_lock(vp->v_mount, pfs->pfs_pid, &p2, ENOENT)) != 0) 330 return error; 331 332 l1 = curlwp; /* tracer */ 333 334 #define M2K(m) (((m) & FREAD) && ((m) & FWRITE) ? \ 335 KAUTH_REQ_PROCESS_PROCFS_RW : \ 336 (m) & FWRITE ? KAUTH_REQ_PROCESS_PROCFS_WRITE : \ 337 KAUTH_REQ_PROCESS_PROCFS_READ) 338 339 mutex_enter(p2->p_lock); 340 error = kauth_authorize_process(l1->l_cred, KAUTH_PROCESS_PROCFS, 341 p2, pfs, KAUTH_ARG(M2K(ap->a_mode)), NULL); 342 mutex_exit(p2->p_lock); 343 if (error) { 344 procfs_proc_unlock(p2); 345 return (error); 346 } 347 348 #undef M2K 349 350 switch (pfs->pfs_type) { 351 case PFSmem: 352 if (((pfs->pfs_flags & FWRITE) && (ap->a_mode & O_EXCL)) || 353 ((pfs->pfs_flags & O_EXCL) && (ap->a_mode & FWRITE))) { 354 error = EBUSY; 355 break; 356 } 357 358 if (!proc_isunder(p2, l1)) { 359 error = EPERM; 360 break; 361 } 362 363 if (ap->a_mode & FWRITE) 364 pfs->pfs_flags = ap->a_mode & (FWRITE|O_EXCL); 365 366 break; 367 368 case PFSregs: 369 case PFSfpregs: 370 if (!proc_isunder(p2, l1)) { 371 error = EPERM; 372 break; 373 } 374 break; 375 376 default: 377 break; 378 } 379 380 procfs_proc_unlock(p2); 381 return (error); 382 } 383 384 /* 385 * close the pfsnode (vp) after doing i/o. 386 * (vp) is not locked on entry or exit. 387 * 388 * nothing to do for procfs other than undo 389 * any exclusive open flag (see _open above). 390 */ 391 int 392 procfs_close(void *v) 393 { 394 struct vop_close_args /* { 395 struct vnode *a_vp; 396 int a_fflag; 397 kauth_cred_t a_cred; 398 } */ *ap = v; 399 struct pfsnode *pfs = VTOPFS(ap->a_vp); 400 401 switch (pfs->pfs_type) { 402 case PFSmem: 403 if ((ap->a_fflag & FWRITE) && (pfs->pfs_flags & O_EXCL)) 404 pfs->pfs_flags &= ~(FWRITE|O_EXCL); 405 break; 406 407 default: 408 break; 409 } 410 411 return (0); 412 } 413 414 /* 415 * _inactive is called when the pfsnode 416 * is vrele'd and the reference count goes 417 * to zero. (vp) will be on the vnode free 418 * list, so to get it back vget() must be 419 * used. 420 * 421 * (vp) is locked on entry, but must be unlocked on exit. 422 */ 423 int 424 procfs_inactive(void *v) 425 { 426 struct vop_inactive_v2_args /* { 427 struct vnode *a_vp; 428 bool *a_recycle; 429 } */ *ap = v; 430 struct vnode *vp = ap->a_vp; 431 struct pfsnode *pfs = VTOPFS(vp); 432 433 mutex_enter(&proc_lock); 434 *ap->a_recycle = (procfs_proc_find(vp->v_mount, pfs->pfs_pid) == NULL); 435 mutex_exit(&proc_lock); 436 437 return (0); 438 } 439 440 /* 441 * _reclaim is called when getnewvnode() 442 * wants to make use of an entry on the vnode 443 * free list. at this time the filesystem needs 444 * to free any private data and remove the node 445 * from any private lists. 446 */ 447 int 448 procfs_reclaim(void *v) 449 { 450 struct vop_reclaim_v2_args /* { 451 struct vnode *a_vp; 452 } */ *ap = v; 453 struct vnode *vp = ap->a_vp; 454 struct pfsnode *pfs = VTOPFS(vp); 455 456 VOP_UNLOCK(vp); 457 458 /* 459 * To interlock with procfs_revoke_vnodes(). 460 */ 461 mutex_enter(vp->v_interlock); 462 vp->v_data = NULL; 463 mutex_exit(vp->v_interlock); 464 kmem_free(pfs, sizeof(*pfs)); 465 return 0; 466 } 467 468 /* 469 * Return POSIX pathconf information applicable to special devices. 470 */ 471 int 472 procfs_pathconf(void *v) 473 { 474 struct vop_pathconf_args /* { 475 struct vnode *a_vp; 476 int a_name; 477 register_t *a_retval; 478 } */ *ap = v; 479 480 switch (ap->a_name) { 481 case _PC_LINK_MAX: 482 *ap->a_retval = LINK_MAX; 483 return (0); 484 case _PC_MAX_CANON: 485 *ap->a_retval = MAX_CANON; 486 return (0); 487 case _PC_MAX_INPUT: 488 *ap->a_retval = MAX_INPUT; 489 return (0); 490 case _PC_PIPE_BUF: 491 *ap->a_retval = PIPE_BUF; 492 return (0); 493 case _PC_CHOWN_RESTRICTED: 494 *ap->a_retval = 1; 495 return (0); 496 case _PC_VDISABLE: 497 *ap->a_retval = _POSIX_VDISABLE; 498 return (0); 499 case _PC_SYNC_IO: 500 *ap->a_retval = 1; 501 return (0); 502 default: 503 return genfs_pathconf(ap); 504 } 505 /* NOTREACHED */ 506 } 507 508 /* 509 * _print is used for debugging. 510 * just print a readable description 511 * of (vp). 512 */ 513 int 514 procfs_print(void *v) 515 { 516 struct vop_print_args /* { 517 struct vnode *a_vp; 518 } */ *ap = v; 519 struct pfsnode *pfs = VTOPFS(ap->a_vp); 520 521 printf("tag VT_PROCFS, type %d, pid %d, mode %x, flags %lx\n", 522 pfs->pfs_type, pfs->pfs_pid, pfs->pfs_mode, pfs->pfs_flags); 523 return 0; 524 } 525 526 int 527 procfs_link(void *v) 528 { 529 struct vop_link_v2_args /* { 530 struct vnode *a_dvp; 531 struct vnode *a_vp; 532 struct componentname *a_cnp; 533 } */ *ap = v; 534 535 VOP_ABORTOP(ap->a_dvp, ap->a_cnp); 536 return (EROFS); 537 } 538 539 int 540 procfs_symlink(void *v) 541 { 542 struct vop_symlink_v3_args /* { 543 struct vnode *a_dvp; 544 struct vnode **a_vpp; 545 struct componentname *a_cnp; 546 struct vattr *a_vap; 547 char *a_target; 548 } */ *ap = v; 549 550 VOP_ABORTOP(ap->a_dvp, ap->a_cnp); 551 return (EROFS); 552 } 553 554 /* 555 * Works out the path to the target process's current 556 * working directory or chroot. If the caller is in a chroot and 557 * can't "reach" the target's cwd or root (or some other error 558 * occurs), a "/" is returned for the path. 559 */ 560 static void 561 procfs_dir(pfstype t, struct lwp *caller, struct proc *target, char **bpp, 562 char *path, size_t len) 563 { 564 struct cwdinfo *cwdi; 565 struct vnode *vp, *rvp; 566 char *bp; 567 568 /* 569 * Lock target cwdi and take a reference to the vnode 570 * we are interested in to prevent it from disappearing 571 * before getcwd_common() below. 572 */ 573 rw_enter(&target->p_cwdi->cwdi_lock, RW_READER); 574 switch (t) { 575 case PFScwd: 576 vp = target->p_cwdi->cwdi_cdir; 577 break; 578 case PFSchroot: 579 vp = target->p_cwdi->cwdi_rdir; 580 break; 581 default: 582 rw_exit(&target->p_cwdi->cwdi_lock); 583 return; 584 } 585 if (vp != NULL) 586 vref(vp); 587 rw_exit(&target->p_cwdi->cwdi_lock); 588 589 cwdi = caller->l_proc->p_cwdi; 590 rw_enter(&cwdi->cwdi_lock, RW_READER); 591 592 rvp = cwdi->cwdi_rdir; 593 bp = bpp ? *bpp : NULL; 594 595 /* 596 * XXX: this horrible kludge avoids locking panics when 597 * attempting to lookup links that point to within procfs 598 */ 599 if (vp != NULL && vp->v_tag == VT_PROCFS) { 600 if (bpp) { 601 *--bp = '/'; 602 *bpp = bp; 603 } 604 vrele(vp); 605 rw_exit(&cwdi->cwdi_lock); 606 return; 607 } 608 609 if (rvp == NULL) 610 rvp = rootvnode; 611 if (vp == NULL || getcwd_common(vp, rvp, bp ? &bp : NULL, path, 612 len / 2, 0, caller) != 0) { 613 if (bpp) { 614 bp = *bpp; 615 *--bp = '/'; 616 } 617 } 618 619 if (bpp) 620 *bpp = bp; 621 622 if (vp != NULL) 623 vrele(vp); 624 rw_exit(&cwdi->cwdi_lock); 625 } 626 627 /* 628 * Invent attributes for pfsnode (vp) and store 629 * them in (vap). 630 * Directories lengths are returned as zero since 631 * any real length would require the genuine size 632 * to be computed, and nothing cares anyway. 633 * 634 * this is relatively minimal for procfs. 635 */ 636 int 637 procfs_getattr(void *v) 638 { 639 struct vop_getattr_args /* { 640 struct vnode *a_vp; 641 struct vattr *a_vap; 642 kauth_cred_t a_cred; 643 } */ *ap = v; 644 struct vnode *vp = ap->a_vp; 645 struct pfsnode *pfs = VTOPFS(vp); 646 struct vattr *vap = ap->a_vap; 647 struct proc *procp; 648 char *path, *bp, bf[16]; 649 int error; 650 651 /* first check the process still exists */ 652 switch (pfs->pfs_type) { 653 case PFSroot: 654 case PFScurproc: 655 case PFSself: 656 procp = NULL; 657 break; 658 659 default: 660 error = 661 procfs_proc_lock(vp->v_mount, pfs->pfs_pid, &procp, ENOENT); 662 if (error != 0) 663 return (error); 664 break; 665 } 666 667 switch (pfs->pfs_type) { 668 case PFStask: 669 if (pfs->pfs_fd == -1) { 670 path = NULL; 671 break; 672 } 673 /*FALLTHROUGH*/ 674 case PFScwd: 675 case PFSchroot: 676 path = malloc(MAXPATHLEN + 4, M_TEMP, M_WAITOK); 677 if (path == NULL && procp != NULL) { 678 procfs_proc_unlock(procp); 679 return (ENOMEM); 680 } 681 break; 682 683 default: 684 path = NULL; 685 break; 686 } 687 688 if (procp != NULL) { 689 mutex_enter(procp->p_lock); 690 error = kauth_authorize_process(kauth_cred_get(), 691 KAUTH_PROCESS_CANSEE, procp, 692 KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_ENTRY), NULL, NULL); 693 mutex_exit(procp->p_lock); 694 if (error != 0) { 695 procfs_proc_unlock(procp); 696 if (path != NULL) 697 free(path, M_TEMP); 698 return (ENOENT); 699 } 700 } 701 702 error = 0; 703 704 /* start by zeroing out the attributes */ 705 vattr_null(vap); 706 707 /* next do all the common fields */ 708 vap->va_type = ap->a_vp->v_type; 709 vap->va_mode = pfs->pfs_mode; 710 vap->va_fileid = pfs->pfs_fileno; 711 vap->va_flags = 0; 712 vap->va_blocksize = PAGE_SIZE; 713 714 /* 715 * Make all times be current TOD. 716 * 717 * It would be possible to get the process start 718 * time from the p_stats structure, but there's 719 * no "file creation" time stamp anyway, and the 720 * p_stats structure is not addressable if u. gets 721 * swapped out for that process. 722 */ 723 getnanotime(&vap->va_ctime); 724 vap->va_atime = vap->va_mtime = vap->va_ctime; 725 if (procp) 726 TIMEVAL_TO_TIMESPEC(&procp->p_stats->p_start, 727 &vap->va_birthtime); 728 else 729 getnanotime(&vap->va_birthtime); 730 731 switch (pfs->pfs_type) { 732 case PFSmem: 733 case PFSregs: 734 case PFSfpregs: 735 #if defined(__HAVE_PROCFS_MACHDEP) && defined(PROCFS_MACHDEP_PROTECT_CASES) 736 PROCFS_MACHDEP_PROTECT_CASES 737 #endif 738 /* 739 * If the process has exercised some setuid or setgid 740 * privilege, then rip away read/write permission so 741 * that only root can gain access. 742 */ 743 if (procp->p_flag & PK_SUGID) 744 vap->va_mode &= ~(S_IRUSR|S_IWUSR); 745 /* FALLTHROUGH */ 746 case PFSstatus: 747 case PFSstat: 748 case PFSnote: 749 case PFSnotepg: 750 case PFScmdline: 751 case PFSenviron: 752 case PFSemul: 753 case PFSstatm: 754 755 case PFSmap: 756 case PFSmaps: 757 case PFSlimit: 758 case PFSauxv: 759 vap->va_nlink = 1; 760 vap->va_uid = kauth_cred_geteuid(procp->p_cred); 761 vap->va_gid = kauth_cred_getegid(procp->p_cred); 762 break; 763 case PFScwd: 764 case PFSchroot: 765 case PFSmeminfo: 766 case PFSdevices: 767 case PFScpuinfo: 768 case PFSuptime: 769 case PFSmounts: 770 case PFScpustat: 771 case PFSloadavg: 772 case PFSversion: 773 case PFSexe: 774 case PFSself: 775 case PFScurproc: 776 case PFSroot: 777 vap->va_nlink = 1; 778 vap->va_uid = vap->va_gid = 0; 779 break; 780 781 case PFSproc: 782 case PFStask: 783 case PFSfile: 784 case PFSfd: 785 break; 786 787 default: 788 panic("%s: %d/1", __func__, pfs->pfs_type); 789 } 790 791 /* 792 * now do the object specific fields 793 * 794 * The size could be set from struct reg, but it's hardly 795 * worth the trouble, and it puts some (potentially) machine 796 * dependent data into this machine-independent code. If it 797 * becomes important then this function should break out into 798 * a per-file stat function in the corresponding .c file. 799 */ 800 801 switch (pfs->pfs_type) { 802 case PFSroot: 803 vap->va_bytes = vap->va_size = DEV_BSIZE; 804 break; 805 806 case PFSself: 807 case PFScurproc: 808 vap->va_bytes = vap->va_size = 809 snprintf(bf, sizeof(bf), "%ld", (long)curproc->p_pid); 810 break; 811 case PFStask: 812 if (pfs->pfs_fd != -1) { 813 vap->va_nlink = 1; 814 vap->va_uid = 0; 815 vap->va_gid = 0; 816 vap->va_bytes = vap->va_size = 817 snprintf(bf, sizeof(bf), ".."); 818 break; 819 } 820 /*FALLTHROUGH*/ 821 case PFSfd: 822 if (pfs->pfs_fd != -1) { 823 file_t *fp; 824 825 fp = fd_getfile2(procp, pfs->pfs_fd); 826 if (fp == NULL) { 827 error = EBADF; 828 break; 829 } 830 vap->va_nlink = 1; 831 vap->va_uid = kauth_cred_geteuid(fp->f_cred); 832 vap->va_gid = kauth_cred_getegid(fp->f_cred); 833 switch (fp->f_type) { 834 case DTYPE_VNODE: 835 vap->va_bytes = vap->va_size = 836 fp->f_vnode->v_size; 837 break; 838 default: 839 vap->va_bytes = vap->va_size = 0; 840 break; 841 } 842 closef(fp); 843 break; 844 } 845 /*FALLTHROUGH*/ 846 case PFSproc: 847 vap->va_nlink = 2; 848 vap->va_uid = kauth_cred_geteuid(procp->p_cred); 849 vap->va_gid = kauth_cred_getegid(procp->p_cred); 850 vap->va_bytes = vap->va_size = DEV_BSIZE; 851 break; 852 853 case PFSfile: 854 error = EOPNOTSUPP; 855 break; 856 857 case PFSmem: 858 vap->va_bytes = vap->va_size = 859 ctob(procp->p_vmspace->vm_tsize + 860 procp->p_vmspace->vm_dsize + 861 procp->p_vmspace->vm_ssize); 862 break; 863 864 case PFSauxv: 865 vap->va_bytes = vap->va_size = procp->p_execsw->es_arglen; 866 break; 867 868 #if defined(PT_GETREGS) || defined(PT_SETREGS) 869 case PFSregs: 870 vap->va_bytes = vap->va_size = sizeof(struct reg); 871 break; 872 #endif 873 874 #if defined(PT_GETFPREGS) || defined(PT_SETFPREGS) 875 case PFSfpregs: 876 vap->va_bytes = vap->va_size = sizeof(struct fpreg); 877 break; 878 #endif 879 880 case PFSstatus: 881 case PFSstat: 882 case PFSnote: 883 case PFSnotepg: 884 case PFScmdline: 885 case PFSenviron: 886 case PFSmeminfo: 887 case PFSdevices: 888 case PFScpuinfo: 889 case PFSuptime: 890 case PFSmounts: 891 case PFScpustat: 892 case PFSloadavg: 893 case PFSstatm: 894 case PFSversion: 895 vap->va_bytes = vap->va_size = 0; 896 break; 897 case PFSlimit: 898 case PFSmap: 899 case PFSmaps: 900 /* 901 * Advise a larger blocksize for the map files, so that 902 * they may be read in one pass. 903 */ 904 vap->va_blocksize = 4 * PAGE_SIZE; 905 vap->va_bytes = vap->va_size = 0; 906 break; 907 908 case PFScwd: 909 case PFSchroot: 910 bp = path + MAXPATHLEN; 911 *--bp = '\0'; 912 procfs_dir(pfs->pfs_type, curlwp, procp, &bp, path, 913 MAXPATHLEN); 914 vap->va_bytes = vap->va_size = strlen(bp); 915 break; 916 917 case PFSexe: 918 vap->va_bytes = vap->va_size = strlen(procp->p_path); 919 break; 920 921 case PFSemul: 922 vap->va_bytes = vap->va_size = strlen(procp->p_emul->e_name); 923 break; 924 925 #ifdef __HAVE_PROCFS_MACHDEP 926 PROCFS_MACHDEP_NODETYPE_CASES 927 error = procfs_machdep_getattr(ap->a_vp, vap, procp); 928 break; 929 #endif 930 931 default: 932 panic("%s: %d/2", __func__, pfs->pfs_type); 933 } 934 935 if (procp != NULL) 936 procfs_proc_unlock(procp); 937 if (path != NULL) 938 free(path, M_TEMP); 939 940 return (error); 941 } 942 943 /*ARGSUSED*/ 944 int 945 procfs_setattr(void *v) 946 { 947 /* 948 * just fake out attribute setting 949 * it's not good to generate an error 950 * return, otherwise things like creat() 951 * will fail when they try to set the 952 * file length to 0. worse, this means 953 * that echo $note > /proc/$pid/note will fail. 954 */ 955 956 return (0); 957 } 958 959 /* 960 * implement access checking. 961 * 962 * actually, the check for super-user is slightly 963 * broken since it will allow read access to write-only 964 * objects. this doesn't cause any particular trouble 965 * but does mean that the i/o entry points need to check 966 * that the operation really does make sense. 967 */ 968 int 969 procfs_access(void *v) 970 { 971 struct vop_access_args /* { 972 struct vnode *a_vp; 973 accmode_t a_accmode; 974 kauth_cred_t a_cred; 975 } */ *ap = v; 976 struct vattr va; 977 int error; 978 979 if ((error = VOP_GETATTR(ap->a_vp, &va, ap->a_cred)) != 0) 980 return (error); 981 982 return kauth_authorize_vnode(ap->a_cred, 983 KAUTH_ACCESS_ACTION(ap->a_accmode, ap->a_vp->v_type, va.va_mode), 984 ap->a_vp, NULL, genfs_can_access(ap->a_vp, ap->a_cred, 985 va.va_uid, va.va_gid, va.va_mode, NULL, ap->a_accmode)); 986 } 987 988 /* 989 * lookup. this is incredibly complicated in the 990 * general case, however for most pseudo-filesystems 991 * very little needs to be done. 992 * 993 * Locking isn't hard here, just poorly documented. 994 * 995 * If we're looking up ".", just vref the parent & return it. 996 * 997 * If we're looking up "..", unlock the parent, and lock "..". If everything 998 * went ok, and we're on the last component and the caller requested the 999 * parent locked, try to re-lock the parent. We do this to prevent lock 1000 * races. 1001 * 1002 * For anything else, get the needed node. Then unlock the parent if not 1003 * the last component or not LOCKPARENT (i.e. if we wouldn't re-lock the 1004 * parent in the .. case). 1005 * 1006 * We try to exit with the parent locked in error cases. 1007 */ 1008 int 1009 procfs_lookup(void *v) 1010 { 1011 struct vop_lookup_v2_args /* { 1012 struct vnode * a_dvp; 1013 struct vnode ** a_vpp; 1014 struct componentname * a_cnp; 1015 } */ *ap = v; 1016 struct componentname *cnp = ap->a_cnp; 1017 struct vnode **vpp = ap->a_vpp; 1018 struct vnode *dvp = ap->a_dvp; 1019 const char *pname = cnp->cn_nameptr; 1020 const struct proc_target *pt = NULL; 1021 struct vnode *fvp; 1022 pid_t pid, vnpid; 1023 struct pfsnode *pfs; 1024 struct proc *p = NULL; 1025 struct lwp *plwp; 1026 int i, error; 1027 pfstype type; 1028 1029 *vpp = NULL; 1030 1031 if (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME) 1032 return (EROFS); 1033 1034 if (cnp->cn_namelen == 1 && *pname == '.') { 1035 *vpp = dvp; 1036 vref(dvp); 1037 return (0); 1038 } 1039 1040 pfs = VTOPFS(dvp); 1041 switch (pfs->pfs_type) { 1042 case PFSroot: 1043 /* 1044 * Shouldn't get here with .. in the root node. 1045 */ 1046 if (cnp->cn_flags & ISDOTDOT) 1047 return (EIO); 1048 1049 for (i = 0; i < nproc_root_targets; i++) { 1050 pt = &proc_root_targets[i]; 1051 /* 1052 * check for node match. proc is always NULL here, 1053 * so call pt_valid with constant NULL lwp. 1054 */ 1055 if (cnp->cn_namelen == pt->pt_namlen && 1056 memcmp(pt->pt_name, pname, cnp->cn_namelen) == 0 && 1057 (pt->pt_valid == NULL || 1058 (*pt->pt_valid)(NULL, dvp->v_mount))) 1059 break; 1060 } 1061 1062 if (i != nproc_root_targets) { 1063 error = procfs_allocvp(dvp->v_mount, vpp, 0, 1064 pt->pt_pfstype, -1); 1065 return (error); 1066 } 1067 1068 if (CNEQ(cnp, "curproc", 7)) { 1069 pid = curproc->p_pid; 1070 vnpid = 0; 1071 type = PFScurproc; 1072 } else if (CNEQ(cnp, "self", 4)) { 1073 pid = curproc->p_pid; 1074 vnpid = 0; 1075 type = PFSself; 1076 } else { 1077 pid = (pid_t)atoi(pname, cnp->cn_namelen); 1078 vnpid = pid; 1079 type = PFSproc; 1080 } 1081 1082 if (procfs_proc_lock(dvp->v_mount, pid, &p, ESRCH) != 0) 1083 break; 1084 error = procfs_allocvp(dvp->v_mount, vpp, vnpid, type, -1); 1085 procfs_proc_unlock(p); 1086 return (error); 1087 1088 case PFSproc: 1089 if (cnp->cn_flags & ISDOTDOT) { 1090 error = procfs_allocvp(dvp->v_mount, vpp, 0, PFSroot, 1091 -1); 1092 return (error); 1093 } 1094 1095 if (procfs_proc_lock(dvp->v_mount, pfs->pfs_pid, &p, 1096 ESRCH) != 0) 1097 break; 1098 1099 mutex_enter(p->p_lock); 1100 LIST_FOREACH(plwp, &p->p_lwps, l_sibling) { 1101 if (plwp->l_stat != LSZOMB) 1102 break; 1103 } 1104 /* Process is exiting if no-LWPS or all LWPs are LSZOMB */ 1105 if (plwp == NULL) { 1106 mutex_exit(p->p_lock); 1107 procfs_proc_unlock(p); 1108 return ESRCH; 1109 } 1110 1111 lwp_addref(plwp); 1112 mutex_exit(p->p_lock); 1113 1114 for (pt = proc_targets, i = 0; i < nproc_targets; pt++, i++) { 1115 int found; 1116 1117 found = cnp->cn_namelen == pt->pt_namlen && 1118 memcmp(pt->pt_name, pname, cnp->cn_namelen) == 0 && 1119 (pt->pt_valid == NULL 1120 || (*pt->pt_valid)(plwp, dvp->v_mount)); 1121 if (found) 1122 break; 1123 } 1124 lwp_delref(plwp); 1125 1126 if (i == nproc_targets) { 1127 procfs_proc_unlock(p); 1128 break; 1129 } 1130 if (pt->pt_pfstype == PFSfile) { 1131 fvp = p->p_textvp; 1132 /* We already checked that it exists. */ 1133 vref(fvp); 1134 procfs_proc_unlock(p); 1135 *vpp = fvp; 1136 return (0); 1137 } 1138 1139 error = procfs_allocvp(dvp->v_mount, vpp, pfs->pfs_pid, 1140 pt->pt_pfstype, -1); 1141 procfs_proc_unlock(p); 1142 return (error); 1143 1144 case PFSfd: { 1145 int fd; 1146 file_t *fp; 1147 1148 if ((error = procfs_proc_lock(dvp->v_mount, pfs->pfs_pid, &p, 1149 ENOENT)) != 0) 1150 return error; 1151 1152 if (cnp->cn_flags & ISDOTDOT) { 1153 error = procfs_allocvp(dvp->v_mount, vpp, pfs->pfs_pid, 1154 PFSproc, -1); 1155 procfs_proc_unlock(p); 1156 return (error); 1157 } 1158 fd = atoi(pname, cnp->cn_namelen); 1159 1160 fp = fd_getfile2(p, fd); 1161 if (fp == NULL) { 1162 procfs_proc_unlock(p); 1163 return ENOENT; 1164 } 1165 fvp = fp->f_vnode; 1166 1167 /* Don't show directories */ 1168 if (fp->f_type == DTYPE_VNODE && fvp->v_type != VDIR) { 1169 vref(fvp); 1170 closef(fp); 1171 procfs_proc_unlock(p); 1172 *vpp = fvp; 1173 return 0; 1174 } 1175 1176 closef(fp); 1177 error = procfs_allocvp(dvp->v_mount, vpp, pfs->pfs_pid, 1178 PFSfd, fd); 1179 procfs_proc_unlock(p); 1180 return error; 1181 } 1182 case PFStask: { 1183 int xpid; 1184 1185 if ((error = procfs_proc_lock(dvp->v_mount, pfs->pfs_pid, &p, 1186 ENOENT)) != 0) 1187 return error; 1188 1189 if (cnp->cn_flags & ISDOTDOT) { 1190 error = procfs_allocvp(dvp->v_mount, vpp, pfs->pfs_pid, 1191 PFSproc, -1); 1192 procfs_proc_unlock(p); 1193 return (error); 1194 } 1195 xpid = atoi(pname, cnp->cn_namelen); 1196 1197 if (xpid != pfs->pfs_pid) { 1198 procfs_proc_unlock(p); 1199 return ENOENT; 1200 } 1201 error = procfs_allocvp(dvp->v_mount, vpp, pfs->pfs_pid, 1202 PFStask, 0); 1203 procfs_proc_unlock(p); 1204 return error; 1205 } 1206 default: 1207 return (ENOTDIR); 1208 } 1209 1210 return (cnp->cn_nameiop == LOOKUP ? ENOENT : EROFS); 1211 } 1212 1213 int 1214 procfs_validfile(struct lwp *l, struct mount *mp) 1215 { 1216 return l != NULL && l->l_proc != NULL && l->l_proc->p_textvp != NULL; 1217 } 1218 1219 static int 1220 procfs_validfile_linux(struct lwp *l, struct mount *mp) 1221 { 1222 return procfs_use_linux_compat(mp) && 1223 (l == NULL || l->l_proc == NULL || procfs_validfile(l, mp)); 1224 } 1225 1226 struct procfs_root_readdir_ctx { 1227 struct uio *uiop; 1228 off_t *cookies; 1229 int ncookies; 1230 off_t off; 1231 off_t startoff; 1232 int error; 1233 }; 1234 1235 static int 1236 procfs_root_readdir_callback(struct proc *p, void *arg) 1237 { 1238 struct procfs_root_readdir_ctx *ctxp = arg; 1239 struct dirent d; 1240 struct uio *uiop; 1241 int error; 1242 1243 uiop = ctxp->uiop; 1244 if (uiop->uio_resid < UIO_MX) 1245 return -1; /* no space */ 1246 1247 if (ctxp->off < ctxp->startoff) { 1248 ctxp->off++; 1249 return 0; 1250 } 1251 1252 if (kauth_authorize_process(kauth_cred_get(), 1253 KAUTH_PROCESS_CANSEE, p, 1254 KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_ENTRY), NULL, NULL) != 0) 1255 return 0; 1256 1257 memset(&d, 0, UIO_MX); 1258 d.d_reclen = UIO_MX; 1259 d.d_fileno = PROCFS_FILENO(p->p_pid, PFSproc, -1); 1260 d.d_namlen = snprintf(d.d_name, 1261 UIO_MX - offsetof(struct dirent, d_name), "%ld", (long)p->p_pid); 1262 d.d_type = DT_DIR; 1263 1264 mutex_exit(&proc_lock); 1265 error = uiomove(&d, UIO_MX, uiop); 1266 mutex_enter(&proc_lock); 1267 if (error) { 1268 ctxp->error = error; 1269 return -1; 1270 } 1271 1272 ctxp->ncookies++; 1273 if (ctxp->cookies) 1274 *(ctxp->cookies)++ = ctxp->off + 1; 1275 ctxp->off++; 1276 1277 return 0; 1278 } 1279 1280 /* 1281 * readdir returns directory entries from pfsnode (vp). 1282 * 1283 * the strategy here with procfs is to generate a single 1284 * directory entry at a time (struct dirent) and then 1285 * copy that out to userland using uiomove. a more efficent 1286 * though more complex implementation, would try to minimize 1287 * the number of calls to uiomove(). for procfs, this is 1288 * hardly worth the added code complexity. 1289 * 1290 * this should just be done through read() 1291 */ 1292 int 1293 procfs_readdir(void *v) 1294 { 1295 struct vop_readdir_args /* { 1296 struct vnode *a_vp; 1297 struct uio *a_uio; 1298 kauth_cred_t a_cred; 1299 int *a_eofflag; 1300 off_t **a_cookies; 1301 int *a_ncookies; 1302 } */ *ap = v; 1303 struct uio *uio = ap->a_uio; 1304 struct dirent d; 1305 struct pfsnode *pfs; 1306 off_t i; 1307 int error; 1308 off_t *cookies = NULL; 1309 int ncookies; 1310 struct vnode *vp; 1311 const struct proc_target *pt; 1312 struct procfs_root_readdir_ctx ctx; 1313 struct lwp *l; 1314 int nfd; 1315 1316 vp = ap->a_vp; 1317 pfs = VTOPFS(vp); 1318 1319 if (uio->uio_resid < UIO_MX) 1320 return (EINVAL); 1321 if (uio->uio_offset < 0) 1322 return (EINVAL); 1323 1324 error = 0; 1325 i = uio->uio_offset; 1326 memset(&d, 0, UIO_MX); 1327 d.d_reclen = UIO_MX; 1328 ncookies = uio->uio_resid / UIO_MX; 1329 1330 switch (pfs->pfs_type) { 1331 /* 1332 * this is for the process-specific sub-directories. 1333 * all that is needed to is copy out all the entries 1334 * from the procent[] table (top of this file). 1335 */ 1336 case PFSproc: { 1337 struct proc *p; 1338 1339 if (i >= nproc_targets) 1340 return 0; 1341 1342 if (procfs_proc_lock(vp->v_mount, pfs->pfs_pid, &p, ESRCH) != 0) 1343 break; 1344 1345 if (ap->a_ncookies) { 1346 ncookies = uimin(ncookies, (nproc_targets - i)); 1347 cookies = malloc(ncookies * sizeof (off_t), 1348 M_TEMP, M_WAITOK); 1349 *ap->a_cookies = cookies; 1350 } 1351 1352 for (pt = &proc_targets[i]; 1353 uio->uio_resid >= UIO_MX && i < nproc_targets; pt++, i++) { 1354 if (pt->pt_valid) { 1355 /* XXXSMP LWP can disappear */ 1356 mutex_enter(p->p_lock); 1357 l = LIST_FIRST(&p->p_lwps); 1358 KASSERT(l != NULL); 1359 mutex_exit(p->p_lock); 1360 if ((*pt->pt_valid)(l, vp->v_mount) == 0) 1361 continue; 1362 } 1363 1364 d.d_fileno = PROCFS_FILENO(pfs->pfs_pid, 1365 pt->pt_pfstype, -1); 1366 d.d_namlen = pt->pt_namlen; 1367 memcpy(d.d_name, pt->pt_name, pt->pt_namlen + 1); 1368 d.d_type = pt->pt_type; 1369 1370 if ((error = uiomove(&d, UIO_MX, uio)) != 0) 1371 break; 1372 if (cookies) 1373 *cookies++ = i + 1; 1374 } 1375 1376 procfs_proc_unlock(p); 1377 break; 1378 } 1379 case PFSfd: { 1380 struct proc *p; 1381 file_t *fp; 1382 int lim, nc = 0; 1383 1384 if ((error = procfs_proc_lock(vp->v_mount, pfs->pfs_pid, &p, 1385 ESRCH)) != 0) 1386 return error; 1387 1388 /* XXX Should this be by file as well? */ 1389 if (kauth_authorize_process(kauth_cred_get(), 1390 KAUTH_PROCESS_CANSEE, p, 1391 KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_OPENFILES), NULL, 1392 NULL) != 0) { 1393 procfs_proc_unlock(p); 1394 return ESRCH; 1395 } 1396 1397 nfd = atomic_load_consume(&p->p_fd->fd_dt)->dt_nfiles; 1398 1399 lim = uimin((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles); 1400 if (i >= lim) { 1401 procfs_proc_unlock(p); 1402 return 0; 1403 } 1404 1405 if (ap->a_ncookies) { 1406 ncookies = uimin(ncookies, (nfd + 2 - i)); 1407 cookies = malloc(ncookies * sizeof (off_t), 1408 M_TEMP, M_WAITOK); 1409 *ap->a_cookies = cookies; 1410 } 1411 1412 for (; i < 2 && uio->uio_resid >= UIO_MX; i++) { 1413 pt = &proc_targets[i]; 1414 d.d_namlen = pt->pt_namlen; 1415 d.d_fileno = PROCFS_FILENO(pfs->pfs_pid, 1416 pt->pt_pfstype, -1); 1417 (void)memcpy(d.d_name, pt->pt_name, pt->pt_namlen + 1); 1418 d.d_type = pt->pt_type; 1419 if ((error = uiomove(&d, UIO_MX, uio)) != 0) 1420 break; 1421 if (cookies) 1422 *cookies++ = i + 1; 1423 nc++; 1424 } 1425 if (error) { 1426 ncookies = nc; 1427 break; 1428 } 1429 for (; uio->uio_resid >= UIO_MX && i < nfd; i++) { 1430 /* check the descriptor exists */ 1431 if ((fp = fd_getfile2(p, i - 2)) == NULL) 1432 continue; 1433 closef(fp); 1434 1435 d.d_fileno = PROCFS_FILENO(pfs->pfs_pid, PFSfd, i - 2); 1436 d.d_namlen = snprintf(d.d_name, sizeof(d.d_name), 1437 "%lld", (long long)(i - 2)); 1438 d.d_type = VREG; 1439 if ((error = uiomove(&d, UIO_MX, uio)) != 0) 1440 break; 1441 if (cookies) 1442 *cookies++ = i + 1; 1443 nc++; 1444 } 1445 ncookies = nc; 1446 procfs_proc_unlock(p); 1447 break; 1448 } 1449 case PFStask: { 1450 struct proc *p; 1451 int nc = 0; 1452 1453 if ((error = procfs_proc_lock(vp->v_mount, pfs->pfs_pid, &p, 1454 ESRCH)) != 0) 1455 return error; 1456 1457 nfd = 3; /* ., .., pid */ 1458 1459 if (ap->a_ncookies) { 1460 ncookies = uimin(ncookies, (nfd + 2 - i)); 1461 cookies = malloc(ncookies * sizeof (off_t), 1462 M_TEMP, M_WAITOK); 1463 *ap->a_cookies = cookies; 1464 } 1465 1466 for (; i < 2 && uio->uio_resid >= UIO_MX; i++) { 1467 pt = &proc_targets[i]; 1468 d.d_namlen = pt->pt_namlen; 1469 d.d_fileno = PROCFS_FILENO(pfs->pfs_pid, 1470 pt->pt_pfstype, -1); 1471 (void)memcpy(d.d_name, pt->pt_name, pt->pt_namlen + 1); 1472 d.d_type = pt->pt_type; 1473 if ((error = uiomove(&d, UIO_MX, uio)) != 0) 1474 break; 1475 if (cookies) 1476 *cookies++ = i + 1; 1477 nc++; 1478 } 1479 if (error) { 1480 ncookies = nc; 1481 break; 1482 } 1483 for (; uio->uio_resid >= UIO_MX && i < nfd; i++) { 1484 /* check the descriptor exists */ 1485 d.d_fileno = PROCFS_FILENO(pfs->pfs_pid, PFStask, 1486 i - 2); 1487 d.d_namlen = snprintf(d.d_name, sizeof(d.d_name), 1488 "%ld", (long)pfs->pfs_pid); 1489 d.d_type = DT_LNK; 1490 if ((error = uiomove(&d, UIO_MX, uio)) != 0) 1491 break; 1492 if (cookies) 1493 *cookies++ = i + 1; 1494 nc++; 1495 } 1496 ncookies = nc; 1497 procfs_proc_unlock(p); 1498 break; 1499 } 1500 1501 /* 1502 * this is for the root of the procfs filesystem 1503 * what is needed are special entries for "curproc" 1504 * and "self" followed by an entry for each process 1505 * on allproc. 1506 */ 1507 1508 case PFSroot: { 1509 int nc = 0; 1510 1511 if (ap->a_ncookies) { 1512 /* 1513 * XXX Potentially allocating too much space here, 1514 * but I'm lazy. This loop needs some work. 1515 */ 1516 cookies = malloc(ncookies * sizeof (off_t), 1517 M_TEMP, M_WAITOK); 1518 *ap->a_cookies = cookies; 1519 } 1520 error = 0; 1521 /* 0 ... 3 are static entries. */ 1522 for (; i <= 3 && uio->uio_resid >= UIO_MX; i++) { 1523 switch (i) { 1524 case 0: /* `.' */ 1525 case 1: /* `..' */ 1526 d.d_fileno = PROCFS_FILENO(0, PFSroot, -1); 1527 d.d_namlen = i + 1; 1528 memcpy(d.d_name, "..", d.d_namlen); 1529 d.d_name[i + 1] = '\0'; 1530 d.d_type = DT_DIR; 1531 break; 1532 1533 case 2: 1534 d.d_fileno = PROCFS_FILENO(0, PFScurproc, -1); 1535 d.d_namlen = sizeof("curproc") - 1; 1536 memcpy(d.d_name, "curproc", sizeof("curproc")); 1537 d.d_type = DT_LNK; 1538 break; 1539 1540 case 3: 1541 d.d_fileno = PROCFS_FILENO(0, PFSself, -1); 1542 d.d_namlen = sizeof("self") - 1; 1543 memcpy(d.d_name, "self", sizeof("self")); 1544 d.d_type = DT_LNK; 1545 break; 1546 } 1547 1548 if ((error = uiomove(&d, UIO_MX, uio)) != 0) 1549 break; 1550 nc++; 1551 if (cookies) 1552 *cookies++ = i + 1; 1553 } 1554 /* 4 ... are process entries. */ 1555 ctx.uiop = uio; 1556 ctx.error = 0; 1557 ctx.off = 4; 1558 ctx.startoff = i; 1559 ctx.cookies = cookies; 1560 ctx.ncookies = nc; 1561 proclist_foreach_call(&allproc, 1562 procfs_root_readdir_callback, &ctx); 1563 cookies = ctx.cookies; 1564 nc = ctx.ncookies; 1565 error = ctx.error; 1566 if (error) 1567 break; 1568 1569 /* misc entries. */ 1570 if (i < ctx.off) 1571 i = ctx.off; 1572 if (i >= ctx.off + nproc_root_targets) 1573 break; 1574 for (pt = &proc_root_targets[i - ctx.off]; 1575 uio->uio_resid >= UIO_MX && 1576 pt < &proc_root_targets[nproc_root_targets]; 1577 pt++, i++) { 1578 if (pt->pt_valid && 1579 (*pt->pt_valid)(NULL, vp->v_mount) == 0) 1580 continue; 1581 d.d_fileno = PROCFS_FILENO(0, pt->pt_pfstype, -1); 1582 d.d_namlen = pt->pt_namlen; 1583 memcpy(d.d_name, pt->pt_name, pt->pt_namlen + 1); 1584 d.d_type = pt->pt_type; 1585 1586 if ((error = uiomove(&d, UIO_MX, uio)) != 0) 1587 break; 1588 nc++; 1589 if (cookies) 1590 *cookies++ = i + 1; 1591 } 1592 1593 ncookies = nc; 1594 break; 1595 } 1596 1597 default: 1598 error = ENOTDIR; 1599 break; 1600 } 1601 1602 if (ap->a_ncookies) { 1603 if (error) { 1604 if (cookies) 1605 free(*ap->a_cookies, M_TEMP); 1606 *ap->a_ncookies = 0; 1607 *ap->a_cookies = NULL; 1608 } else 1609 *ap->a_ncookies = ncookies; 1610 } 1611 uio->uio_offset = i; 1612 return (error); 1613 } 1614 1615 /* 1616 * readlink reads the link of `curproc' and others 1617 */ 1618 int 1619 procfs_readlink(void *v) 1620 { 1621 struct vop_readlink_args *ap = v; 1622 char bf[16]; /* should be enough */ 1623 char *bp = bf; 1624 char *path = NULL; 1625 int len = 0; 1626 int error = 0; 1627 struct vnode *vp = ap->a_vp; 1628 struct pfsnode *pfs = VTOPFS(vp); 1629 struct proc *pown = NULL; 1630 1631 if (pfs->pfs_fileno == PROCFS_FILENO(0, PFScurproc, -1)) 1632 len = snprintf(bf, sizeof(bf), "%ld", (long)curproc->p_pid); 1633 else if (pfs->pfs_fileno == PROCFS_FILENO(0, PFSself, -1)) 1634 len = snprintf(bf, sizeof(bf), "%s", "curproc"); 1635 else if (pfs->pfs_fileno == PROCFS_FILENO(pfs->pfs_pid, PFStask, 0)) 1636 len = snprintf(bf, sizeof(bf), ".."); 1637 else if (pfs->pfs_fileno == PROCFS_FILENO(pfs->pfs_pid, PFSexe, -1)) { 1638 if ((error = procfs_proc_lock(vp->v_mount, pfs->pfs_pid, &pown, 1639 ESRCH)) != 0) 1640 return error; 1641 bp = pown->p_path; 1642 len = strlen(bp); 1643 } else if (pfs->pfs_fileno == PROCFS_FILENO(pfs->pfs_pid, PFScwd, -1) || 1644 pfs->pfs_fileno == PROCFS_FILENO(pfs->pfs_pid, PFSchroot, -1)) { 1645 if ((error = procfs_proc_lock(vp->v_mount, pfs->pfs_pid, &pown, 1646 ESRCH)) != 0) 1647 return error; 1648 path = malloc(MAXPATHLEN + 4, M_TEMP, M_WAITOK); 1649 if (path == NULL) { 1650 procfs_proc_unlock(pown); 1651 return (ENOMEM); 1652 } 1653 bp = path + MAXPATHLEN; 1654 *--bp = '\0'; 1655 procfs_dir(PROCFS_TYPE(pfs->pfs_fileno), curlwp, pown, 1656 &bp, path, MAXPATHLEN); 1657 len = strlen(bp); 1658 } else { 1659 file_t *fp; 1660 struct vnode *vxp; 1661 1662 if ((error = procfs_proc_lock(vp->v_mount, pfs->pfs_pid, &pown, 1663 ESRCH)) != 0) 1664 return error; 1665 1666 fp = fd_getfile2(pown, pfs->pfs_fd); 1667 if (fp == NULL) { 1668 procfs_proc_unlock(pown); 1669 return EBADF; 1670 } 1671 1672 switch (fp->f_type) { 1673 case DTYPE_VNODE: 1674 vxp = fp->f_vnode; 1675 if (vxp->v_type != VDIR) { 1676 error = EINVAL; 1677 break; 1678 } 1679 if ((path = malloc(MAXPATHLEN, M_TEMP, M_WAITOK)) 1680 == NULL) { 1681 error = ENOMEM; 1682 break; 1683 } 1684 bp = path + MAXPATHLEN; 1685 *--bp = '\0'; 1686 1687 /* 1688 * XXX: kludge to avoid locking against ourselves 1689 * in getcwd() 1690 */ 1691 if (vxp->v_tag == VT_PROCFS) { 1692 *--bp = '/'; 1693 } else { 1694 rw_enter(&curproc->p_cwdi->cwdi_lock, 1695 RW_READER); 1696 vp = curproc->p_cwdi->cwdi_rdir; 1697 if (vp == NULL) 1698 vp = rootvnode; 1699 error = getcwd_common(vxp, vp, &bp, path, 1700 MAXPATHLEN / 2, 0, curlwp); 1701 rw_exit(&curproc->p_cwdi->cwdi_lock); 1702 } 1703 if (error) 1704 break; 1705 len = strlen(bp); 1706 break; 1707 1708 case DTYPE_MISC: 1709 len = snprintf(bf, sizeof(bf), "%s", "[misc]"); 1710 break; 1711 1712 case DTYPE_KQUEUE: 1713 len = snprintf(bf, sizeof(bf), "%s", "[kqueue]"); 1714 break; 1715 1716 case DTYPE_SEM: 1717 len = snprintf(bf, sizeof(bf), "%s", "[ksem]"); 1718 break; 1719 1720 default: 1721 error = EINVAL; 1722 break; 1723 } 1724 closef(fp); 1725 } 1726 1727 if (error == 0) 1728 error = uiomove(bp, len, ap->a_uio); 1729 if (pown) 1730 procfs_proc_unlock(pown); 1731 if (path) 1732 free(path, M_TEMP); 1733 return error; 1734 } 1735 1736 int 1737 procfs_getpages(void *v) 1738 { 1739 struct vop_getpages_args /* { 1740 struct vnode *a_vp; 1741 voff_t a_offset; 1742 struct vm_page **a_m; 1743 int *a_count; 1744 int a_centeridx; 1745 vm_prot_t a_access_type; 1746 int a_advice; 1747 int a_flags; 1748 } */ *ap = v; 1749 1750 if ((ap->a_flags & PGO_LOCKED) == 0) 1751 rw_exit(ap->a_vp->v_uobj.vmobjlock); 1752 1753 return (EFAULT); 1754 } 1755 1756 /* 1757 * convert decimal ascii to int 1758 */ 1759 static int 1760 atoi(const char *b, size_t len) 1761 { 1762 int p = 0; 1763 1764 while (len--) { 1765 char c = *b++; 1766 if (c < '0' || c > '9') 1767 return -1; 1768 p = 10 * p + (c - '0'); 1769 } 1770 1771 return p; 1772 } 1773