xref: /netbsd-src/sys/miscfs/procfs/procfs_vfsops.c (revision da4222d3678fa36b5bebd6eb60ce1574074ae16d)
1 /*	$NetBSD: procfs_vfsops.c,v 1.112 2024/01/17 10:19:21 hannken Exp $	*/
2 
3 /*
4  * Copyright (c) 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley by
8  * Jan-Simon Pendry.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  *	@(#)procfs_vfsops.c	8.7 (Berkeley) 5/10/95
35  */
36 
37 /*
38  * Copyright (c) 1993 Jan-Simon Pendry
39  *
40  * This code is derived from software contributed to Berkeley by
41  * Jan-Simon Pendry.
42  *
43  * Redistribution and use in source and binary forms, with or without
44  * modification, are permitted provided that the following conditions
45  * are met:
46  * 1. Redistributions of source code must retain the above copyright
47  *    notice, this list of conditions and the following disclaimer.
48  * 2. Redistributions in binary form must reproduce the above copyright
49  *    notice, this list of conditions and the following disclaimer in the
50  *    documentation and/or other materials provided with the distribution.
51  * 3. All advertising materials mentioning features or use of this software
52  *    must display the following acknowledgement:
53  *	This product includes software developed by the University of
54  *	California, Berkeley and its contributors.
55  * 4. Neither the name of the University nor the names of its contributors
56  *    may be used to endorse or promote products derived from this software
57  *    without specific prior written permission.
58  *
59  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
60  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
61  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
62  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
63  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
64  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
65  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
66  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
67  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
68  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
69  * SUCH DAMAGE.
70  *
71  *	@(#)procfs_vfsops.c	8.7 (Berkeley) 5/10/95
72  */
73 
74 /*
75  * procfs VFS interface
76  */
77 
78 #include <sys/cdefs.h>
79 __KERNEL_RCSID(0, "$NetBSD: procfs_vfsops.c,v 1.112 2024/01/17 10:19:21 hannken Exp $");
80 
81 #if defined(_KERNEL_OPT)
82 #include "opt_compat_netbsd.h"
83 #endif
84 
85 #include <sys/param.h>
86 #include <sys/atomic.h>
87 #include <sys/buf.h>
88 #include <sys/dirent.h>
89 #include <sys/file.h>
90 #include <sys/filedesc.h>
91 #include <sys/kauth.h>
92 #include <sys/kernel.h>
93 #include <sys/module.h>
94 #include <sys/mount.h>
95 #include <sys/proc.h>
96 #include <sys/signalvar.h>
97 #include <sys/sysctl.h>
98 #include <sys/syslog.h>
99 #include <sys/systm.h>
100 #include <sys/time.h>
101 #include <sys/vnode.h>
102 
103 #include <miscfs/genfs/genfs.h>
104 
105 #include <miscfs/procfs/procfs.h>
106 
107 #include <uvm/uvm_extern.h>			/* for PAGE_SIZE */
108 
109 MODULE(MODULE_CLASS_VFS, procfs, "ptrace_common");
110 
111 VFS_PROTOS(procfs);
112 
113 #define PROCFS_HASHSIZE	256
114 
115 static kauth_listener_t procfs_listener;
116 LIST_HEAD(hashhead, pfsnode);
117 static u_long procfs_hashmask;
118 static struct hashhead *procfs_hashtab;
119 static kmutex_t procfs_hashlock;
120 
121 static struct hashhead *
122 procfs_hashhead(pid_t pid)
123 {
124 
125 	return &procfs_hashtab[pid & procfs_hashmask];
126 }
127 
128 void
129 procfs_hashrem(struct pfsnode *pfs)
130 {
131 
132 	mutex_enter(&procfs_hashlock);
133 	LIST_REMOVE(pfs, pfs_hash);
134 	mutex_exit(&procfs_hashlock);
135 }
136 
137 /*
138  * VFS Operations.
139  *
140  * mount system call
141  */
142 /* ARGSUSED */
143 int
144 procfs_mount(
145     struct mount *mp,
146     const char *path,
147     void *data,
148     size_t *data_len)
149 {
150 	struct lwp *l = curlwp;
151 	struct procfsmount *pmnt;
152 	struct procfs_args *args = data;
153 	int error;
154 
155 	if (args == NULL)
156 		return EINVAL;
157 
158 	if (UIO_MX & (UIO_MX-1)) {
159 		log(LOG_ERR, "procfs: invalid directory entry size");
160 		return (EINVAL);
161 	}
162 
163 	if (mp->mnt_flag & MNT_GETARGS) {
164 		if (*data_len < sizeof *args)
165 			return EINVAL;
166 
167 		pmnt = VFSTOPROC(mp);
168 		if (pmnt == NULL)
169 			return EIO;
170 		args->version = PROCFS_ARGSVERSION;
171 		args->flags = pmnt->pmnt_flags;
172 		*data_len = sizeof *args;
173 		return 0;
174 	}
175 
176 	if (mp->mnt_flag & MNT_UPDATE)
177 		return (EOPNOTSUPP);
178 
179 	if (*data_len >= sizeof *args && args->version != PROCFS_ARGSVERSION)
180 		return EINVAL;
181 
182 	pmnt = kmem_zalloc(sizeof(struct procfsmount), KM_SLEEP);
183 
184 	mp->mnt_stat.f_namemax = PROCFS_MAXNAMLEN;
185 	mp->mnt_flag |= MNT_LOCAL;
186 	mp->mnt_data = pmnt;
187 	vfs_getnewfsid(mp);
188 
189 	error = set_statvfs_info(path, UIO_USERSPACE, "procfs", UIO_SYSSPACE,
190 	    mp->mnt_op->vfs_name, mp, l);
191 	pmnt->pmnt_exechook = exechook_establish(procfs_revoke_vnodes, mp);
192 	if (*data_len >= sizeof *args)
193 		pmnt->pmnt_flags = args->flags;
194 	else
195 		pmnt->pmnt_flags = 0;
196 
197 	mp->mnt_iflag |= IMNT_MPSAFE | IMNT_SHRLOOKUP;
198 	return error;
199 }
200 
201 /*
202  * unmount system call
203  */
204 int
205 procfs_unmount(struct mount *mp, int mntflags)
206 {
207 	int error;
208 	int flags = 0;
209 
210 	if (mntflags & MNT_FORCE)
211 		flags |= FORCECLOSE;
212 
213 	if ((error = vflush(mp, 0, flags)) != 0)
214 		return (error);
215 
216 	exechook_disestablish(VFSTOPROC(mp)->pmnt_exechook);
217 
218 	kmem_free(mp->mnt_data, sizeof(struct procfsmount));
219 	mp->mnt_data = NULL;
220 
221 	return 0;
222 }
223 
224 int
225 procfs_root(struct mount *mp, int lktype, struct vnode **vpp)
226 {
227 	int error;
228 
229 	error = procfs_allocvp(mp, vpp, 0, PFSroot, -1);
230 	if (error == 0) {
231 		error = vn_lock(*vpp, lktype);
232 		if (error != 0) {
233 			vrele(*vpp);
234 			*vpp = NULL;
235 		}
236 	}
237 
238 	return error;
239 }
240 
241 /* ARGSUSED */
242 int
243 procfs_start(struct mount *mp, int flags)
244 {
245 
246 	return (0);
247 }
248 
249 /*
250  * Get file system statistics.
251  */
252 int
253 procfs_statvfs(struct mount *mp, struct statvfs *sbp)
254 {
255 
256 	genfs_statvfs(mp, sbp);
257 
258 	sbp->f_bsize = PAGE_SIZE;
259 	sbp->f_frsize = PAGE_SIZE;
260 	sbp->f_iosize = PAGE_SIZE;
261 	sbp->f_blocks = 1;
262 	sbp->f_files = maxproc;					/* approx */
263 	sbp->f_ffree = maxproc - atomic_load_relaxed(&nprocs);	/* approx */
264 	sbp->f_favail = maxproc - atomic_load_relaxed(&nprocs);	/* approx */
265 
266 	return (0);
267 }
268 
269 /*ARGSUSED*/
270 int
271 procfs_sync(
272     struct mount *mp,
273     int waitfor,
274     kauth_cred_t uc)
275 {
276 
277 	return (0);
278 }
279 
280 /*ARGSUSED*/
281 int
282 procfs_vget(struct mount *mp, ino_t ino, int lktype,
283     struct vnode **vpp)
284 {
285 	return (EOPNOTSUPP);
286 }
287 
288 int
289 procfs_loadvnode(struct mount *mp, struct vnode *vp,
290     const void *key, size_t key_len, const void **new_key)
291 {
292 	int error;
293 	struct pfskey pfskey;
294 	struct pfsnode *pfs;
295 
296 	KASSERT(key_len == sizeof(pfskey));
297 	memcpy(&pfskey, key, key_len);
298 
299 	pfs = kmem_alloc(sizeof(*pfs), KM_SLEEP);
300 	pfs->pfs_pid = pfskey.pk_pid;
301 	pfs->pfs_type = pfskey.pk_type;
302 	pfs->pfs_fd = pfskey.pk_fd;
303 	pfs->pfs_vnode = vp;
304 	pfs->pfs_mount = mp;
305 	pfs->pfs_flags = 0;
306 	pfs->pfs_fileno =
307 	    PROCFS_FILENO(pfs->pfs_pid, pfs->pfs_type, pfs->pfs_fd);
308 	vp->v_tag = VT_PROCFS;
309 	vp->v_op = procfs_vnodeop_p;
310 	vp->v_data = pfs;
311 
312 	switch (pfs->pfs_type) {
313 	case PFSroot:	/* /proc = dr-xr-xr-x */
314 		vp->v_vflag |= VV_ROOT;
315 		/*FALLTHROUGH*/
316 	case PFSproc:	/* /proc/N = dr-xr-xr-x */
317 		pfs->pfs_mode = S_IRUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH;
318 		vp->v_type = VDIR;
319 		break;
320 
321 	case PFStask:	/* /proc/N/task = dr-xr-xr-x */
322 		if (pfs->pfs_fd == -1) {
323 			pfs->pfs_mode = S_IRUSR|S_IXUSR|S_IRGRP|S_IXGRP|
324 			    S_IROTH|S_IXOTH;
325 			vp->v_type = VDIR;
326 			break;
327 		}
328 		/*FALLTHROUGH*/
329 	case PFScurproc:	/* /proc/curproc = lr-xr-xr-x */
330 	case PFSself:	/* /proc/self    = lr-xr-xr-x */
331 	case PFScwd:	/* /proc/N/cwd = lr-xr-xr-x */
332 	case PFSchroot:	/* /proc/N/chroot = lr-xr-xr-x */
333 	case PFSexe:	/* /proc/N/exe = lr-xr-xr-x */
334 		pfs->pfs_mode = S_IRUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH;
335 		vp->v_type = VLNK;
336 		break;
337 
338 	case PFSfd:
339 		if (pfs->pfs_fd == -1) {	/* /proc/N/fd = dr-x------ */
340 			pfs->pfs_mode = S_IRUSR|S_IXUSR;
341 			vp->v_type = VDIR;
342 		} else {	/* /proc/N/fd/M = [ps-]rw------- */
343 			file_t *fp;
344 			vnode_t *vxp;
345 			struct proc *p;
346 
347 			mutex_enter(&proc_lock);
348 			p = procfs_proc_find(mp, pfs->pfs_pid);
349 			mutex_exit(&proc_lock);
350 			if (p == NULL) {
351 				error = ENOENT;
352 				goto bad;
353 			}
354 			KASSERT(rw_read_held(&p->p_reflock));
355 			if ((fp = fd_getfile2(p, pfs->pfs_fd)) == NULL) {
356 				error = EBADF;
357 				goto bad;
358 			}
359 
360 			pfs->pfs_mode = S_IRUSR|S_IWUSR;
361 			switch (fp->f_type) {
362 			case DTYPE_VNODE:
363 				vxp = fp->f_vnode;
364 
365 				/*
366 				 * We make symlinks for directories
367 				 * to avoid cycles.
368 				 */
369 				if (vxp->v_type == VDIR ||
370 				    procfs_proc_is_linux_compat())
371 					goto symlink;
372 				vp->v_type = vxp->v_type;
373 				break;
374 			case DTYPE_PIPE:
375 				vp->v_type = VFIFO;
376 				break;
377 			case DTYPE_SOCKET:
378 				vp->v_type = VSOCK;
379 				break;
380 			case DTYPE_KQUEUE:
381 			case DTYPE_MISC:
382 			case DTYPE_SEM:
383 			symlink:
384 				pfs->pfs_mode = S_IRUSR|S_IXUSR|S_IRGRP|
385 				    S_IXGRP|S_IROTH|S_IXOTH;
386 				vp->v_type = VLNK;
387 				break;
388 			default:
389 				error = EOPNOTSUPP;
390 				closef(fp);
391 				goto bad;
392 			}
393 			closef(fp);
394 		}
395 		break;
396 
397 	case PFSfile:	/* /proc/N/file = -rw------- */
398 	case PFSmem:	/* /proc/N/mem = -rw------- */
399 	case PFSregs:	/* /proc/N/regs = -rw------- */
400 	case PFSfpregs:	/* /proc/N/fpregs = -rw------- */
401 		pfs->pfs_mode = S_IRUSR|S_IWUSR;
402 		vp->v_type = VREG;
403 		break;
404 
405 	case PFSnote:	/* /proc/N/note = --w------ */
406 	case PFSnotepg:	/* /proc/N/notepg = --w------ */
407 		pfs->pfs_mode = S_IWUSR;
408 		vp->v_type = VREG;
409 		break;
410 
411 	case PFSmap:		/* /proc/N/map = -r-------- */
412 	case PFSmaps:		/* /proc/N/maps = -r-------- */
413 	case PFSauxv:		/* /proc/N/auxv = -r-------- */
414 	case PFSenviron:	/* /proc/N/environ = -r-------- */
415 		pfs->pfs_mode = S_IRUSR;
416 		vp->v_type = VREG;
417 		break;
418 
419 	case PFSstatus:		/* /proc/N/status = -r--r--r-- */
420 	case PFSstat:		/* /proc/N/stat = -r--r--r-- */
421 	case PFScmdline:	/* /proc/N/cmdline = -r--r--r-- */
422 	case PFSemul:		/* /proc/N/emul = -r--r--r-- */
423 	case PFSmeminfo:	/* /proc/meminfo = -r--r--r-- */
424 	case PFScpustat:	/* /proc/stat = -r--r--r-- */
425 	case PFSdevices:	/* /proc/devices = -r--r--r-- */
426 	case PFScpuinfo:	/* /proc/cpuinfo = -r--r--r-- */
427 	case PFSuptime:		/* /proc/uptime = -r--r--r-- */
428 	case PFSmounts:		/* /proc/mounts = -r--r--r-- */
429 	case PFSloadavg:	/* /proc/loadavg = -r--r--r-- */
430 	case PFSstatm:		/* /proc/N/statm = -r--r--r-- */
431 	case PFSversion:	/* /proc/version = -r--r--r-- */
432 	case PFSlimit:		/* /proc/limit = -r--r--r-- */
433 		pfs->pfs_mode = S_IRUSR|S_IRGRP|S_IROTH;
434 		vp->v_type = VREG;
435 		break;
436 
437 #ifdef __HAVE_PROCFS_MACHDEP
438 	PROCFS_MACHDEP_NODETYPE_CASES
439 		procfs_machdep_allocvp(vp);
440 		break;
441 #endif
442 
443 	default:
444 		panic("procfs_allocvp");
445 	}
446 
447 	mutex_enter(&procfs_hashlock);
448 	LIST_INSERT_HEAD(procfs_hashhead(pfs->pfs_pid), pfs, pfs_hash);
449 	mutex_exit(&procfs_hashlock);
450 
451 	uvm_vnp_setsize(vp, 0);
452 	*new_key = &pfs->pfs_key;
453 
454 	return 0;
455 
456 bad:
457 	vp->v_tag =VT_NON;
458 	vp->v_type = VNON;
459 	vp->v_op = NULL;
460 	vp->v_data = NULL;
461 	kmem_free(pfs, sizeof(*pfs));
462 	return error;
463 }
464 
465 void
466 procfs_init(void)
467 {
468 
469 }
470 
471 void
472 procfs_reinit(void)
473 {
474 
475 }
476 
477 void
478 procfs_done(void)
479 {
480 
481 }
482 
483 extern const struct vnodeopv_desc procfs_vnodeop_opv_desc;
484 
485 const struct vnodeopv_desc * const procfs_vnodeopv_descs[] = {
486 	&procfs_vnodeop_opv_desc,
487 	NULL,
488 };
489 
490 struct vfsops procfs_vfsops = {
491 	.vfs_name = MOUNT_PROCFS,
492 	.vfs_min_mount_data = sizeof (struct procfs_args),
493 	.vfs_mount = procfs_mount,
494 	.vfs_start = procfs_start,
495 	.vfs_unmount = procfs_unmount,
496 	.vfs_root = procfs_root,
497 	.vfs_quotactl = (void *)eopnotsupp,
498 	.vfs_statvfs = procfs_statvfs,
499 	.vfs_sync = procfs_sync,
500 	.vfs_vget = procfs_vget,
501 	.vfs_loadvnode = procfs_loadvnode,
502 	.vfs_fhtovp = (void *)eopnotsupp,
503 	.vfs_vptofh = (void *)eopnotsupp,
504 	.vfs_init = procfs_init,
505 	.vfs_reinit = procfs_reinit,
506 	.vfs_done = procfs_done,
507 	.vfs_snapshot = (void *)eopnotsupp,
508 	.vfs_extattrctl = vfs_stdextattrctl,
509 	.vfs_suspendctl = genfs_suspendctl,
510 	.vfs_renamelock_enter = genfs_renamelock_enter,
511 	.vfs_renamelock_exit = genfs_renamelock_exit,
512 	.vfs_fsync = (void *)eopnotsupp,
513 	.vfs_opv_descs = procfs_vnodeopv_descs
514 };
515 
516 static int
517 procfs_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie,
518     void *arg0, void *arg1, void *arg2, void *arg3)
519 {
520 	struct proc *p;
521 	struct pfsnode *pfs;
522 	int result;
523 
524 	result = KAUTH_RESULT_DEFER;
525 	p = arg0;
526 	pfs = arg1;
527 
528 	if (action != KAUTH_PROCESS_PROCFS)
529 		return result;
530 
531 	switch (pfs->pfs_type) {
532 	case PFSregs:
533 	case PFSfpregs:
534 	case PFSmem:
535 		if (kauth_cred_getuid(cred) != kauth_cred_getuid(p->p_cred) ||
536 		    ISSET(p->p_flag, PK_SUGID))
537 			break;
538 
539 		/*FALLTHROUGH*/
540 	default:
541 		result = KAUTH_RESULT_ALLOW;
542 		break;
543 	}
544 
545 	return result;
546 }
547 
548 SYSCTL_SETUP(procfs_sysctl_setup, "procfs sysctl")
549 {
550 
551 	sysctl_createv(clog, 0, NULL, NULL,
552 		       CTLFLAG_PERMANENT,
553 		       CTLTYPE_NODE, "procfs",
554 		       SYSCTL_DESCR("Process file system"),
555 		       NULL, 0, NULL, 0,
556 		       CTL_VFS, 12, CTL_EOL);
557 	/*
558 	 * XXX the "12" above could be dynamic, thereby eliminating
559 	 * one more instance of the "number to vfs" mapping problem,
560 	 * but "12" is the order as taken from sys/mount.h
561 	 */
562 }
563 
564 static int
565 procfs_modcmd(modcmd_t cmd, void *arg)
566 {
567 	int error;
568 
569 	switch (cmd) {
570 	case MODULE_CMD_INIT:
571 		error = vfs_attach(&procfs_vfsops);
572 		if (error != 0)
573 			break;
574 
575 		procfs_listener = kauth_listen_scope(KAUTH_SCOPE_PROCESS,
576 		    procfs_listener_cb, NULL);
577 
578 		break;
579 	case MODULE_CMD_FINI:
580 		error = vfs_detach(&procfs_vfsops);
581 		if (error != 0)
582 			break;
583 		kauth_unlisten_scope(procfs_listener);
584 		break;
585 	default:
586 		error = ENOTTY;
587 		break;
588 	}
589 
590 	return (error);
591 }
592