xref: /netbsd-src/sys/miscfs/procfs/procfs_linux.c (revision c42dbd0ed2e61fe6eda8590caa852ccf34719964)
1 /*      $NetBSD: procfs_linux.c,v 1.88 2024/05/12 17:26:50 christos Exp $      */
2 
3 /*
4  * Copyright (c) 2001 Wasabi Systems, Inc.
5  * All rights reserved.
6  *
7  * Written by Frank van der Linden for Wasabi Systems, Inc.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. All advertising materials mentioning features or use of this software
18  *    must display the following acknowledgement:
19  *      This product includes software developed for the NetBSD Project by
20  *      Wasabi Systems, Inc.
21  * 4. The name of Wasabi Systems, Inc. may not be used to endorse
22  *    or promote products derived from this software without specific prior
23  *    written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
29  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35  * POSSIBILITY OF SUCH DAMAGE.
36  */
37 
38 #include <sys/cdefs.h>
39 __KERNEL_RCSID(0, "$NetBSD: procfs_linux.c,v 1.88 2024/05/12 17:26:50 christos Exp $");
40 
41 #if defined(_KERNEL_OPT)
42 #include "opt_sysv.h"
43 #endif
44 
45 #include <sys/param.h>
46 #include <sys/systm.h>
47 #include <sys/atomic.h>
48 #include <sys/time.h>
49 #include <sys/cpu.h>
50 #include <sys/kernel.h>
51 #include <sys/proc.h>
52 #include <sys/vnode.h>
53 #include <sys/exec.h>
54 #include <sys/resource.h>
55 #include <sys/resourcevar.h>
56 #include <sys/signal.h>
57 #include <sys/signalvar.h>
58 #include <sys/tty.h>
59 #include <sys/malloc.h>
60 #include <sys/mount.h>
61 #include <sys/conf.h>
62 #include <sys/sysctl.h>
63 #include <sys/kauth.h>
64 #include <sys/filedesc.h>
65 #ifdef SYSVMSG
66 #include <sys/msg.h>
67 #endif
68 #ifdef SYSVSEM
69 #include <sys/sem.h>
70 #endif
71 #ifdef SYSVSHM
72 #include <sys/shm.h>
73 #endif
74 
75 #include <miscfs/procfs/procfs.h>
76 
77 #include <compat/linux/common/linux_exec.h>
78 #include <compat/linux32/common/linux32_sysctl.h>
79 
80 #include <uvm/uvm.h>
81 #include <uvm/uvm_extern.h>
82 
83 extern struct devsw_conv *devsw_conv;
84 extern int max_devsw_convs;
85 
86 #define PGTOB(p)	((unsigned long)(p) << PAGE_SHIFT)
87 #define PGTOKB(p)	((unsigned long)(p) << (PAGE_SHIFT - 10))
88 
89 #define LBFSZ (8 * 1024)
90 
91 static void
92 get_proc_size_info(struct proc *p, struct vm_map *map, unsigned long *stext,
93     unsigned long *etext, unsigned long *sstack)
94 {
95 	struct vm_map_entry *entry;
96 
97 	*stext = 0;
98 	*etext = 0;
99 	*sstack = 0;
100 
101 	vm_map_lock_read(map);
102 
103 	for (entry = map->header.next; entry != &map->header;
104 	    entry = entry->next) {
105 		if (UVM_ET_ISSUBMAP(entry))
106 			continue;
107 		/* assume text is the first entry */
108 		if (*stext == *etext) {
109 			*stext = entry->start;
110 			*etext = entry->end;
111 			break;
112 		}
113 	}
114 #if defined(LINUX_USRSTACK32) && defined(USRSTACK32)
115 	if (strcmp(p->p_emul->e_name, "linux32") == 0 &&
116 	    LINUX_USRSTACK32 < USRSTACK32)
117 		*sstack = (unsigned long)LINUX_USRSTACK32;
118 	else
119 #endif
120 #ifdef LINUX_USRSTACK
121 	if (strcmp(p->p_emul->e_name, "linux") == 0 &&
122 	    LINUX_USRSTACK < USRSTACK)
123 		*sstack = (unsigned long)LINUX_USRSTACK;
124 	else
125 #endif
126 #ifdef	USRSTACK32
127 	if (strstr(p->p_emul->e_name, "32") != NULL)
128 		*sstack = (unsigned long)USRSTACK32;
129 	else
130 #endif
131 		*sstack = (unsigned long)USRSTACK;
132 
133 	/*
134 	 * jdk 1.6 compares low <= addr && addr < high
135 	 * if we put addr == high, then the test fails
136 	 * so eat one page.
137 	 */
138 	*sstack -= PAGE_SIZE;
139 
140 	vm_map_unlock_read(map);
141 }
142 
143 /*
144  * Linux compatible /proc/meminfo. Only active when the -o linux
145  * mountflag is used.
146  */
147 int
148 procfs_domeminfo(struct lwp *curl, struct proc *p,
149     struct pfsnode *pfs, struct uio *uio)
150 {
151 	char *bf;
152 	int len;
153 	int error = 0;
154 	long filepg, anonpg, execpg, freepg;
155 
156 	bf = malloc(LBFSZ, M_TEMP, M_WAITOK);
157 
158 	/* uvm_availmem() will sync the counters if needed. */
159 	freepg = (long)uvm_availmem(true);
160 	filepg = (long)(cpu_count_get(CPU_COUNT_FILECLEAN) +
161 	    cpu_count_get(CPU_COUNT_FILEDIRTY) +
162 	    cpu_count_get(CPU_COUNT_FILEUNKNOWN) -
163 	    cpu_count_get(CPU_COUNT_EXECPAGES));
164 	anonpg = (long)(cpu_count_get(CPU_COUNT_ANONCLEAN) +
165 	    cpu_count_get(CPU_COUNT_ANONDIRTY) +
166 	    cpu_count_get(CPU_COUNT_ANONUNKNOWN));
167 	execpg = (long)cpu_count_get(CPU_COUNT_EXECPAGES);
168 
169 	len = snprintf(bf, LBFSZ,
170 		"        total:    used:    free:  shared: buffers: cached:\n"
171 		"Mem:  %8lu %8lu %8lu %8lu %8lu %8lu\n"
172 		"Swap: %8lu %8lu %8lu\n"
173 		"MemTotal:  %8lu kB\n"
174 		"MemFree:   %8lu kB\n"
175 		"MemShared: %8lu kB\n"
176 		"Buffers:   %8lu kB\n"
177 		"Cached:    %8lu kB\n"
178 		"SwapTotal: %8lu kB\n"
179 		"SwapFree:  %8lu kB\n",
180 		PGTOB(uvmexp.npages),
181 		PGTOB(uvmexp.npages - freepg),
182 		PGTOB(freepg),
183 		0L,
184 		PGTOB(filepg),
185 		PGTOB(anonpg + filepg + execpg),
186 		PGTOB(uvmexp.swpages),
187 		PGTOB(uvmexp.swpginuse),
188 		PGTOB(uvmexp.swpages - uvmexp.swpginuse),
189 		PGTOKB(uvmexp.npages),
190 		PGTOKB(freepg),
191 		0L,
192 		PGTOKB(freepg),
193 		PGTOKB(anonpg + filepg + execpg),
194 		PGTOKB(uvmexp.swpages),
195 		PGTOKB(uvmexp.swpages - uvmexp.swpginuse));
196 
197 	if (len == 0)
198 		goto out;
199 
200 	error = uiomove_frombuf(bf, len, uio);
201 out:
202 	free(bf, M_TEMP);
203 	return error;
204 }
205 
206 /*
207  * Linux compatible /proc/devices. Only active when the -o linux
208  * mountflag is used.
209  */
210 int
211 procfs_dodevices(struct lwp *curl, struct proc *p,
212     struct pfsnode *pfs, struct uio *uio)
213 {
214 	char *bf;
215 	int offset = 0;
216 	int i, error = ENAMETOOLONG;
217 
218 	/* XXX elad - may need filtering. */
219 
220 	bf = malloc(LBFSZ, M_TEMP, M_WAITOK);
221 
222 	offset += snprintf(&bf[offset], LBFSZ - offset, "Character devices:\n");
223 	if (offset >= LBFSZ)
224 		goto out;
225 
226 	mutex_enter(&device_lock);
227 	for (i = 0; i < max_devsw_convs; i++) {
228 		if ((devsw_conv[i].d_name == NULL) ||
229 		    (devsw_conv[i].d_cmajor == -1))
230 			continue;
231 
232 		offset += snprintf(&bf[offset], LBFSZ - offset,
233 		    "%3d %s\n", devsw_conv[i].d_cmajor, devsw_conv[i].d_name);
234 		if (offset >= LBFSZ) {
235 			mutex_exit(&device_lock);
236 			goto out;
237 		}
238 	}
239 
240 	offset += snprintf(&bf[offset], LBFSZ - offset, "\nBlock devices:\n");
241 	if (offset >= LBFSZ) {
242 		mutex_exit(&device_lock);
243 		goto out;
244 	}
245 
246 	for (i = 0; i < max_devsw_convs; i++) {
247 		if ((devsw_conv[i].d_name == NULL) ||
248 		    (devsw_conv[i].d_bmajor == -1))
249 			continue;
250 
251 		offset += snprintf(&bf[offset], LBFSZ - offset,
252 		    "%3d %s\n", devsw_conv[i].d_bmajor, devsw_conv[i].d_name);
253 		if (offset >= LBFSZ) {
254 			mutex_exit(&device_lock);
255 			goto out;
256 		}
257 	}
258 	mutex_exit(&device_lock);
259 
260 	error = uiomove_frombuf(bf, offset, uio);
261 out:
262 	free(bf, M_TEMP);
263 	return error;
264 }
265 
266 /*
267  * Linux compatible /proc/stat. Only active when the -o linux
268  * mountflag is used.
269  */
270 int
271 procfs_docpustat(struct lwp *curl, struct proc *p,
272     struct pfsnode *pfs, struct uio *uio)
273 {
274 	char		*bf;
275 	int	 	 error;
276 	int	 	 len;
277 #if defined(MULTIPROCESSOR)
278         struct cpu_info *ci;
279         CPU_INFO_ITERATOR cii;
280 #endif
281 	int	 	 i;
282 
283 	error = ENAMETOOLONG;
284 	bf = malloc(LBFSZ, M_TEMP, M_WAITOK);
285 
286 	len = snprintf(bf, LBFSZ,
287 		"cpu %" PRIu64 " %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
288 		curcpu()->ci_schedstate.spc_cp_time[CP_USER],
289 		curcpu()->ci_schedstate.spc_cp_time[CP_NICE],
290 		curcpu()->ci_schedstate.spc_cp_time[CP_SYS] /*+ [CP_INTR]*/,
291 		curcpu()->ci_schedstate.spc_cp_time[CP_IDLE]);
292 	if (len == 0)
293 		goto out;
294 
295 #if defined(MULTIPROCESSOR)
296 #define ALLCPUS	CPU_INFO_FOREACH(cii, ci)
297 #define CPUNAME	ci
298 #else
299 #define ALLCPUS	; i < 1 ;
300 #define CPUNAME	curcpu()
301 #endif
302 
303 	i = 0;
304 	for (ALLCPUS) {
305 		len += snprintf(&bf[len], LBFSZ - len,
306 			"cpu%d %" PRIu64 " %" PRIu64 " %" PRIu64 " %" PRIu64
307 			"\n", i,
308 			CPUNAME->ci_schedstate.spc_cp_time[CP_USER],
309 			CPUNAME->ci_schedstate.spc_cp_time[CP_NICE],
310 			CPUNAME->ci_schedstate.spc_cp_time[CP_SYS],
311 			CPUNAME->ci_schedstate.spc_cp_time[CP_IDLE]);
312 		if (len >= LBFSZ)
313 			goto out;
314 		i += 1;
315 	}
316 
317 	cpu_count_sync(true);
318 
319 	struct timeval btv;
320 	getmicroboottime(&btv);
321 
322 	len += snprintf(&bf[len], LBFSZ - len,
323 			"disk 0 0 0 0\n"
324 			"page %u %u\n"
325 			"swap %u %u\n"
326 			"intr %"PRId64"\n"
327 			"ctxt %"PRId64"\n"
328 			"btime %"PRId64"\n",
329 			uvmexp.pageins, uvmexp.pdpageouts,
330 			uvmexp.pgswapin, uvmexp.pgswapout,
331 			cpu_count_get(CPU_COUNT_NINTR),
332 			cpu_count_get(CPU_COUNT_NSWTCH),
333 			btv.tv_sec);
334 	if (len >= LBFSZ)
335 		goto out;
336 
337 	error = uiomove_frombuf(bf, len, uio);
338 out:
339 	free(bf, M_TEMP);
340 	return error;
341 }
342 
343 /*
344  * Linux compatible /proc/loadavg. Only active when the -o linux
345  * mountflag is used.
346  */
347 int
348 procfs_doloadavg(struct lwp *curl, struct proc *p,
349     struct pfsnode *pfs, struct uio *uio)
350 {
351 	char	*bf;
352 	int 	 error;
353 	int 	 len;
354 
355 	error = ENAMETOOLONG;
356 	bf = malloc(LBFSZ, M_TEMP, M_WAITOK);
357 
358 	averunnable.fscale = FSCALE;
359 	len = snprintf(bf, LBFSZ,
360 	        "%d.%02d %d.%02d %d.%02d %d/%d %d\n",
361 		(int)(averunnable.ldavg[0] / averunnable.fscale),
362 		(int)(averunnable.ldavg[0] * 100 / averunnable.fscale % 100),
363 		(int)(averunnable.ldavg[1] / averunnable.fscale),
364 		(int)(averunnable.ldavg[1] * 100 / averunnable.fscale % 100),
365 		(int)(averunnable.ldavg[2] / averunnable.fscale),
366 		(int)(averunnable.ldavg[2] * 100 / averunnable.fscale % 100),
367 		1,		/* number of ONPROC processes */
368 		atomic_load_relaxed(&nprocs),
369 		30000);		/* last pid */
370 	if (len == 0)
371 		goto out;
372 
373 	error = uiomove_frombuf(bf, len, uio);
374 out:
375 	free(bf, M_TEMP);
376 	return error;
377 }
378 
379 /*
380  * Linux compatible /proc/<pid>/statm. Only active when the -o linux
381  * mountflag is used.
382  */
383 int
384 procfs_do_pid_statm(struct lwp *curl, struct lwp *l,
385     struct pfsnode *pfs, struct uio *uio)
386 {
387 	struct vmspace	*vm;
388 	struct proc	*p = l->l_proc;
389 	char		*bf;
390 	int	 	 error;
391 	int	 	 len;
392 	struct kinfo_proc2 ki;
393 
394 	bf = malloc(LBFSZ, M_TEMP, M_WAITOK);
395 
396 	/* XXX - we use values from vmspace, since dsl says that ru figures
397 	   are always 0 except for zombies. See kvm_proc.c::kvm_getproc2() */
398 	if ((error = proc_vmspace_getref(p, &vm)) != 0) {
399 		goto out;
400 	}
401 
402 	mutex_enter(&proc_lock);
403 	mutex_enter(p->p_lock);
404 
405 	/* retrieve RSS size */
406 	memset(&ki, 0, sizeof(ki));
407 	fill_kproc2(p, &ki, false, false);
408 
409 	mutex_exit(p->p_lock);
410 	mutex_exit(&proc_lock);
411 
412 	uvmspace_free(vm);
413 
414 	len = snprintf(bf, LBFSZ,
415 	        "%lu %lu %lu %lu %lu %lu %lu\n",
416 		(unsigned long)(ki.p_vm_msize),	/* size */
417 		(unsigned long)(ki.p_vm_rssize),/* resident */
418 		(unsigned long)(ki.p_uru_ixrss),/* shared */
419 		(unsigned long)(ki.p_vm_tsize),	/* text */
420 		(unsigned long) 0,		/* library (unused) */
421 		(unsigned long)(ki.p_vm_dsize + ki.p_vm_ssize),	/* data+stack */
422 		(unsigned long) 0);		/* dirty */
423 
424 	if (len == 0)
425 		goto out;
426 
427 	error = uiomove_frombuf(bf, len, uio);
428 out:
429 	free(bf, M_TEMP);
430 	return error;
431 }
432 
433 #define UTIME2TICKS(s,u)	(((uint64_t)(s) * 1000000 + (u)) / 10000)
434 
435 /*
436  * Linux compatible /proc/<pid>/stat. Only active when the -o linux
437  * mountflag is used.
438  */
439 int
440 procfs_do_pid_stat(struct lwp *curl, struct lwp *l,
441     struct pfsnode *pfs, struct uio *uio)
442 {
443 	char *bf;
444 	struct proc *p = l->l_proc;
445 	int len;
446 	struct rusage *cru = &p->p_stats->p_cru;
447 	unsigned long stext = 0, etext = 0, sstack = 0;
448 	struct timeval rt;
449 	struct vmspace	*vm;
450 	struct kinfo_proc2 ki;
451 	int error;
452 
453 	bf = malloc(LBFSZ, M_TEMP, M_WAITOK);
454 
455 	if ((error = proc_vmspace_getref(p, &vm)) != 0) {
456 		goto out;
457 	}
458 
459 	get_proc_size_info(p, &vm->vm_map, &stext, &etext, &sstack);
460 
461 	mutex_enter(&proc_lock);
462 	mutex_enter(p->p_lock);
463 
464 	memset(&ki, 0, sizeof(ki));
465 	fill_kproc2(p, &ki, false, false);
466 	calcru(p, NULL, NULL, NULL, &rt);
467 
468 	len = snprintf(bf, LBFSZ,
469 	    "%d (%s) %c %d %d %d %u %d "
470 	    "%u "
471 	    "%"PRIu64" %lu %"PRIu64" %lu %"PRIu64" %"PRIu64" %"PRIu64" %"PRIu64" "
472 	    "%d %d %"PRIu64" "
473 	    "%lld %"PRIu64" %"PRId64" %lu %"PRIu64" "
474 	    "%lu %lu %lu "
475 	    "%u %u "
476 	    "%u %u %u %u "
477 	    "%"PRIu64" %"PRIu64" %"PRIu64" %d %"PRIu64"\n",
478 
479 	    ki.p_pid,						/* 1 pid */
480 	    ki.p_comm,						/* 2 tcomm */
481 	    "0RRSTZXR8"[(ki.p_stat > 8) ? 0 : (int)ki.p_stat],	/* 3 state */
482 	    ki.p_ppid,						/* 4 ppid */
483 	    ki.p__pgid,						/* 5 pgrp */
484 	    ki.p_sid,						/* 6 sid */
485 	    (ki.p_tdev != (uint32_t)NODEV) ? ki.p_tdev : 0,	/* 7 tty_nr */
486 	    ki.p_tpgid,						/* 8 tty_pgrp */
487 
488 	    ki.p_flag,						/* 9 flags */
489 
490 	    ki.p_uru_minflt,					/* 10 min_flt */
491 	    cru->ru_minflt,
492 	    ki.p_uru_majflt,					/* 12 maj_flt */
493 	    cru->ru_majflt,
494 	    UTIME2TICKS(ki.p_uutime_sec, ki.p_uutime_usec),	/* 14 utime */
495 	    UTIME2TICKS(ki.p_ustime_sec, ki.p_ustime_usec),	/* 15 stime */
496 	    UTIME2TICKS(cru->ru_utime.tv_sec, cru->ru_utime.tv_usec), /* 16 cutime */
497 	    UTIME2TICKS(cru->ru_stime.tv_sec, cru->ru_stime.tv_usec), /* 17 cstime */
498 
499 	    ki.p_priority,				/* XXX: 18 priority */
500 	    ki.p_nice - NZERO,				/* 19 nice */
501 	    ki.p_nlwps,					/* 20 num_threads */
502 
503 	    (long long)rt.tv_sec,
504 	    UTIME2TICKS(ki.p_ustart_sec, ki.p_ustart_usec), /* 22 start_time */
505 	    ki.p_vm_msize,				/* 23 vsize */
506 	    PGTOKB(ki.p_vm_rssize),			/* 24 rss */
507 	    p->p_rlimit[RLIMIT_RSS].rlim_cur,		/* 25 rsslim */
508 
509 	    stext,					/* 26 start_code */
510 	    etext,					/* 27 end_code */
511 	    sstack,					/* 28 start_stack */
512 
513 	    0,						/* XXX: 29 esp */
514 	    0,						/* XXX: 30 eip */
515 
516 	    ki.p_siglist.__bits[0],			/* XXX: 31 pending */
517 	    0,						/* XXX: 32 blocked */
518 	    ki.p_sigignore.__bits[0],		/* 33 sigign */
519 	    ki.p_sigcatch.__bits[0],		/* 34 sigcatch */
520 
521 	    ki.p_wchan,					/* 35 wchan */
522 	    ki.p_uru_nvcsw,
523 	    ki.p_uru_nivcsw,
524 	    ki.p_exitsig,				/* 38 exit_signal */
525 	    ki.p_cpuid);				/* 39 task_cpu */
526 
527 	mutex_exit(p->p_lock);
528 	mutex_exit(&proc_lock);
529 
530 	uvmspace_free(vm);
531 
532 	if (len == 0)
533 		goto out;
534 
535 	error = uiomove_frombuf(bf, len, uio);
536 out:
537 	free(bf, M_TEMP);
538 	return error;
539 }
540 
541 int
542 procfs_docpuinfo(struct lwp *curl, struct proc *p,
543     struct pfsnode *pfs, struct uio *uio)
544 {
545 	size_t len = LBFSZ;
546 	char *bf = NULL;
547 	int error;
548 
549 	do {
550 		if (bf)
551 			free(bf, M_TEMP);
552 		bf = malloc(len, M_TEMP, M_WAITOK);
553 	} while (procfs_getcpuinfstr(bf, &len) < 0);
554 
555 	if (len == 0) {
556 		error = 0;
557 		goto done;
558 	}
559 
560 	error = uiomove_frombuf(bf, len, uio);
561 done:
562 	free(bf, M_TEMP);
563 	return error;
564 }
565 
566 int
567 procfs_douptime(struct lwp *curl, struct proc *p,
568     struct pfsnode *pfs, struct uio *uio)
569 {
570 	char *bf;
571 	int len;
572 	struct timeval runtime;
573 	u_int64_t idle;
574 	int error = 0;
575 
576 	bf = malloc(LBFSZ, M_TEMP, M_WAITOK);
577 
578 	microuptime(&runtime);
579 	idle = curcpu()->ci_schedstate.spc_cp_time[CP_IDLE];
580 	len = snprintf(bf, LBFSZ,
581 	    "%lld.%02lu %" PRIu64 ".%02" PRIu64 "\n",
582 	    (long long)runtime.tv_sec, (long)runtime.tv_usec / 10000,
583 	    idle / hz, (((idle % hz) * 100) / hz) % 100);
584 
585 	if (len == 0)
586 		goto out;
587 
588 	error = uiomove_frombuf(bf, len, uio);
589 out:
590 	free(bf, M_TEMP);
591 	return error;
592 }
593 
594 static int
595 procfs_format_sfs(char **mtab, size_t *mlen, char *buf, size_t blen,
596     const struct statvfs *sfs, struct lwp *curl, int suser)
597 {
598 	const char *fsname;
599 
600 	/* Linux uses different names for some filesystems */
601 	fsname = sfs->f_fstypename;
602 	if (strcmp(fsname, "procfs") == 0)
603 		fsname = "proc";
604 	else if (strcmp(fsname, "ext2fs") == 0)
605 		fsname = "ext2";
606 
607 	blen = snprintf(buf, blen, "%s %s %s %s%s%s%s%s%s 0 0\n",
608 	    sfs->f_mntfromname, sfs->f_mntonname, fsname,
609 	    (sfs->f_flag & ST_RDONLY) ? "ro" : "rw",
610 	    (sfs->f_flag & ST_NOSUID) ? ",nosuid" : "",
611 	    (sfs->f_flag & ST_NOEXEC) ? ",noexec" : "",
612 	    (sfs->f_flag & ST_NODEV) ? ",nodev" : "",
613 	    (sfs->f_flag & ST_SYNCHRONOUS) ? ",sync" : "",
614 	    (sfs->f_flag & ST_NOATIME) ? ",noatime" : "");
615 
616 	*mtab = realloc(*mtab, *mlen + blen, M_TEMP, M_WAITOK);
617 	memcpy(*mtab + *mlen, buf, blen);
618 	*mlen += blen;
619 	return sfs->f_mntonname[0] == '/' && sfs->f_mntonname[1] == '\0';
620 }
621 
622 int
623 procfs_domounts(struct lwp *curl, struct proc *p,
624     struct pfsnode *pfs, struct uio *uio)
625 {
626 	char *bf, *mtab = NULL;
627 	size_t mtabsz = 0;
628 	mount_iterator_t *iter;
629 	struct mount *mp;
630 	int error = 0, root = 0;
631 	struct cwdinfo *cwdi = curl->l_proc->p_cwdi;
632 	struct statvfs *sfs;
633 
634 	bf = malloc(LBFSZ, M_TEMP, M_WAITOK);
635 
636 	sfs = malloc(sizeof(*sfs), M_TEMP, M_WAITOK);
637 	mountlist_iterator_init(&iter);
638 	while ((mp = mountlist_iterator_next(iter)) != NULL) {
639 		if ((error = dostatvfs(mp, sfs, curl, MNT_WAIT, 0)) == 0)
640 			root |= procfs_format_sfs(&mtab, &mtabsz, bf, LBFSZ,
641 			    sfs, curl, 0);
642 	}
643 	mountlist_iterator_destroy(iter);
644 	free(sfs, M_TEMP);
645 
646 	/*
647 	 * If we are inside a chroot that is not itself a mount point,
648 	 * fake a root entry.
649 	 */
650 	if (!root && cwdi->cwdi_rdir)
651 		(void)procfs_format_sfs(&mtab, &mtabsz, bf, LBFSZ,
652 		    &cwdi->cwdi_rdir->v_mount->mnt_stat, curl, 1);
653 
654 	free(bf, M_TEMP);
655 
656 	if (mtabsz > 0) {
657 		error = uiomove_frombuf(mtab, mtabsz, uio);
658 		free(mtab, M_TEMP);
659 	}
660 
661 	return error;
662 }
663 
664 /*
665  * Linux compatible /proc/version. Only active when the -o linux
666  * mountflag is used.
667  */
668 int
669 procfs_doversion(struct lwp *curl, struct proc *p,
670     struct pfsnode *pfs, struct uio *uio)
671 {
672 	char *bf;
673 	char lostype[20], losrelease[20], lversion[80];
674 	const char *postype, *posrelease, *pversion;
675 	const char *emulname = curlwp->l_proc->p_emul->e_name;
676 	int len;
677 	int error = 0;
678 	int nm[4];
679 	size_t buflen;
680 
681 	CTASSERT(EMUL_LINUX_KERN_OSTYPE == EMUL_LINUX32_KERN_OSTYPE);
682 	CTASSERT(EMUL_LINUX_KERN_OSRELEASE == EMUL_LINUX32_KERN_OSRELEASE);
683 	CTASSERT(EMUL_LINUX_KERN_VERSION == EMUL_LINUX32_KERN_VERSION);
684 
685 	bf = malloc(LBFSZ, M_TEMP, M_WAITOK);
686 
687 	sysctl_lock(false);
688 
689 	if (strncmp(emulname, "linux", 5) == 0) {
690 		/*
691 		 * Lookup the emulation ostype, osrelease, and version.
692 		 * Since compat_linux and compat_linux32 can be built as
693 		 * modules, we use sysctl to obtain the values instead of
694 		 * using the symbols directly.
695 		 */
696 
697 		if (strcmp(emulname, "linux32") == 0) {
698 			nm[0] = CTL_EMUL;
699 			nm[1] = EMUL_LINUX32;
700 			nm[2] = EMUL_LINUX32_KERN;
701 		} else {
702 			nm[0] = CTL_EMUL;
703 			nm[1] = EMUL_LINUX;
704 			nm[2] = EMUL_LINUX_KERN;
705 		}
706 
707 		nm[3] = EMUL_LINUX_KERN_OSTYPE;
708 		buflen = sizeof(lostype);
709 		error = sysctl_dispatch(nm, __arraycount(nm),
710 		    lostype, &buflen,
711 		    NULL, 0, NULL, NULL, NULL);
712 		if (error)
713 			goto out;
714 
715 		nm[3] = EMUL_LINUX_KERN_OSRELEASE;
716 		buflen = sizeof(losrelease);
717 		error = sysctl_dispatch(nm, __arraycount(nm),
718 		    losrelease, &buflen,
719 		    NULL, 0, NULL, NULL, NULL);
720 		if (error)
721 			goto out;
722 
723 		nm[3] = EMUL_LINUX_KERN_VERSION;
724 		buflen = sizeof(lversion);
725 		error = sysctl_dispatch(nm, __arraycount(nm),
726 		    lversion, &buflen,
727 		    NULL, 0, NULL, NULL, NULL);
728 		if (error)
729 			goto out;
730 
731 		postype = lostype;
732 		posrelease = losrelease;
733 		pversion = lversion;
734 	} else {
735 		postype = ostype;
736 		posrelease = osrelease;
737 		strlcpy(lversion, version, sizeof(lversion));
738 		if (strchr(lversion, '\n'))
739 			*strchr(lversion, '\n') = '\0';
740 		pversion = lversion;
741 	}
742 
743 	len = snprintf(bf, LBFSZ,
744 		"%s version %s (%s@localhost) (gcc version %s) %s\n",
745 		postype, posrelease, emulname,
746 #ifdef __VERSION__
747 		__VERSION__,
748 #else
749 		"unknown",
750 #endif
751 		pversion);
752 
753 	if (len == 0)
754 		goto out;
755 
756 	error = uiomove_frombuf(bf, len, uio);
757 out:
758 	free(bf, M_TEMP);
759 	sysctl_unlock();
760 	return error;
761 }
762 
763 int
764 procfs_dosysvipc_msg(struct lwp *curl, struct proc *p,
765     struct pfsnode *pfs, struct uio *uio)
766 {
767 	char *bf;
768 	int offset = 0;
769 	int error = EFBIG;
770 
771 	bf = malloc(LBFSZ, M_TEMP, M_WAITOK);
772 
773 	offset += snprintf(bf, LBFSZ,
774 	    "%10s %10s %4s  %10s %10s %5s %5s %5s %5s %5s %5s %10s %10s %10s\n",
775 	    "key", "msqid", "perms", "cbytes", "qnum", "lspid", "lrpid",
776 	    "uid", "gid", "cuid", "cgid", "stime", "rtime", "ctime");
777 	if (offset >= LBFSZ)
778 		goto out;
779 
780 #ifdef SYSVMSG
781 	for (int id = 0; id < msginfo.msgmni; id++)
782 		if (msqs[id].msq_u.msg_qbytes > 0) {
783 			offset += snprintf(&bf[offset], LBFSZ - offset,
784 			    "%10d %10d  %4o  %10zu %10lu %5u %5u %5u %5u %5u %5u %10lld %10lld %10lld\n",
785 			    (int) msqs[id].msq_u.msg_perm._key,
786 			    IXSEQ_TO_IPCID(id, msqs[id].msq_u.msg_perm),
787 			    msqs[id].msq_u.msg_perm.mode,
788 			    msqs[id].msq_u._msg_cbytes,
789 			    msqs[id].msq_u.msg_qnum,
790 			    msqs[id].msq_u.msg_lspid,
791 			    msqs[id].msq_u.msg_lrpid,
792 			    msqs[id].msq_u.msg_perm.uid,
793 			    msqs[id].msq_u.msg_perm.gid,
794 			    msqs[id].msq_u.msg_perm.cuid,
795 			    msqs[id].msq_u.msg_perm.cgid,
796 			    (long long)msqs[id].msq_u.msg_stime,
797 			    (long long)msqs[id].msq_u.msg_rtime,
798 			    (long long)msqs[id].msq_u.msg_ctime);
799 			if (offset >= LBFSZ)
800 				goto out;
801 		}
802 #endif
803 
804 	error = uiomove_frombuf(bf, offset, uio);
805 out:
806 	free(bf, M_TEMP);
807 	return error;
808 }
809 
810 int
811 procfs_dosysvipc_sem(struct lwp *curl, struct proc *p,
812     struct pfsnode *pfs, struct uio *uio)
813 {
814 	char *bf;
815 	int offset = 0;
816 	int error = EFBIG;
817 
818 	bf = malloc(LBFSZ, M_TEMP, M_WAITOK);
819 
820 	offset += snprintf(bf, LBFSZ,
821 	    "%10s %10s %4s %10s %5s %5s %5s %5s %10s %10s\n",
822 	    "key", "semid", "perms", "nsems", "uid", "gid", "cuid", "cgid",
823 	    "otime", "ctime");
824 	if (offset >= LBFSZ)
825 		goto out;
826 
827 #ifdef SYSVSEM
828 	for (int id = 0; id < seminfo.semmni; id++)
829 		if ((sema[id].sem_perm.mode & SEM_ALLOC) != 0) {
830 			offset += snprintf(&bf[offset], LBFSZ - offset,
831 			    "%10d %10d  %4o %10u %5u %5u %5u %5u %10lld %10lld\n",
832 			    (int) sema[id].sem_perm._key,
833 			    IXSEQ_TO_IPCID(id, sema[id].sem_perm),
834 			    sema[id].sem_perm.mode,
835 			    sema[id].sem_nsems,
836 			    sema[id].sem_perm.uid,
837 			    sema[id].sem_perm.gid,
838 			    sema[id].sem_perm.cuid,
839 			    sema[id].sem_perm.cgid,
840 			    (long long)sema[id].sem_otime,
841 			    (long long)sema[id].sem_ctime);
842 			if (offset >= LBFSZ)
843 				goto out;
844 		}
845 #endif
846 
847 	error = uiomove_frombuf(bf, offset, uio);
848 out:
849 	free(bf, M_TEMP);
850 	return error;
851 }
852 
853 int
854 procfs_dosysvipc_shm(struct lwp *curl, struct proc *p,
855     struct pfsnode *pfs, struct uio *uio)
856 {
857 	char *bf;
858 	int offset = 0;
859 	int error = EFBIG;
860 
861 	bf = malloc(LBFSZ, M_TEMP, M_WAITOK);
862 
863 	offset += snprintf(bf, LBFSZ,
864 	    "%10s %10s %s %21s %5s %5s %5s %5s %5s %5s %5s %10s %10s %10s %21s %21s\n",
865 	    "key", "shmid", "perms", "size", "cpid", "lpid", "nattch", "uid",
866 	    "gid", "cuid", "cgid", "atime", "dtime", "ctime", "rss", "swap");
867 	if (offset >= LBFSZ)
868 		goto out;
869 
870 #ifdef SYSVSHM
871 	for (unsigned int id = 0; id < shminfo.shmmni; id++)
872 		if ((shmsegs[id].shm_perm.mode & SHMSEG_ALLOCATED) != 0) {
873 			offset += snprintf(&bf[offset], LBFSZ - offset,
874 			    "%10d %10d  %4o %21zu %5u %5u  %5u %5u %5u %5u %5u %10lld %10lld %10lld %21d %21d\n",
875 			    (int) shmsegs[id].shm_perm._key,
876 			    IXSEQ_TO_IPCID(id, shmsegs[id].shm_perm),
877 			    shmsegs[id].shm_perm.mode,
878 			    shmsegs[id].shm_segsz,
879 			    shmsegs[id].shm_cpid,
880 			    shmsegs[id].shm_lpid,
881 			    shmsegs[id].shm_nattch,
882 			    shmsegs[id].shm_perm.uid,
883 			    shmsegs[id].shm_perm.gid,
884 			    shmsegs[id].shm_perm.cuid,
885 			    shmsegs[id].shm_perm.cgid,
886 			    (long long)shmsegs[id].shm_atime,
887 			    (long long)shmsegs[id].shm_dtime,
888 			    (long long)shmsegs[id].shm_ctime,
889 			    0, 0);	/* XXX rss & swp are not supported */
890 			if (offset >= LBFSZ)
891 				goto out;
892 		}
893 #endif
894 
895 	error = uiomove_frombuf(bf, offset, uio);
896 out:
897 	free(bf, M_TEMP);
898 	return error;
899 }
900