xref: /netbsd-src/sys/miscfs/procfs/procfs_linux.c (revision 801f73adf8029e41ec107911c58034bb925796a2)
1 /*      $NetBSD: procfs_linux.c,v 1.89 2024/07/01 01:35:53 christos Exp $      */
2 
3 /*
4  * Copyright (c) 2001 Wasabi Systems, Inc.
5  * All rights reserved.
6  *
7  * Written by Frank van der Linden for Wasabi Systems, Inc.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. All advertising materials mentioning features or use of this software
18  *    must display the following acknowledgement:
19  *      This product includes software developed for the NetBSD Project by
20  *      Wasabi Systems, Inc.
21  * 4. The name of Wasabi Systems, Inc. may not be used to endorse
22  *    or promote products derived from this software without specific prior
23  *    written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
29  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35  * POSSIBILITY OF SUCH DAMAGE.
36  */
37 
38 #include <sys/cdefs.h>
39 __KERNEL_RCSID(0, "$NetBSD: procfs_linux.c,v 1.89 2024/07/01 01:35:53 christos Exp $");
40 
41 #if defined(_KERNEL_OPT)
42 #include "opt_sysv.h"
43 #endif
44 
45 #include <sys/param.h>
46 #include <sys/systm.h>
47 #include <sys/atomic.h>
48 #include <sys/time.h>
49 #include <sys/cpu.h>
50 #include <sys/kernel.h>
51 #include <sys/proc.h>
52 #include <sys/vnode.h>
53 #include <sys/exec.h>
54 #include <sys/resource.h>
55 #include <sys/resourcevar.h>
56 #include <sys/signal.h>
57 #include <sys/signalvar.h>
58 #include <sys/tty.h>
59 #include <sys/malloc.h>
60 #include <sys/mount.h>
61 #include <sys/conf.h>
62 #include <sys/sysctl.h>
63 #include <sys/kauth.h>
64 #include <sys/filedesc.h>
65 #ifdef SYSVMSG
66 #include <sys/msg.h>
67 #endif
68 #ifdef SYSVSEM
69 #include <sys/sem.h>
70 #endif
71 #ifdef SYSVSHM
72 #include <sys/shm.h>
73 #endif
74 #include <sys/mqueue.h>
75 
76 #include <miscfs/procfs/procfs.h>
77 
78 #include <compat/linux/common/linux_exec.h>
79 #include <compat/linux32/common/linux32_sysctl.h>
80 
81 #include <uvm/uvm.h>
82 #include <uvm/uvm_extern.h>
83 
84 extern struct devsw_conv *devsw_conv;
85 extern int max_devsw_convs;
86 extern u_int mq_open_max;
87 extern u_int mq_max_msgsize;
88 extern u_int mq_def_maxmsg;
89 extern u_int mq_max_maxmsg;
90 
91 
92 #define PGTOB(p)	((unsigned long)(p) << PAGE_SHIFT)
93 #define PGTOKB(p)	((unsigned long)(p) << (PAGE_SHIFT - 10))
94 
95 #define LBFSZ (8 * 1024)
96 
97 static void
98 get_proc_size_info(struct proc *p, struct vm_map *map, unsigned long *stext,
99     unsigned long *etext, unsigned long *sstack)
100 {
101 	struct vm_map_entry *entry;
102 
103 	*stext = 0;
104 	*etext = 0;
105 	*sstack = 0;
106 
107 	vm_map_lock_read(map);
108 
109 	for (entry = map->header.next; entry != &map->header;
110 	    entry = entry->next) {
111 		if (UVM_ET_ISSUBMAP(entry))
112 			continue;
113 		/* assume text is the first entry */
114 		if (*stext == *etext) {
115 			*stext = entry->start;
116 			*etext = entry->end;
117 			break;
118 		}
119 	}
120 #if defined(LINUX_USRSTACK32) && defined(USRSTACK32)
121 	if (strcmp(p->p_emul->e_name, "linux32") == 0 &&
122 	    LINUX_USRSTACK32 < USRSTACK32)
123 		*sstack = (unsigned long)LINUX_USRSTACK32;
124 	else
125 #endif
126 #ifdef LINUX_USRSTACK
127 	if (strcmp(p->p_emul->e_name, "linux") == 0 &&
128 	    LINUX_USRSTACK < USRSTACK)
129 		*sstack = (unsigned long)LINUX_USRSTACK;
130 	else
131 #endif
132 #ifdef	USRSTACK32
133 	if (strstr(p->p_emul->e_name, "32") != NULL)
134 		*sstack = (unsigned long)USRSTACK32;
135 	else
136 #endif
137 		*sstack = (unsigned long)USRSTACK;
138 
139 	/*
140 	 * jdk 1.6 compares low <= addr && addr < high
141 	 * if we put addr == high, then the test fails
142 	 * so eat one page.
143 	 */
144 	*sstack -= PAGE_SIZE;
145 
146 	vm_map_unlock_read(map);
147 }
148 
149 /*
150  * Linux compatible /proc/meminfo. Only active when the -o linux
151  * mountflag is used.
152  */
153 int
154 procfs_domeminfo(struct lwp *curl, struct proc *p,
155     struct pfsnode *pfs, struct uio *uio)
156 {
157 	char *bf;
158 	int len;
159 	int error = 0;
160 	long filepg, anonpg, execpg, freepg;
161 
162 	bf = malloc(LBFSZ, M_TEMP, M_WAITOK);
163 
164 	/* uvm_availmem() will sync the counters if needed. */
165 	freepg = (long)uvm_availmem(true);
166 	filepg = (long)(cpu_count_get(CPU_COUNT_FILECLEAN) +
167 	    cpu_count_get(CPU_COUNT_FILEDIRTY) +
168 	    cpu_count_get(CPU_COUNT_FILEUNKNOWN) -
169 	    cpu_count_get(CPU_COUNT_EXECPAGES));
170 	anonpg = (long)(cpu_count_get(CPU_COUNT_ANONCLEAN) +
171 	    cpu_count_get(CPU_COUNT_ANONDIRTY) +
172 	    cpu_count_get(CPU_COUNT_ANONUNKNOWN));
173 	execpg = (long)cpu_count_get(CPU_COUNT_EXECPAGES);
174 
175 	len = snprintf(bf, LBFSZ,
176 		"        total:    used:    free:  shared: buffers: cached:\n"
177 		"Mem:  %8lu %8lu %8lu %8lu %8lu %8lu\n"
178 		"Swap: %8lu %8lu %8lu\n"
179 		"MemTotal:  %8lu kB\n"
180 		"MemFree:   %8lu kB\n"
181 		"MemShared: %8lu kB\n"
182 		"Buffers:   %8lu kB\n"
183 		"Cached:    %8lu kB\n"
184 		"SwapTotal: %8lu kB\n"
185 		"SwapFree:  %8lu kB\n",
186 		PGTOB(uvmexp.npages),
187 		PGTOB(uvmexp.npages - freepg),
188 		PGTOB(freepg),
189 		0L,
190 		PGTOB(filepg),
191 		PGTOB(anonpg + filepg + execpg),
192 		PGTOB(uvmexp.swpages),
193 		PGTOB(uvmexp.swpginuse),
194 		PGTOB(uvmexp.swpages - uvmexp.swpginuse),
195 		PGTOKB(uvmexp.npages),
196 		PGTOKB(freepg),
197 		0L,
198 		PGTOKB(freepg),
199 		PGTOKB(anonpg + filepg + execpg),
200 		PGTOKB(uvmexp.swpages),
201 		PGTOKB(uvmexp.swpages - uvmexp.swpginuse));
202 
203 	if (len == 0)
204 		goto out;
205 
206 	error = uiomove_frombuf(bf, len, uio);
207 out:
208 	free(bf, M_TEMP);
209 	return error;
210 }
211 
212 /*
213  * Linux compatible /proc/devices. Only active when the -o linux
214  * mountflag is used.
215  */
216 int
217 procfs_dodevices(struct lwp *curl, struct proc *p,
218     struct pfsnode *pfs, struct uio *uio)
219 {
220 	char *bf;
221 	int offset = 0;
222 	int i, error = ENAMETOOLONG;
223 
224 	/* XXX elad - may need filtering. */
225 
226 	bf = malloc(LBFSZ, M_TEMP, M_WAITOK);
227 
228 	offset += snprintf(&bf[offset], LBFSZ - offset, "Character devices:\n");
229 	if (offset >= LBFSZ)
230 		goto out;
231 
232 	mutex_enter(&device_lock);
233 	for (i = 0; i < max_devsw_convs; i++) {
234 		if ((devsw_conv[i].d_name == NULL) ||
235 		    (devsw_conv[i].d_cmajor == -1))
236 			continue;
237 
238 		offset += snprintf(&bf[offset], LBFSZ - offset,
239 		    "%3d %s\n", devsw_conv[i].d_cmajor, devsw_conv[i].d_name);
240 		if (offset >= LBFSZ) {
241 			mutex_exit(&device_lock);
242 			goto out;
243 		}
244 	}
245 
246 	offset += snprintf(&bf[offset], LBFSZ - offset, "\nBlock devices:\n");
247 	if (offset >= LBFSZ) {
248 		mutex_exit(&device_lock);
249 		goto out;
250 	}
251 
252 	for (i = 0; i < max_devsw_convs; i++) {
253 		if ((devsw_conv[i].d_name == NULL) ||
254 		    (devsw_conv[i].d_bmajor == -1))
255 			continue;
256 
257 		offset += snprintf(&bf[offset], LBFSZ - offset,
258 		    "%3d %s\n", devsw_conv[i].d_bmajor, devsw_conv[i].d_name);
259 		if (offset >= LBFSZ) {
260 			mutex_exit(&device_lock);
261 			goto out;
262 		}
263 	}
264 	mutex_exit(&device_lock);
265 
266 	error = uiomove_frombuf(bf, offset, uio);
267 out:
268 	free(bf, M_TEMP);
269 	return error;
270 }
271 
272 /*
273  * Linux compatible /proc/stat. Only active when the -o linux
274  * mountflag is used.
275  */
276 int
277 procfs_docpustat(struct lwp *curl, struct proc *p,
278     struct pfsnode *pfs, struct uio *uio)
279 {
280 	char		*bf;
281 	int	 	 error;
282 	int	 	 len;
283 #if defined(MULTIPROCESSOR)
284         struct cpu_info *ci;
285         CPU_INFO_ITERATOR cii;
286 #endif
287 	int	 	 i;
288 
289 	error = ENAMETOOLONG;
290 	bf = malloc(LBFSZ, M_TEMP, M_WAITOK);
291 
292 	len = snprintf(bf, LBFSZ,
293 		"cpu %" PRIu64 " %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
294 		curcpu()->ci_schedstate.spc_cp_time[CP_USER],
295 		curcpu()->ci_schedstate.spc_cp_time[CP_NICE],
296 		curcpu()->ci_schedstate.spc_cp_time[CP_SYS] /*+ [CP_INTR]*/,
297 		curcpu()->ci_schedstate.spc_cp_time[CP_IDLE]);
298 	if (len == 0)
299 		goto out;
300 
301 #if defined(MULTIPROCESSOR)
302 #define ALLCPUS	CPU_INFO_FOREACH(cii, ci)
303 #define CPUNAME	ci
304 #else
305 #define ALLCPUS	; i < 1 ;
306 #define CPUNAME	curcpu()
307 #endif
308 
309 	i = 0;
310 	for (ALLCPUS) {
311 		len += snprintf(&bf[len], LBFSZ - len,
312 			"cpu%d %" PRIu64 " %" PRIu64 " %" PRIu64 " %" PRIu64
313 			"\n", i,
314 			CPUNAME->ci_schedstate.spc_cp_time[CP_USER],
315 			CPUNAME->ci_schedstate.spc_cp_time[CP_NICE],
316 			CPUNAME->ci_schedstate.spc_cp_time[CP_SYS],
317 			CPUNAME->ci_schedstate.spc_cp_time[CP_IDLE]);
318 		if (len >= LBFSZ)
319 			goto out;
320 		i += 1;
321 	}
322 
323 	cpu_count_sync(true);
324 
325 	struct timeval btv;
326 	getmicroboottime(&btv);
327 
328 	len += snprintf(&bf[len], LBFSZ - len,
329 			"disk 0 0 0 0\n"
330 			"page %u %u\n"
331 			"swap %u %u\n"
332 			"intr %"PRId64"\n"
333 			"ctxt %"PRId64"\n"
334 			"btime %"PRId64"\n",
335 			uvmexp.pageins, uvmexp.pdpageouts,
336 			uvmexp.pgswapin, uvmexp.pgswapout,
337 			cpu_count_get(CPU_COUNT_NINTR),
338 			cpu_count_get(CPU_COUNT_NSWTCH),
339 			btv.tv_sec);
340 	if (len >= LBFSZ)
341 		goto out;
342 
343 	error = uiomove_frombuf(bf, len, uio);
344 out:
345 	free(bf, M_TEMP);
346 	return error;
347 }
348 
349 /*
350  * Linux compatible /proc/loadavg. Only active when the -o linux
351  * mountflag is used.
352  */
353 int
354 procfs_doloadavg(struct lwp *curl, struct proc *p,
355     struct pfsnode *pfs, struct uio *uio)
356 {
357 	char	*bf;
358 	int 	 error;
359 	int 	 len;
360 
361 	error = ENAMETOOLONG;
362 	bf = malloc(LBFSZ, M_TEMP, M_WAITOK);
363 
364 	averunnable.fscale = FSCALE;
365 	len = snprintf(bf, LBFSZ,
366 	        "%d.%02d %d.%02d %d.%02d %d/%d %d\n",
367 		(int)(averunnable.ldavg[0] / averunnable.fscale),
368 		(int)(averunnable.ldavg[0] * 100 / averunnable.fscale % 100),
369 		(int)(averunnable.ldavg[1] / averunnable.fscale),
370 		(int)(averunnable.ldavg[1] * 100 / averunnable.fscale % 100),
371 		(int)(averunnable.ldavg[2] / averunnable.fscale),
372 		(int)(averunnable.ldavg[2] * 100 / averunnable.fscale % 100),
373 		1,		/* number of ONPROC processes */
374 		atomic_load_relaxed(&nprocs),
375 		30000);		/* last pid */
376 	if (len == 0)
377 		goto out;
378 
379 	error = uiomove_frombuf(bf, len, uio);
380 out:
381 	free(bf, M_TEMP);
382 	return error;
383 }
384 
385 /*
386  * Linux compatible /proc/<pid>/statm. Only active when the -o linux
387  * mountflag is used.
388  */
389 int
390 procfs_do_pid_statm(struct lwp *curl, struct lwp *l,
391     struct pfsnode *pfs, struct uio *uio)
392 {
393 	struct vmspace	*vm;
394 	struct proc	*p = l->l_proc;
395 	char		*bf;
396 	int	 	 error;
397 	int	 	 len;
398 	struct kinfo_proc2 ki;
399 
400 	bf = malloc(LBFSZ, M_TEMP, M_WAITOK);
401 
402 	/* XXX - we use values from vmspace, since dsl says that ru figures
403 	   are always 0 except for zombies. See kvm_proc.c::kvm_getproc2() */
404 	if ((error = proc_vmspace_getref(p, &vm)) != 0) {
405 		goto out;
406 	}
407 
408 	mutex_enter(&proc_lock);
409 	mutex_enter(p->p_lock);
410 
411 	/* retrieve RSS size */
412 	memset(&ki, 0, sizeof(ki));
413 	fill_kproc2(p, &ki, false, false);
414 
415 	mutex_exit(p->p_lock);
416 	mutex_exit(&proc_lock);
417 
418 	uvmspace_free(vm);
419 
420 	len = snprintf(bf, LBFSZ,
421 	        "%lu %lu %lu %lu %lu %lu %lu\n",
422 		(unsigned long)(ki.p_vm_msize),	/* size */
423 		(unsigned long)(ki.p_vm_rssize),/* resident */
424 		(unsigned long)(ki.p_uru_ixrss),/* shared */
425 		(unsigned long)(ki.p_vm_tsize),	/* text */
426 		(unsigned long) 0,		/* library (unused) */
427 		(unsigned long)(ki.p_vm_dsize + ki.p_vm_ssize),	/* data+stack */
428 		(unsigned long) 0);		/* dirty */
429 
430 	if (len == 0)
431 		goto out;
432 
433 	error = uiomove_frombuf(bf, len, uio);
434 out:
435 	free(bf, M_TEMP);
436 	return error;
437 }
438 
439 #define UTIME2TICKS(s,u)	(((uint64_t)(s) * 1000000 + (u)) / 10000)
440 
441 /*
442  * Linux compatible /proc/<pid>/stat. Only active when the -o linux
443  * mountflag is used.
444  */
445 int
446 procfs_do_pid_stat(struct lwp *curl, struct lwp *l,
447     struct pfsnode *pfs, struct uio *uio)
448 {
449 	char *bf;
450 	struct proc *p = l->l_proc;
451 	int len;
452 	struct rusage *cru = &p->p_stats->p_cru;
453 	unsigned long stext = 0, etext = 0, sstack = 0;
454 	struct timeval rt;
455 	struct vmspace	*vm;
456 	struct kinfo_proc2 ki;
457 	int error;
458 
459 	bf = malloc(LBFSZ, M_TEMP, M_WAITOK);
460 
461 	if ((error = proc_vmspace_getref(p, &vm)) != 0) {
462 		goto out;
463 	}
464 
465 	get_proc_size_info(p, &vm->vm_map, &stext, &etext, &sstack);
466 
467 	mutex_enter(&proc_lock);
468 	mutex_enter(p->p_lock);
469 
470 	memset(&ki, 0, sizeof(ki));
471 	fill_kproc2(p, &ki, false, false);
472 	calcru(p, NULL, NULL, NULL, &rt);
473 
474 	len = snprintf(bf, LBFSZ,
475 	    "%d (%s) %c %d %d %d %u %d "
476 	    "%u "
477 	    "%"PRIu64" %lu %"PRIu64" %lu %"PRIu64" %"PRIu64" %"PRIu64" %"PRIu64" "
478 	    "%d %d %"PRIu64" "
479 	    "%lld %"PRIu64" %"PRId64" %lu %"PRIu64" "
480 	    "%lu %lu %lu "
481 	    "%u %u "
482 	    "%u %u %u %u "
483 	    "%"PRIu64" %"PRIu64" %"PRIu64" %d %"PRIu64"\n",
484 
485 	    ki.p_pid,						/* 1 pid */
486 	    ki.p_comm,						/* 2 tcomm */
487 	    "0RRSTZXR8"[(ki.p_stat > 8) ? 0 : (int)ki.p_stat],	/* 3 state */
488 	    ki.p_ppid,						/* 4 ppid */
489 	    ki.p__pgid,						/* 5 pgrp */
490 	    ki.p_sid,						/* 6 sid */
491 	    (ki.p_tdev != (uint32_t)NODEV) ? ki.p_tdev : 0,	/* 7 tty_nr */
492 	    ki.p_tpgid,						/* 8 tty_pgrp */
493 
494 	    ki.p_flag,						/* 9 flags */
495 
496 	    ki.p_uru_minflt,					/* 10 min_flt */
497 	    cru->ru_minflt,
498 	    ki.p_uru_majflt,					/* 12 maj_flt */
499 	    cru->ru_majflt,
500 	    UTIME2TICKS(ki.p_uutime_sec, ki.p_uutime_usec),	/* 14 utime */
501 	    UTIME2TICKS(ki.p_ustime_sec, ki.p_ustime_usec),	/* 15 stime */
502 	    UTIME2TICKS(cru->ru_utime.tv_sec, cru->ru_utime.tv_usec), /* 16 cutime */
503 	    UTIME2TICKS(cru->ru_stime.tv_sec, cru->ru_stime.tv_usec), /* 17 cstime */
504 
505 	    ki.p_priority,				/* XXX: 18 priority */
506 	    ki.p_nice - NZERO,				/* 19 nice */
507 	    ki.p_nlwps,					/* 20 num_threads */
508 
509 	    (long long)rt.tv_sec,
510 	    UTIME2TICKS(ki.p_ustart_sec, ki.p_ustart_usec), /* 22 start_time */
511 	    ki.p_vm_msize,				/* 23 vsize */
512 	    PGTOKB(ki.p_vm_rssize),			/* 24 rss */
513 	    p->p_rlimit[RLIMIT_RSS].rlim_cur,		/* 25 rsslim */
514 
515 	    stext,					/* 26 start_code */
516 	    etext,					/* 27 end_code */
517 	    sstack,					/* 28 start_stack */
518 
519 	    0,						/* XXX: 29 esp */
520 	    0,						/* XXX: 30 eip */
521 
522 	    ki.p_siglist.__bits[0],			/* XXX: 31 pending */
523 	    0,						/* XXX: 32 blocked */
524 	    ki.p_sigignore.__bits[0],		/* 33 sigign */
525 	    ki.p_sigcatch.__bits[0],		/* 34 sigcatch */
526 
527 	    ki.p_wchan,					/* 35 wchan */
528 	    ki.p_uru_nvcsw,
529 	    ki.p_uru_nivcsw,
530 	    ki.p_exitsig,				/* 38 exit_signal */
531 	    ki.p_cpuid);				/* 39 task_cpu */
532 
533 	mutex_exit(p->p_lock);
534 	mutex_exit(&proc_lock);
535 
536 	uvmspace_free(vm);
537 
538 	if (len == 0)
539 		goto out;
540 
541 	error = uiomove_frombuf(bf, len, uio);
542 out:
543 	free(bf, M_TEMP);
544 	return error;
545 }
546 
547 int
548 procfs_docpuinfo(struct lwp *curl, struct proc *p,
549     struct pfsnode *pfs, struct uio *uio)
550 {
551 	size_t len = LBFSZ;
552 	char *bf = NULL;
553 	int error;
554 
555 	do {
556 		if (bf)
557 			free(bf, M_TEMP);
558 		bf = malloc(len, M_TEMP, M_WAITOK);
559 	} while (procfs_getcpuinfstr(bf, &len) < 0);
560 
561 	if (len == 0) {
562 		error = 0;
563 		goto done;
564 	}
565 
566 	error = uiomove_frombuf(bf, len, uio);
567 done:
568 	free(bf, M_TEMP);
569 	return error;
570 }
571 
572 int
573 procfs_douptime(struct lwp *curl, struct proc *p,
574     struct pfsnode *pfs, struct uio *uio)
575 {
576 	char *bf;
577 	int len;
578 	struct timeval runtime;
579 	u_int64_t idle;
580 	int error = 0;
581 
582 	bf = malloc(LBFSZ, M_TEMP, M_WAITOK);
583 
584 	microuptime(&runtime);
585 	idle = curcpu()->ci_schedstate.spc_cp_time[CP_IDLE];
586 	len = snprintf(bf, LBFSZ,
587 	    "%lld.%02lu %" PRIu64 ".%02" PRIu64 "\n",
588 	    (long long)runtime.tv_sec, (long)runtime.tv_usec / 10000,
589 	    idle / hz, (((idle % hz) * 100) / hz) % 100);
590 
591 	if (len == 0)
592 		goto out;
593 
594 	error = uiomove_frombuf(bf, len, uio);
595 out:
596 	free(bf, M_TEMP);
597 	return error;
598 }
599 
600 static int
601 procfs_format_sfs(char **mtab, size_t *mlen, char *buf, size_t blen,
602     const struct statvfs *sfs, struct lwp *curl, int suser)
603 {
604 	const char *fsname;
605 
606 	/* Linux uses different names for some filesystems */
607 	fsname = sfs->f_fstypename;
608 	if (strcmp(fsname, "procfs") == 0)
609 		fsname = "proc";
610 	else if (strcmp(fsname, "ext2fs") == 0)
611 		fsname = "ext2";
612 
613 	blen = snprintf(buf, blen, "%s %s %s %s%s%s%s%s%s 0 0\n",
614 	    sfs->f_mntfromname, sfs->f_mntonname, fsname,
615 	    (sfs->f_flag & ST_RDONLY) ? "ro" : "rw",
616 	    (sfs->f_flag & ST_NOSUID) ? ",nosuid" : "",
617 	    (sfs->f_flag & ST_NOEXEC) ? ",noexec" : "",
618 	    (sfs->f_flag & ST_NODEV) ? ",nodev" : "",
619 	    (sfs->f_flag & ST_SYNCHRONOUS) ? ",sync" : "",
620 	    (sfs->f_flag & ST_NOATIME) ? ",noatime" : "");
621 
622 	*mtab = realloc(*mtab, *mlen + blen, M_TEMP, M_WAITOK);
623 	memcpy(*mtab + *mlen, buf, blen);
624 	*mlen += blen;
625 	return sfs->f_mntonname[0] == '/' && sfs->f_mntonname[1] == '\0';
626 }
627 
628 int
629 procfs_domounts(struct lwp *curl, struct proc *p,
630     struct pfsnode *pfs, struct uio *uio)
631 {
632 	char *bf, *mtab = NULL;
633 	size_t mtabsz = 0;
634 	mount_iterator_t *iter;
635 	struct mount *mp;
636 	int error = 0, root = 0;
637 	struct cwdinfo *cwdi = curl->l_proc->p_cwdi;
638 	struct statvfs *sfs;
639 
640 	bf = malloc(LBFSZ, M_TEMP, M_WAITOK);
641 
642 	sfs = malloc(sizeof(*sfs), M_TEMP, M_WAITOK);
643 	mountlist_iterator_init(&iter);
644 	while ((mp = mountlist_iterator_next(iter)) != NULL) {
645 		if ((error = dostatvfs(mp, sfs, curl, MNT_WAIT, 0)) == 0)
646 			root |= procfs_format_sfs(&mtab, &mtabsz, bf, LBFSZ,
647 			    sfs, curl, 0);
648 	}
649 	mountlist_iterator_destroy(iter);
650 	free(sfs, M_TEMP);
651 
652 	/*
653 	 * If we are inside a chroot that is not itself a mount point,
654 	 * fake a root entry.
655 	 */
656 	if (!root && cwdi->cwdi_rdir)
657 		(void)procfs_format_sfs(&mtab, &mtabsz, bf, LBFSZ,
658 		    &cwdi->cwdi_rdir->v_mount->mnt_stat, curl, 1);
659 
660 	free(bf, M_TEMP);
661 
662 	if (mtabsz > 0) {
663 		error = uiomove_frombuf(mtab, mtabsz, uio);
664 		free(mtab, M_TEMP);
665 	}
666 
667 	return error;
668 }
669 
670 /*
671  * Linux compatible /proc/version. Only active when the -o linux
672  * mountflag is used.
673  */
674 int
675 procfs_doversion(struct lwp *curl, struct proc *p,
676     struct pfsnode *pfs, struct uio *uio)
677 {
678 	char *bf;
679 	char lostype[20], losrelease[20], lversion[80];
680 	const char *postype, *posrelease, *pversion;
681 	const char *emulname = curlwp->l_proc->p_emul->e_name;
682 	int len;
683 	int error = 0;
684 	int nm[4];
685 	size_t buflen;
686 
687 	CTASSERT(EMUL_LINUX_KERN_OSTYPE == EMUL_LINUX32_KERN_OSTYPE);
688 	CTASSERT(EMUL_LINUX_KERN_OSRELEASE == EMUL_LINUX32_KERN_OSRELEASE);
689 	CTASSERT(EMUL_LINUX_KERN_VERSION == EMUL_LINUX32_KERN_VERSION);
690 
691 	bf = malloc(LBFSZ, M_TEMP, M_WAITOK);
692 
693 	sysctl_lock(false);
694 
695 	if (strncmp(emulname, "linux", 5) == 0) {
696 		/*
697 		 * Lookup the emulation ostype, osrelease, and version.
698 		 * Since compat_linux and compat_linux32 can be built as
699 		 * modules, we use sysctl to obtain the values instead of
700 		 * using the symbols directly.
701 		 */
702 
703 		if (strcmp(emulname, "linux32") == 0) {
704 			nm[0] = CTL_EMUL;
705 			nm[1] = EMUL_LINUX32;
706 			nm[2] = EMUL_LINUX32_KERN;
707 		} else {
708 			nm[0] = CTL_EMUL;
709 			nm[1] = EMUL_LINUX;
710 			nm[2] = EMUL_LINUX_KERN;
711 		}
712 
713 		nm[3] = EMUL_LINUX_KERN_OSTYPE;
714 		buflen = sizeof(lostype);
715 		error = sysctl_dispatch(nm, __arraycount(nm),
716 		    lostype, &buflen,
717 		    NULL, 0, NULL, NULL, NULL);
718 		if (error)
719 			goto out;
720 
721 		nm[3] = EMUL_LINUX_KERN_OSRELEASE;
722 		buflen = sizeof(losrelease);
723 		error = sysctl_dispatch(nm, __arraycount(nm),
724 		    losrelease, &buflen,
725 		    NULL, 0, NULL, NULL, NULL);
726 		if (error)
727 			goto out;
728 
729 		nm[3] = EMUL_LINUX_KERN_VERSION;
730 		buflen = sizeof(lversion);
731 		error = sysctl_dispatch(nm, __arraycount(nm),
732 		    lversion, &buflen,
733 		    NULL, 0, NULL, NULL, NULL);
734 		if (error)
735 			goto out;
736 
737 		postype = lostype;
738 		posrelease = losrelease;
739 		pversion = lversion;
740 	} else {
741 		postype = ostype;
742 		posrelease = osrelease;
743 		strlcpy(lversion, version, sizeof(lversion));
744 		if (strchr(lversion, '\n'))
745 			*strchr(lversion, '\n') = '\0';
746 		pversion = lversion;
747 	}
748 
749 	len = snprintf(bf, LBFSZ,
750 		"%s version %s (%s@localhost) (gcc version %s) %s\n",
751 		postype, posrelease, emulname,
752 #ifdef __VERSION__
753 		__VERSION__,
754 #else
755 		"unknown",
756 #endif
757 		pversion);
758 
759 	if (len == 0)
760 		goto out;
761 
762 	error = uiomove_frombuf(bf, len, uio);
763 out:
764 	free(bf, M_TEMP);
765 	sysctl_unlock();
766 	return error;
767 }
768 
769 int
770 procfs_dosysvipc_msg(struct lwp *curl, struct proc *p,
771     struct pfsnode *pfs, struct uio *uio)
772 {
773 	char *bf;
774 	int offset = 0;
775 	int error = EFBIG;
776 
777 	bf = malloc(LBFSZ, M_TEMP, M_WAITOK);
778 
779 	offset += snprintf(bf, LBFSZ,
780 	    "%10s %10s %4s  %10s %10s %5s %5s %5s %5s %5s %5s %10s %10s %10s\n",
781 	    "key", "msqid", "perms", "cbytes", "qnum", "lspid", "lrpid",
782 	    "uid", "gid", "cuid", "cgid", "stime", "rtime", "ctime");
783 	if (offset >= LBFSZ)
784 		goto out;
785 
786 #ifdef SYSVMSG
787 	for (int id = 0; id < msginfo.msgmni; id++)
788 		if (msqs[id].msq_u.msg_qbytes > 0) {
789 			offset += snprintf(&bf[offset], LBFSZ - offset,
790 			    "%10d %10d  %4o  %10zu %10lu %5u %5u %5u %5u %5u %5u %10lld %10lld %10lld\n",
791 			    (int) msqs[id].msq_u.msg_perm._key,
792 			    IXSEQ_TO_IPCID(id, msqs[id].msq_u.msg_perm),
793 			    msqs[id].msq_u.msg_perm.mode,
794 			    msqs[id].msq_u._msg_cbytes,
795 			    msqs[id].msq_u.msg_qnum,
796 			    msqs[id].msq_u.msg_lspid,
797 			    msqs[id].msq_u.msg_lrpid,
798 			    msqs[id].msq_u.msg_perm.uid,
799 			    msqs[id].msq_u.msg_perm.gid,
800 			    msqs[id].msq_u.msg_perm.cuid,
801 			    msqs[id].msq_u.msg_perm.cgid,
802 			    (long long)msqs[id].msq_u.msg_stime,
803 			    (long long)msqs[id].msq_u.msg_rtime,
804 			    (long long)msqs[id].msq_u.msg_ctime);
805 			if (offset >= LBFSZ)
806 				goto out;
807 		}
808 #endif
809 
810 	error = uiomove_frombuf(bf, offset, uio);
811 out:
812 	free(bf, M_TEMP);
813 	return error;
814 }
815 
816 int
817 procfs_dosysvipc_sem(struct lwp *curl, struct proc *p,
818     struct pfsnode *pfs, struct uio *uio)
819 {
820 	char *bf;
821 	int offset = 0;
822 	int error = EFBIG;
823 
824 	bf = malloc(LBFSZ, M_TEMP, M_WAITOK);
825 
826 	offset += snprintf(bf, LBFSZ,
827 	    "%10s %10s %4s %10s %5s %5s %5s %5s %10s %10s\n",
828 	    "key", "semid", "perms", "nsems", "uid", "gid", "cuid", "cgid",
829 	    "otime", "ctime");
830 	if (offset >= LBFSZ)
831 		goto out;
832 
833 #ifdef SYSVSEM
834 	for (int id = 0; id < seminfo.semmni; id++)
835 		if ((sema[id].sem_perm.mode & SEM_ALLOC) != 0) {
836 			offset += snprintf(&bf[offset], LBFSZ - offset,
837 			    "%10d %10d  %4o %10u %5u %5u %5u %5u %10lld %10lld\n",
838 			    (int) sema[id].sem_perm._key,
839 			    IXSEQ_TO_IPCID(id, sema[id].sem_perm),
840 			    sema[id].sem_perm.mode,
841 			    sema[id].sem_nsems,
842 			    sema[id].sem_perm.uid,
843 			    sema[id].sem_perm.gid,
844 			    sema[id].sem_perm.cuid,
845 			    sema[id].sem_perm.cgid,
846 			    (long long)sema[id].sem_otime,
847 			    (long long)sema[id].sem_ctime);
848 			if (offset >= LBFSZ)
849 				goto out;
850 		}
851 #endif
852 
853 	error = uiomove_frombuf(bf, offset, uio);
854 out:
855 	free(bf, M_TEMP);
856 	return error;
857 }
858 
859 int
860 procfs_dosysvipc_shm(struct lwp *curl, struct proc *p,
861     struct pfsnode *pfs, struct uio *uio)
862 {
863 	char *bf;
864 	int offset = 0;
865 	int error = EFBIG;
866 
867 	bf = malloc(LBFSZ, M_TEMP, M_WAITOK);
868 
869 	offset += snprintf(bf, LBFSZ,
870 	    "%10s %10s %s %21s %5s %5s %5s %5s %5s %5s %5s %10s %10s %10s %21s %21s\n",
871 	    "key", "shmid", "perms", "size", "cpid", "lpid", "nattch", "uid",
872 	    "gid", "cuid", "cgid", "atime", "dtime", "ctime", "rss", "swap");
873 	if (offset >= LBFSZ)
874 		goto out;
875 
876 #ifdef SYSVSHM
877 	for (unsigned int id = 0; id < shminfo.shmmni; id++)
878 		if ((shmsegs[id].shm_perm.mode & SHMSEG_ALLOCATED) != 0) {
879 			offset += snprintf(&bf[offset], LBFSZ - offset,
880 			    "%10d %10d  %4o %21zu %5u %5u  %5u %5u %5u %5u %5u %10lld %10lld %10lld %21d %21d\n",
881 			    (int) shmsegs[id].shm_perm._key,
882 			    IXSEQ_TO_IPCID(id, shmsegs[id].shm_perm),
883 			    shmsegs[id].shm_perm.mode,
884 			    shmsegs[id].shm_segsz,
885 			    shmsegs[id].shm_cpid,
886 			    shmsegs[id].shm_lpid,
887 			    shmsegs[id].shm_nattch,
888 			    shmsegs[id].shm_perm.uid,
889 			    shmsegs[id].shm_perm.gid,
890 			    shmsegs[id].shm_perm.cuid,
891 			    shmsegs[id].shm_perm.cgid,
892 			    (long long)shmsegs[id].shm_atime,
893 			    (long long)shmsegs[id].shm_dtime,
894 			    (long long)shmsegs[id].shm_ctime,
895 			    0, 0);	/* XXX rss & swp are not supported */
896 			if (offset >= LBFSZ)
897 				goto out;
898 		}
899 #endif
900 
901 	error = uiomove_frombuf(bf, offset, uio);
902 out:
903 	free(bf, M_TEMP);
904 	return error;
905 }
906 
907 static int
908 print_uint(unsigned int value, struct uio *uio)
909 {
910 	char *bf;
911 	int offset = 0;
912 	int error = EFBIG;
913 
914 	bf = malloc(LBFSZ, M_TEMP, M_WAITOK);
915 	offset += snprintf(bf, LBFSZ, "%u\n", value);
916 	if (offset >= LBFSZ)
917 		goto out;
918 
919 	error = uiomove_frombuf(bf, offset, uio);
920 out:
921 	free(bf, M_TEMP);
922 	return error;
923 }
924 
925 int
926 procfs_domq_msg_def(struct lwp *curl, struct proc *p,
927     struct pfsnode *pfs, struct uio *uio)
928 {
929 	return print_uint(mq_def_maxmsg, uio);
930 }
931 
932 int
933 procfs_domq_msg_max(struct lwp *curl, struct proc *p,
934     struct pfsnode *pfs, struct uio *uio)
935 {
936 	return print_uint(mq_max_maxmsg, uio);
937 }
938 
939 int
940 procfs_domq_siz_def(struct lwp *curl, struct proc *p,
941     struct pfsnode *pfs, struct uio *uio)
942 {
943 	return print_uint(MQ_DEF_MSGSIZE, uio);
944 }
945 
946 int
947 procfs_domq_siz_max(struct lwp *curl, struct proc *p,
948     struct pfsnode *pfs, struct uio *uio)
949 {
950 	return print_uint(mq_max_msgsize, uio);
951 }
952 
953 int
954 procfs_domq_qmax(struct lwp *curl, struct proc *p,
955     struct pfsnode *pfs, struct uio *uio)
956 {
957 	return print_uint(mq_open_max, uio);
958 }
959