xref: /netbsd-src/sys/miscfs/procfs/procfs_linux.c (revision 78b530bb67e2d313c060ab72129301c684235015)
1 /*      $NetBSD: procfs_linux.c,v 1.90 2024/09/14 01:37:42 pgoyette Exp $      */
2 
3 /*
4  * Copyright (c) 2001 Wasabi Systems, Inc.
5  * All rights reserved.
6  *
7  * Written by Frank van der Linden for Wasabi Systems, Inc.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. All advertising materials mentioning features or use of this software
18  *    must display the following acknowledgement:
19  *      This product includes software developed for the NetBSD Project by
20  *      Wasabi Systems, Inc.
21  * 4. The name of Wasabi Systems, Inc. may not be used to endorse
22  *    or promote products derived from this software without specific prior
23  *    written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
29  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35  * POSSIBILITY OF SUCH DAMAGE.
36  */
37 
38 #include <sys/cdefs.h>
39 __KERNEL_RCSID(0, "$NetBSD: procfs_linux.c,v 1.90 2024/09/14 01:37:42 pgoyette Exp $");
40 
41 #if defined(_KERNEL_OPT)
42 #include "opt_sysv.h"
43 #include "opt_mqueue.h"
44 #endif
45 
46 #include <sys/param.h>
47 #include <sys/systm.h>
48 #include <sys/atomic.h>
49 #include <sys/time.h>
50 #include <sys/cpu.h>
51 #include <sys/kernel.h>
52 #include <sys/proc.h>
53 #include <sys/vnode.h>
54 #include <sys/exec.h>
55 #include <sys/resource.h>
56 #include <sys/resourcevar.h>
57 #include <sys/signal.h>
58 #include <sys/signalvar.h>
59 #include <sys/tty.h>
60 #include <sys/malloc.h>
61 #include <sys/mount.h>
62 #include <sys/conf.h>
63 #include <sys/sysctl.h>
64 #include <sys/kauth.h>
65 #include <sys/filedesc.h>
66 #ifdef SYSVMSG
67 #include <sys/msg.h>
68 #endif
69 #ifdef SYSVSEM
70 #include <sys/sem.h>
71 #endif
72 #ifdef SYSVSHM
73 #include <sys/shm.h>
74 #endif
75 #ifdef MQUEUE
76 #include <sys/mqueue.h>
77 #endif
78 
79 #include <miscfs/procfs/procfs.h>
80 
81 #include <compat/linux/common/linux_exec.h>
82 #include <compat/linux32/common/linux32_sysctl.h>
83 
84 #include <uvm/uvm.h>
85 #include <uvm/uvm_extern.h>
86 
87 extern struct devsw_conv *devsw_conv;
88 extern int max_devsw_convs;
89 #ifdef MQUEUE
90 extern u_int mq_open_max;
91 extern u_int mq_max_msgsize;
92 extern u_int mq_def_maxmsg;
93 extern u_int mq_max_maxmsg;
94 #endif
95 
96 
97 #define PGTOB(p)	((unsigned long)(p) << PAGE_SHIFT)
98 #define PGTOKB(p)	((unsigned long)(p) << (PAGE_SHIFT - 10))
99 
100 #define LBFSZ (8 * 1024)
101 
102 static void
103 get_proc_size_info(struct proc *p, struct vm_map *map, unsigned long *stext,
104     unsigned long *etext, unsigned long *sstack)
105 {
106 	struct vm_map_entry *entry;
107 
108 	*stext = 0;
109 	*etext = 0;
110 	*sstack = 0;
111 
112 	vm_map_lock_read(map);
113 
114 	for (entry = map->header.next; entry != &map->header;
115 	    entry = entry->next) {
116 		if (UVM_ET_ISSUBMAP(entry))
117 			continue;
118 		/* assume text is the first entry */
119 		if (*stext == *etext) {
120 			*stext = entry->start;
121 			*etext = entry->end;
122 			break;
123 		}
124 	}
125 #if defined(LINUX_USRSTACK32) && defined(USRSTACK32)
126 	if (strcmp(p->p_emul->e_name, "linux32") == 0 &&
127 	    LINUX_USRSTACK32 < USRSTACK32)
128 		*sstack = (unsigned long)LINUX_USRSTACK32;
129 	else
130 #endif
131 #ifdef LINUX_USRSTACK
132 	if (strcmp(p->p_emul->e_name, "linux") == 0 &&
133 	    LINUX_USRSTACK < USRSTACK)
134 		*sstack = (unsigned long)LINUX_USRSTACK;
135 	else
136 #endif
137 #ifdef	USRSTACK32
138 	if (strstr(p->p_emul->e_name, "32") != NULL)
139 		*sstack = (unsigned long)USRSTACK32;
140 	else
141 #endif
142 		*sstack = (unsigned long)USRSTACK;
143 
144 	/*
145 	 * jdk 1.6 compares low <= addr && addr < high
146 	 * if we put addr == high, then the test fails
147 	 * so eat one page.
148 	 */
149 	*sstack -= PAGE_SIZE;
150 
151 	vm_map_unlock_read(map);
152 }
153 
154 /*
155  * Linux compatible /proc/meminfo. Only active when the -o linux
156  * mountflag is used.
157  */
158 int
159 procfs_domeminfo(struct lwp *curl, struct proc *p,
160     struct pfsnode *pfs, struct uio *uio)
161 {
162 	char *bf;
163 	int len;
164 	int error = 0;
165 	long filepg, anonpg, execpg, freepg;
166 
167 	bf = malloc(LBFSZ, M_TEMP, M_WAITOK);
168 
169 	/* uvm_availmem() will sync the counters if needed. */
170 	freepg = (long)uvm_availmem(true);
171 	filepg = (long)(cpu_count_get(CPU_COUNT_FILECLEAN) +
172 	    cpu_count_get(CPU_COUNT_FILEDIRTY) +
173 	    cpu_count_get(CPU_COUNT_FILEUNKNOWN) -
174 	    cpu_count_get(CPU_COUNT_EXECPAGES));
175 	anonpg = (long)(cpu_count_get(CPU_COUNT_ANONCLEAN) +
176 	    cpu_count_get(CPU_COUNT_ANONDIRTY) +
177 	    cpu_count_get(CPU_COUNT_ANONUNKNOWN));
178 	execpg = (long)cpu_count_get(CPU_COUNT_EXECPAGES);
179 
180 	len = snprintf(bf, LBFSZ,
181 		"        total:    used:    free:  shared: buffers: cached:\n"
182 		"Mem:  %8lu %8lu %8lu %8lu %8lu %8lu\n"
183 		"Swap: %8lu %8lu %8lu\n"
184 		"MemTotal:  %8lu kB\n"
185 		"MemFree:   %8lu kB\n"
186 		"MemShared: %8lu kB\n"
187 		"Buffers:   %8lu kB\n"
188 		"Cached:    %8lu kB\n"
189 		"SwapTotal: %8lu kB\n"
190 		"SwapFree:  %8lu kB\n",
191 		PGTOB(uvmexp.npages),
192 		PGTOB(uvmexp.npages - freepg),
193 		PGTOB(freepg),
194 		0L,
195 		PGTOB(filepg),
196 		PGTOB(anonpg + filepg + execpg),
197 		PGTOB(uvmexp.swpages),
198 		PGTOB(uvmexp.swpginuse),
199 		PGTOB(uvmexp.swpages - uvmexp.swpginuse),
200 		PGTOKB(uvmexp.npages),
201 		PGTOKB(freepg),
202 		0L,
203 		PGTOKB(freepg),
204 		PGTOKB(anonpg + filepg + execpg),
205 		PGTOKB(uvmexp.swpages),
206 		PGTOKB(uvmexp.swpages - uvmexp.swpginuse));
207 
208 	if (len == 0)
209 		goto out;
210 
211 	error = uiomove_frombuf(bf, len, uio);
212 out:
213 	free(bf, M_TEMP);
214 	return error;
215 }
216 
217 /*
218  * Linux compatible /proc/devices. Only active when the -o linux
219  * mountflag is used.
220  */
221 int
222 procfs_dodevices(struct lwp *curl, struct proc *p,
223     struct pfsnode *pfs, struct uio *uio)
224 {
225 	char *bf;
226 	int offset = 0;
227 	int i, error = ENAMETOOLONG;
228 
229 	/* XXX elad - may need filtering. */
230 
231 	bf = malloc(LBFSZ, M_TEMP, M_WAITOK);
232 
233 	offset += snprintf(&bf[offset], LBFSZ - offset, "Character devices:\n");
234 	if (offset >= LBFSZ)
235 		goto out;
236 
237 	mutex_enter(&device_lock);
238 	for (i = 0; i < max_devsw_convs; i++) {
239 		if ((devsw_conv[i].d_name == NULL) ||
240 		    (devsw_conv[i].d_cmajor == -1))
241 			continue;
242 
243 		offset += snprintf(&bf[offset], LBFSZ - offset,
244 		    "%3d %s\n", devsw_conv[i].d_cmajor, devsw_conv[i].d_name);
245 		if (offset >= LBFSZ) {
246 			mutex_exit(&device_lock);
247 			goto out;
248 		}
249 	}
250 
251 	offset += snprintf(&bf[offset], LBFSZ - offset, "\nBlock devices:\n");
252 	if (offset >= LBFSZ) {
253 		mutex_exit(&device_lock);
254 		goto out;
255 	}
256 
257 	for (i = 0; i < max_devsw_convs; i++) {
258 		if ((devsw_conv[i].d_name == NULL) ||
259 		    (devsw_conv[i].d_bmajor == -1))
260 			continue;
261 
262 		offset += snprintf(&bf[offset], LBFSZ - offset,
263 		    "%3d %s\n", devsw_conv[i].d_bmajor, devsw_conv[i].d_name);
264 		if (offset >= LBFSZ) {
265 			mutex_exit(&device_lock);
266 			goto out;
267 		}
268 	}
269 	mutex_exit(&device_lock);
270 
271 	error = uiomove_frombuf(bf, offset, uio);
272 out:
273 	free(bf, M_TEMP);
274 	return error;
275 }
276 
277 /*
278  * Linux compatible /proc/stat. Only active when the -o linux
279  * mountflag is used.
280  */
281 int
282 procfs_docpustat(struct lwp *curl, struct proc *p,
283     struct pfsnode *pfs, struct uio *uio)
284 {
285 	char		*bf;
286 	int	 	 error;
287 	int	 	 len;
288 #if defined(MULTIPROCESSOR)
289         struct cpu_info *ci;
290         CPU_INFO_ITERATOR cii;
291 #endif
292 	int	 	 i;
293 
294 	error = ENAMETOOLONG;
295 	bf = malloc(LBFSZ, M_TEMP, M_WAITOK);
296 
297 	len = snprintf(bf, LBFSZ,
298 		"cpu %" PRIu64 " %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
299 		curcpu()->ci_schedstate.spc_cp_time[CP_USER],
300 		curcpu()->ci_schedstate.spc_cp_time[CP_NICE],
301 		curcpu()->ci_schedstate.spc_cp_time[CP_SYS] /*+ [CP_INTR]*/,
302 		curcpu()->ci_schedstate.spc_cp_time[CP_IDLE]);
303 	if (len == 0)
304 		goto out;
305 
306 #if defined(MULTIPROCESSOR)
307 #define ALLCPUS	CPU_INFO_FOREACH(cii, ci)
308 #define CPUNAME	ci
309 #else
310 #define ALLCPUS	; i < 1 ;
311 #define CPUNAME	curcpu()
312 #endif
313 
314 	i = 0;
315 	for (ALLCPUS) {
316 		len += snprintf(&bf[len], LBFSZ - len,
317 			"cpu%d %" PRIu64 " %" PRIu64 " %" PRIu64 " %" PRIu64
318 			"\n", i,
319 			CPUNAME->ci_schedstate.spc_cp_time[CP_USER],
320 			CPUNAME->ci_schedstate.spc_cp_time[CP_NICE],
321 			CPUNAME->ci_schedstate.spc_cp_time[CP_SYS],
322 			CPUNAME->ci_schedstate.spc_cp_time[CP_IDLE]);
323 		if (len >= LBFSZ)
324 			goto out;
325 		i += 1;
326 	}
327 
328 	cpu_count_sync(true);
329 
330 	struct timeval btv;
331 	getmicroboottime(&btv);
332 
333 	len += snprintf(&bf[len], LBFSZ - len,
334 			"disk 0 0 0 0\n"
335 			"page %u %u\n"
336 			"swap %u %u\n"
337 			"intr %"PRId64"\n"
338 			"ctxt %"PRId64"\n"
339 			"btime %"PRId64"\n",
340 			uvmexp.pageins, uvmexp.pdpageouts,
341 			uvmexp.pgswapin, uvmexp.pgswapout,
342 			cpu_count_get(CPU_COUNT_NINTR),
343 			cpu_count_get(CPU_COUNT_NSWTCH),
344 			btv.tv_sec);
345 	if (len >= LBFSZ)
346 		goto out;
347 
348 	error = uiomove_frombuf(bf, len, uio);
349 out:
350 	free(bf, M_TEMP);
351 	return error;
352 }
353 
354 /*
355  * Linux compatible /proc/loadavg. Only active when the -o linux
356  * mountflag is used.
357  */
358 int
359 procfs_doloadavg(struct lwp *curl, struct proc *p,
360     struct pfsnode *pfs, struct uio *uio)
361 {
362 	char	*bf;
363 	int 	 error;
364 	int 	 len;
365 
366 	error = ENAMETOOLONG;
367 	bf = malloc(LBFSZ, M_TEMP, M_WAITOK);
368 
369 	averunnable.fscale = FSCALE;
370 	len = snprintf(bf, LBFSZ,
371 	        "%d.%02d %d.%02d %d.%02d %d/%d %d\n",
372 		(int)(averunnable.ldavg[0] / averunnable.fscale),
373 		(int)(averunnable.ldavg[0] * 100 / averunnable.fscale % 100),
374 		(int)(averunnable.ldavg[1] / averunnable.fscale),
375 		(int)(averunnable.ldavg[1] * 100 / averunnable.fscale % 100),
376 		(int)(averunnable.ldavg[2] / averunnable.fscale),
377 		(int)(averunnable.ldavg[2] * 100 / averunnable.fscale % 100),
378 		1,		/* number of ONPROC processes */
379 		atomic_load_relaxed(&nprocs),
380 		30000);		/* last pid */
381 	if (len == 0)
382 		goto out;
383 
384 	error = uiomove_frombuf(bf, len, uio);
385 out:
386 	free(bf, M_TEMP);
387 	return error;
388 }
389 
390 /*
391  * Linux compatible /proc/<pid>/statm. Only active when the -o linux
392  * mountflag is used.
393  */
394 int
395 procfs_do_pid_statm(struct lwp *curl, struct lwp *l,
396     struct pfsnode *pfs, struct uio *uio)
397 {
398 	struct vmspace	*vm;
399 	struct proc	*p = l->l_proc;
400 	char		*bf;
401 	int	 	 error;
402 	int	 	 len;
403 	struct kinfo_proc2 ki;
404 
405 	bf = malloc(LBFSZ, M_TEMP, M_WAITOK);
406 
407 	/* XXX - we use values from vmspace, since dsl says that ru figures
408 	   are always 0 except for zombies. See kvm_proc.c::kvm_getproc2() */
409 	if ((error = proc_vmspace_getref(p, &vm)) != 0) {
410 		goto out;
411 	}
412 
413 	mutex_enter(&proc_lock);
414 	mutex_enter(p->p_lock);
415 
416 	/* retrieve RSS size */
417 	memset(&ki, 0, sizeof(ki));
418 	fill_kproc2(p, &ki, false, false);
419 
420 	mutex_exit(p->p_lock);
421 	mutex_exit(&proc_lock);
422 
423 	uvmspace_free(vm);
424 
425 	len = snprintf(bf, LBFSZ,
426 	        "%lu %lu %lu %lu %lu %lu %lu\n",
427 		(unsigned long)(ki.p_vm_msize),	/* size */
428 		(unsigned long)(ki.p_vm_rssize),/* resident */
429 		(unsigned long)(ki.p_uru_ixrss),/* shared */
430 		(unsigned long)(ki.p_vm_tsize),	/* text */
431 		(unsigned long) 0,		/* library (unused) */
432 		(unsigned long)(ki.p_vm_dsize + ki.p_vm_ssize),	/* data+stack */
433 		(unsigned long) 0);		/* dirty */
434 
435 	if (len == 0)
436 		goto out;
437 
438 	error = uiomove_frombuf(bf, len, uio);
439 out:
440 	free(bf, M_TEMP);
441 	return error;
442 }
443 
444 #define UTIME2TICKS(s,u)	(((uint64_t)(s) * 1000000 + (u)) / 10000)
445 
446 /*
447  * Linux compatible /proc/<pid>/stat. Only active when the -o linux
448  * mountflag is used.
449  */
450 int
451 procfs_do_pid_stat(struct lwp *curl, struct lwp *l,
452     struct pfsnode *pfs, struct uio *uio)
453 {
454 	char *bf;
455 	struct proc *p = l->l_proc;
456 	int len;
457 	struct rusage *cru = &p->p_stats->p_cru;
458 	unsigned long stext = 0, etext = 0, sstack = 0;
459 	struct timeval rt;
460 	struct vmspace	*vm;
461 	struct kinfo_proc2 ki;
462 	int error;
463 
464 	bf = malloc(LBFSZ, M_TEMP, M_WAITOK);
465 
466 	if ((error = proc_vmspace_getref(p, &vm)) != 0) {
467 		goto out;
468 	}
469 
470 	get_proc_size_info(p, &vm->vm_map, &stext, &etext, &sstack);
471 
472 	mutex_enter(&proc_lock);
473 	mutex_enter(p->p_lock);
474 
475 	memset(&ki, 0, sizeof(ki));
476 	fill_kproc2(p, &ki, false, false);
477 	calcru(p, NULL, NULL, NULL, &rt);
478 
479 	len = snprintf(bf, LBFSZ,
480 	    "%d (%s) %c %d %d %d %u %d "
481 	    "%u "
482 	    "%"PRIu64" %lu %"PRIu64" %lu %"PRIu64" %"PRIu64" %"PRIu64" %"PRIu64" "
483 	    "%d %d %"PRIu64" "
484 	    "%lld %"PRIu64" %"PRId64" %lu %"PRIu64" "
485 	    "%lu %lu %lu "
486 	    "%u %u "
487 	    "%u %u %u %u "
488 	    "%"PRIu64" %"PRIu64" %"PRIu64" %d %"PRIu64"\n",
489 
490 	    ki.p_pid,						/* 1 pid */
491 	    ki.p_comm,						/* 2 tcomm */
492 	    "0RRSTZXR8"[(ki.p_stat > 8) ? 0 : (int)ki.p_stat],	/* 3 state */
493 	    ki.p_ppid,						/* 4 ppid */
494 	    ki.p__pgid,						/* 5 pgrp */
495 	    ki.p_sid,						/* 6 sid */
496 	    (ki.p_tdev != (uint32_t)NODEV) ? ki.p_tdev : 0,	/* 7 tty_nr */
497 	    ki.p_tpgid,						/* 8 tty_pgrp */
498 
499 	    ki.p_flag,						/* 9 flags */
500 
501 	    ki.p_uru_minflt,					/* 10 min_flt */
502 	    cru->ru_minflt,
503 	    ki.p_uru_majflt,					/* 12 maj_flt */
504 	    cru->ru_majflt,
505 	    UTIME2TICKS(ki.p_uutime_sec, ki.p_uutime_usec),	/* 14 utime */
506 	    UTIME2TICKS(ki.p_ustime_sec, ki.p_ustime_usec),	/* 15 stime */
507 	    UTIME2TICKS(cru->ru_utime.tv_sec, cru->ru_utime.tv_usec), /* 16 cutime */
508 	    UTIME2TICKS(cru->ru_stime.tv_sec, cru->ru_stime.tv_usec), /* 17 cstime */
509 
510 	    ki.p_priority,				/* XXX: 18 priority */
511 	    ki.p_nice - NZERO,				/* 19 nice */
512 	    ki.p_nlwps,					/* 20 num_threads */
513 
514 	    (long long)rt.tv_sec,
515 	    UTIME2TICKS(ki.p_ustart_sec, ki.p_ustart_usec), /* 22 start_time */
516 	    ki.p_vm_msize,				/* 23 vsize */
517 	    PGTOKB(ki.p_vm_rssize),			/* 24 rss */
518 	    p->p_rlimit[RLIMIT_RSS].rlim_cur,		/* 25 rsslim */
519 
520 	    stext,					/* 26 start_code */
521 	    etext,					/* 27 end_code */
522 	    sstack,					/* 28 start_stack */
523 
524 	    0,						/* XXX: 29 esp */
525 	    0,						/* XXX: 30 eip */
526 
527 	    ki.p_siglist.__bits[0],			/* XXX: 31 pending */
528 	    0,						/* XXX: 32 blocked */
529 	    ki.p_sigignore.__bits[0],		/* 33 sigign */
530 	    ki.p_sigcatch.__bits[0],		/* 34 sigcatch */
531 
532 	    ki.p_wchan,					/* 35 wchan */
533 	    ki.p_uru_nvcsw,
534 	    ki.p_uru_nivcsw,
535 	    ki.p_exitsig,				/* 38 exit_signal */
536 	    ki.p_cpuid);				/* 39 task_cpu */
537 
538 	mutex_exit(p->p_lock);
539 	mutex_exit(&proc_lock);
540 
541 	uvmspace_free(vm);
542 
543 	if (len == 0)
544 		goto out;
545 
546 	error = uiomove_frombuf(bf, len, uio);
547 out:
548 	free(bf, M_TEMP);
549 	return error;
550 }
551 
552 int
553 procfs_docpuinfo(struct lwp *curl, struct proc *p,
554     struct pfsnode *pfs, struct uio *uio)
555 {
556 	size_t len = LBFSZ;
557 	char *bf = NULL;
558 	int error;
559 
560 	do {
561 		if (bf)
562 			free(bf, M_TEMP);
563 		bf = malloc(len, M_TEMP, M_WAITOK);
564 	} while (procfs_getcpuinfstr(bf, &len) < 0);
565 
566 	if (len == 0) {
567 		error = 0;
568 		goto done;
569 	}
570 
571 	error = uiomove_frombuf(bf, len, uio);
572 done:
573 	free(bf, M_TEMP);
574 	return error;
575 }
576 
577 int
578 procfs_douptime(struct lwp *curl, struct proc *p,
579     struct pfsnode *pfs, struct uio *uio)
580 {
581 	char *bf;
582 	int len;
583 	struct timeval runtime;
584 	u_int64_t idle;
585 	int error = 0;
586 
587 	bf = malloc(LBFSZ, M_TEMP, M_WAITOK);
588 
589 	microuptime(&runtime);
590 	idle = curcpu()->ci_schedstate.spc_cp_time[CP_IDLE];
591 	len = snprintf(bf, LBFSZ,
592 	    "%lld.%02lu %" PRIu64 ".%02" PRIu64 "\n",
593 	    (long long)runtime.tv_sec, (long)runtime.tv_usec / 10000,
594 	    idle / hz, (((idle % hz) * 100) / hz) % 100);
595 
596 	if (len == 0)
597 		goto out;
598 
599 	error = uiomove_frombuf(bf, len, uio);
600 out:
601 	free(bf, M_TEMP);
602 	return error;
603 }
604 
605 static int
606 procfs_format_sfs(char **mtab, size_t *mlen, char *buf, size_t blen,
607     const struct statvfs *sfs, struct lwp *curl, int suser)
608 {
609 	const char *fsname;
610 
611 	/* Linux uses different names for some filesystems */
612 	fsname = sfs->f_fstypename;
613 	if (strcmp(fsname, "procfs") == 0)
614 		fsname = "proc";
615 	else if (strcmp(fsname, "ext2fs") == 0)
616 		fsname = "ext2";
617 
618 	blen = snprintf(buf, blen, "%s %s %s %s%s%s%s%s%s 0 0\n",
619 	    sfs->f_mntfromname, sfs->f_mntonname, fsname,
620 	    (sfs->f_flag & ST_RDONLY) ? "ro" : "rw",
621 	    (sfs->f_flag & ST_NOSUID) ? ",nosuid" : "",
622 	    (sfs->f_flag & ST_NOEXEC) ? ",noexec" : "",
623 	    (sfs->f_flag & ST_NODEV) ? ",nodev" : "",
624 	    (sfs->f_flag & ST_SYNCHRONOUS) ? ",sync" : "",
625 	    (sfs->f_flag & ST_NOATIME) ? ",noatime" : "");
626 
627 	*mtab = realloc(*mtab, *mlen + blen, M_TEMP, M_WAITOK);
628 	memcpy(*mtab + *mlen, buf, blen);
629 	*mlen += blen;
630 	return sfs->f_mntonname[0] == '/' && sfs->f_mntonname[1] == '\0';
631 }
632 
633 int
634 procfs_domounts(struct lwp *curl, struct proc *p,
635     struct pfsnode *pfs, struct uio *uio)
636 {
637 	char *bf, *mtab = NULL;
638 	size_t mtabsz = 0;
639 	mount_iterator_t *iter;
640 	struct mount *mp;
641 	int error = 0, root = 0;
642 	struct cwdinfo *cwdi = curl->l_proc->p_cwdi;
643 	struct statvfs *sfs;
644 
645 	bf = malloc(LBFSZ, M_TEMP, M_WAITOK);
646 
647 	sfs = malloc(sizeof(*sfs), M_TEMP, M_WAITOK);
648 	mountlist_iterator_init(&iter);
649 	while ((mp = mountlist_iterator_next(iter)) != NULL) {
650 		if ((error = dostatvfs(mp, sfs, curl, MNT_WAIT, 0)) == 0)
651 			root |= procfs_format_sfs(&mtab, &mtabsz, bf, LBFSZ,
652 			    sfs, curl, 0);
653 	}
654 	mountlist_iterator_destroy(iter);
655 	free(sfs, M_TEMP);
656 
657 	/*
658 	 * If we are inside a chroot that is not itself a mount point,
659 	 * fake a root entry.
660 	 */
661 	if (!root && cwdi->cwdi_rdir)
662 		(void)procfs_format_sfs(&mtab, &mtabsz, bf, LBFSZ,
663 		    &cwdi->cwdi_rdir->v_mount->mnt_stat, curl, 1);
664 
665 	free(bf, M_TEMP);
666 
667 	if (mtabsz > 0) {
668 		error = uiomove_frombuf(mtab, mtabsz, uio);
669 		free(mtab, M_TEMP);
670 	}
671 
672 	return error;
673 }
674 
675 /*
676  * Linux compatible /proc/version. Only active when the -o linux
677  * mountflag is used.
678  */
679 int
680 procfs_doversion(struct lwp *curl, struct proc *p,
681     struct pfsnode *pfs, struct uio *uio)
682 {
683 	char *bf;
684 	char lostype[20], losrelease[20], lversion[80];
685 	const char *postype, *posrelease, *pversion;
686 	const char *emulname = curlwp->l_proc->p_emul->e_name;
687 	int len;
688 	int error = 0;
689 	int nm[4];
690 	size_t buflen;
691 
692 	CTASSERT(EMUL_LINUX_KERN_OSTYPE == EMUL_LINUX32_KERN_OSTYPE);
693 	CTASSERT(EMUL_LINUX_KERN_OSRELEASE == EMUL_LINUX32_KERN_OSRELEASE);
694 	CTASSERT(EMUL_LINUX_KERN_VERSION == EMUL_LINUX32_KERN_VERSION);
695 
696 	bf = malloc(LBFSZ, M_TEMP, M_WAITOK);
697 
698 	sysctl_lock(false);
699 
700 	if (strncmp(emulname, "linux", 5) == 0) {
701 		/*
702 		 * Lookup the emulation ostype, osrelease, and version.
703 		 * Since compat_linux and compat_linux32 can be built as
704 		 * modules, we use sysctl to obtain the values instead of
705 		 * using the symbols directly.
706 		 */
707 
708 		if (strcmp(emulname, "linux32") == 0) {
709 			nm[0] = CTL_EMUL;
710 			nm[1] = EMUL_LINUX32;
711 			nm[2] = EMUL_LINUX32_KERN;
712 		} else {
713 			nm[0] = CTL_EMUL;
714 			nm[1] = EMUL_LINUX;
715 			nm[2] = EMUL_LINUX_KERN;
716 		}
717 
718 		nm[3] = EMUL_LINUX_KERN_OSTYPE;
719 		buflen = sizeof(lostype);
720 		error = sysctl_dispatch(nm, __arraycount(nm),
721 		    lostype, &buflen,
722 		    NULL, 0, NULL, NULL, NULL);
723 		if (error)
724 			goto out;
725 
726 		nm[3] = EMUL_LINUX_KERN_OSRELEASE;
727 		buflen = sizeof(losrelease);
728 		error = sysctl_dispatch(nm, __arraycount(nm),
729 		    losrelease, &buflen,
730 		    NULL, 0, NULL, NULL, NULL);
731 		if (error)
732 			goto out;
733 
734 		nm[3] = EMUL_LINUX_KERN_VERSION;
735 		buflen = sizeof(lversion);
736 		error = sysctl_dispatch(nm, __arraycount(nm),
737 		    lversion, &buflen,
738 		    NULL, 0, NULL, NULL, NULL);
739 		if (error)
740 			goto out;
741 
742 		postype = lostype;
743 		posrelease = losrelease;
744 		pversion = lversion;
745 	} else {
746 		postype = ostype;
747 		posrelease = osrelease;
748 		strlcpy(lversion, version, sizeof(lversion));
749 		if (strchr(lversion, '\n'))
750 			*strchr(lversion, '\n') = '\0';
751 		pversion = lversion;
752 	}
753 
754 	len = snprintf(bf, LBFSZ,
755 		"%s version %s (%s@localhost) (gcc version %s) %s\n",
756 		postype, posrelease, emulname,
757 #ifdef __VERSION__
758 		__VERSION__,
759 #else
760 		"unknown",
761 #endif
762 		pversion);
763 
764 	if (len == 0)
765 		goto out;
766 
767 	error = uiomove_frombuf(bf, len, uio);
768 out:
769 	free(bf, M_TEMP);
770 	sysctl_unlock();
771 	return error;
772 }
773 
774 int
775 procfs_dosysvipc_msg(struct lwp *curl, struct proc *p,
776     struct pfsnode *pfs, struct uio *uio)
777 {
778 	char *bf;
779 	int offset = 0;
780 	int error = EFBIG;
781 
782 	bf = malloc(LBFSZ, M_TEMP, M_WAITOK);
783 
784 	offset += snprintf(bf, LBFSZ,
785 	    "%10s %10s %4s  %10s %10s %5s %5s %5s %5s %5s %5s %10s %10s %10s\n",
786 	    "key", "msqid", "perms", "cbytes", "qnum", "lspid", "lrpid",
787 	    "uid", "gid", "cuid", "cgid", "stime", "rtime", "ctime");
788 	if (offset >= LBFSZ)
789 		goto out;
790 
791 #ifdef SYSVMSG
792 	for (int id = 0; id < msginfo.msgmni; id++)
793 		if (msqs[id].msq_u.msg_qbytes > 0) {
794 			offset += snprintf(&bf[offset], LBFSZ - offset,
795 			    "%10d %10d  %4o  %10zu %10lu %5u %5u %5u %5u %5u %5u %10lld %10lld %10lld\n",
796 			    (int) msqs[id].msq_u.msg_perm._key,
797 			    IXSEQ_TO_IPCID(id, msqs[id].msq_u.msg_perm),
798 			    msqs[id].msq_u.msg_perm.mode,
799 			    msqs[id].msq_u._msg_cbytes,
800 			    msqs[id].msq_u.msg_qnum,
801 			    msqs[id].msq_u.msg_lspid,
802 			    msqs[id].msq_u.msg_lrpid,
803 			    msqs[id].msq_u.msg_perm.uid,
804 			    msqs[id].msq_u.msg_perm.gid,
805 			    msqs[id].msq_u.msg_perm.cuid,
806 			    msqs[id].msq_u.msg_perm.cgid,
807 			    (long long)msqs[id].msq_u.msg_stime,
808 			    (long long)msqs[id].msq_u.msg_rtime,
809 			    (long long)msqs[id].msq_u.msg_ctime);
810 			if (offset >= LBFSZ)
811 				goto out;
812 		}
813 #endif
814 
815 	error = uiomove_frombuf(bf, offset, uio);
816 out:
817 	free(bf, M_TEMP);
818 	return error;
819 }
820 
821 int
822 procfs_dosysvipc_sem(struct lwp *curl, struct proc *p,
823     struct pfsnode *pfs, struct uio *uio)
824 {
825 	char *bf;
826 	int offset = 0;
827 	int error = EFBIG;
828 
829 	bf = malloc(LBFSZ, M_TEMP, M_WAITOK);
830 
831 	offset += snprintf(bf, LBFSZ,
832 	    "%10s %10s %4s %10s %5s %5s %5s %5s %10s %10s\n",
833 	    "key", "semid", "perms", "nsems", "uid", "gid", "cuid", "cgid",
834 	    "otime", "ctime");
835 	if (offset >= LBFSZ)
836 		goto out;
837 
838 #ifdef SYSVSEM
839 	for (int id = 0; id < seminfo.semmni; id++)
840 		if ((sema[id].sem_perm.mode & SEM_ALLOC) != 0) {
841 			offset += snprintf(&bf[offset], LBFSZ - offset,
842 			    "%10d %10d  %4o %10u %5u %5u %5u %5u %10lld %10lld\n",
843 			    (int) sema[id].sem_perm._key,
844 			    IXSEQ_TO_IPCID(id, sema[id].sem_perm),
845 			    sema[id].sem_perm.mode,
846 			    sema[id].sem_nsems,
847 			    sema[id].sem_perm.uid,
848 			    sema[id].sem_perm.gid,
849 			    sema[id].sem_perm.cuid,
850 			    sema[id].sem_perm.cgid,
851 			    (long long)sema[id].sem_otime,
852 			    (long long)sema[id].sem_ctime);
853 			if (offset >= LBFSZ)
854 				goto out;
855 		}
856 #endif
857 
858 	error = uiomove_frombuf(bf, offset, uio);
859 out:
860 	free(bf, M_TEMP);
861 	return error;
862 }
863 
864 int
865 procfs_dosysvipc_shm(struct lwp *curl, struct proc *p,
866     struct pfsnode *pfs, struct uio *uio)
867 {
868 	char *bf;
869 	int offset = 0;
870 	int error = EFBIG;
871 
872 	bf = malloc(LBFSZ, M_TEMP, M_WAITOK);
873 
874 	offset += snprintf(bf, LBFSZ,
875 	    "%10s %10s %s %21s %5s %5s %5s %5s %5s %5s %5s %10s %10s %10s %21s %21s\n",
876 	    "key", "shmid", "perms", "size", "cpid", "lpid", "nattch", "uid",
877 	    "gid", "cuid", "cgid", "atime", "dtime", "ctime", "rss", "swap");
878 	if (offset >= LBFSZ)
879 		goto out;
880 
881 #ifdef SYSVSHM
882 	for (unsigned int id = 0; id < shminfo.shmmni; id++)
883 		if ((shmsegs[id].shm_perm.mode & SHMSEG_ALLOCATED) != 0) {
884 			offset += snprintf(&bf[offset], LBFSZ - offset,
885 			    "%10d %10d  %4o %21zu %5u %5u  %5u %5u %5u %5u %5u %10lld %10lld %10lld %21d %21d\n",
886 			    (int) shmsegs[id].shm_perm._key,
887 			    IXSEQ_TO_IPCID(id, shmsegs[id].shm_perm),
888 			    shmsegs[id].shm_perm.mode,
889 			    shmsegs[id].shm_segsz,
890 			    shmsegs[id].shm_cpid,
891 			    shmsegs[id].shm_lpid,
892 			    shmsegs[id].shm_nattch,
893 			    shmsegs[id].shm_perm.uid,
894 			    shmsegs[id].shm_perm.gid,
895 			    shmsegs[id].shm_perm.cuid,
896 			    shmsegs[id].shm_perm.cgid,
897 			    (long long)shmsegs[id].shm_atime,
898 			    (long long)shmsegs[id].shm_dtime,
899 			    (long long)shmsegs[id].shm_ctime,
900 			    0, 0);	/* XXX rss & swp are not supported */
901 			if (offset >= LBFSZ)
902 				goto out;
903 		}
904 #endif
905 
906 	error = uiomove_frombuf(bf, offset, uio);
907 out:
908 	free(bf, M_TEMP);
909 	return error;
910 }
911 
912 #ifdef MQUEUE
913 #define print_uint(value, uio) PFS_print_uint(value, uio);
914 
915 static int
916 PFS_print_uint(unsigned int value, struct uio *uio)
917 {
918 	char *bf;
919 	int offset = 0;
920 	int error = EFBIG;
921 
922 	bf = malloc(LBFSZ, M_TEMP, M_WAITOK);
923 	offset += snprintf(bf, LBFSZ, "%u\n", value);
924 	if (offset >= LBFSZ)
925 		goto out;
926 
927 	error = uiomove_frombuf(bf, offset, uio);
928 out:
929 	free(bf, M_TEMP);
930 	return error;
931 }
932 #else
933 
934 #define print_uint(value, uio) EINVAL
935 
936 #endif
937 
938 int
939 procfs_domq_msg_def(struct lwp *curl, struct proc *p,
940     struct pfsnode *pfs, struct uio *uio)
941 {
942 	return print_uint(mq_def_maxmsg, uio);
943 }
944 
945 int
946 procfs_domq_msg_max(struct lwp *curl, struct proc *p,
947     struct pfsnode *pfs, struct uio *uio)
948 {
949 	return print_uint(mq_max_maxmsg, uio);
950 }
951 
952 int
953 procfs_domq_siz_def(struct lwp *curl, struct proc *p,
954     struct pfsnode *pfs, struct uio *uio)
955 {
956 	return print_uint(MQ_DEF_MSGSIZE, uio);
957 }
958 
959 int
960 procfs_domq_siz_max(struct lwp *curl, struct proc *p,
961     struct pfsnode *pfs, struct uio *uio)
962 {
963 	return print_uint(mq_max_msgsize, uio);
964 }
965 
966 int
967 procfs_domq_qmax(struct lwp *curl, struct proc *p,
968     struct pfsnode *pfs, struct uio *uio)
969 {
970 	return print_uint(mq_open_max, uio);
971 }
972