xref: /netbsd-src/sys/miscfs/procfs/procfs_linux.c (revision 8f9e04edea810a3d00fc21cffe16a72a9d1deae6)
1 /*      $NetBSD: procfs_linux.c,v 1.57 2009/01/11 03:16:33 christos Exp $      */
2 
3 /*
4  * Copyright (c) 2001 Wasabi Systems, Inc.
5  * All rights reserved.
6  *
7  * Written by Frank van der Linden for Wasabi Systems, Inc.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. All advertising materials mentioning features or use of this software
18  *    must display the following acknowledgement:
19  *      This product includes software developed for the NetBSD Project by
20  *      Wasabi Systems, Inc.
21  * 4. The name of Wasabi Systems, Inc. may not be used to endorse
22  *    or promote products derived from this software without specific prior
23  *    written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
29  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35  * POSSIBILITY OF SUCH DAMAGE.
36  */
37 
38 #include <sys/cdefs.h>
39 __KERNEL_RCSID(0, "$NetBSD: procfs_linux.c,v 1.57 2009/01/11 03:16:33 christos Exp $");
40 
41 #include <sys/param.h>
42 #include <sys/systm.h>
43 #include <sys/time.h>
44 #include <sys/kernel.h>
45 #include <sys/proc.h>
46 #include <sys/vnode.h>
47 #include <sys/exec.h>
48 #include <sys/resource.h>
49 #include <sys/resourcevar.h>
50 #include <sys/signal.h>
51 #include <sys/signalvar.h>
52 #include <sys/tty.h>
53 #include <sys/malloc.h>
54 #include <sys/mount.h>
55 #include <sys/conf.h>
56 
57 #include <miscfs/procfs/procfs.h>
58 
59 #include <compat/linux/common/linux_exec.h>
60 
61 #include <uvm/uvm_extern.h>
62 #include <uvm/uvm.h>
63 
64 extern struct devsw_conv *devsw_conv;
65 extern int max_devsw_convs;
66 
67 #define PGTOB(p)	((unsigned long)(p) << PAGE_SHIFT)
68 #define PGTOKB(p)	((unsigned long)(p) << (PAGE_SHIFT - 10))
69 
70 #define LBFSZ (8 * 1024)
71 
72 static void
73 get_proc_size_info(struct lwp *l, unsigned long *stext, unsigned long *etext, unsigned long *sstack)
74 {
75 	struct proc *p = l->l_proc;
76 	struct vmspace *vm;
77 	struct vm_map *map;
78 	struct vm_map_entry *entry;
79 
80 	*stext = 0;
81 	*etext = 0;
82 	*sstack = 0;
83 
84 	proc_vmspace_getref(p, &vm);
85 	map = &vm->vm_map;
86 	vm_map_lock_read(map);
87 
88 	for (entry = map->header.next; entry != &map->header;
89 	    entry = entry->next) {
90 		if (UVM_ET_ISSUBMAP(entry))
91 			continue;
92 		/* assume text is the first entry */
93 		if (*stext == *etext) {
94 			*stext = entry->start;
95 			*etext = entry->end;
96 			break;
97 		}
98 	}
99 #ifdef LINUX_USRSTACK32
100 	if (strcmp(p->p_emul->e_name, "linux32") == 0 &&
101 	    LINUX_USRSTACK32 < USRSTACK32)
102 		*sstack = (unsigned long)LINUX_USRSTACK32;
103 	else
104 #endif
105 #ifdef LINUX_USRSTACK
106 	if (strcmp(p->p_emul->e_name, "linux") == 0 &&
107 	    LINUX_USRSTACK < USRSTACK)
108 		*sstack = (unsigned long)LINUX_USRSTACK;
109 	else
110 #endif
111 #ifdef	USRSTACK32
112 	if (strstr(p->p_emul->e_name, "32") != NULL)
113 		*sstack = (unsigned long)USRSTACK32;
114 	else
115 #endif
116 		*sstack = (unsigned long)USRSTACK;
117 
118 	/*
119 	 * jdk 1.6 compares low <= addr && addr < high
120 	 * if we put addr == high, then the test fails
121 	 * so eat one page.
122 	 */
123 	*sstack -= PAGE_SIZE;
124 
125 	vm_map_unlock_read(map);
126 	uvmspace_free(vm);
127 }
128 
129 /*
130  * Linux compatible /proc/meminfo. Only active when the -o linux
131  * mountflag is used.
132  */
133 int
134 procfs_domeminfo(struct lwp *curl, struct proc *p,
135     struct pfsnode *pfs, struct uio *uio)
136 {
137 	char *bf;
138 	int len;
139 	int error = 0;
140 
141 	bf = malloc(LBFSZ, M_TEMP, M_WAITOK);
142 
143 	len = snprintf(bf, LBFSZ,
144 		"        total:    used:    free:  shared: buffers: cached:\n"
145 		"Mem:  %8lu %8lu %8lu %8lu %8lu %8lu\n"
146 		"Swap: %8lu %8lu %8lu\n"
147 		"MemTotal:  %8lu kB\n"
148 		"MemFree:   %8lu kB\n"
149 		"MemShared: %8lu kB\n"
150 		"Buffers:   %8lu kB\n"
151 		"Cached:    %8lu kB\n"
152 		"SwapTotal: %8lu kB\n"
153 		"SwapFree:  %8lu kB\n",
154 		PGTOB(uvmexp.npages),
155 		PGTOB(uvmexp.npages - uvmexp.free),
156 		PGTOB(uvmexp.free),
157 		0L,
158 		PGTOB(uvmexp.filepages),
159 		PGTOB(uvmexp.anonpages + uvmexp.filepages + uvmexp.execpages),
160 		PGTOB(uvmexp.swpages),
161 		PGTOB(uvmexp.swpginuse),
162 		PGTOB(uvmexp.swpages - uvmexp.swpginuse),
163 		PGTOKB(uvmexp.npages),
164 		PGTOKB(uvmexp.free),
165 		0L,
166 		PGTOKB(uvmexp.filepages),
167 		PGTOKB(uvmexp.anonpages + uvmexp.filepages + uvmexp.execpages),
168 		PGTOKB(uvmexp.swpages),
169 		PGTOKB(uvmexp.swpages - uvmexp.swpginuse));
170 
171 	if (len == 0)
172 		goto out;
173 
174 	error = uiomove_frombuf(bf, len, uio);
175 out:
176 	free(bf, M_TEMP);
177 	return error;
178 }
179 
180 /*
181  * Linux compatible /proc/devices. Only active when the -o linux
182  * mountflag is used.
183  */
184 int
185 procfs_dodevices(struct lwp *curl, struct proc *p,
186     struct pfsnode *pfs, struct uio *uio)
187 {
188 	char *bf;
189 	int offset = 0;
190 	int i, error = ENAMETOOLONG;
191 
192 	/* XXX elad - may need filtering. */
193 
194 	bf = malloc(LBFSZ, M_TEMP, M_WAITOK);
195 
196 	offset += snprintf(&bf[offset], LBFSZ - offset, "Character devices:\n");
197 	if (offset >= LBFSZ)
198 		goto out;
199 
200 	mutex_enter(&device_lock);
201 	for (i = 0; i < max_devsw_convs; i++) {
202 		if ((devsw_conv[i].d_name == NULL) ||
203 		    (devsw_conv[i].d_cmajor == -1))
204 			continue;
205 
206 		offset += snprintf(&bf[offset], LBFSZ - offset,
207 		    "%3d %s\n", devsw_conv[i].d_cmajor, devsw_conv[i].d_name);
208 		if (offset >= LBFSZ) {
209 			mutex_exit(&device_lock);
210 			goto out;
211 		}
212 	}
213 
214 	offset += snprintf(&bf[offset], LBFSZ - offset, "\nBlock devices:\n");
215 	if (offset >= LBFSZ) {
216 		mutex_exit(&device_lock);
217 		goto out;
218 	}
219 
220 	for (i = 0; i < max_devsw_convs; i++) {
221 		if ((devsw_conv[i].d_name == NULL) ||
222 		    (devsw_conv[i].d_bmajor == -1))
223 			continue;
224 
225 		offset += snprintf(&bf[offset], LBFSZ - offset,
226 		    "%3d %s\n", devsw_conv[i].d_bmajor, devsw_conv[i].d_name);
227 		if (offset >= LBFSZ) {
228 			mutex_exit(&device_lock);
229 			goto out;
230 		}
231 	}
232 	mutex_exit(&device_lock);
233 
234 	error = uiomove_frombuf(bf, offset, uio);
235 out:
236 	free(bf, M_TEMP);
237 	return error;
238 }
239 
240 /*
241  * Linux compatible /proc/stat. Only active when the -o linux
242  * mountflag is used.
243  */
244 int
245 procfs_docpustat(struct lwp *curl, struct proc *p,
246     struct pfsnode *pfs, struct uio *uio)
247 {
248 	char		*bf;
249 	int	 	 error;
250 	int	 	 len;
251 #if defined(MULTIPROCESSOR)
252         struct cpu_info *ci;
253         CPU_INFO_ITERATOR cii;
254 #endif
255 	int	 	 i;
256 
257 	error = ENAMETOOLONG;
258 	bf = malloc(LBFSZ, M_TEMP, M_WAITOK);
259 
260 	len = snprintf(bf, LBFSZ,
261 		"cpu %" PRIu64 " %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
262 		curcpu()->ci_schedstate.spc_cp_time[CP_USER],
263 		curcpu()->ci_schedstate.spc_cp_time[CP_NICE],
264 		curcpu()->ci_schedstate.spc_cp_time[CP_SYS] /*+ [CP_INTR]*/,
265 		curcpu()->ci_schedstate.spc_cp_time[CP_IDLE]);
266 	if (len == 0)
267 		goto out;
268 
269 #if defined(MULTIPROCESSOR)
270 #define ALLCPUS	CPU_INFO_FOREACH(cii, ci)
271 #define CPUNAME	ci
272 #else
273 #define ALLCPUS	; i < 1 ;
274 #define CPUNAME	curcpu()
275 #endif
276 
277 	i = 0;
278 	for (ALLCPUS) {
279 		len += snprintf(&bf[len], LBFSZ - len,
280 			"cpu%d %" PRIu64 " %" PRIu64 " %" PRIu64 " %" PRIu64
281 			"\n", i,
282 			CPUNAME->ci_schedstate.spc_cp_time[CP_USER],
283 			CPUNAME->ci_schedstate.spc_cp_time[CP_NICE],
284 			CPUNAME->ci_schedstate.spc_cp_time[CP_SYS],
285 			CPUNAME->ci_schedstate.spc_cp_time[CP_IDLE]);
286 		if (len >= LBFSZ)
287 			goto out;
288 		i += 1;
289 	}
290 
291 	len += snprintf(&bf[len], LBFSZ - len,
292 			"disk 0 0 0 0\n"
293 			"page %u %u\n"
294 			"swap %u %u\n"
295 			"intr %u\n"
296 			"ctxt %u\n"
297 			"btime %lld\n",
298 			uvmexp.pageins, uvmexp.pdpageouts,
299 			uvmexp.pgswapin, uvmexp.pgswapout,
300 			uvmexp.intrs,
301 			uvmexp.swtch,
302 			(long long)boottime.tv_sec);
303 	if (len >= LBFSZ)
304 		goto out;
305 
306 	error = uiomove_frombuf(bf, len, uio);
307 out:
308 	free(bf, M_TEMP);
309 	return error;
310 }
311 
312 /*
313  * Linux compatible /proc/loadavg. Only active when the -o linux
314  * mountflag is used.
315  */
316 int
317 procfs_doloadavg(struct lwp *curl, struct proc *p,
318     struct pfsnode *pfs, struct uio *uio)
319 {
320 	char	*bf;
321 	int 	 error;
322 	int 	 len;
323 
324 	error = ENAMETOOLONG;
325 	bf = malloc(LBFSZ, M_TEMP, M_WAITOK);
326 
327 	averunnable.fscale = FSCALE;
328 	len = snprintf(bf, LBFSZ,
329 	        "%d.%02d %d.%02d %d.%02d %d/%d %d\n",
330 		(int)(averunnable.ldavg[0] / averunnable.fscale),
331 		(int)(averunnable.ldavg[0] * 100 / averunnable.fscale % 100),
332 		(int)(averunnable.ldavg[1] / averunnable.fscale),
333 		(int)(averunnable.ldavg[1] * 100 / averunnable.fscale % 100),
334 		(int)(averunnable.ldavg[2] / averunnable.fscale),
335 		(int)(averunnable.ldavg[2] * 100 / averunnable.fscale % 100),
336 		1,		/* number of ONPROC processes */
337 		nprocs,
338 		30000);		/* last pid */
339 	if (len == 0)
340 		goto out;
341 
342 	error = uiomove_frombuf(bf, len, uio);
343 out:
344 	free(bf, M_TEMP);
345 	return error;
346 }
347 
348 /*
349  * Linux compatible /proc/<pid>/statm. Only active when the -o linux
350  * mountflag is used.
351  */
352 int
353 procfs_do_pid_statm(struct lwp *curl, struct lwp *l,
354     struct pfsnode *pfs, struct uio *uio)
355 {
356 	struct vmspace	*vm;
357 	struct proc	*p = l->l_proc;
358 	struct rusage	*ru = &p->p_stats->p_ru;
359 	char		*bf;
360 	int	 	 error;
361 	int	 	 len;
362 
363 	error = ENAMETOOLONG;
364 	bf = malloc(LBFSZ, M_TEMP, M_WAITOK);
365 
366 	/* XXX - we use values from vmspace, since dsl says that ru figures
367 	   are always 0 except for zombies. See kvm_proc.c::kvm_getproc2() */
368 	if ((error = proc_vmspace_getref(p, &vm)) != 0) {
369 		goto out;
370 	}
371 
372 	len = snprintf(bf, LBFSZ,
373 	        "%lu %lu %lu %lu %lu %lu %lu\n",
374 		(unsigned long)(vm->vm_tsize + vm->vm_dsize + vm->vm_ssize), /* size */
375 		(unsigned long)(vm->vm_rssize),	/* resident */
376 		(unsigned long)(ru->ru_ixrss),	/* shared */
377 		(unsigned long)(vm->vm_tsize),	/* text size in pages */
378 		(unsigned long)(vm->vm_dsize),	/* data size in pages */
379 		(unsigned long)(vm->vm_ssize),	/* stack size in pages */
380 		(unsigned long) 0);
381 
382 	if (len == 0)
383 		goto out;
384 
385 	error = uiomove_frombuf(bf, len, uio);
386 out:
387 	free(bf, M_TEMP);
388 	return error;
389 }
390 
391 #define USEC_2_TICKS(x)		((x) / 10000)
392 
393 /*
394  * Linux compatible /proc/<pid>/stat. Only active when the -o linux
395  * mountflag is used.
396  */
397 int
398 procfs_do_pid_stat(struct lwp *curl, struct lwp *l,
399     struct pfsnode *pfs, struct uio *uio)
400 {
401 	char *bf;
402 	struct proc *p = l->l_proc;
403 	int len;
404 	struct tty *tty = p->p_session->s_ttyp;
405 	struct rusage *ru = &p->p_stats->p_ru;
406 	struct rusage *cru = &p->p_stats->p_cru;
407 	unsigned long stext = 0, etext = 0, sstack = 0;
408 	struct timeval rt;
409 	struct vmspace	*vm;
410 	int error = 0;
411 
412 	bf = malloc(LBFSZ, M_TEMP, M_WAITOK);
413 
414 	if ((error = proc_vmspace_getref(p, &vm)) != 0) {
415 		goto out;
416 	}
417 
418 	get_proc_size_info(l, &stext, &etext, &sstack);
419 
420 	mutex_enter(proc_lock);
421 	mutex_enter(p->p_lock);
422 
423 	calcru(p, NULL, NULL, NULL, &rt);
424 
425 	len = snprintf(bf, LBFSZ,
426 	    "%d (%s) %c %d %d %d %lld %d "
427 	    "%u "
428 	    "%lu %lu %lu %lu %lu %lu %lu %lu "
429 	    "%d %d %d "
430 	    "%lld %lld %lu %lu %" PRIu64 " "
431 	    "%lu %lu %lu "
432 	    "%u %u "
433 	    "%u %u %u %u "
434 	    "%lu %lu %lu %d %d\n",
435 
436 	    p->p_pid,
437 	    p->p_comm,
438 	    "0IR3SZD"[(p->p_stat > 6) ? 0 : (int)p->p_stat],
439 	    (p->p_pptr != NULL) ? p->p_pptr->p_pid : 0,
440 
441 	    p->p_pgid,
442 	    p->p_session->s_sid,
443 	    (unsigned long long)(tty ? tty->t_dev : 0),
444 	    (tty && tty->t_pgrp) ? tty->t_pgrp->pg_id : 0,
445 
446 	    p->p_flag,
447 
448 	    ru->ru_minflt,
449 	    cru->ru_minflt,
450 	    ru->ru_majflt,
451 	    cru->ru_majflt,
452 	    (long)USEC_2_TICKS(ru->ru_utime.tv_usec),
453 	    (long)USEC_2_TICKS(ru->ru_stime.tv_usec),
454 	    (long)USEC_2_TICKS(cru->ru_utime.tv_usec),
455 	    (long)USEC_2_TICKS(cru->ru_stime.tv_usec),
456 
457 	    l->l_priority,				/* XXX: priority */
458 	    p->p_nice - 20,
459 	    0,
460 
461 	    (long long)rt.tv_sec,
462 	    (long long)p->p_stats->p_start.tv_sec,
463 	    (unsigned long)(vm->vm_tsize + vm->vm_dsize + vm->vm_ssize), /* size */
464 	    (unsigned long)(vm->vm_rssize),	/* resident */
465 	    p->p_rlimit[RLIMIT_RSS].rlim_cur,
466 
467 	    stext,					/* start code */
468 	    etext,					/* end code */
469 	    sstack,					/* mm start stack */
470 	    0,						/* XXX: pc */
471 	    0,						/* XXX: sp */
472 	    p->p_sigpend.sp_set.__bits[0],		/* XXX: pending */
473 	    0,						/* XXX: held */
474 	    p->p_sigctx.ps_sigignore.__bits[0],		/* ignored */
475 	    p->p_sigctx.ps_sigcatch.__bits[0],		/* caught */
476 
477 	    (unsigned long)(intptr_t)l->l_wchan,
478 	    ru->ru_nvcsw,
479 	    ru->ru_nivcsw,
480 	    p->p_exitsig,
481 	    0);						/* XXX: processor */
482 
483 	mutex_exit(p->p_lock);
484 	mutex_exit(proc_lock);
485 
486 	if (len == 0)
487 		goto out;
488 
489 	error = uiomove_frombuf(bf, len, uio);
490 out:
491 	free(bf, M_TEMP);
492 	return error;
493 }
494 
495 int
496 procfs_docpuinfo(struct lwp *curl, struct proc *p,
497     struct pfsnode *pfs, struct uio *uio)
498 {
499 	int len = LBFSZ;
500 	char *bf = malloc(len, M_TEMP, M_WAITOK);
501 	int error;
502 
503 	if (procfs_getcpuinfstr(bf, &len) < 0) {
504 		error = ENOSPC;
505 		goto done;
506 	}
507 
508 	if (len == 0) {
509 		error = 0;
510 		goto done;
511 	}
512 
513 	error = uiomove_frombuf(bf, len, uio);
514 done:
515 	free(bf, M_TEMP);
516 	return error;
517 }
518 
519 int
520 procfs_douptime(struct lwp *curl, struct proc *p,
521     struct pfsnode *pfs, struct uio *uio)
522 {
523 	char *bf;
524 	int len;
525 	struct timeval runtime;
526 	u_int64_t idle;
527 	int error = 0;
528 
529 	bf = malloc(LBFSZ, M_TEMP, M_WAITOK);
530 
531 	microuptime(&runtime);
532 	idle = curcpu()->ci_schedstate.spc_cp_time[CP_IDLE];
533 	len = snprintf(bf, LBFSZ,
534 	    "%lld.%02lu %" PRIu64 ".%02" PRIu64 "\n",
535 	    (long long)runtime.tv_sec, (long)runtime.tv_usec / 10000,
536 	    idle / hz, (((idle % hz) * 100) / hz) % 100);
537 
538 	if (len == 0)
539 		goto out;
540 
541 	error = uiomove_frombuf(bf, len, uio);
542 out:
543 	free(bf, M_TEMP);
544 	return error;
545 }
546 
547 int
548 procfs_domounts(struct lwp *curl, struct proc *p,
549     struct pfsnode *pfs, struct uio *uio)
550 {
551 	char *bf, *mtab = NULL;
552 	const char *fsname;
553 	size_t len, mtabsz = 0;
554 	struct mount *mp, *nmp;
555 	struct statvfs *sfs;
556 	int error = 0;
557 
558 	bf = malloc(LBFSZ, M_TEMP, M_WAITOK);
559 	mutex_enter(&mountlist_lock);
560 	for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist;
561 	     mp = nmp) {
562 		if (vfs_busy(mp, &nmp)) {
563 			continue;
564 		}
565 
566 		sfs = &mp->mnt_stat;
567 
568 		/* Linux uses different names for some filesystems */
569 		fsname = sfs->f_fstypename;
570 		if (strcmp(fsname, "procfs") == 0)
571 			fsname = "proc";
572 		else if (strcmp(fsname, "ext2fs") == 0)
573 			fsname = "ext2";
574 
575 		len = snprintf(bf, LBFSZ, "%s %s %s %s%s%s%s%s%s 0 0\n",
576 			sfs->f_mntfromname,
577 			sfs->f_mntonname,
578 			fsname,
579 			(mp->mnt_flag & MNT_RDONLY) ? "ro" : "rw",
580 			(mp->mnt_flag & MNT_NOSUID) ? ",nosuid" : "",
581 			(mp->mnt_flag & MNT_NOEXEC) ? ",noexec" : "",
582 			(mp->mnt_flag & MNT_NODEV) ? ",nodev" : "",
583 			(mp->mnt_flag & MNT_SYNCHRONOUS) ? ",sync" : "",
584 			(mp->mnt_flag & MNT_NOATIME) ? ",noatime" : ""
585 			);
586 
587 		mtab = realloc(mtab, mtabsz + len, M_TEMP, M_WAITOK);
588 		memcpy(mtab + mtabsz, bf, len);
589 		mtabsz += len;
590 
591 		vfs_unbusy(mp, false, &nmp);
592 	}
593 	mutex_exit(&mountlist_lock);
594 	free(bf, M_TEMP);
595 
596 	if (mtabsz > 0) {
597 		error = uiomove_frombuf(mtab, mtabsz, uio);
598 		free(mtab, M_TEMP);
599 	}
600 
601 	return error;
602 }
603