xref: /netbsd-src/sys/miscfs/procfs/procfs_linux.c (revision d18c6ca4de085417c15b29b84dad2075960cf9e0)
1 /*      $NetBSD: procfs_linux.c,v 1.43 2007/11/07 00:23:37 ad Exp $      */
2 
3 /*
4  * Copyright (c) 2001 Wasabi Systems, Inc.
5  * All rights reserved.
6  *
7  * Written by Frank van der Linden for Wasabi Systems, Inc.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. All advertising materials mentioning features or use of this software
18  *    must display the following acknowledgement:
19  *      This product includes software developed for the NetBSD Project by
20  *      Wasabi Systems, Inc.
21  * 4. The name of Wasabi Systems, Inc. may not be used to endorse
22  *    or promote products derived from this software without specific prior
23  *    written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
29  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35  * POSSIBILITY OF SUCH DAMAGE.
36  */
37 
38 #include <sys/cdefs.h>
39 __KERNEL_RCSID(0, "$NetBSD: procfs_linux.c,v 1.43 2007/11/07 00:23:37 ad Exp $");
40 
41 #include <sys/param.h>
42 #include <sys/systm.h>
43 #include <sys/time.h>
44 #include <sys/kernel.h>
45 #include <sys/proc.h>
46 #include <sys/vnode.h>
47 #include <sys/exec.h>
48 #include <sys/resource.h>
49 #include <sys/resourcevar.h>
50 #include <sys/signal.h>
51 #include <sys/signalvar.h>
52 #include <sys/tty.h>
53 #include <sys/malloc.h>
54 #include <sys/mount.h>
55 #include <sys/conf.h>
56 
57 #include <miscfs/procfs/procfs.h>
58 #include <compat/linux/common/linux_exec.h>
59 
60 #include <uvm/uvm_extern.h>
61 #include <uvm/uvm.h>
62 
63 extern struct devsw_conv *devsw_conv;
64 extern int max_devsw_convs;
65 
66 #define PGTOB(p)	((unsigned long)(p) << PAGE_SHIFT)
67 #define PGTOKB(p)	((unsigned long)(p) << (PAGE_SHIFT - 10))
68 
69 #define LBFSZ (8 * 1024)
70 
71 static void
72 get_proc_size_info(struct lwp *l, unsigned long *stext, unsigned long *etext, unsigned long *sstack)
73 {
74 	struct proc *p = l->l_proc;
75 	struct vmspace *vm;
76 	struct vm_map *map;
77 	struct vm_map_entry *entry;
78 
79 	*stext = 0;
80 	*etext = 0;
81 	*sstack = 0;
82 
83 	proc_vmspace_getref(p, &vm);
84 	map = &vm->vm_map;
85 	vm_map_lock_read(map);
86 
87 	for (entry = map->header.next; entry != &map->header;
88 	    entry = entry->next) {
89 		if (UVM_ET_ISSUBMAP(entry))
90 			continue;
91 		/* assume text is the first entry */
92 		if (*stext == *etext) {
93 			*stext = entry->start;
94 			*etext = entry->end;
95 			break;
96 		}
97 	}
98 #ifdef LINUX_USRSTACK
99 	if (strcmp(p->p_emul->e_name, "linux") == 0 &&
100 	    LINUX_USRSTACK < USRSTACK)
101 		*sstack = (unsigned long) LINUX_USRSTACK;
102 	else
103 #endif
104 		*sstack = (unsigned long) USRSTACK;
105 
106 	/*
107 	 * jdk 1.6 compares low <= addr && addr < high
108 	 * if we put addr == high, then the test fails
109 	 * so eat one page.
110 	 */
111 	*sstack -= PAGE_SIZE;
112 
113 	vm_map_unlock_read(map);
114 	uvmspace_free(vm);
115 }
116 
117 /*
118  * Linux compatible /proc/meminfo. Only active when the -o linux
119  * mountflag is used.
120  */
121 int
122 procfs_domeminfo(struct lwp *curl, struct proc *p,
123     struct pfsnode *pfs, struct uio *uio)
124 {
125 	char *bf;
126 	int len;
127 	int error = 0;
128 
129 	bf = malloc(LBFSZ, M_TEMP, M_WAITOK);
130 
131 	len = snprintf(bf, LBFSZ,
132 		"        total:    used:    free:  shared: buffers: cached:\n"
133 		"Mem:  %8lu %8lu %8lu %8lu %8lu %8lu\n"
134 		"Swap: %8lu %8lu %8lu\n"
135 		"MemTotal:  %8lu kB\n"
136 		"MemFree:   %8lu kB\n"
137 		"MemShared: %8lu kB\n"
138 		"Buffers:   %8lu kB\n"
139 		"Cached:    %8lu kB\n"
140 		"SwapTotal: %8lu kB\n"
141 		"SwapFree:  %8lu kB\n",
142 		PGTOB(uvmexp.npages),
143 		PGTOB(uvmexp.npages - uvmexp.free),
144 		PGTOB(uvmexp.free),
145 		0L,
146 		PGTOB(uvmexp.filepages),
147 		PGTOB(uvmexp.anonpages + uvmexp.filepages + uvmexp.execpages),
148 		PGTOB(uvmexp.swpages),
149 		PGTOB(uvmexp.swpginuse),
150 		PGTOB(uvmexp.swpages - uvmexp.swpginuse),
151 		PGTOKB(uvmexp.npages),
152 		PGTOKB(uvmexp.free),
153 		0L,
154 		PGTOKB(uvmexp.filepages),
155 		PGTOKB(uvmexp.anonpages + uvmexp.filepages + uvmexp.execpages),
156 		PGTOKB(uvmexp.swpages),
157 		PGTOKB(uvmexp.swpages - uvmexp.swpginuse));
158 
159 	if (len == 0)
160 		goto out;
161 
162 	error = uiomove_frombuf(bf, len, uio);
163 out:
164 	free(bf, M_TEMP);
165 	return error;
166 }
167 
168 /*
169  * Linux compatible /proc/devices. Only active when the -o linux
170  * mountflag is used.
171  */
172 int
173 procfs_dodevices(struct lwp *curl, struct proc *p,
174     struct pfsnode *pfs, struct uio *uio)
175 {
176 	char *bf;
177 	int offset = 0;
178 	int i, error = ENAMETOOLONG;
179 	extern kmutex_t devsw_lock;
180 
181 	/* XXX elad - may need filtering. */
182 
183 	bf = malloc(LBFSZ, M_TEMP, M_WAITOK);
184 
185 	offset += snprintf(&bf[offset], LBFSZ - offset, "Character devices:\n");
186 	if (offset >= LBFSZ)
187 		goto out;
188 
189 	mutex_enter(&devsw_lock);
190 	for (i = 0; i < max_devsw_convs; i++) {
191 		if ((devsw_conv[i].d_name == NULL) ||
192 		    (devsw_conv[i].d_cmajor == -1))
193 			continue;
194 
195 		offset += snprintf(&bf[offset], LBFSZ - offset,
196 		    "%3d %s\n", devsw_conv[i].d_cmajor, devsw_conv[i].d_name);
197 		if (offset >= LBFSZ) {
198 			mutex_exit(&devsw_lock);
199 			goto out;
200 		}
201 	}
202 
203 	offset += snprintf(&bf[offset], LBFSZ - offset, "\nBlock devices:\n");
204 	if (offset >= LBFSZ) {
205 		mutex_exit(&devsw_lock);
206 		goto out;
207 	}
208 
209 	for (i = 0; i < max_devsw_convs; i++) {
210 		if ((devsw_conv[i].d_name == NULL) ||
211 		    (devsw_conv[i].d_bmajor == -1))
212 			continue;
213 
214 		offset += snprintf(&bf[offset], LBFSZ - offset,
215 		    "%3d %s\n", devsw_conv[i].d_bmajor, devsw_conv[i].d_name);
216 		if (offset >= LBFSZ) {
217 			mutex_exit(&devsw_lock);
218 			goto out;
219 		}
220 	}
221 	mutex_exit(&devsw_lock);
222 
223 	error = uiomove_frombuf(bf, offset, uio);
224 out:
225 	free(bf, M_TEMP);
226 	return error;
227 }
228 
229 /*
230  * Linux compatible /proc/stat. Only active when the -o linux
231  * mountflag is used.
232  */
233 int
234 procfs_docpustat(struct lwp *curl, struct proc *p,
235     struct pfsnode *pfs, struct uio *uio)
236 {
237 	struct timeval	 runtime;
238 	char		*bf;
239 	int	 	 error;
240 	int	 	 len;
241 #if defined(MULTIPROCESSOR)
242         struct cpu_info *ci;
243         CPU_INFO_ITERATOR cii;
244 #endif
245 	int	 	 i;
246 
247 	error = ENAMETOOLONG;
248 	bf = malloc(LBFSZ, M_TEMP, M_WAITOK);
249 
250 	len = snprintf(bf, LBFSZ,
251 		"cpu %" PRIu64 " %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
252 		curcpu()->ci_schedstate.spc_cp_time[CP_USER],
253 		curcpu()->ci_schedstate.spc_cp_time[CP_NICE],
254 		curcpu()->ci_schedstate.spc_cp_time[CP_SYS] /*+ [CP_INTR]*/,
255 		curcpu()->ci_schedstate.spc_cp_time[CP_IDLE]);
256 	if (len == 0)
257 		goto out;
258 
259 #if defined(MULTIPROCESSOR)
260 #define ALLCPUS	CPU_INFO_FOREACH(cii, ci)
261 #define CPUNAME	ci
262 #else
263 #define ALLCPUS	; i < 1 ;
264 #define CPUNAME	curcpu()
265 #endif
266 
267 	i = 0;
268 	for (ALLCPUS) {
269 		len += snprintf(&bf[len], LBFSZ - len,
270 			"cpu%d %" PRIu64 " %" PRIu64 " %" PRIu64 " %" PRIu64
271 			"\n", i,
272 			CPUNAME->ci_schedstate.spc_cp_time[CP_USER],
273 			CPUNAME->ci_schedstate.spc_cp_time[CP_NICE],
274 			CPUNAME->ci_schedstate.spc_cp_time[CP_SYS],
275 			CPUNAME->ci_schedstate.spc_cp_time[CP_IDLE]);
276 		if (len >= LBFSZ)
277 			goto out;
278 		i += 1;
279 	}
280 
281 	timersub(&curlwp->l_stime, &boottime, &runtime);
282 	len += snprintf(&bf[len], LBFSZ - len,
283 			"disk 0 0 0 0\n"
284 			"page %u %u\n"
285 			"swap %u %u\n"
286 			"intr %u\n"
287 			"ctxt %u\n"
288 			"btime %lld\n",
289 			uvmexp.pageins, uvmexp.pdpageouts,
290 			uvmexp.pgswapin, uvmexp.pgswapout,
291 			uvmexp.intrs,
292 			uvmexp.swtch,
293 			(long long)boottime.tv_sec);
294 	if (len >= LBFSZ)
295 		goto out;
296 
297 	error = uiomove_frombuf(bf, len, uio);
298 out:
299 	free(bf, M_TEMP);
300 	return error;
301 }
302 
303 /*
304  * Linux compatible /proc/loadavg. Only active when the -o linux
305  * mountflag is used.
306  */
307 int
308 procfs_doloadavg(struct lwp *curl, struct proc *p,
309     struct pfsnode *pfs, struct uio *uio)
310 {
311 	char	*bf;
312 	int 	 error;
313 	int 	 len;
314 
315 	error = ENAMETOOLONG;
316 	bf = malloc(LBFSZ, M_TEMP, M_WAITOK);
317 
318 	averunnable.fscale = FSCALE;
319 	len = snprintf(bf, LBFSZ,
320 	        "%d.%02d %d.%02d %d.%02d %d/%d %d\n",
321 		(int)(averunnable.ldavg[0] / averunnable.fscale),
322 		(int)(averunnable.ldavg[0] * 100 / averunnable.fscale % 100),
323 		(int)(averunnable.ldavg[1] / averunnable.fscale),
324 		(int)(averunnable.ldavg[1] * 100 / averunnable.fscale % 100),
325 		(int)(averunnable.ldavg[2] / averunnable.fscale),
326 		(int)(averunnable.ldavg[2] * 100 / averunnable.fscale % 100),
327 		1,		/* number of ONPROC processes */
328 		nprocs,
329 		30000);		/* last pid */
330 	if (len == 0)
331 		goto out;
332 
333 	error = uiomove_frombuf(bf, len, uio);
334 out:
335 	free(bf, M_TEMP);
336 	return error;
337 }
338 
339 /*
340  * Linux compatible /proc/<pid>/statm. Only active when the -o linux
341  * mountflag is used.
342  */
343 int
344 procfs_do_pid_statm(struct lwp *curl, struct lwp *l,
345     struct pfsnode *pfs, struct uio *uio)
346 {
347 	struct vmspace	*vm;
348 	struct proc	*p = l->l_proc;
349 	struct rusage	*ru = &p->p_stats->p_ru;
350 	char		*bf;
351 	int	 	 error;
352 	int	 	 len;
353 
354 	error = ENAMETOOLONG;
355 	bf = malloc(LBFSZ, M_TEMP, M_WAITOK);
356 
357 	/* XXX - we use values from vmspace, since dsl says that ru figures
358 	   are always 0 except for zombies. See kvm_proc.c::kvm_getproc2() */
359 	if ((error = proc_vmspace_getref(p, &vm)) != 0) {
360 		goto out;
361 	}
362 
363 	len = snprintf(bf, LBFSZ,
364 	        "%lu %lu %lu %lu %lu %lu %lu\n",
365 		(unsigned long)(vm->vm_tsize + vm->vm_dsize + vm->vm_ssize), /* size */
366 		(unsigned long)(vm->vm_rssize),	/* resident */
367 		(unsigned long)(ru->ru_ixrss),	/* shared */
368 		(unsigned long)(vm->vm_tsize),	/* text size in pages */
369 		(unsigned long)(vm->vm_dsize),	/* data size in pages */
370 		(unsigned long)(vm->vm_ssize),	/* stack size in pages */
371 		(unsigned long) 0);
372 
373 	if (len == 0)
374 		goto out;
375 
376 	error = uiomove_frombuf(bf, len, uio);
377 out:
378 	free(bf, M_TEMP);
379 	return error;
380 }
381 
382 #define USEC_2_TICKS(x)		((x) / 10000)
383 
384 /*
385  * Linux compatible /proc/<pid>/stat. Only active when the -o linux
386  * mountflag is used.
387  */
388 int
389 procfs_do_pid_stat(struct lwp *curl, struct lwp *l,
390     struct pfsnode *pfs, struct uio *uio)
391 {
392 	char *bf;
393 	struct proc *p = l->l_proc;
394 	int len;
395 	struct tty *tty = p->p_session->s_ttyp;
396 	struct rusage *ru = &p->p_stats->p_ru;
397 	struct rusage *cru = &p->p_stats->p_cru;
398 	unsigned long stext = 0, etext = 0, sstack = 0;
399 	struct timeval rt;
400 	struct vmspace	*vm;
401 	int error = 0;
402 
403 	bf = malloc(LBFSZ, M_TEMP, M_WAITOK);
404 
405 	if ((error = proc_vmspace_getref(p, &vm)) != 0) {
406 		goto out;
407 	}
408 
409 	get_proc_size_info(l, &stext, &etext, &sstack);
410 
411 	mutex_enter(&proclist_lock);
412 	mutex_enter(&p->p_mutex);
413 	mutex_enter(&p->p_smutex);
414 
415 	calcru(p, NULL, NULL, NULL, &rt);
416 
417 	len = snprintf(bf, LBFSZ,
418 	    "%d (%s) %c %d %d %d %d %d "
419 	    "%u "
420 	    "%lu %lu %lu %lu %lu %lu %lu %lu "
421 	    "%d %d %d "
422 	    "%lu %lu %lu %lu %" PRIu64 " "
423 	    "%lu %lu %lu "
424 	    "%u %u "
425 	    "%u %u %u %u "
426 	    "%lu %lu %lu %d %d\n",
427 
428 	    p->p_pid,
429 	    p->p_comm,
430 	    "0IR3SZD"[(p->p_stat > 6) ? 0 : (int)p->p_stat],
431 	    (p->p_pptr != NULL) ? p->p_pptr->p_pid : 0,
432 
433 	    p->p_pgid,
434 	    p->p_session->s_sid,
435 	    tty ? tty->t_dev : 0,
436 	    (tty && tty->t_pgrp) ? tty->t_pgrp->pg_id : 0,
437 
438 	    p->p_flag,
439 
440 	    ru->ru_minflt,
441 	    cru->ru_minflt,
442 	    ru->ru_majflt,
443 	    cru->ru_majflt,
444 	    USEC_2_TICKS(ru->ru_utime.tv_usec),
445 	    USEC_2_TICKS(ru->ru_stime.tv_usec),
446 	    USEC_2_TICKS(cru->ru_utime.tv_usec),
447 	    USEC_2_TICKS(cru->ru_stime.tv_usec),
448 
449 	    l->l_priority,				/* XXX: priority */
450 	    p->p_nice - 20,
451 	    0,
452 
453 	    rt.tv_sec,
454 	    p->p_stats->p_start.tv_sec,
455 	    (unsigned long)(vm->vm_tsize + vm->vm_dsize + vm->vm_ssize), /* size */
456 	    (unsigned long)(vm->vm_rssize),	/* resident */
457 	    p->p_rlimit[RLIMIT_RSS].rlim_cur,
458 
459 	    stext,					/* start code */
460 	    etext,					/* end code */
461 	    sstack,					/* mm start stack */
462 	    0,						/* XXX: pc */
463 	    0,						/* XXX: sp */
464 	    p->p_sigpend.sp_set.__bits[0],		/* XXX: pending */
465 	    0,						/* XXX: held */
466 	    p->p_sigctx.ps_sigignore.__bits[0],		/* ignored */
467 	    p->p_sigctx.ps_sigcatch.__bits[0],		/* caught */
468 
469 	    (unsigned long)(intptr_t)l->l_wchan,
470 	    ru->ru_nvcsw,
471 	    ru->ru_nivcsw,
472 	    p->p_exitsig,
473 	    0);						/* XXX: processor */
474 
475 	mutex_exit(&p->p_smutex);
476 	mutex_exit(&p->p_mutex);
477 	mutex_exit(&proclist_lock);
478 
479 	if (len == 0)
480 		goto out;
481 
482 	error = uiomove_frombuf(bf, len, uio);
483 out:
484 	free(bf, M_TEMP);
485 	return error;
486 }
487 
488 int
489 procfs_docpuinfo(struct lwp *curl, struct proc *p,
490     struct pfsnode *pfs, struct uio *uio)
491 {
492 	int len = LBFSZ;
493 	char *bf = malloc(len, M_TEMP, M_WAITOK);
494 	int error;
495 
496 	if (procfs_getcpuinfstr(bf, &len) < 0) {
497 		error = ENOSPC;
498 		goto done;
499 	}
500 
501 	if (len == 0) {
502 		error = 0;
503 		goto done;
504 	}
505 
506 	error = uiomove_frombuf(bf, len, uio);
507 done:
508 	free(bf, M_TEMP);
509 	return error;
510 }
511 
512 int
513 procfs_douptime(struct lwp *curl, struct proc *p,
514     struct pfsnode *pfs, struct uio *uio)
515 {
516 	char *bf;
517 	int len;
518 	struct timeval runtime;
519 	u_int64_t idle;
520 	int error = 0;
521 
522 	bf = malloc(LBFSZ, M_TEMP, M_WAITOK);
523 
524 	timersub(&curlwp->l_stime, &boottime, &runtime);
525 	idle = curcpu()->ci_schedstate.spc_cp_time[CP_IDLE];
526 	len = snprintf(bf, LBFSZ,
527 	    "%lu.%02lu %" PRIu64 ".%02" PRIu64 "\n",
528 	    runtime.tv_sec, runtime.tv_usec / 10000,
529 	    idle / hz, (((idle % hz) * 100) / hz) % 100);
530 
531 	if (len == 0)
532 		goto out;
533 
534 	error = uiomove_frombuf(bf, len, uio);
535 out:
536 	free(bf, M_TEMP);
537 	return error;
538 }
539 
540 int
541 procfs_domounts(struct lwp *curl, struct proc *p,
542     struct pfsnode *pfs, struct uio *uio)
543 {
544 	char *bf, *mtab = NULL;
545 	const char *fsname;
546 	size_t len, mtabsz = 0;
547 	struct mount *mp, *nmp;
548 	struct statvfs *sfs;
549 	int error = 0;
550 
551 	/* XXX elad - may need filtering. */
552 
553 	bf = malloc(LBFSZ, M_TEMP, M_WAITOK);
554 	mutex_enter(&mountlist_lock);
555 	for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist;
556 	     mp = nmp) {
557 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_lock)) {
558 			nmp = CIRCLEQ_NEXT(mp, mnt_list);
559 			continue;
560 		}
561 
562 		sfs = &mp->mnt_stat;
563 
564 		/* Linux uses different names for some filesystems */
565 		fsname = sfs->f_fstypename;
566 		if (strcmp(fsname, "procfs") == 0)
567 			fsname = "proc";
568 		else if (strcmp(fsname, "ext2fs") == 0)
569 			fsname = "ext2";
570 
571 		len = snprintf(bf, LBFSZ, "%s %s %s %s%s%s%s%s%s 0 0\n",
572 			sfs->f_mntfromname,
573 			sfs->f_mntonname,
574 			fsname,
575 			(mp->mnt_flag & MNT_RDONLY) ? "ro" : "rw",
576 			(mp->mnt_flag & MNT_NOSUID) ? ",nosuid" : "",
577 			(mp->mnt_flag & MNT_NOEXEC) ? ",noexec" : "",
578 			(mp->mnt_flag & MNT_NODEV) ? ",nodev" : "",
579 			(mp->mnt_flag & MNT_SYNCHRONOUS) ? ",sync" : "",
580 			(mp->mnt_flag & MNT_NOATIME) ? ",noatime" : ""
581 			);
582 
583 		mtab = realloc(mtab, mtabsz + len, M_TEMP, M_WAITOK);
584 		memcpy(mtab + mtabsz, bf, len);
585 		mtabsz += len;
586 
587 		mutex_exit(&mountlist_lock);
588 		nmp = CIRCLEQ_NEXT(mp, mnt_list);
589 		vfs_unbusy(mp);
590 	}
591 	mutex_exit(&mountlist_lock);
592 	free(bf, M_TEMP);
593 
594 	if (mtabsz > 0) {
595 		error = uiomove_frombuf(mtab, mtabsz, uio);
596 		free(mtab, M_TEMP);
597 	}
598 
599 	return error;
600 }
601