xref: /minix3/minix/servers/mib/proc.c (revision 25d39513e7c757eb8ae5ac9f6f9c43db7138235f)
1*25d39513SDavid van Moolenbroek /* MIB service - proc.c - functionality based on service process tables */
2*25d39513SDavid van Moolenbroek /* Eventually, the CTL_PROC subtree might end up here as well. */
3*25d39513SDavid van Moolenbroek 
4*25d39513SDavid van Moolenbroek #include "mib.h"
5*25d39513SDavid van Moolenbroek 
6*25d39513SDavid van Moolenbroek #include <sys/exec.h>
7*25d39513SDavid van Moolenbroek #include <minix/sysinfo.h>
8*25d39513SDavid van Moolenbroek 
9*25d39513SDavid van Moolenbroek #include <machine/archtypes.h>
10*25d39513SDavid van Moolenbroek #include "kernel/proc.h"
11*25d39513SDavid van Moolenbroek #include "servers/pm/mproc.h"
12*25d39513SDavid van Moolenbroek #include "servers/vfs/const.h"
13*25d39513SDavid van Moolenbroek #include "servers/vfs/fproc.h"
14*25d39513SDavid van Moolenbroek 
15*25d39513SDavid van Moolenbroek typedef struct proc ixfer_proc_t;
16*25d39513SDavid van Moolenbroek typedef struct mproc ixfer_mproc_t;
17*25d39513SDavid van Moolenbroek typedef struct fproc ixfer_fproc_t;
18*25d39513SDavid van Moolenbroek 
19*25d39513SDavid van Moolenbroek static ixfer_proc_t proc_tab[NR_TASKS + NR_PROCS];
20*25d39513SDavid van Moolenbroek static ixfer_mproc_t mproc_tab[NR_PROCS];
21*25d39513SDavid van Moolenbroek static ixfer_fproc_t fproc_tab[NR_PROCS];
22*25d39513SDavid van Moolenbroek 
23*25d39513SDavid van Moolenbroek /*
24*25d39513SDavid van Moolenbroek  * The number of processes added to the current number of processes when doing
25*25d39513SDavid van Moolenbroek  * a size estimation, so that the actual data retrieval does not end up with
26*25d39513SDavid van Moolenbroek  * too little space if new processes have forked between the two calls.  We do
27*25d39513SDavid van Moolenbroek  * a process table update only once per clock tick, which means that typically
28*25d39513SDavid van Moolenbroek  * no update will take place between the user process's size estimation request
29*25d39513SDavid van Moolenbroek  * and its subsequent data retrieval request.  On the other hand, if we do
30*25d39513SDavid van Moolenbroek  * update process tables in between, quite a bit might have changed.
31*25d39513SDavid van Moolenbroek  */
32*25d39513SDavid van Moolenbroek #define EXTRA_PROCS	8
33*25d39513SDavid van Moolenbroek 
34*25d39513SDavid van Moolenbroek #define HASH_SLOTS 	(NR_PROCS / 4)	/* expected nr. of processes in use */
35*25d39513SDavid van Moolenbroek #define NO_SLOT		(-1)
36*25d39513SDavid van Moolenbroek static int hash_tab[HASH_SLOTS];	/* hash table mapping from PID.. */
37*25d39513SDavid van Moolenbroek static int hnext_tab[NR_PROCS];		/* ..to PM process slot */
38*25d39513SDavid van Moolenbroek 
39*25d39513SDavid van Moolenbroek static clock_t tabs_updated = 0;	/* when the tables were last updated */
40*25d39513SDavid van Moolenbroek static int tabs_valid = TRUE;		/* FALSE if obtaining tables failed */
41*25d39513SDavid van Moolenbroek 
42*25d39513SDavid van Moolenbroek /*
43*25d39513SDavid van Moolenbroek  * Update the process tables by pulling in new copies from the kernel, PM, and
44*25d39513SDavid van Moolenbroek  * VFS, but only every so often and only if it has not failed before.  Return
45*25d39513SDavid van Moolenbroek  * TRUE iff the tables are now valid.
46*25d39513SDavid van Moolenbroek  */
47*25d39513SDavid van Moolenbroek static int
48*25d39513SDavid van Moolenbroek update_tables(void)
49*25d39513SDavid van Moolenbroek {
50*25d39513SDavid van Moolenbroek 	clock_t now;
51*25d39513SDavid van Moolenbroek 	pid_t pid;
52*25d39513SDavid van Moolenbroek 	int r, kslot, mslot, hslot;
53*25d39513SDavid van Moolenbroek 
54*25d39513SDavid van Moolenbroek 	/*
55*25d39513SDavid van Moolenbroek 	 * If retrieving the tables failed at some point, do not keep trying
56*25d39513SDavid van Moolenbroek 	 * all the time.  Such a failure is very unlikely to be transient.
57*25d39513SDavid van Moolenbroek 	 */
58*25d39513SDavid van Moolenbroek 	if (tabs_valid == FALSE)
59*25d39513SDavid van Moolenbroek 		return FALSE;
60*25d39513SDavid van Moolenbroek 
61*25d39513SDavid van Moolenbroek 	/*
62*25d39513SDavid van Moolenbroek 	 * Update the tables once per clock tick at most.  The update operation
63*25d39513SDavid van Moolenbroek 	 * is rather heavy, transferring several hundreds of kilobytes between
64*25d39513SDavid van Moolenbroek 	 * servers.  Userland should be able to live with information that is
65*25d39513SDavid van Moolenbroek 	 * outdated by at most one clock tick.
66*25d39513SDavid van Moolenbroek 	 */
67*25d39513SDavid van Moolenbroek 	now = getticks();
68*25d39513SDavid van Moolenbroek 
69*25d39513SDavid van Moolenbroek 	if (tabs_updated != 0 && tabs_updated == now)
70*25d39513SDavid van Moolenbroek 		return TRUE;
71*25d39513SDavid van Moolenbroek 
72*25d39513SDavid van Moolenbroek 	/* Perform an actual update now. */
73*25d39513SDavid van Moolenbroek 	tabs_valid = FALSE;
74*25d39513SDavid van Moolenbroek 
75*25d39513SDavid van Moolenbroek 	/* Retrieve and check the kernel process table. */
76*25d39513SDavid van Moolenbroek 	if ((r = sys_getproctab(proc_tab)) != OK) {
77*25d39513SDavid van Moolenbroek 		printf("MIB: unable to obtain kernel process table (%d)\n", r);
78*25d39513SDavid van Moolenbroek 
79*25d39513SDavid van Moolenbroek 		return FALSE;
80*25d39513SDavid van Moolenbroek 	}
81*25d39513SDavid van Moolenbroek 
82*25d39513SDavid van Moolenbroek 	for (kslot = 0; kslot < NR_TASKS + NR_PROCS; kslot++) {
83*25d39513SDavid van Moolenbroek 		if (proc_tab[kslot].p_magic != PMAGIC) {
84*25d39513SDavid van Moolenbroek 			printf("MIB: kernel process table mismatch\n");
85*25d39513SDavid van Moolenbroek 
86*25d39513SDavid van Moolenbroek 			return FALSE;
87*25d39513SDavid van Moolenbroek 		}
88*25d39513SDavid van Moolenbroek 	}
89*25d39513SDavid van Moolenbroek 
90*25d39513SDavid van Moolenbroek 	/* Retrieve and check the PM process table. */
91*25d39513SDavid van Moolenbroek 	r = getsysinfo(PM_PROC_NR, SI_PROC_TAB, mproc_tab, sizeof(mproc_tab));
92*25d39513SDavid van Moolenbroek 	if (r != OK) {
93*25d39513SDavid van Moolenbroek 		printf("MIB: unable to obtain PM process table (%d)\n", r);
94*25d39513SDavid van Moolenbroek 
95*25d39513SDavid van Moolenbroek 		return FALSE;
96*25d39513SDavid van Moolenbroek 	}
97*25d39513SDavid van Moolenbroek 
98*25d39513SDavid van Moolenbroek 	for (mslot = 0; mslot < NR_PROCS; mslot++) {
99*25d39513SDavid van Moolenbroek 		if (mproc_tab[mslot].mp_magic != MP_MAGIC) {
100*25d39513SDavid van Moolenbroek 			printf("MIB: PM process table mismatch\n");
101*25d39513SDavid van Moolenbroek 
102*25d39513SDavid van Moolenbroek 			return FALSE;
103*25d39513SDavid van Moolenbroek 		}
104*25d39513SDavid van Moolenbroek 	}
105*25d39513SDavid van Moolenbroek 
106*25d39513SDavid van Moolenbroek 	/* Retrieve the VFS process table, which has no magic number. */
107*25d39513SDavid van Moolenbroek 	r = getsysinfo(VFS_PROC_NR, SI_PROC_TAB, fproc_tab, sizeof(fproc_tab));
108*25d39513SDavid van Moolenbroek 	if (r != OK) {
109*25d39513SDavid van Moolenbroek 		printf("MIB: unable to obtain VFS process table (%d)\n", r);
110*25d39513SDavid van Moolenbroek 
111*25d39513SDavid van Moolenbroek 		return FALSE;
112*25d39513SDavid van Moolenbroek 	}
113*25d39513SDavid van Moolenbroek 
114*25d39513SDavid van Moolenbroek 	tabs_valid = TRUE;
115*25d39513SDavid van Moolenbroek 	tabs_updated = now;
116*25d39513SDavid van Moolenbroek 
117*25d39513SDavid van Moolenbroek 	/*
118*25d39513SDavid van Moolenbroek 	 * Build a hash table mapping from process IDs to slot numbers, for
119*25d39513SDavid van Moolenbroek 	 * fast access.  TODO: decide if this is better done on demand only.
120*25d39513SDavid van Moolenbroek 	 */
121*25d39513SDavid van Moolenbroek 	for (hslot = 0; hslot < HASH_SLOTS; hslot++)
122*25d39513SDavid van Moolenbroek 		hash_tab[hslot] = NO_SLOT;
123*25d39513SDavid van Moolenbroek 
124*25d39513SDavid van Moolenbroek 	for (mslot = 0; mslot < NR_PROCS; mslot++) {
125*25d39513SDavid van Moolenbroek 		if (mproc_tab[mslot].mp_flags & IN_USE) {
126*25d39513SDavid van Moolenbroek 			if ((pid = mproc_tab[mslot].mp_pid) <= 0)
127*25d39513SDavid van Moolenbroek 				continue;
128*25d39513SDavid van Moolenbroek 
129*25d39513SDavid van Moolenbroek 			hslot = mproc_tab[mslot].mp_pid % HASH_SLOTS;
130*25d39513SDavid van Moolenbroek 
131*25d39513SDavid van Moolenbroek 			hnext_tab[mslot] = hash_tab[hslot];
132*25d39513SDavid van Moolenbroek 			hash_tab[hslot] = mslot;
133*25d39513SDavid van Moolenbroek 		}
134*25d39513SDavid van Moolenbroek 	}
135*25d39513SDavid van Moolenbroek 
136*25d39513SDavid van Moolenbroek 	return TRUE;
137*25d39513SDavid van Moolenbroek }
138*25d39513SDavid van Moolenbroek 
139*25d39513SDavid van Moolenbroek /*
140*25d39513SDavid van Moolenbroek  * Return the PM slot number for the given PID, or NO_SLOT if the PID is not in
141*25d39513SDavid van Moolenbroek  * use by a process.
142*25d39513SDavid van Moolenbroek  */
143*25d39513SDavid van Moolenbroek static int
144*25d39513SDavid van Moolenbroek get_mslot(pid_t pid)
145*25d39513SDavid van Moolenbroek {
146*25d39513SDavid van Moolenbroek 	int mslot;
147*25d39513SDavid van Moolenbroek 
148*25d39513SDavid van Moolenbroek 	/* PID 0 identifies the kernel; checking this is up to the caller. */
149*25d39513SDavid van Moolenbroek 	if (pid <= 0)
150*25d39513SDavid van Moolenbroek 		return NO_SLOT;
151*25d39513SDavid van Moolenbroek 
152*25d39513SDavid van Moolenbroek 	for (mslot = hash_tab[pid % HASH_SLOTS]; mslot != NO_SLOT;
153*25d39513SDavid van Moolenbroek 	    mslot = hnext_tab[mslot])
154*25d39513SDavid van Moolenbroek 		if (mproc_tab[mslot].mp_pid == pid)
155*25d39513SDavid van Moolenbroek 			break;
156*25d39513SDavid van Moolenbroek 
157*25d39513SDavid van Moolenbroek 	return mslot;
158*25d39513SDavid van Moolenbroek }
159*25d39513SDavid van Moolenbroek 
160*25d39513SDavid van Moolenbroek /*
161*25d39513SDavid van Moolenbroek  * Store the given number of clock ticks as a timeval structure.
162*25d39513SDavid van Moolenbroek  */
163*25d39513SDavid van Moolenbroek static void
164*25d39513SDavid van Moolenbroek ticks_to_timeval(struct timeval * tv, clock_t ticks)
165*25d39513SDavid van Moolenbroek {
166*25d39513SDavid van Moolenbroek 	clock_t hz;
167*25d39513SDavid van Moolenbroek 
168*25d39513SDavid van Moolenbroek 	hz = sys_hz();
169*25d39513SDavid van Moolenbroek 
170*25d39513SDavid van Moolenbroek 	tv->tv_sec = ticks / hz;
171*25d39513SDavid van Moolenbroek 	tv->tv_usec = (long)((ticks % hz) * 1000000ULL / hz);
172*25d39513SDavid van Moolenbroek }
173*25d39513SDavid van Moolenbroek 
174*25d39513SDavid van Moolenbroek /*
175*25d39513SDavid van Moolenbroek  * Generate a wchan message text for the cases that the process is blocked on
176*25d39513SDavid van Moolenbroek  * IPC with another process, of which the endpoint is given as 'endpt' here.
177*25d39513SDavid van Moolenbroek  * The name of the other process is to be stored in 'wmesg', which is a buffer
178*25d39513SDavid van Moolenbroek  * of size 'wmsz'.  The result should be null terminated.  If 'ipc' is set, the
179*25d39513SDavid van Moolenbroek  * process is blocked on a direct IPC call, in which case the name of the other
180*25d39513SDavid van Moolenbroek  * process is enclosed in parentheses.  If 'ipc' is not set, the call is made
181*25d39513SDavid van Moolenbroek  * indirectly through VFS, and the name of the other process should not be
182*25d39513SDavid van Moolenbroek  * enclosed in parentheses.  If no name can be obtained, we use the endpoint of
183*25d39513SDavid van Moolenbroek  * the other process instead.
184*25d39513SDavid van Moolenbroek  */
185*25d39513SDavid van Moolenbroek static void
186*25d39513SDavid van Moolenbroek fill_wmesg(char * wmesg, size_t wmsz, endpoint_t endpt, int ipc)
187*25d39513SDavid van Moolenbroek {
188*25d39513SDavid van Moolenbroek 	const char *name;
189*25d39513SDavid van Moolenbroek 	int mslot;
190*25d39513SDavid van Moolenbroek 
191*25d39513SDavid van Moolenbroek 	switch (endpt) {
192*25d39513SDavid van Moolenbroek 	case ANY:
193*25d39513SDavid van Moolenbroek 		name = "any";
194*25d39513SDavid van Moolenbroek 		break;
195*25d39513SDavid van Moolenbroek 	case SELF:
196*25d39513SDavid van Moolenbroek 		name = "self";
197*25d39513SDavid van Moolenbroek 		break;
198*25d39513SDavid van Moolenbroek 	case NONE:
199*25d39513SDavid van Moolenbroek 		name = "none";
200*25d39513SDavid van Moolenbroek 		break;
201*25d39513SDavid van Moolenbroek 	default:
202*25d39513SDavid van Moolenbroek 		mslot = _ENDPOINT_P(endpt);
203*25d39513SDavid van Moolenbroek 		if (mslot >= -NR_TASKS && mslot < NR_PROCS &&
204*25d39513SDavid van Moolenbroek 		    (mslot < 0 || (mproc_tab[mslot].mp_flags & IN_USE)))
205*25d39513SDavid van Moolenbroek 			name = proc_tab[NR_TASKS + mslot].p_name;
206*25d39513SDavid van Moolenbroek 		else
207*25d39513SDavid van Moolenbroek 			name = NULL;
208*25d39513SDavid van Moolenbroek 	}
209*25d39513SDavid van Moolenbroek 
210*25d39513SDavid van Moolenbroek 	if (name != NULL)
211*25d39513SDavid van Moolenbroek 		snprintf(wmesg, wmsz, "%s%s%s",
212*25d39513SDavid van Moolenbroek 		    ipc ? "(" : "", name, ipc ? ")" : "");
213*25d39513SDavid van Moolenbroek 	else
214*25d39513SDavid van Moolenbroek 		snprintf(wmesg, wmsz, "%s%d%s",
215*25d39513SDavid van Moolenbroek 		    ipc ? "(" : "", endpt, ipc ? ")" : "");
216*25d39513SDavid van Moolenbroek }
217*25d39513SDavid van Moolenbroek 
218*25d39513SDavid van Moolenbroek /*
219*25d39513SDavid van Moolenbroek  * Return the LWP status of a process, along with additional information in
220*25d39513SDavid van Moolenbroek  * case the process is sleeping (LSSLEEP): a wchan value and text to indicate
221*25d39513SDavid van Moolenbroek  * what the process is sleeping on, and possibly a flag field modification to
222*25d39513SDavid van Moolenbroek  * indicate that the sleep is interruptible.
223*25d39513SDavid van Moolenbroek  */
224*25d39513SDavid van Moolenbroek static int
225*25d39513SDavid van Moolenbroek get_lwp_stat(int mslot, uint64_t * wcptr, char * wmptr, size_t wmsz,
226*25d39513SDavid van Moolenbroek 	int32_t * flag)
227*25d39513SDavid van Moolenbroek {
228*25d39513SDavid van Moolenbroek 	struct mproc *mp;
229*25d39513SDavid van Moolenbroek 	struct fproc *fp;
230*25d39513SDavid van Moolenbroek 	struct proc *kp;
231*25d39513SDavid van Moolenbroek 	const char *wmesg;
232*25d39513SDavid van Moolenbroek 	uint64_t wchan;
233*25d39513SDavid van Moolenbroek 	endpoint_t endpt;
234*25d39513SDavid van Moolenbroek 
235*25d39513SDavid van Moolenbroek 	mp = &mproc_tab[mslot];
236*25d39513SDavid van Moolenbroek 	fp = &fproc_tab[mslot];
237*25d39513SDavid van Moolenbroek 	kp = &proc_tab[NR_TASKS + mslot];
238*25d39513SDavid van Moolenbroek 
239*25d39513SDavid van Moolenbroek 	/*
240*25d39513SDavid van Moolenbroek 	 * First cover all the cases that the process is not sleeping.  In
241*25d39513SDavid van Moolenbroek 	 * those cases, we need not return additional sleep information either.
242*25d39513SDavid van Moolenbroek 	 */
243*25d39513SDavid van Moolenbroek 	if (mp->mp_flags & (TRACE_ZOMBIE | ZOMBIE))
244*25d39513SDavid van Moolenbroek 		return LSZOMB;
245*25d39513SDavid van Moolenbroek 
246*25d39513SDavid van Moolenbroek 	if (mp->mp_flags & EXITING)
247*25d39513SDavid van Moolenbroek 		return LSDEAD;
248*25d39513SDavid van Moolenbroek 
249*25d39513SDavid van Moolenbroek 	if ((mp->mp_flags & TRACE_STOPPED) || RTS_ISSET(kp, RTS_P_STOP))
250*25d39513SDavid van Moolenbroek 		return LSSTOP;
251*25d39513SDavid van Moolenbroek 
252*25d39513SDavid van Moolenbroek 	if (proc_is_runnable(kp))
253*25d39513SDavid van Moolenbroek 		return LSRUN;
254*25d39513SDavid van Moolenbroek 
255*25d39513SDavid van Moolenbroek 	/*
256*25d39513SDavid van Moolenbroek 	 * The process is sleeping.  In that case, we must also figure out why,
257*25d39513SDavid van Moolenbroek 	 * and return an appropriate wchan value and human-readable wmesg text.
258*25d39513SDavid van Moolenbroek 	 *
259*25d39513SDavid van Moolenbroek 	 * The process can be blocked on either a known sleep state in PM or
260*25d39513SDavid van Moolenbroek 	 * VFS, or otherwise on IPC communication with another process, or
261*25d39513SDavid van Moolenbroek 	 * otherwise on a kernel RTS flag.  In each case, decide what to use as
262*25d39513SDavid van Moolenbroek 	 * wchan value and wmesg text, and whether the sleep is interruptible.
263*25d39513SDavid van Moolenbroek 	 *
264*25d39513SDavid van Moolenbroek 	 * The wchan value should be unique for the sleep reason.  We use its
265*25d39513SDavid van Moolenbroek 	 * lower eight bits to indicate a class:
266*25d39513SDavid van Moolenbroek 	 *   0x00 = kernel task
267*25d39513SDavid van Moolenbroek 	 *   0x01 = kerel RTS block
268*25d39513SDavid van Moolenbroek 	 *   0x02 = PM call
269*25d39513SDavid van Moolenbroek 	 *   0x03 = VFS call
270*25d39513SDavid van Moolenbroek 	 *   0x04 = MIB call
271*25d39513SDavid van Moolenbroek 	 *   0xff = blocked on process
272*25d39513SDavid van Moolenbroek 	 * The upper bits are used for class-specific information.  The actual
273*25d39513SDavid van Moolenbroek 	 * value does not really matter, as long as it is nonzero and there is
274*25d39513SDavid van Moolenbroek 	 * no overlap between the different values.
275*25d39513SDavid van Moolenbroek 	 */
276*25d39513SDavid van Moolenbroek 	wchan = 0;
277*25d39513SDavid van Moolenbroek 	wmesg = NULL;
278*25d39513SDavid van Moolenbroek 
279*25d39513SDavid van Moolenbroek 	/*
280*25d39513SDavid van Moolenbroek 	 * First see if the process is marked as blocked in the tables of PM or
281*25d39513SDavid van Moolenbroek 	 * VFS.  Such a block reason is always an interruptible sleep.  Note
282*25d39513SDavid van Moolenbroek 	 * that we do not use the kernel table at all in this case: each of the
283*25d39513SDavid van Moolenbroek 	 * three tables is consistent within itself, but not necessarily
284*25d39513SDavid van Moolenbroek 	 * consistent with any of the other tables, so we avoid internal
285*25d39513SDavid van Moolenbroek 	 * mismatches if we can.
286*25d39513SDavid van Moolenbroek 	 */
287*25d39513SDavid van Moolenbroek 	if (mp->mp_flags & WAITING) {
288*25d39513SDavid van Moolenbroek 		wchan = 0x102;
289*25d39513SDavid van Moolenbroek 		wmesg = "wait";
290*25d39513SDavid van Moolenbroek 	} else if (mp->mp_flags & SIGSUSPENDED) {
291*25d39513SDavid van Moolenbroek 		wchan = 0x202;
292*25d39513SDavid van Moolenbroek 		wmesg = "pause";
293*25d39513SDavid van Moolenbroek 	} else if (fp->fp_blocked_on != FP_BLOCKED_ON_NONE) {
294*25d39513SDavid van Moolenbroek 		wchan = (fp->fp_blocked_on << 8) | 0x03;
295*25d39513SDavid van Moolenbroek 		switch (fp->fp_blocked_on) {
296*25d39513SDavid van Moolenbroek 		case FP_BLOCKED_ON_PIPE:
297*25d39513SDavid van Moolenbroek 			wmesg = "pipe";
298*25d39513SDavid van Moolenbroek 			break;
299*25d39513SDavid van Moolenbroek 		case FP_BLOCKED_ON_LOCK:
300*25d39513SDavid van Moolenbroek 			wmesg = "lock";
301*25d39513SDavid van Moolenbroek 			break;
302*25d39513SDavid van Moolenbroek 		case FP_BLOCKED_ON_POPEN:
303*25d39513SDavid van Moolenbroek 			wmesg = "popen";
304*25d39513SDavid van Moolenbroek 			break;
305*25d39513SDavid van Moolenbroek 		case FP_BLOCKED_ON_SELECT:
306*25d39513SDavid van Moolenbroek 			wmesg = "select";
307*25d39513SDavid van Moolenbroek 			break;
308*25d39513SDavid van Moolenbroek 		case FP_BLOCKED_ON_OTHER:
309*25d39513SDavid van Moolenbroek 			/*
310*25d39513SDavid van Moolenbroek 			 * Add the task (= character driver) endpoint to the
311*25d39513SDavid van Moolenbroek 			 * wchan value, and use the driver's process name,
312*25d39513SDavid van Moolenbroek 			 * without parentheses, as wmesg text.
313*25d39513SDavid van Moolenbroek 			 */
314*25d39513SDavid van Moolenbroek 			wchan |= (uint64_t)fp->fp_task << 16;
315*25d39513SDavid van Moolenbroek 			fill_wmesg(wmptr, wmsz, fp->fp_task, FALSE /*ipc*/);
316*25d39513SDavid van Moolenbroek 			break;
317*25d39513SDavid van Moolenbroek 		default:
318*25d39513SDavid van Moolenbroek 			/* A newly added flag we don't yet know about? */
319*25d39513SDavid van Moolenbroek 			wmesg = "???";
320*25d39513SDavid van Moolenbroek 			break;
321*25d39513SDavid van Moolenbroek 		}
322*25d39513SDavid van Moolenbroek 	}
323*25d39513SDavid van Moolenbroek 	if (wchan != 0) {
324*25d39513SDavid van Moolenbroek 		*wcptr = wchan;
325*25d39513SDavid van Moolenbroek 		if (wmesg != NULL) /* NULL means "already set" here */
326*25d39513SDavid van Moolenbroek 			strlcpy(wmptr, wmesg, wmsz);
327*25d39513SDavid van Moolenbroek 		*flag |= L_SINTR;
328*25d39513SDavid van Moolenbroek 	}
329*25d39513SDavid van Moolenbroek 
330*25d39513SDavid van Moolenbroek 	/*
331*25d39513SDavid van Moolenbroek 	 * See if the process is blocked on sending or receiving.  If not, then
332*25d39513SDavid van Moolenbroek 	 * use one of the kernel RTS flags as reason.
333*25d39513SDavid van Moolenbroek 	 */
334*25d39513SDavid van Moolenbroek 	endpt = P_BLOCKEDON(kp);
335*25d39513SDavid van Moolenbroek 
336*25d39513SDavid van Moolenbroek 	switch (endpt) {
337*25d39513SDavid van Moolenbroek 	case MIB_PROC_NR:
338*25d39513SDavid van Moolenbroek 		/* This is really just aesthetics. */
339*25d39513SDavid van Moolenbroek 		wchan = 0x04;
340*25d39513SDavid van Moolenbroek 		wmesg = "sysctl";
341*25d39513SDavid van Moolenbroek 		break;
342*25d39513SDavid van Moolenbroek 	case NONE:
343*25d39513SDavid van Moolenbroek 		/*
344*25d39513SDavid van Moolenbroek 		 * The process is not running, but also not blocked on IPC with
345*25d39513SDavid van Moolenbroek 		 * another process.  This means it must be stopped on a kernel
346*25d39513SDavid van Moolenbroek 		 * RTS flag.
347*25d39513SDavid van Moolenbroek 		 */
348*25d39513SDavid van Moolenbroek 		wchan = ((uint64_t)kp->p_rts_flags << 8) | 0x01;
349*25d39513SDavid van Moolenbroek 		if (RTS_ISSET(kp, RTS_PROC_STOP))
350*25d39513SDavid van Moolenbroek 			wmesg = "kstop";
351*25d39513SDavid van Moolenbroek 		else if (RTS_ISSET(kp, RTS_SIGNALED) ||
352*25d39513SDavid van Moolenbroek 		    RTS_ISSET(kp, RTS_SIGNALED))
353*25d39513SDavid van Moolenbroek 			wmesg = "ksignal";
354*25d39513SDavid van Moolenbroek 		else if (RTS_ISSET(kp, RTS_NO_PRIV))
355*25d39513SDavid van Moolenbroek 			wmesg = "knopriv";
356*25d39513SDavid van Moolenbroek 		else if (RTS_ISSET(kp, RTS_PAGEFAULT) ||
357*25d39513SDavid van Moolenbroek 		    RTS_ISSET(kp, RTS_VMREQTARGET))
358*25d39513SDavid van Moolenbroek 			wmesg = "fault";
359*25d39513SDavid van Moolenbroek 		else if (RTS_ISSET(kp, RTS_NO_QUANTUM))
360*25d39513SDavid van Moolenbroek 			wmesg = "sched";
361*25d39513SDavid van Moolenbroek 		else
362*25d39513SDavid van Moolenbroek 			wmesg = "kflag";
363*25d39513SDavid van Moolenbroek 		break;
364*25d39513SDavid van Moolenbroek 	case ANY:
365*25d39513SDavid van Moolenbroek 		/*
366*25d39513SDavid van Moolenbroek 		 * If the process is blocked receiving from ANY, mark it as
367*25d39513SDavid van Moolenbroek 		 * being in an interruptible sleep.  This looks nicer, even
368*25d39513SDavid van Moolenbroek 		 * though "interruptible" is not applicable to services at all.
369*25d39513SDavid van Moolenbroek 		 */
370*25d39513SDavid van Moolenbroek 		*flag |= L_SINTR;
371*25d39513SDavid van Moolenbroek 		break;
372*25d39513SDavid van Moolenbroek 	}
373*25d39513SDavid van Moolenbroek 
374*25d39513SDavid van Moolenbroek 	/*
375*25d39513SDavid van Moolenbroek 	 * If at this point wchan is still zero, the process is blocked sending
376*25d39513SDavid van Moolenbroek 	 * or receiving.  Use a wchan value based on the target endpoint, and
377*25d39513SDavid van Moolenbroek 	 * use "(procname)" as wmesg text.
378*25d39513SDavid van Moolenbroek 	 */
379*25d39513SDavid van Moolenbroek 	if (wchan == 0) {
380*25d39513SDavid van Moolenbroek 		*wcptr = ((uint64_t)endpt << 8) | 0xff;
381*25d39513SDavid van Moolenbroek 		fill_wmesg(wmptr, wmsz, endpt, TRUE /*ipc*/);
382*25d39513SDavid van Moolenbroek 	} else {
383*25d39513SDavid van Moolenbroek 		*wcptr = wchan;
384*25d39513SDavid van Moolenbroek 		if (wmesg != NULL) /* NULL means "already set" here */
385*25d39513SDavid van Moolenbroek 			strlcpy(wmptr, wmesg, wmsz);
386*25d39513SDavid van Moolenbroek 	}
387*25d39513SDavid van Moolenbroek 
388*25d39513SDavid van Moolenbroek 	return LSSLEEP;
389*25d39513SDavid van Moolenbroek }
390*25d39513SDavid van Moolenbroek 
391*25d39513SDavid van Moolenbroek 
392*25d39513SDavid van Moolenbroek /*
393*25d39513SDavid van Moolenbroek  * Fill the part of a LWP structure that is common between kernel tasks and
394*25d39513SDavid van Moolenbroek  * user processes.  Also return a CPU estimate in 'estcpu', because we generate
395*25d39513SDavid van Moolenbroek  * the value as a side effect here, and the LWP structure has no estcpu field.
396*25d39513SDavid van Moolenbroek  */
397*25d39513SDavid van Moolenbroek static void
398*25d39513SDavid van Moolenbroek fill_lwp_common(struct kinfo_lwp * l, int kslot, uint32_t * estcpu)
399*25d39513SDavid van Moolenbroek {
400*25d39513SDavid van Moolenbroek 	struct proc *kp;
401*25d39513SDavid van Moolenbroek 	struct timeval tv;
402*25d39513SDavid van Moolenbroek 	clock_t uptime;
403*25d39513SDavid van Moolenbroek 	uint32_t hz;
404*25d39513SDavid van Moolenbroek 
405*25d39513SDavid van Moolenbroek 	kp = &proc_tab[kslot];
406*25d39513SDavid van Moolenbroek 
407*25d39513SDavid van Moolenbroek 	uptime = getticks();
408*25d39513SDavid van Moolenbroek 	hz = sys_hz();
409*25d39513SDavid van Moolenbroek 
410*25d39513SDavid van Moolenbroek 	/*
411*25d39513SDavid van Moolenbroek 	 * We use the process endpoint as the LWP ID.  Not only does this allow
412*25d39513SDavid van Moolenbroek 	 * users to obtain process endpoints with "ps -s" (thus replacing the
413*25d39513SDavid van Moolenbroek 	 * MINIX3 ps(1)'s "ps -E"), but if we ever do implement kernel threads,
414*25d39513SDavid van Moolenbroek 	 * this is probably still going to be accurate.
415*25d39513SDavid van Moolenbroek 	 */
416*25d39513SDavid van Moolenbroek 	l->l_lid = kp->p_endpoint;
417*25d39513SDavid van Moolenbroek 
418*25d39513SDavid van Moolenbroek 	/*
419*25d39513SDavid van Moolenbroek 	 * The time during which the process has not been swapped in or out is
420*25d39513SDavid van Moolenbroek 	 * not applicable for us, and thus, we set it to the time the process
421*25d39513SDavid van Moolenbroek 	 * has been running (in seconds).  This value is relevant mostly for
422*25d39513SDavid van Moolenbroek 	 * ps(1)'s CPU usage correction for processes that have just started.
423*25d39513SDavid van Moolenbroek 	 */
424*25d39513SDavid van Moolenbroek 	if (kslot >= NR_TASKS)
425*25d39513SDavid van Moolenbroek 		l->l_swtime = uptime - mproc_tab[kslot - NR_TASKS].mp_started;
426*25d39513SDavid van Moolenbroek 	else
427*25d39513SDavid van Moolenbroek 		l->l_swtime = uptime;
428*25d39513SDavid van Moolenbroek 	l->l_swtime /= hz;
429*25d39513SDavid van Moolenbroek 
430*25d39513SDavid van Moolenbroek 	/*
431*25d39513SDavid van Moolenbroek 	 * Sleep (dequeue) times are not maintained for kernel tasks, so
432*25d39513SDavid van Moolenbroek 	 * pretend they are never asleep (which is pretty accurate).
433*25d39513SDavid van Moolenbroek 	 */
434*25d39513SDavid van Moolenbroek 	if (kslot < NR_TASKS)
435*25d39513SDavid van Moolenbroek 		l->l_slptime = 0;
436*25d39513SDavid van Moolenbroek 	else
437*25d39513SDavid van Moolenbroek 		l->l_slptime = (uptime - kp->p_dequeued) / hz;
438*25d39513SDavid van Moolenbroek 
439*25d39513SDavid van Moolenbroek 	l->l_priority = kp->p_priority;
440*25d39513SDavid van Moolenbroek 	l->l_usrpri = kp->p_priority;
441*25d39513SDavid van Moolenbroek 	l->l_cpuid = kp->p_cpu;
442*25d39513SDavid van Moolenbroek 	ticks_to_timeval(&tv, kp->p_user_time + kp->p_sys_time);
443*25d39513SDavid van Moolenbroek 	l->l_rtime_sec = tv.tv_sec;
444*25d39513SDavid van Moolenbroek 	l->l_rtime_usec = tv.tv_usec;
445*25d39513SDavid van Moolenbroek 
446*25d39513SDavid van Moolenbroek 	/*
447*25d39513SDavid van Moolenbroek 	 * Obtain CPU usage percentages and estimates through library code
448*25d39513SDavid van Moolenbroek 	 * shared between the kernel and this service; see its source for
449*25d39513SDavid van Moolenbroek 	 * details.  We note that the produced estcpu value is rather different
450*25d39513SDavid van Moolenbroek 	 * from the one produced by NetBSD, but this should not be a problem.
451*25d39513SDavid van Moolenbroek 	 */
452*25d39513SDavid van Moolenbroek 	l->l_pctcpu = cpuavg_getstats(&kp->p_cpuavg, &l->l_cpticks, estcpu,
453*25d39513SDavid van Moolenbroek 	    uptime, hz);
454*25d39513SDavid van Moolenbroek }
455*25d39513SDavid van Moolenbroek 
456*25d39513SDavid van Moolenbroek /*
457*25d39513SDavid van Moolenbroek  * Fill a LWP structure for a kernel task.  Each kernel task has its own LWP,
458*25d39513SDavid van Moolenbroek  * and all of them have negative PIDs.
459*25d39513SDavid van Moolenbroek  */
460*25d39513SDavid van Moolenbroek static void
461*25d39513SDavid van Moolenbroek fill_lwp_kern(struct kinfo_lwp * l, int kslot)
462*25d39513SDavid van Moolenbroek {
463*25d39513SDavid van Moolenbroek 	uint32_t estcpu;
464*25d39513SDavid van Moolenbroek 
465*25d39513SDavid van Moolenbroek 	memset(l, 0, sizeof(*l));
466*25d39513SDavid van Moolenbroek 
467*25d39513SDavid van Moolenbroek 	l->l_flag = L_INMEM | L_SINTR | L_SYSTEM;
468*25d39513SDavid van Moolenbroek 	l->l_stat = LSSLEEP;
469*25d39513SDavid van Moolenbroek 	l->l_pid = kslot - NR_TASKS;
470*25d39513SDavid van Moolenbroek 
471*25d39513SDavid van Moolenbroek 	/*
472*25d39513SDavid van Moolenbroek 	 * When showing LWP entries, ps(1) uses the process name rather than
473*25d39513SDavid van Moolenbroek 	 * the LWP name.  All kernel tasks are therefore shown as "[kernel]"
474*25d39513SDavid van Moolenbroek 	 * anyway.  We use the wmesg field to show the actual kernel task name.
475*25d39513SDavid van Moolenbroek 	 */
476*25d39513SDavid van Moolenbroek 	l->l_wchan = ((uint64_t)(l->l_pid) << 8) | 0x00;
477*25d39513SDavid van Moolenbroek 	strlcpy(l->l_wmesg, proc_tab[kslot].p_name, sizeof(l->l_wmesg));
478*25d39513SDavid van Moolenbroek 	strlcpy(l->l_name, "kernel", sizeof(l->l_name));
479*25d39513SDavid van Moolenbroek 
480*25d39513SDavid van Moolenbroek 	fill_lwp_common(l, kslot, &estcpu);
481*25d39513SDavid van Moolenbroek }
482*25d39513SDavid van Moolenbroek 
483*25d39513SDavid van Moolenbroek /*
484*25d39513SDavid van Moolenbroek  * Fill a LWP structure for a user process.
485*25d39513SDavid van Moolenbroek  */
486*25d39513SDavid van Moolenbroek static void
487*25d39513SDavid van Moolenbroek fill_lwp_user(struct kinfo_lwp * l, int mslot)
488*25d39513SDavid van Moolenbroek {
489*25d39513SDavid van Moolenbroek 	struct mproc *mp;
490*25d39513SDavid van Moolenbroek 	uint32_t estcpu;
491*25d39513SDavid van Moolenbroek 
492*25d39513SDavid van Moolenbroek 	memset(l, 0, sizeof(*l));
493*25d39513SDavid van Moolenbroek 
494*25d39513SDavid van Moolenbroek 	mp = &mproc_tab[mslot];
495*25d39513SDavid van Moolenbroek 
496*25d39513SDavid van Moolenbroek 	l->l_flag = L_INMEM;
497*25d39513SDavid van Moolenbroek 	l->l_stat = get_lwp_stat(mslot, &l->l_wchan, l->l_wmesg,
498*25d39513SDavid van Moolenbroek 	    sizeof(l->l_wmesg), &l->l_flag);
499*25d39513SDavid van Moolenbroek 	l->l_pid = mp->mp_pid;
500*25d39513SDavid van Moolenbroek 	strlcpy(l->l_name, mp->mp_name, sizeof(l->l_name));
501*25d39513SDavid van Moolenbroek 
502*25d39513SDavid van Moolenbroek 	fill_lwp_common(l, NR_TASKS + mslot, &estcpu);
503*25d39513SDavid van Moolenbroek }
504*25d39513SDavid van Moolenbroek 
505*25d39513SDavid van Moolenbroek /*
506*25d39513SDavid van Moolenbroek  * Implementation of CTL_KERN KERN_LWP.
507*25d39513SDavid van Moolenbroek  */
508*25d39513SDavid van Moolenbroek ssize_t
509*25d39513SDavid van Moolenbroek mib_kern_lwp(struct mib_call * call, struct mib_node * node __unused,
510*25d39513SDavid van Moolenbroek 	struct mib_oldp * oldp, struct mib_newp * newp __unused)
511*25d39513SDavid van Moolenbroek {
512*25d39513SDavid van Moolenbroek 	struct kinfo_lwp lwp;
513*25d39513SDavid van Moolenbroek 	struct mproc *mp;
514*25d39513SDavid van Moolenbroek 	size_t copysz;
515*25d39513SDavid van Moolenbroek 	ssize_t off;
516*25d39513SDavid van Moolenbroek 	pid_t pid;
517*25d39513SDavid van Moolenbroek 	int r, elsz, elmax, kslot, mslot, last_mslot;
518*25d39513SDavid van Moolenbroek 
519*25d39513SDavid van Moolenbroek 	if (call->call_namelen != 3)
520*25d39513SDavid van Moolenbroek 		return EINVAL;
521*25d39513SDavid van Moolenbroek 
522*25d39513SDavid van Moolenbroek 	pid = (pid_t)call->call_name[0];
523*25d39513SDavid van Moolenbroek 	elsz = call->call_name[1];
524*25d39513SDavid van Moolenbroek 	elmax = call->call_name[2]; /* redundant with the given oldlen.. */
525*25d39513SDavid van Moolenbroek 
526*25d39513SDavid van Moolenbroek 	if (pid < -1 || elsz <= 0 || elmax < 0)
527*25d39513SDavid van Moolenbroek 		return EINVAL;
528*25d39513SDavid van Moolenbroek 
529*25d39513SDavid van Moolenbroek 	if (!update_tables())
530*25d39513SDavid van Moolenbroek 		return EINVAL;
531*25d39513SDavid van Moolenbroek 
532*25d39513SDavid van Moolenbroek 	off = 0;
533*25d39513SDavid van Moolenbroek 	copysz = MIN((size_t)elsz, sizeof(lwp));
534*25d39513SDavid van Moolenbroek 
535*25d39513SDavid van Moolenbroek 	/*
536*25d39513SDavid van Moolenbroek 	 * We model kernel tasks as LWP threads of the kernel (with PID 0).
537*25d39513SDavid van Moolenbroek 	 * Modeling the kernel tasks as processes with negative PIDs, like
538*25d39513SDavid van Moolenbroek 	 * ProcFS does, conflicts with the KERN_LWP API here: a PID of -1
539*25d39513SDavid van Moolenbroek 	 * indicates that the caller wants a full listing of LWPs.
540*25d39513SDavid van Moolenbroek 	 */
541*25d39513SDavid van Moolenbroek 	if (pid <= 0) {
542*25d39513SDavid van Moolenbroek 		for (kslot = 0; kslot < NR_TASKS; kslot++) {
543*25d39513SDavid van Moolenbroek 			if (mib_inrange(oldp, off) && elmax > 0) {
544*25d39513SDavid van Moolenbroek 				fill_lwp_kern(&lwp, kslot);
545*25d39513SDavid van Moolenbroek 				if ((r = mib_copyout(oldp, off, &lwp,
546*25d39513SDavid van Moolenbroek 				    copysz)) < 0)
547*25d39513SDavid van Moolenbroek 					return r;
548*25d39513SDavid van Moolenbroek 				elmax--;
549*25d39513SDavid van Moolenbroek 			}
550*25d39513SDavid van Moolenbroek 			off += elsz;
551*25d39513SDavid van Moolenbroek 		}
552*25d39513SDavid van Moolenbroek 
553*25d39513SDavid van Moolenbroek 		/* No need to add extra space here: NR_TASKS is static. */
554*25d39513SDavid van Moolenbroek 		if (pid == 0)
555*25d39513SDavid van Moolenbroek 			return off;
556*25d39513SDavid van Moolenbroek 	}
557*25d39513SDavid van Moolenbroek 
558*25d39513SDavid van Moolenbroek 	/*
559*25d39513SDavid van Moolenbroek 	 * With PID 0 out of the way: the user requested the LWP for either a
560*25d39513SDavid van Moolenbroek 	 * specific user process (pid > 0), or for all processes (pid < 0).
561*25d39513SDavid van Moolenbroek 	 */
562*25d39513SDavid van Moolenbroek 	if (pid > 0) {
563*25d39513SDavid van Moolenbroek 		if ((mslot = get_mslot(pid)) == NO_SLOT ||
564*25d39513SDavid van Moolenbroek 		    (mproc_tab[mslot].mp_flags & (TRACE_ZOMBIE | ZOMBIE)))
565*25d39513SDavid van Moolenbroek 			return ESRCH;
566*25d39513SDavid van Moolenbroek 		last_mslot = mslot;
567*25d39513SDavid van Moolenbroek 	} else {
568*25d39513SDavid van Moolenbroek 		mslot = 0;
569*25d39513SDavid van Moolenbroek 		last_mslot = NR_PROCS - 1;
570*25d39513SDavid van Moolenbroek 	}
571*25d39513SDavid van Moolenbroek 
572*25d39513SDavid van Moolenbroek 	for (; mslot <= last_mslot; mslot++) {
573*25d39513SDavid van Moolenbroek 		mp = &mproc_tab[mslot];
574*25d39513SDavid van Moolenbroek 
575*25d39513SDavid van Moolenbroek 		if ((mp->mp_flags & (IN_USE | TRACE_ZOMBIE | ZOMBIE)) !=
576*25d39513SDavid van Moolenbroek 		    IN_USE)
577*25d39513SDavid van Moolenbroek 			continue;
578*25d39513SDavid van Moolenbroek 
579*25d39513SDavid van Moolenbroek 		if (mib_inrange(oldp, off) && elmax > 0) {
580*25d39513SDavid van Moolenbroek 			fill_lwp_user(&lwp, mslot);
581*25d39513SDavid van Moolenbroek 			if ((r = mib_copyout(oldp, off, &lwp, copysz)) < 0)
582*25d39513SDavid van Moolenbroek 				return r;
583*25d39513SDavid van Moolenbroek 			elmax--;
584*25d39513SDavid van Moolenbroek 		}
585*25d39513SDavid van Moolenbroek 		off += elsz;
586*25d39513SDavid van Moolenbroek 	}
587*25d39513SDavid van Moolenbroek 
588*25d39513SDavid van Moolenbroek 	if (oldp == NULL && pid < 0)
589*25d39513SDavid van Moolenbroek 		off += EXTRA_PROCS * elsz;
590*25d39513SDavid van Moolenbroek 
591*25d39513SDavid van Moolenbroek 	return off;
592*25d39513SDavid van Moolenbroek }
593*25d39513SDavid van Moolenbroek 
594*25d39513SDavid van Moolenbroek 
595*25d39513SDavid van Moolenbroek /*
596*25d39513SDavid van Moolenbroek  * Fill the part of a process structure that is common between kernel tasks and
597*25d39513SDavid van Moolenbroek  * user processes.
598*25d39513SDavid van Moolenbroek  */
599*25d39513SDavid van Moolenbroek static void
600*25d39513SDavid van Moolenbroek fill_proc2_common(struct kinfo_proc2 * p, int kslot)
601*25d39513SDavid van Moolenbroek {
602*25d39513SDavid van Moolenbroek 	struct vm_usage_info vui;
603*25d39513SDavid van Moolenbroek 	struct timeval tv;
604*25d39513SDavid van Moolenbroek 	struct proc *kp;
605*25d39513SDavid van Moolenbroek 	struct kinfo_lwp l;
606*25d39513SDavid van Moolenbroek 
607*25d39513SDavid van Moolenbroek 	kp = &proc_tab[kslot];
608*25d39513SDavid van Moolenbroek 
609*25d39513SDavid van Moolenbroek 	/*
610*25d39513SDavid van Moolenbroek 	 * Much of the information in the LWP structure also ends up in the
611*25d39513SDavid van Moolenbroek 	 * process structure.  In order to avoid duplication of some important
612*25d39513SDavid van Moolenbroek 	 * code, first generate LWP values and then copy it them into the
613*25d39513SDavid van Moolenbroek 	 * process structure.
614*25d39513SDavid van Moolenbroek 	 */
615*25d39513SDavid van Moolenbroek 	memset(&l, 0, sizeof(l));
616*25d39513SDavid van Moolenbroek 	fill_lwp_common(&l, kslot, &p->p_estcpu);
617*25d39513SDavid van Moolenbroek 
618*25d39513SDavid van Moolenbroek 	/* Obtain memory usage information from VM.  Ignore failures. */
619*25d39513SDavid van Moolenbroek 	memset(&vui, 0, sizeof(vui));
620*25d39513SDavid van Moolenbroek 	(void)vm_info_usage(kp->p_endpoint, &vui);
621*25d39513SDavid van Moolenbroek 
622*25d39513SDavid van Moolenbroek 	ticks_to_timeval(&tv, kp->p_user_time + kp->p_sys_time);
623*25d39513SDavid van Moolenbroek 	p->p_rtime_sec = l.l_rtime_sec;
624*25d39513SDavid van Moolenbroek 	p->p_rtime_usec = l.l_rtime_usec;
625*25d39513SDavid van Moolenbroek 	p->p_cpticks = l.l_cpticks;
626*25d39513SDavid van Moolenbroek 	p->p_pctcpu = l.l_pctcpu;
627*25d39513SDavid van Moolenbroek 	p->p_swtime = l.l_swtime;
628*25d39513SDavid van Moolenbroek 	p->p_slptime = l.l_slptime;
629*25d39513SDavid van Moolenbroek 	p->p_uticks = kp->p_user_time;
630*25d39513SDavid van Moolenbroek 	p->p_sticks = kp->p_sys_time;
631*25d39513SDavid van Moolenbroek 	/* TODO: p->p_iticks */
632*25d39513SDavid van Moolenbroek 	ticks_to_timeval(&tv, kp->p_user_time);
633*25d39513SDavid van Moolenbroek 	p->p_uutime_sec = tv.tv_sec;
634*25d39513SDavid van Moolenbroek 	p->p_uutime_usec = tv.tv_usec;
635*25d39513SDavid van Moolenbroek 	ticks_to_timeval(&tv, kp->p_sys_time);
636*25d39513SDavid van Moolenbroek 	p->p_ustime_sec = tv.tv_sec;
637*25d39513SDavid van Moolenbroek 	p->p_ustime_usec = tv.tv_usec;
638*25d39513SDavid van Moolenbroek 
639*25d39513SDavid van Moolenbroek 	p->p_priority = l.l_priority;
640*25d39513SDavid van Moolenbroek 	p->p_usrpri = l.l_usrpri;
641*25d39513SDavid van Moolenbroek 
642*25d39513SDavid van Moolenbroek 	p->p_vm_rssize = howmany(vui.vui_total, PAGE_SIZE);
643*25d39513SDavid van Moolenbroek 	p->p_vm_vsize = howmany(vui.vui_virtual, PAGE_SIZE);
644*25d39513SDavid van Moolenbroek 	p->p_vm_msize = howmany(vui.vui_mvirtual, PAGE_SIZE);
645*25d39513SDavid van Moolenbroek 
646*25d39513SDavid van Moolenbroek 	p->p_uru_maxrss = vui.vui_maxrss;
647*25d39513SDavid van Moolenbroek 	p->p_uru_minflt = vui.vui_minflt;
648*25d39513SDavid van Moolenbroek 	p->p_uru_majflt = vui.vui_majflt;
649*25d39513SDavid van Moolenbroek 
650*25d39513SDavid van Moolenbroek 	p->p_cpuid = l.l_cpuid;
651*25d39513SDavid van Moolenbroek }
652*25d39513SDavid van Moolenbroek 
653*25d39513SDavid van Moolenbroek /*
654*25d39513SDavid van Moolenbroek  * Fill a process structure for the kernel pseudo-process (with PID 0).
655*25d39513SDavid van Moolenbroek  */
656*25d39513SDavid van Moolenbroek static void
657*25d39513SDavid van Moolenbroek fill_proc2_kern(struct kinfo_proc2 * p)
658*25d39513SDavid van Moolenbroek {
659*25d39513SDavid van Moolenbroek 
660*25d39513SDavid van Moolenbroek 	memset(p, 0, sizeof(*p));
661*25d39513SDavid van Moolenbroek 
662*25d39513SDavid van Moolenbroek 	p->p_flag = L_INMEM | L_SYSTEM | L_SINTR;
663*25d39513SDavid van Moolenbroek 	p->p_pid = 0;
664*25d39513SDavid van Moolenbroek 	p->p_stat = LSSLEEP;
665*25d39513SDavid van Moolenbroek 	p->p_nice = NZERO;
666*25d39513SDavid van Moolenbroek 
667*25d39513SDavid van Moolenbroek 	/* Use the KERNEL task wchan, for consistency between ps and top. */
668*25d39513SDavid van Moolenbroek 	p->p_wchan = ((uint64_t)KERNEL << 8) | 0x00;
669*25d39513SDavid van Moolenbroek 	strlcpy(p->p_wmesg, "kernel", sizeof(p->p_wmesg));
670*25d39513SDavid van Moolenbroek 
671*25d39513SDavid van Moolenbroek 	strlcpy(p->p_comm, "kernel", sizeof(p->p_comm));
672*25d39513SDavid van Moolenbroek 	p->p_realflag = P_INMEM | P_SYSTEM | P_SINTR;
673*25d39513SDavid van Moolenbroek 	p->p_realstat = SACTIVE;
674*25d39513SDavid van Moolenbroek 	p->p_nlwps = NR_TASKS;
675*25d39513SDavid van Moolenbroek 
676*25d39513SDavid van Moolenbroek 	/*
677*25d39513SDavid van Moolenbroek 	 * By using the KERNEL slot here, the kernel process will get a proper
678*25d39513SDavid van Moolenbroek 	 * CPU usage average.
679*25d39513SDavid van Moolenbroek 	 */
680*25d39513SDavid van Moolenbroek 	fill_proc2_common(p, KERNEL + NR_TASKS);
681*25d39513SDavid van Moolenbroek }
682*25d39513SDavid van Moolenbroek 
683*25d39513SDavid van Moolenbroek /*
684*25d39513SDavid van Moolenbroek  * Fill a process structure for a user process.
685*25d39513SDavid van Moolenbroek  */
686*25d39513SDavid van Moolenbroek static void
687*25d39513SDavid van Moolenbroek fill_proc2_user(struct kinfo_proc2 * p, int mslot)
688*25d39513SDavid van Moolenbroek {
689*25d39513SDavid van Moolenbroek 	struct mproc *mp;
690*25d39513SDavid van Moolenbroek 	struct fproc *fp;
691*25d39513SDavid van Moolenbroek 	time_t boottime;
692*25d39513SDavid van Moolenbroek 	dev_t tty;
693*25d39513SDavid van Moolenbroek 	struct timeval tv;
694*25d39513SDavid van Moolenbroek 	int i, r, kslot, zombie;
695*25d39513SDavid van Moolenbroek 
696*25d39513SDavid van Moolenbroek 	memset(p, 0, sizeof(*p));
697*25d39513SDavid van Moolenbroek 
698*25d39513SDavid van Moolenbroek 	if ((r = getuptime(NULL, NULL, &boottime)) != OK)
699*25d39513SDavid van Moolenbroek 		panic("getuptime failed: %d", r);
700*25d39513SDavid van Moolenbroek 
701*25d39513SDavid van Moolenbroek 	kslot = NR_TASKS + mslot;
702*25d39513SDavid van Moolenbroek 	mp = &mproc_tab[mslot];
703*25d39513SDavid van Moolenbroek 	fp = &fproc_tab[mslot];
704*25d39513SDavid van Moolenbroek 
705*25d39513SDavid van Moolenbroek 	zombie = (mp->mp_flags & (TRACE_ZOMBIE | ZOMBIE));
706*25d39513SDavid van Moolenbroek 	tty = (!zombie) ? fp->fp_tty : NO_DEV;
707*25d39513SDavid van Moolenbroek 
708*25d39513SDavid van Moolenbroek 	p->p_eflag = 0;
709*25d39513SDavid van Moolenbroek 	if (tty != NO_DEV)
710*25d39513SDavid van Moolenbroek 		p->p_eflag |= EPROC_CTTY;
711*25d39513SDavid van Moolenbroek 	if (mp->mp_pid == mp->mp_procgrp) /* TODO: job control support */
712*25d39513SDavid van Moolenbroek 		p->p_eflag |= EPROC_SLEADER;
713*25d39513SDavid van Moolenbroek 
714*25d39513SDavid van Moolenbroek 	p->p_exitsig = SIGCHLD; /* TODO */
715*25d39513SDavid van Moolenbroek 
716*25d39513SDavid van Moolenbroek 	p->p_flag = P_INMEM;
717*25d39513SDavid van Moolenbroek 	if (mp->mp_flags & TAINTED)
718*25d39513SDavid van Moolenbroek 		p->p_flag |= P_SUGID;
719*25d39513SDavid van Moolenbroek 	if (mp->mp_tracer != NO_TRACER)
720*25d39513SDavid van Moolenbroek 		p->p_flag |= P_TRACED;
721*25d39513SDavid van Moolenbroek 	if (tty != NO_DEV)
722*25d39513SDavid van Moolenbroek 		p->p_flag |= P_CONTROLT;
723*25d39513SDavid van Moolenbroek 	p->p_pid = mp->mp_pid;
724*25d39513SDavid van Moolenbroek 	if (mp->mp_parent >= 0 && mp->mp_parent < NR_PROCS)
725*25d39513SDavid van Moolenbroek 		p->p_ppid = mproc_tab[mp->mp_parent].mp_pid;
726*25d39513SDavid van Moolenbroek 	p->p_sid = mp->mp_procgrp; /* TODO: job control supported */
727*25d39513SDavid van Moolenbroek 	p->p__pgid = mp->mp_procgrp;
728*25d39513SDavid van Moolenbroek 	p->p_tpgid = (tty != NO_DEV) ? mp->mp_procgrp : 0;
729*25d39513SDavid van Moolenbroek 	p->p_uid = mp->mp_effuid;
730*25d39513SDavid van Moolenbroek 	p->p_ruid = mp->mp_realuid;
731*25d39513SDavid van Moolenbroek 	p->p_gid = mp->mp_effgid;
732*25d39513SDavid van Moolenbroek 	p->p_rgid = mp->mp_realgid;
733*25d39513SDavid van Moolenbroek 	p->p_ngroups = MIN(mp->mp_ngroups, KI_NGROUPS);
734*25d39513SDavid van Moolenbroek 	for (i = 0; i < p->p_ngroups; i++)
735*25d39513SDavid van Moolenbroek 		p->p_groups[i] = mp->mp_sgroups[i];
736*25d39513SDavid van Moolenbroek 	p->p_tdev = tty;
737*25d39513SDavid van Moolenbroek 	memcpy(&p->p_siglist, &mp->mp_sigpending, sizeof(p->p_siglist));
738*25d39513SDavid van Moolenbroek 	memcpy(&p->p_sigmask, &mp->mp_sigmask, sizeof(p->p_sigmask));
739*25d39513SDavid van Moolenbroek 	memcpy(&p->p_sigcatch, &mp->mp_catch, sizeof(p->p_sigcatch));
740*25d39513SDavid van Moolenbroek 	memcpy(&p->p_sigignore, &mp->mp_ignore, sizeof(p->p_sigignore));
741*25d39513SDavid van Moolenbroek 	p->p_nice = mp->mp_nice + NZERO;
742*25d39513SDavid van Moolenbroek 	strlcpy(p->p_comm, mp->mp_name, sizeof(p->p_comm));
743*25d39513SDavid van Moolenbroek 	p->p_uvalid = 1;
744*25d39513SDavid van Moolenbroek 	ticks_to_timeval(&tv, mp->mp_started);
745*25d39513SDavid van Moolenbroek 	p->p_ustart_sec = boottime + tv.tv_sec;
746*25d39513SDavid van Moolenbroek 	p->p_ustart_usec = tv.tv_usec;
747*25d39513SDavid van Moolenbroek 	/* TODO: other rusage fields */
748*25d39513SDavid van Moolenbroek 	ticks_to_timeval(&tv, mp->mp_child_utime + mp->mp_child_stime);
749*25d39513SDavid van Moolenbroek 	p->p_uctime_sec = tv.tv_sec;
750*25d39513SDavid van Moolenbroek 	p->p_uctime_usec = tv.tv_usec;
751*25d39513SDavid van Moolenbroek 	p->p_realflag = p->p_flag;
752*25d39513SDavid van Moolenbroek 	p->p_nlwps = (zombie) ? 0 : 1;
753*25d39513SDavid van Moolenbroek 
754*25d39513SDavid van Moolenbroek 	p->p_stat = get_lwp_stat(mslot, &p->p_wchan, p->p_wmesg,
755*25d39513SDavid van Moolenbroek 	    sizeof(p->p_wmesg), &p->p_flag);
756*25d39513SDavid van Moolenbroek 
757*25d39513SDavid van Moolenbroek 	switch (p->p_stat) {
758*25d39513SDavid van Moolenbroek 	case LSRUN:
759*25d39513SDavid van Moolenbroek 		p->p_realstat = SACTIVE;
760*25d39513SDavid van Moolenbroek 		p->p_nrlwps = 1;
761*25d39513SDavid van Moolenbroek 		break;
762*25d39513SDavid van Moolenbroek 	case LSSLEEP:
763*25d39513SDavid van Moolenbroek 		p->p_realstat = SACTIVE;
764*25d39513SDavid van Moolenbroek 		if (p->p_flag & L_SINTR)
765*25d39513SDavid van Moolenbroek 			p->p_realflag |= P_SINTR;
766*25d39513SDavid van Moolenbroek 		break;
767*25d39513SDavid van Moolenbroek 	case LSSTOP:
768*25d39513SDavid van Moolenbroek 		p->p_realstat = SSTOP;
769*25d39513SDavid van Moolenbroek 		break;
770*25d39513SDavid van Moolenbroek 	case LSZOMB:
771*25d39513SDavid van Moolenbroek 		p->p_realstat = SZOMB;
772*25d39513SDavid van Moolenbroek 		break;
773*25d39513SDavid van Moolenbroek 	case LSDEAD:
774*25d39513SDavid van Moolenbroek 		p->p_stat = LSZOMB; /* ps(1) STAT does not know LSDEAD */
775*25d39513SDavid van Moolenbroek 		p->p_realstat = SDEAD;
776*25d39513SDavid van Moolenbroek 		break;
777*25d39513SDavid van Moolenbroek 	default:
778*25d39513SDavid van Moolenbroek 		assert(0);
779*25d39513SDavid van Moolenbroek 	}
780*25d39513SDavid van Moolenbroek 
781*25d39513SDavid van Moolenbroek 	if (!zombie)
782*25d39513SDavid van Moolenbroek 		fill_proc2_common(p, kslot);
783*25d39513SDavid van Moolenbroek }
784*25d39513SDavid van Moolenbroek 
785*25d39513SDavid van Moolenbroek /*
786*25d39513SDavid van Moolenbroek  * Implementation of CTL_KERN KERN_PROC2.
787*25d39513SDavid van Moolenbroek  */
788*25d39513SDavid van Moolenbroek ssize_t
789*25d39513SDavid van Moolenbroek mib_kern_proc2(struct mib_call * call, struct mib_node * node __unused,
790*25d39513SDavid van Moolenbroek 	struct mib_oldp * oldp, struct mib_newp * newp __unused)
791*25d39513SDavid van Moolenbroek {
792*25d39513SDavid van Moolenbroek 	struct kinfo_proc2 proc2;
793*25d39513SDavid van Moolenbroek 	struct mproc *mp;
794*25d39513SDavid van Moolenbroek 	size_t copysz;
795*25d39513SDavid van Moolenbroek 	ssize_t off;
796*25d39513SDavid van Moolenbroek 	dev_t tty;
797*25d39513SDavid van Moolenbroek 	int r, req, arg, elsz, elmax, kmatch, zombie, mslot;
798*25d39513SDavid van Moolenbroek 
799*25d39513SDavid van Moolenbroek 	if (call->call_namelen != 4)
800*25d39513SDavid van Moolenbroek 		return EINVAL;
801*25d39513SDavid van Moolenbroek 
802*25d39513SDavid van Moolenbroek 	req = call->call_name[0];
803*25d39513SDavid van Moolenbroek 	arg = call->call_name[1];
804*25d39513SDavid van Moolenbroek 	elsz = call->call_name[2];
805*25d39513SDavid van Moolenbroek 	elmax = call->call_name[3]; /* redundant with the given oldlen.. */
806*25d39513SDavid van Moolenbroek 
807*25d39513SDavid van Moolenbroek 	/*
808*25d39513SDavid van Moolenbroek 	 * The kernel is special, in that it does not have a slot in the PM or
809*25d39513SDavid van Moolenbroek 	 * VFS tables.  As such, it is dealt with separately.  While checking
810*25d39513SDavid van Moolenbroek 	 * arguments, we might as well check whether the kernel is matched.
811*25d39513SDavid van Moolenbroek 	 */
812*25d39513SDavid van Moolenbroek 	switch (req) {
813*25d39513SDavid van Moolenbroek 	case KERN_PROC_ALL:
814*25d39513SDavid van Moolenbroek 		kmatch = TRUE;
815*25d39513SDavid van Moolenbroek 		break;
816*25d39513SDavid van Moolenbroek 	case KERN_PROC_PID:
817*25d39513SDavid van Moolenbroek 	case KERN_PROC_SESSION:
818*25d39513SDavid van Moolenbroek 	case KERN_PROC_PGRP:
819*25d39513SDavid van Moolenbroek 	case KERN_PROC_UID:
820*25d39513SDavid van Moolenbroek 	case KERN_PROC_RUID:
821*25d39513SDavid van Moolenbroek 	case KERN_PROC_GID:
822*25d39513SDavid van Moolenbroek 	case KERN_PROC_RGID:
823*25d39513SDavid van Moolenbroek 		kmatch = (arg == 0);
824*25d39513SDavid van Moolenbroek 		break;
825*25d39513SDavid van Moolenbroek 	case KERN_PROC_TTY:
826*25d39513SDavid van Moolenbroek 		kmatch = ((dev_t)arg == KERN_PROC_TTY_NODEV);
827*25d39513SDavid van Moolenbroek 		break;
828*25d39513SDavid van Moolenbroek 	default:
829*25d39513SDavid van Moolenbroek 		return EINVAL;
830*25d39513SDavid van Moolenbroek 	}
831*25d39513SDavid van Moolenbroek 
832*25d39513SDavid van Moolenbroek 	if (elsz <= 0 || elmax < 0)
833*25d39513SDavid van Moolenbroek 		return EINVAL;
834*25d39513SDavid van Moolenbroek 
835*25d39513SDavid van Moolenbroek 	if (!update_tables())
836*25d39513SDavid van Moolenbroek 		return EINVAL;
837*25d39513SDavid van Moolenbroek 
838*25d39513SDavid van Moolenbroek 	off = 0;
839*25d39513SDavid van Moolenbroek 	copysz = MIN((size_t)elsz, sizeof(proc2));
840*25d39513SDavid van Moolenbroek 
841*25d39513SDavid van Moolenbroek 	if (kmatch) {
842*25d39513SDavid van Moolenbroek 		if (mib_inrange(oldp, off) && elmax > 0) {
843*25d39513SDavid van Moolenbroek 			fill_proc2_kern(&proc2);
844*25d39513SDavid van Moolenbroek 			if ((r = mib_copyout(oldp, off, &proc2, copysz)) < 0)
845*25d39513SDavid van Moolenbroek 				return r;
846*25d39513SDavid van Moolenbroek 			elmax--;
847*25d39513SDavid van Moolenbroek 		}
848*25d39513SDavid van Moolenbroek 		off += elsz;
849*25d39513SDavid van Moolenbroek 	}
850*25d39513SDavid van Moolenbroek 
851*25d39513SDavid van Moolenbroek 	for (mslot = 0; mslot < NR_PROCS; mslot++) {
852*25d39513SDavid van Moolenbroek 		mp = &mproc_tab[mslot];
853*25d39513SDavid van Moolenbroek 
854*25d39513SDavid van Moolenbroek 		if (!(mp->mp_flags & IN_USE))
855*25d39513SDavid van Moolenbroek 			continue;
856*25d39513SDavid van Moolenbroek 
857*25d39513SDavid van Moolenbroek 		switch (req) {
858*25d39513SDavid van Moolenbroek 		case KERN_PROC_PID:
859*25d39513SDavid van Moolenbroek 			if ((pid_t)arg != mp->mp_pid)
860*25d39513SDavid van Moolenbroek 				continue;
861*25d39513SDavid van Moolenbroek 			break;
862*25d39513SDavid van Moolenbroek 		case KERN_PROC_SESSION: /* TODO: job control support */
863*25d39513SDavid van Moolenbroek 		case KERN_PROC_PGRP:
864*25d39513SDavid van Moolenbroek 			if ((pid_t)arg != mp->mp_procgrp)
865*25d39513SDavid van Moolenbroek 				continue;
866*25d39513SDavid van Moolenbroek 			break;
867*25d39513SDavid van Moolenbroek 		case KERN_PROC_TTY:
868*25d39513SDavid van Moolenbroek 			if ((dev_t)arg == KERN_PROC_TTY_REVOKE)
869*25d39513SDavid van Moolenbroek 				continue; /* TODO: revoke(2) support */
870*25d39513SDavid van Moolenbroek 			/* Do not access the fproc_tab slot of zombies. */
871*25d39513SDavid van Moolenbroek 			zombie = (mp->mp_flags & (TRACE_ZOMBIE | ZOMBIE));
872*25d39513SDavid van Moolenbroek 			tty = (zombie) ? fproc_tab[mslot].fp_tty : NO_DEV;
873*25d39513SDavid van Moolenbroek 			if ((dev_t)arg == KERN_PROC_TTY_NODEV) {
874*25d39513SDavid van Moolenbroek 				if (tty != NO_DEV)
875*25d39513SDavid van Moolenbroek 					continue;
876*25d39513SDavid van Moolenbroek 			} else if ((dev_t)arg == NO_DEV || (dev_t)arg != tty)
877*25d39513SDavid van Moolenbroek 				continue;
878*25d39513SDavid van Moolenbroek 			break;
879*25d39513SDavid van Moolenbroek 		case KERN_PROC_UID:
880*25d39513SDavid van Moolenbroek 			if ((uid_t)arg != mp->mp_effuid)
881*25d39513SDavid van Moolenbroek 				continue;
882*25d39513SDavid van Moolenbroek 			break;
883*25d39513SDavid van Moolenbroek 		case KERN_PROC_RUID:
884*25d39513SDavid van Moolenbroek 			if ((uid_t)arg != mp->mp_realuid)
885*25d39513SDavid van Moolenbroek 				continue;
886*25d39513SDavid van Moolenbroek 			break;
887*25d39513SDavid van Moolenbroek 		case KERN_PROC_GID:
888*25d39513SDavid van Moolenbroek 			if ((gid_t)arg != mp->mp_effgid)
889*25d39513SDavid van Moolenbroek 				continue;
890*25d39513SDavid van Moolenbroek 			break;
891*25d39513SDavid van Moolenbroek 		case KERN_PROC_RGID:
892*25d39513SDavid van Moolenbroek 			if ((gid_t)arg != mp->mp_realgid)
893*25d39513SDavid van Moolenbroek 				continue;
894*25d39513SDavid van Moolenbroek 			break;
895*25d39513SDavid van Moolenbroek 		}
896*25d39513SDavid van Moolenbroek 
897*25d39513SDavid van Moolenbroek 		if (mib_inrange(oldp, off) && elmax > 0) {
898*25d39513SDavid van Moolenbroek 			fill_proc2_user(&proc2, mslot);
899*25d39513SDavid van Moolenbroek 			if ((r = mib_copyout(oldp, off, &proc2, copysz)) < 0)
900*25d39513SDavid van Moolenbroek 				return r;
901*25d39513SDavid van Moolenbroek 			elmax--;
902*25d39513SDavid van Moolenbroek 		}
903*25d39513SDavid van Moolenbroek 		off += elsz;
904*25d39513SDavid van Moolenbroek 	}
905*25d39513SDavid van Moolenbroek 
906*25d39513SDavid van Moolenbroek 	if (oldp == NULL && req != KERN_PROC_PID)
907*25d39513SDavid van Moolenbroek 		off += EXTRA_PROCS * elsz;
908*25d39513SDavid van Moolenbroek 
909*25d39513SDavid van Moolenbroek 	return off;
910*25d39513SDavid van Moolenbroek }
911*25d39513SDavid van Moolenbroek 
912*25d39513SDavid van Moolenbroek /*
913*25d39513SDavid van Moolenbroek  * Implementation of CTL_KERN KERN_PROC_ARGS.
914*25d39513SDavid van Moolenbroek  */
915*25d39513SDavid van Moolenbroek ssize_t
916*25d39513SDavid van Moolenbroek mib_kern_proc_args(struct mib_call * call, struct mib_node * node __unused,
917*25d39513SDavid van Moolenbroek 	struct mib_oldp * oldp, struct mib_newp * newp __unused)
918*25d39513SDavid van Moolenbroek {
919*25d39513SDavid van Moolenbroek 	char vbuf[PAGE_SIZE], sbuf[PAGE_SIZE], obuf[PAGE_SIZE];
920*25d39513SDavid van Moolenbroek 	struct ps_strings pss;
921*25d39513SDavid van Moolenbroek 	struct mproc *mp;
922*25d39513SDavid van Moolenbroek 	char *buf, *p, *q, *pptr;
923*25d39513SDavid van Moolenbroek 	vir_bytes vaddr, vpage, spage, paddr, ppage;
924*25d39513SDavid van Moolenbroek 	size_t max, off, olen, oleft, oldlen, bytes, pleft;
925*25d39513SDavid van Moolenbroek 	unsigned int copybudget;
926*25d39513SDavid van Moolenbroek 	pid_t pid;
927*25d39513SDavid van Moolenbroek 	int req, mslot, count, aborted, ended;
928*25d39513SDavid van Moolenbroek 	ssize_t r;
929*25d39513SDavid van Moolenbroek 
930*25d39513SDavid van Moolenbroek 	if (call->call_namelen != 2)
931*25d39513SDavid van Moolenbroek 		return EINVAL;
932*25d39513SDavid van Moolenbroek 
933*25d39513SDavid van Moolenbroek 	pid = call->call_name[0];
934*25d39513SDavid van Moolenbroek 	req = call->call_name[1];
935*25d39513SDavid van Moolenbroek 
936*25d39513SDavid van Moolenbroek 	switch (req) {
937*25d39513SDavid van Moolenbroek 	case KERN_PROC_ARGV:
938*25d39513SDavid van Moolenbroek 	case KERN_PROC_ENV:
939*25d39513SDavid van Moolenbroek 	case KERN_PROC_NARGV:
940*25d39513SDavid van Moolenbroek 	case KERN_PROC_NENV:
941*25d39513SDavid van Moolenbroek 		break;
942*25d39513SDavid van Moolenbroek 	default:
943*25d39513SDavid van Moolenbroek 		return EOPNOTSUPP;
944*25d39513SDavid van Moolenbroek 	}
945*25d39513SDavid van Moolenbroek 
946*25d39513SDavid van Moolenbroek 	if (!update_tables())
947*25d39513SDavid van Moolenbroek 		return EINVAL;
948*25d39513SDavid van Moolenbroek 
949*25d39513SDavid van Moolenbroek 	if ((mslot = get_mslot(pid)) == NO_SLOT)
950*25d39513SDavid van Moolenbroek 		return ESRCH;
951*25d39513SDavid van Moolenbroek 	mp = &mproc_tab[mslot];
952*25d39513SDavid van Moolenbroek 	if (mp->mp_flags & (TRACE_ZOMBIE | ZOMBIE))
953*25d39513SDavid van Moolenbroek 		return ESRCH;
954*25d39513SDavid van Moolenbroek 
955*25d39513SDavid van Moolenbroek 	/* We can return the count field size without copying in any data. */
956*25d39513SDavid van Moolenbroek 	if (oldp == NULL && (req == KERN_PROC_NARGV || req == KERN_PROC_NENV))
957*25d39513SDavid van Moolenbroek 		return sizeof(count);
958*25d39513SDavid van Moolenbroek 
959*25d39513SDavid van Moolenbroek 	if (sys_datacopy(mp->mp_endpoint,
960*25d39513SDavid van Moolenbroek 	    mp->mp_frame_addr + mp->mp_frame_len - sizeof(pss),
961*25d39513SDavid van Moolenbroek 	    SELF, (vir_bytes)&pss, sizeof(pss)) != OK)
962*25d39513SDavid van Moolenbroek 		return EINVAL;
963*25d39513SDavid van Moolenbroek 
964*25d39513SDavid van Moolenbroek 	/*
965*25d39513SDavid van Moolenbroek 	 * Determine the upper size limit of the requested data.  Not only may
966*25d39513SDavid van Moolenbroek 	 * the size never exceed ARG_MAX, it may also not exceed the frame
967*25d39513SDavid van Moolenbroek 	 * length as given in its original exec call.  In fact, the frame
968*25d39513SDavid van Moolenbroek 	 * length should be substantially larger: all strings for both the
969*25d39513SDavid van Moolenbroek 	 * arguments and the environment are in there, along with other stuff,
970*25d39513SDavid van Moolenbroek 	 * and there must be no overlap between strings.  It is possible that
971*25d39513SDavid van Moolenbroek 	 * the application called setproctitle(3), in which case the ps_strings
972*25d39513SDavid van Moolenbroek 	 * pointers refer to data outside the frame altogether.  However, this
973*25d39513SDavid van Moolenbroek 	 * data should not exceed 2048 bytes, and we cover this by rounding up
974*25d39513SDavid van Moolenbroek 	 * the frame length to a multiple of the page size.  Anyhow, NetBSD
975*25d39513SDavid van Moolenbroek 	 * blindly returns ARG_MAX when asked for a size estimate, so with this
976*25d39513SDavid van Moolenbroek 	 * maximum we are already quite a bit more accurate.
977*25d39513SDavid van Moolenbroek 	 */
978*25d39513SDavid van Moolenbroek 	max = roundup(MIN(mp->mp_frame_len, ARG_MAX), PAGE_SIZE);
979*25d39513SDavid van Moolenbroek 
980*25d39513SDavid van Moolenbroek 	switch (req) {
981*25d39513SDavid van Moolenbroek 	case KERN_PROC_NARGV:
982*25d39513SDavid van Moolenbroek 		count = pss.ps_nargvstr;
983*25d39513SDavid van Moolenbroek 		return mib_copyout(oldp, 0, &count, sizeof(count));
984*25d39513SDavid van Moolenbroek 	case KERN_PROC_NENV:
985*25d39513SDavid van Moolenbroek 		count = pss.ps_nenvstr;
986*25d39513SDavid van Moolenbroek 		return mib_copyout(oldp, 0, &count, sizeof(count));
987*25d39513SDavid van Moolenbroek 	case KERN_PROC_ARGV:
988*25d39513SDavid van Moolenbroek 		if (oldp == NULL)
989*25d39513SDavid van Moolenbroek 			return max;
990*25d39513SDavid van Moolenbroek 		vaddr = (vir_bytes)pss.ps_argvstr;
991*25d39513SDavid van Moolenbroek 		count = pss.ps_nargvstr;
992*25d39513SDavid van Moolenbroek 		break;
993*25d39513SDavid van Moolenbroek 	case KERN_PROC_ENV:
994*25d39513SDavid van Moolenbroek 		if (oldp == NULL)
995*25d39513SDavid van Moolenbroek 			return max;
996*25d39513SDavid van Moolenbroek 		vaddr = (vir_bytes)pss.ps_envstr;
997*25d39513SDavid van Moolenbroek 		count = pss.ps_nenvstr;
998*25d39513SDavid van Moolenbroek 		break;
999*25d39513SDavid van Moolenbroek 	}
1000*25d39513SDavid van Moolenbroek 
1001*25d39513SDavid van Moolenbroek 	/*
1002*25d39513SDavid van Moolenbroek 	 * Go through the strings.  Copy in entire, machine-aligned pages at
1003*25d39513SDavid van Moolenbroek 	 * once, in the hope that all data is stored consecutively, which it
1004*25d39513SDavid van Moolenbroek 	 * should be: we expect that the vector is followed by the strings, and
1005*25d39513SDavid van Moolenbroek 	 * that the strings are stored in order of vector reference.  We keep
1006*25d39513SDavid van Moolenbroek 	 * up to two pages with copied-in data: one for the vector, and
1007*25d39513SDavid van Moolenbroek 	 * optionally one for string data.  In addition, we keep one page with
1008*25d39513SDavid van Moolenbroek 	 * data to be copied out, so that we do not cause a lot of copy
1009*25d39513SDavid van Moolenbroek 	 * overhead for short strings.
1010*25d39513SDavid van Moolenbroek 	 *
1011*25d39513SDavid van Moolenbroek 	 * We stop whenever any of the following conditions are met:
1012*25d39513SDavid van Moolenbroek 	 * - copying in data from the target process fails for any reason;
1013*25d39513SDavid van Moolenbroek 	 * - we have processed the last index ('count') into the vector;
1014*25d39513SDavid van Moolenbroek 	 * - the current vector element is a NULL pointer;
1015*25d39513SDavid van Moolenbroek 	 * - the requested number of output bytes ('oldlen') has been reached;
1016*25d39513SDavid van Moolenbroek 	 * - the maximum number of output bytes ('max') has been reached;
1017*25d39513SDavid van Moolenbroek 	 * - the number of page copy-ins exceeds an estimated threshold;
1018*25d39513SDavid van Moolenbroek 	 * - copying out data fails for any reason (we then return the error).
1019*25d39513SDavid van Moolenbroek 	 *
1020*25d39513SDavid van Moolenbroek 	 * We limit the number of page copy-ins because otherwise a rogue
1021*25d39513SDavid van Moolenbroek 	 * process could create an argument vector consisting of only two-byte
1022*25d39513SDavid van Moolenbroek 	 * strings that all span two pages, causing us to copy up to 1GB of
1023*25d39513SDavid van Moolenbroek 	 * data with the current ARG_MAX value of 256K.  No reasonable vector
1024*25d39513SDavid van Moolenbroek 	 * should cause more than (ARG_MAX / PAGE_SIZE) page copies for
1025*25d39513SDavid van Moolenbroek 	 * strings; we are nice enough to allow twice that.  Vector copies do
1026*25d39513SDavid van Moolenbroek 	 * not count, as they are linear anyway.
1027*25d39513SDavid van Moolenbroek 	 *
1028*25d39513SDavid van Moolenbroek 	 * Unlike every other sysctl(2) call, we are supposed to truncate the
1029*25d39513SDavid van Moolenbroek 	 * resulting size (the returned 'oldlen') to the requested size (the
1030*25d39513SDavid van Moolenbroek 	 * given 'oldlen') *and* return the resulting size, rather than ENOMEM
1031*25d39513SDavid van Moolenbroek 	 * and the real size.  Unfortunately, libkvm actually relies on this.
1032*25d39513SDavid van Moolenbroek 	 *
1033*25d39513SDavid van Moolenbroek 	 * Generally speaking, upon failure we just return a truncated result.
1034*25d39513SDavid van Moolenbroek 	 * In case of truncation, the data we copy out need not be null
1035*25d39513SDavid van Moolenbroek 	 * terminated.  It is up to userland to process the data correctly.
1036*25d39513SDavid van Moolenbroek 	 */
1037*25d39513SDavid van Moolenbroek 	if (trunc_page(vaddr) == 0 || vaddr % sizeof(char *) != 0)
1038*25d39513SDavid van Moolenbroek 		return 0;
1039*25d39513SDavid van Moolenbroek 
1040*25d39513SDavid van Moolenbroek 	off = 0;
1041*25d39513SDavid van Moolenbroek 	olen = 0;
1042*25d39513SDavid van Moolenbroek 	aborted = FALSE;
1043*25d39513SDavid van Moolenbroek 
1044*25d39513SDavid van Moolenbroek 	oldlen = mib_getoldlen(oldp);
1045*25d39513SDavid van Moolenbroek 	if (oldlen > max)
1046*25d39513SDavid van Moolenbroek 		oldlen = max;
1047*25d39513SDavid van Moolenbroek 
1048*25d39513SDavid van Moolenbroek 	copybudget = (ARG_MAX / PAGE_SIZE) * 2;
1049*25d39513SDavid van Moolenbroek 
1050*25d39513SDavid van Moolenbroek 	vpage = 0;
1051*25d39513SDavid van Moolenbroek 	spage = 0;
1052*25d39513SDavid van Moolenbroek 
1053*25d39513SDavid van Moolenbroek 	while (count > 0 && off + olen < oldlen && !aborted) {
1054*25d39513SDavid van Moolenbroek 		/*
1055*25d39513SDavid van Moolenbroek 		 * Start by fetching the page containing the current vector
1056*25d39513SDavid van Moolenbroek 		 * element, if needed.  We could limit the fetch to the vector
1057*25d39513SDavid van Moolenbroek 		 * size, but our hope is that for the simple cases, the strings
1058*25d39513SDavid van Moolenbroek 		 * are on the remainder of the same page, so we save a copy
1059*25d39513SDavid van Moolenbroek 		 * call.  TODO: since the strings should follow the vector, we
1060*25d39513SDavid van Moolenbroek 		 * could start the copy at the base of the vector.
1061*25d39513SDavid van Moolenbroek 		 */
1062*25d39513SDavid van Moolenbroek 		if (trunc_page(vaddr) != vpage) {
1063*25d39513SDavid van Moolenbroek 			vpage = trunc_page(vaddr);
1064*25d39513SDavid van Moolenbroek 			if (sys_datacopy(mp->mp_endpoint, vpage, SELF,
1065*25d39513SDavid van Moolenbroek 			    (vir_bytes)vbuf, PAGE_SIZE) != OK)
1066*25d39513SDavid van Moolenbroek 				break;
1067*25d39513SDavid van Moolenbroek 		}
1068*25d39513SDavid van Moolenbroek 
1069*25d39513SDavid van Moolenbroek 		/* Get the current vector element, pointing to a string. */
1070*25d39513SDavid van Moolenbroek 		memcpy(&pptr, &vbuf[vaddr - vpage], sizeof(pptr));
1071*25d39513SDavid van Moolenbroek 		paddr = (vir_bytes)pptr;
1072*25d39513SDavid van Moolenbroek 		ppage = trunc_page(paddr);
1073*25d39513SDavid van Moolenbroek 		if (ppage == 0)
1074*25d39513SDavid van Moolenbroek 			break;
1075*25d39513SDavid van Moolenbroek 
1076*25d39513SDavid van Moolenbroek 		/* Fetch the string itself, one page at a time at most. */
1077*25d39513SDavid van Moolenbroek 		do {
1078*25d39513SDavid van Moolenbroek 			/*
1079*25d39513SDavid van Moolenbroek 			 * See if the string pointer falls inside either the
1080*25d39513SDavid van Moolenbroek 			 * vector page or the previously fetched string page
1081*25d39513SDavid van Moolenbroek 			 * (if any).  If not, fetch a string page.
1082*25d39513SDavid van Moolenbroek 			 */
1083*25d39513SDavid van Moolenbroek 			if (ppage == vpage) {
1084*25d39513SDavid van Moolenbroek 				buf = vbuf;
1085*25d39513SDavid van Moolenbroek 			} else if (ppage == spage) {
1086*25d39513SDavid van Moolenbroek 				buf = sbuf;
1087*25d39513SDavid van Moolenbroek 			} else {
1088*25d39513SDavid van Moolenbroek 				if (--copybudget == 0) {
1089*25d39513SDavid van Moolenbroek 					aborted = TRUE;
1090*25d39513SDavid van Moolenbroek 					break;
1091*25d39513SDavid van Moolenbroek 				}
1092*25d39513SDavid van Moolenbroek 				spage = ppage;
1093*25d39513SDavid van Moolenbroek 				if (sys_datacopy(mp->mp_endpoint, spage, SELF,
1094*25d39513SDavid van Moolenbroek 				    (vir_bytes)sbuf, PAGE_SIZE) != OK) {
1095*25d39513SDavid van Moolenbroek 					aborted = TRUE;
1096*25d39513SDavid van Moolenbroek 					break;
1097*25d39513SDavid van Moolenbroek 				}
1098*25d39513SDavid van Moolenbroek 				buf = sbuf;
1099*25d39513SDavid van Moolenbroek 			}
1100*25d39513SDavid van Moolenbroek 
1101*25d39513SDavid van Moolenbroek 			/*
1102*25d39513SDavid van Moolenbroek 			 * We now have a string fragment in a buffer.  See if
1103*25d39513SDavid van Moolenbroek 			 * the string is null terminated.  If not, all the data
1104*25d39513SDavid van Moolenbroek 			 * up to the buffer end is part of the string, and the
1105*25d39513SDavid van Moolenbroek 			 * string continues on the next page.
1106*25d39513SDavid van Moolenbroek 			 */
1107*25d39513SDavid van Moolenbroek 			p = &buf[paddr - ppage];
1108*25d39513SDavid van Moolenbroek 			pleft = PAGE_SIZE - (paddr - ppage);
1109*25d39513SDavid van Moolenbroek 			assert(pleft > 0);
1110*25d39513SDavid van Moolenbroek 
1111*25d39513SDavid van Moolenbroek 			if ((q = memchr(p, '\0', pleft)) != NULL) {
1112*25d39513SDavid van Moolenbroek 				bytes = (size_t)(q - p + 1);
1113*25d39513SDavid van Moolenbroek 				assert(bytes <= pleft);
1114*25d39513SDavid van Moolenbroek 				ended = TRUE;
1115*25d39513SDavid van Moolenbroek 			} else {
1116*25d39513SDavid van Moolenbroek 				bytes = pleft;
1117*25d39513SDavid van Moolenbroek 				ended = FALSE;
1118*25d39513SDavid van Moolenbroek 			}
1119*25d39513SDavid van Moolenbroek 
1120*25d39513SDavid van Moolenbroek 			/* Limit the result to the requested length. */
1121*25d39513SDavid van Moolenbroek 			if (off + olen + bytes > oldlen)
1122*25d39513SDavid van Moolenbroek 				bytes = oldlen - off - olen;
1123*25d39513SDavid van Moolenbroek 
1124*25d39513SDavid van Moolenbroek 			/*
1125*25d39513SDavid van Moolenbroek 			 * Add 'bytes' bytes from string pointer 'p' to the
1126*25d39513SDavid van Moolenbroek 			 * output buffer, copying out its contents to userland
1127*25d39513SDavid van Moolenbroek 			 * if it has filled up.
1128*25d39513SDavid van Moolenbroek 			 */
1129*25d39513SDavid van Moolenbroek 			if (olen + bytes > sizeof(obuf)) {
1130*25d39513SDavid van Moolenbroek 				oleft = sizeof(obuf) - olen;
1131*25d39513SDavid van Moolenbroek 				memcpy(&obuf[olen], p, oleft);
1132*25d39513SDavid van Moolenbroek 
1133*25d39513SDavid van Moolenbroek 				if ((r = mib_copyout(oldp, off, obuf,
1134*25d39513SDavid van Moolenbroek 				    sizeof(obuf))) < 0)
1135*25d39513SDavid van Moolenbroek 					return r;
1136*25d39513SDavid van Moolenbroek 				off += sizeof(obuf);
1137*25d39513SDavid van Moolenbroek 				olen = 0;
1138*25d39513SDavid van Moolenbroek 
1139*25d39513SDavid van Moolenbroek 				p += oleft;
1140*25d39513SDavid van Moolenbroek 				bytes -= oleft;
1141*25d39513SDavid van Moolenbroek 			}
1142*25d39513SDavid van Moolenbroek 			if (bytes > 0) {
1143*25d39513SDavid van Moolenbroek 				memcpy(&obuf[olen], p, bytes);
1144*25d39513SDavid van Moolenbroek 				olen += bytes;
1145*25d39513SDavid van Moolenbroek 			}
1146*25d39513SDavid van Moolenbroek 
1147*25d39513SDavid van Moolenbroek 			/*
1148*25d39513SDavid van Moolenbroek 			 * Continue as long as we have not yet found the string
1149*25d39513SDavid van Moolenbroek 			 * end, and we have not yet filled the output buffer.
1150*25d39513SDavid van Moolenbroek 			 */
1151*25d39513SDavid van Moolenbroek 			paddr += pleft;
1152*25d39513SDavid van Moolenbroek 			assert(trunc_page(paddr) == paddr);
1153*25d39513SDavid van Moolenbroek 			ppage = paddr;
1154*25d39513SDavid van Moolenbroek 		} while (!ended && off + olen < oldlen);
1155*25d39513SDavid van Moolenbroek 
1156*25d39513SDavid van Moolenbroek 		vaddr += sizeof(char *);
1157*25d39513SDavid van Moolenbroek 		count--;
1158*25d39513SDavid van Moolenbroek 	}
1159*25d39513SDavid van Moolenbroek 
1160*25d39513SDavid van Moolenbroek 	/* Copy out any remainder of the output buffer. */
1161*25d39513SDavid van Moolenbroek 	if (olen > 0) {
1162*25d39513SDavid van Moolenbroek 		if ((r = mib_copyout(oldp, off, obuf, olen)) < 0)
1163*25d39513SDavid van Moolenbroek 			return r;
1164*25d39513SDavid van Moolenbroek 		off += olen;
1165*25d39513SDavid van Moolenbroek 	}
1166*25d39513SDavid van Moolenbroek 
1167*25d39513SDavid van Moolenbroek 	assert(off <= oldlen);
1168*25d39513SDavid van Moolenbroek 	return off;
1169*25d39513SDavid van Moolenbroek }
1170