xref: /openbsd-src/sys/kern/kern_sysctl.c (revision 43003dfe3ad45d1698bed8a37f2b0f5b14f20d4f)
1 /*	$OpenBSD: kern_sysctl.c,v 1.179 2009/08/09 10:40:17 blambert Exp $	*/
2 /*	$NetBSD: kern_sysctl.c,v 1.17 1996/05/20 17:49:05 mrg Exp $	*/
3 
4 /*-
5  * Copyright (c) 1982, 1986, 1989, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * This code is derived from software contributed to Berkeley by
9  * Mike Karels at Berkeley Software Design, Inc.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. Neither the name of the University nor the names of its contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  *
35  *	@(#)kern_sysctl.c	8.4 (Berkeley) 4/14/94
36  */
37 
38 /*
39  * sysctl system call.
40  */
41 
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/kernel.h>
45 #include <sys/malloc.h>
46 #include <sys/proc.h>
47 #include <sys/resourcevar.h>
48 #include <sys/file.h>
49 #include <sys/filedesc.h>
50 #include <sys/vnode.h>
51 #include <sys/unistd.h>
52 #include <sys/buf.h>
53 #include <sys/ioctl.h>
54 #include <sys/tty.h>
55 #include <sys/disklabel.h>
56 #include <sys/disk.h>
57 #include <uvm/uvm_extern.h>
58 #include <sys/sysctl.h>
59 #include <sys/msgbuf.h>
60 #include <sys/dkstat.h>
61 #include <sys/vmmeter.h>
62 #include <sys/namei.h>
63 #include <sys/exec.h>
64 #include <sys/mbuf.h>
65 #include <sys/sensors.h>
66 #include <sys/pipe.h>
67 #include <sys/eventvar.h>
68 #include <sys/socketvar.h>
69 #include <sys/domain.h>
70 #include <sys/protosw.h>
71 #ifdef __HAVE_TIMECOUNTER
72 #include <sys/timetc.h>
73 #endif
74 #include <sys/evcount.h>
75 #include <sys/unpcb.h>
76 
77 #include <sys/mount.h>
78 #include <sys/syscallargs.h>
79 #include <dev/rndvar.h>
80 #include <dev/systrace.h>
81 
82 #include <net/route.h>
83 #include <netinet/in.h>
84 #include <netinet/in_systm.h>
85 #include <netinet/ip.h>
86 #include <netinet/in_pcb.h>
87 #include <netinet/ip6.h>
88 #include <netinet6/ip6_var.h>
89 
90 #ifdef DDB
91 #include <ddb/db_var.h>
92 #endif
93 
94 #ifdef SYSVMSG
95 #include <sys/msg.h>
96 #endif
97 #ifdef SYSVSEM
98 #include <sys/sem.h>
99 #endif
100 #ifdef SYSVSHM
101 #include <sys/shm.h>
102 #endif
103 
104 #define	PTRTOINT64(_x)	((u_int64_t)(u_long)(_x))
105 
106 extern struct forkstat forkstat;
107 extern struct nchstats nchstats;
108 extern int nselcoll, fscale;
109 extern struct disklist_head disklist;
110 extern fixpt_t ccpu;
111 extern  long numvnodes;
112 
113 extern void nmbclust_update(void);
114 
115 int sysctl_diskinit(int, struct proc *);
116 int sysctl_proc_args(int *, u_int, void *, size_t *, struct proc *);
117 int sysctl_intrcnt(int *, u_int, void *, size_t *);
118 int sysctl_sensors(int *, u_int, void *, size_t *, void *, size_t);
119 int sysctl_emul(int *, u_int, void *, size_t *, void *, size_t);
120 int sysctl_cptime2(int *, u_int, void *, size_t *, void *, size_t);
121 
122 int (*cpu_cpuspeed)(int *);
123 void (*cpu_setperf)(int);
124 int perflevel = 100;
125 
126 /*
127  * Lock to avoid too many processes vslocking a large amount of memory
128  * at the same time.
129  */
130 struct rwlock sysctl_lock = RWLOCK_INITIALIZER("sysctllk");
131 struct rwlock sysctl_disklock = RWLOCK_INITIALIZER("sysctldlk");
132 
133 int
134 sys___sysctl(struct proc *p, void *v, register_t *retval)
135 {
136 	struct sys___sysctl_args /* {
137 		syscallarg(int *) name;
138 		syscallarg(u_int) namelen;
139 		syscallarg(void *) old;
140 		syscallarg(size_t *) oldlenp;
141 		syscallarg(void *) new;
142 		syscallarg(size_t) newlen;
143 	} */ *uap = v;
144 	int error, dolock = 1;
145 	size_t savelen = 0, oldlen = 0;
146 	sysctlfn *fn;
147 	int name[CTL_MAXNAME];
148 
149 	if (SCARG(uap, new) != NULL &&
150 	    (error = suser(p, 0)))
151 		return (error);
152 	/*
153 	 * all top-level sysctl names are non-terminal
154 	 */
155 	if (SCARG(uap, namelen) > CTL_MAXNAME || SCARG(uap, namelen) < 2)
156 		return (EINVAL);
157 	error = copyin(SCARG(uap, name), name,
158 		       SCARG(uap, namelen) * sizeof(int));
159 	if (error)
160 		return (error);
161 
162 	switch (name[0]) {
163 	case CTL_KERN:
164 		fn = kern_sysctl;
165 		if (name[1] == KERN_VNODE)	/* XXX */
166 			dolock = 0;
167 		break;
168 	case CTL_HW:
169 		fn = hw_sysctl;
170 		break;
171 	case CTL_VM:
172 		fn = uvm_sysctl;
173 		break;
174 	case CTL_NET:
175 		fn = net_sysctl;
176 		break;
177 	case CTL_FS:
178 		fn = fs_sysctl;
179 		break;
180 	case CTL_VFS:
181 		fn = vfs_sysctl;
182 		break;
183 	case CTL_MACHDEP:
184 		fn = cpu_sysctl;
185 		break;
186 #ifdef DEBUG
187 	case CTL_DEBUG:
188 		fn = debug_sysctl;
189 		break;
190 #endif
191 #ifdef DDB
192 	case CTL_DDB:
193 		fn = ddb_sysctl;
194 		break;
195 #endif
196 	default:
197 		return (EOPNOTSUPP);
198 	}
199 
200 	if (SCARG(uap, oldlenp) &&
201 	    (error = copyin(SCARG(uap, oldlenp), &oldlen, sizeof(oldlen))))
202 		return (error);
203 	if (SCARG(uap, old) != NULL) {
204 		if ((error = rw_enter(&sysctl_lock, RW_WRITE|RW_INTR)) != 0)
205 			return (error);
206 		if (dolock) {
207 			if (atop(oldlen) > uvmexp.wiredmax - uvmexp.wired) {
208 				rw_exit_write(&sysctl_lock);
209 				return (ENOMEM);
210 			}
211 			error = uvm_vslock(p, SCARG(uap, old), oldlen,
212 			    VM_PROT_READ|VM_PROT_WRITE);
213 			if (error) {
214 				rw_exit_write(&sysctl_lock);
215 				return (error);
216 			}
217 		}
218 		savelen = oldlen;
219 	}
220 	error = (*fn)(&name[1], SCARG(uap, namelen) - 1, SCARG(uap, old),
221 	    &oldlen, SCARG(uap, new), SCARG(uap, newlen), p);
222 	if (SCARG(uap, old) != NULL) {
223 		if (dolock)
224 			uvm_vsunlock(p, SCARG(uap, old), savelen);
225 		rw_exit_write(&sysctl_lock);
226 	}
227 	if (error)
228 		return (error);
229 	if (SCARG(uap, oldlenp))
230 		error = copyout(&oldlen, SCARG(uap, oldlenp), sizeof(oldlen));
231 	return (error);
232 }
233 
234 /*
235  * Attributes stored in the kernel.
236  */
237 char hostname[MAXHOSTNAMELEN];
238 int hostnamelen;
239 char domainname[MAXHOSTNAMELEN];
240 int domainnamelen;
241 long hostid;
242 char *disknames = NULL;
243 struct diskstats *diskstats = NULL;
244 #ifdef INSECURE
245 int securelevel = -1;
246 #else
247 int securelevel;
248 #endif
249 
250 /*
251  * kernel related system variables.
252  */
253 int
254 kern_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
255     size_t newlen, struct proc *p)
256 {
257 	int error, level, inthostid, stackgap;
258 	extern int somaxconn, sominconn;
259 	extern int usermount, nosuidcoredump;
260 	extern long cp_time[CPUSTATES];
261 	extern int stackgap_random;
262 #ifdef CRYPTO
263 	extern int usercrypto;
264 	extern int userasymcrypto;
265 	extern int cryptodevallowsoft;
266 #endif
267 	extern int maxlocksperuid;
268 
269 	/* all sysctl names at this level are terminal except a ton of them */
270 	if (namelen != 1) {
271 		switch (name[0]) {
272 		case KERN_PROC:
273 		case KERN_PROC2:
274 		case KERN_PROF:
275 		case KERN_MALLOCSTATS:
276 		case KERN_TTY:
277 		case KERN_POOL:
278 		case KERN_PROC_ARGS:
279 		case KERN_SYSVIPC_INFO:
280 		case KERN_SEMINFO:
281 		case KERN_SHMINFO:
282 		case KERN_INTRCNT:
283 		case KERN_WATCHDOG:
284 		case KERN_EMUL:
285 		case KERN_EVCOUNT:
286 #ifdef __HAVE_TIMECOUNTER
287 		case KERN_TIMECOUNTER:
288 #endif
289 		case KERN_CPTIME2:
290 		case KERN_FILE2:
291 			break;
292 		default:
293 			return (ENOTDIR);	/* overloaded */
294 		}
295 	}
296 
297 	switch (name[0]) {
298 	case KERN_OSTYPE:
299 		return (sysctl_rdstring(oldp, oldlenp, newp, ostype));
300 	case KERN_OSRELEASE:
301 		return (sysctl_rdstring(oldp, oldlenp, newp, osrelease));
302 	case KERN_OSREV:
303 		return (sysctl_rdint(oldp, oldlenp, newp, OpenBSD));
304 	case KERN_OSVERSION:
305 		return (sysctl_rdstring(oldp, oldlenp, newp, osversion));
306 	case KERN_VERSION:
307 		return (sysctl_rdstring(oldp, oldlenp, newp, version));
308 	case KERN_MAXVNODES:
309 		return(sysctl_int(oldp, oldlenp, newp, newlen, &maxvnodes));
310 	case KERN_MAXPROC:
311 		return (sysctl_int(oldp, oldlenp, newp, newlen, &maxproc));
312 	case KERN_MAXFILES:
313 		return (sysctl_int(oldp, oldlenp, newp, newlen, &maxfiles));
314 	case KERN_NFILES:
315 		return (sysctl_rdint(oldp, oldlenp, newp, nfiles));
316 	case KERN_TTYCOUNT:
317 		return (sysctl_rdint(oldp, oldlenp, newp, tty_count));
318 	case KERN_NUMVNODES:
319 		return (sysctl_rdint(oldp, oldlenp, newp, numvnodes));
320 	case KERN_ARGMAX:
321 		return (sysctl_rdint(oldp, oldlenp, newp, ARG_MAX));
322 	case KERN_NSELCOLL:
323 		return (sysctl_rdint(oldp, oldlenp, newp, nselcoll));
324 	case KERN_SECURELVL:
325 		level = securelevel;
326 		if ((error = sysctl_int(oldp, oldlenp, newp, newlen, &level)) ||
327 		    newp == NULL)
328 			return (error);
329 		if ((securelevel > 0 || level < -1) &&
330 		    level < securelevel && p->p_pid != 1)
331 			return (EPERM);
332 		securelevel = level;
333 		return (0);
334 	case KERN_HOSTNAME:
335 		error = sysctl_tstring(oldp, oldlenp, newp, newlen,
336 		    hostname, sizeof(hostname));
337 		if (newp && !error)
338 			hostnamelen = newlen;
339 		return (error);
340 	case KERN_DOMAINNAME:
341 		error = sysctl_tstring(oldp, oldlenp, newp, newlen,
342 		    domainname, sizeof(domainname));
343 		if (newp && !error)
344 			domainnamelen = newlen;
345 		return (error);
346 	case KERN_HOSTID:
347 		inthostid = hostid;  /* XXX assumes sizeof long <= sizeof int */
348 		error =  sysctl_int(oldp, oldlenp, newp, newlen, &inthostid);
349 		hostid = inthostid;
350 		return (error);
351 	case KERN_CLOCKRATE:
352 		return (sysctl_clockrate(oldp, oldlenp, newp));
353 	case KERN_BOOTTIME:
354 		return (sysctl_rdstruct(oldp, oldlenp, newp, &boottime,
355 		    sizeof(struct timeval)));
356 	case KERN_VNODE:
357 		return (sysctl_vnode(oldp, oldlenp, p));
358 #ifndef SMALL_KERNEL
359 	case KERN_PROC:
360 	case KERN_PROC2:
361 		return (sysctl_doproc(name, namelen, oldp, oldlenp));
362 	case KERN_PROC_ARGS:
363 		return (sysctl_proc_args(name + 1, namelen - 1, oldp, oldlenp,
364 		     p));
365 	case KERN_FILE2:
366 		return (sysctl_file2(name + 1, namelen - 1, oldp, oldlenp, p));
367 #endif
368 	case KERN_FILE:
369 		return (sysctl_file(oldp, oldlenp, p));
370 	case KERN_MBSTAT:
371 		return (sysctl_rdstruct(oldp, oldlenp, newp, &mbstat,
372 		    sizeof(mbstat)));
373 #ifdef GPROF
374 	case KERN_PROF:
375 		return (sysctl_doprof(name + 1, namelen - 1, oldp, oldlenp,
376 		    newp, newlen));
377 #endif
378 	case KERN_POSIX1:
379 		return (sysctl_rdint(oldp, oldlenp, newp, _POSIX_VERSION));
380 	case KERN_NGROUPS:
381 		return (sysctl_rdint(oldp, oldlenp, newp, NGROUPS_MAX));
382 	case KERN_JOB_CONTROL:
383 		return (sysctl_rdint(oldp, oldlenp, newp, 1));
384 	case KERN_SAVED_IDS:
385 #ifdef _POSIX_SAVED_IDS
386 		return (sysctl_rdint(oldp, oldlenp, newp, 1));
387 #else
388 		return (sysctl_rdint(oldp, oldlenp, newp, 0));
389 #endif
390 	case KERN_MAXPARTITIONS:
391 		return (sysctl_rdint(oldp, oldlenp, newp, MAXPARTITIONS));
392 	case KERN_RAWPARTITION:
393 		return (sysctl_rdint(oldp, oldlenp, newp, RAW_PART));
394 	case KERN_SOMAXCONN:
395 		return (sysctl_int(oldp, oldlenp, newp, newlen, &somaxconn));
396 	case KERN_SOMINCONN:
397 		return (sysctl_int(oldp, oldlenp, newp, newlen, &sominconn));
398 	case KERN_USERMOUNT:
399 		return (sysctl_int(oldp, oldlenp, newp, newlen, &usermount));
400 	case KERN_RND:
401 		return (sysctl_rdstruct(oldp, oldlenp, newp, &rndstats,
402 		    sizeof(rndstats)));
403 	case KERN_ARND: {
404 		char buf[256];
405 
406 		if (*oldlenp > sizeof(buf))
407 			*oldlenp = sizeof(buf);
408 		if (oldp) {
409 			arc4random_buf(buf, *oldlenp);
410 			if ((error = copyout(buf, oldp, *oldlenp)))
411 				return (error);
412 		}
413 		return (0);
414 	}
415 	case KERN_NOSUIDCOREDUMP:
416 		return (sysctl_int(oldp, oldlenp, newp, newlen, &nosuidcoredump));
417 	case KERN_FSYNC:
418 		return (sysctl_rdint(oldp, oldlenp, newp, 1));
419 	case KERN_SYSVMSG:
420 #ifdef SYSVMSG
421 		return (sysctl_rdint(oldp, oldlenp, newp, 1));
422 #else
423 		return (sysctl_rdint(oldp, oldlenp, newp, 0));
424 #endif
425 	case KERN_SYSVSEM:
426 #ifdef SYSVSEM
427 		return (sysctl_rdint(oldp, oldlenp, newp, 1));
428 #else
429 		return (sysctl_rdint(oldp, oldlenp, newp, 0));
430 #endif
431 	case KERN_SYSVSHM:
432 #ifdef SYSVSHM
433 		return (sysctl_rdint(oldp, oldlenp, newp, 1));
434 #else
435 		return (sysctl_rdint(oldp, oldlenp, newp, 0));
436 #endif
437 	case KERN_MSGBUFSIZE:
438 		/*
439 		 * deal with cases where the message buffer has
440 		 * become corrupted.
441 		 */
442 		if (!msgbufp || msgbufp->msg_magic != MSG_MAGIC)
443 			return (ENXIO);
444 		return (sysctl_rdint(oldp, oldlenp, newp, msgbufp->msg_bufs));
445 	case KERN_MSGBUF:
446 		/* see note above */
447 		if (!msgbufp || msgbufp->msg_magic != MSG_MAGIC)
448 			return (ENXIO);
449 		return (sysctl_rdstruct(oldp, oldlenp, newp, msgbufp,
450 		    msgbufp->msg_bufs + offsetof(struct msgbuf, msg_bufc)));
451 	case KERN_MALLOCSTATS:
452 		return (sysctl_malloc(name + 1, namelen - 1, oldp, oldlenp,
453 		    newp, newlen, p));
454 	case KERN_CPTIME:
455 	{
456 		CPU_INFO_ITERATOR cii;
457 		struct cpu_info *ci;
458 		int i;
459 
460 		bzero(cp_time, sizeof(cp_time));
461 
462 		CPU_INFO_FOREACH(cii, ci) {
463 			for (i = 0; i < CPUSTATES; i++)
464 				cp_time[i] += ci->ci_schedstate.spc_cp_time[i];
465 		}
466 
467 		return (sysctl_rdstruct(oldp, oldlenp, newp, &cp_time,
468 		    sizeof(cp_time)));
469 	}
470 	case KERN_NCHSTATS:
471 		return (sysctl_rdstruct(oldp, oldlenp, newp, &nchstats,
472 		    sizeof(struct nchstats)));
473 	case KERN_FORKSTAT:
474 		return (sysctl_rdstruct(oldp, oldlenp, newp, &forkstat,
475 		    sizeof(struct forkstat)));
476 	case KERN_TTY:
477 		return (sysctl_tty(name + 1, namelen - 1, oldp, oldlenp,
478 		    newp, newlen));
479 	case KERN_FSCALE:
480 		return (sysctl_rdint(oldp, oldlenp, newp, fscale));
481 	case KERN_CCPU:
482 		return (sysctl_rdint(oldp, oldlenp, newp, ccpu));
483 	case KERN_NPROCS:
484 		return (sysctl_rdint(oldp, oldlenp, newp, nprocs));
485 	case KERN_POOL:
486 		return (sysctl_dopool(name + 1, namelen - 1, oldp, oldlenp));
487 	case KERN_STACKGAPRANDOM:
488 		stackgap = stackgap_random;
489 		error = sysctl_int(oldp, oldlenp, newp, newlen, &stackgap);
490 		if (error)
491 			return (error);
492 		/*
493 		 * Safety harness.
494 		 */
495 		if ((stackgap < ALIGNBYTES && stackgap != 0) ||
496 		    !powerof2(stackgap) || stackgap >= MAXSSIZ)
497 			return (EINVAL);
498 		stackgap_random = stackgap;
499 		return (0);
500 #if defined(SYSVMSG) || defined(SYSVSEM) || defined(SYSVSHM)
501 	case KERN_SYSVIPC_INFO:
502 		return (sysctl_sysvipc(name + 1, namelen - 1, oldp, oldlenp));
503 #endif
504 #ifdef CRYPTO
505 	case KERN_USERCRYPTO:
506 		return (sysctl_int(oldp, oldlenp, newp, newlen, &usercrypto));
507 	case KERN_USERASYMCRYPTO:
508 		return (sysctl_int(oldp, oldlenp, newp, newlen,
509 			    &userasymcrypto));
510 	case KERN_CRYPTODEVALLOWSOFT:
511 		return (sysctl_int(oldp, oldlenp, newp, newlen,
512 			    &cryptodevallowsoft));
513 #endif
514 	case KERN_SPLASSERT:
515 		return (sysctl_int(oldp, oldlenp, newp, newlen,
516 		    &splassert_ctl));
517 #ifdef SYSVSEM
518 	case KERN_SEMINFO:
519 		return (sysctl_sysvsem(name + 1, namelen - 1, oldp, oldlenp,
520 		    newp, newlen));
521 #endif
522 #ifdef SYSVSHM
523 	case KERN_SHMINFO:
524 		return (sysctl_sysvshm(name + 1, namelen - 1, oldp, oldlenp,
525 		    newp, newlen));
526 #endif
527 #ifndef SMALL_KERNEL
528 	case KERN_INTRCNT:
529 		return (sysctl_intrcnt(name + 1, namelen - 1, oldp, oldlenp));
530 	case KERN_WATCHDOG:
531 		return (sysctl_wdog(name + 1, namelen - 1, oldp, oldlenp,
532 		    newp, newlen));
533 	case KERN_EMUL:
534 		return (sysctl_emul(name + 1, namelen - 1, oldp, oldlenp,
535 		    newp, newlen));
536 #endif
537 	case KERN_MAXCLUSTERS:
538 		error = sysctl_int(oldp, oldlenp, newp, newlen, &nmbclust);
539 		if (!error)
540 			nmbclust_update();
541 		return (error);
542 #ifndef SMALL_KERNEL
543 	case KERN_EVCOUNT:
544 		return (evcount_sysctl(name + 1, namelen - 1, oldp, oldlenp,
545 		    newp, newlen));
546 #endif
547 #ifdef __HAVE_TIMECOUNTER
548 	case KERN_TIMECOUNTER:
549 		return (sysctl_tc(name + 1, namelen - 1, oldp, oldlenp,
550 		    newp, newlen));
551 #endif
552 	case KERN_MAXLOCKSPERUID:
553 		return (sysctl_int(oldp, oldlenp, newp, newlen, &maxlocksperuid));
554 	case KERN_CPTIME2:
555 		return (sysctl_cptime2(name + 1, namelen -1, oldp, oldlenp,
556 		    newp, newlen));
557 	case KERN_CACHEPCT: {
558 		int opct, pgs;
559 		opct = bufcachepercent;
560 		error = sysctl_int(oldp, oldlenp, newp, newlen,
561 		    &bufcachepercent);
562 		if (error)
563 			return(error);
564 		if (bufcachepercent > 90 || bufcachepercent < 5) {
565 			bufcachepercent = opct;
566 			return (EINVAL);
567 		}
568 		if (bufcachepercent != opct) {
569 			pgs = bufcachepercent * physmem / 100;
570 			bufadjust(pgs); /* adjust bufpages */
571 			bufhighpages = bufpages; /* set high water mark */
572 		}
573 		return(0);
574 	}
575 	default:
576 		return (EOPNOTSUPP);
577 	}
578 	/* NOTREACHED */
579 }
580 
581 /*
582  * hardware related system variables.
583  */
584 char *hw_vendor, *hw_prod, *hw_uuid, *hw_serial, *hw_ver;
585 
586 int
587 hw_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
588     size_t newlen, struct proc *p)
589 {
590 	extern char machine[], cpu_model[];
591 	int err, cpuspeed;
592 
593 	/* all sysctl names at this level except sensors are terminal */
594 	if (name[0] != HW_SENSORS && namelen != 1)
595 		return (ENOTDIR);		/* overloaded */
596 
597 	switch (name[0]) {
598 	case HW_MACHINE:
599 		return (sysctl_rdstring(oldp, oldlenp, newp, machine));
600 	case HW_MODEL:
601 		return (sysctl_rdstring(oldp, oldlenp, newp, cpu_model));
602 	case HW_NCPU:
603 		return (sysctl_rdint(oldp, oldlenp, newp, ncpus));
604 	case HW_NCPUFOUND:
605 		return (sysctl_rdint(oldp, oldlenp, newp, ncpusfound));
606 	case HW_BYTEORDER:
607 		return (sysctl_rdint(oldp, oldlenp, newp, BYTE_ORDER));
608 	case HW_PHYSMEM:
609 		return (sysctl_rdint(oldp, oldlenp, newp, ptoa(physmem)));
610 	case HW_USERMEM:
611 		return (sysctl_rdint(oldp, oldlenp, newp,
612 		    ptoa(physmem - uvmexp.wired)));
613 	case HW_PAGESIZE:
614 		return (sysctl_rdint(oldp, oldlenp, newp, PAGE_SIZE));
615 	case HW_DISKNAMES:
616 		err = sysctl_diskinit(0, p);
617 		if (err)
618 			return err;
619 		if (disknames)
620 			return (sysctl_rdstring(oldp, oldlenp, newp,
621 			    disknames));
622 		else
623 			return (sysctl_rdstring(oldp, oldlenp, newp, ""));
624 	case HW_DISKSTATS:
625 		err = sysctl_diskinit(1, p);
626 		if (err)
627 			return err;
628 		return (sysctl_rdstruct(oldp, oldlenp, newp, diskstats,
629 		    disk_count * sizeof(struct diskstats)));
630 	case HW_DISKCOUNT:
631 		return (sysctl_rdint(oldp, oldlenp, newp, disk_count));
632 #ifndef	SMALL_KERNEL
633 	case HW_SENSORS:
634 		return (sysctl_sensors(name + 1, namelen - 1, oldp, oldlenp,
635 		    newp, newlen));
636 #endif
637 	case HW_CPUSPEED:
638 		if (!cpu_cpuspeed)
639 			return (EOPNOTSUPP);
640 		err = cpu_cpuspeed(&cpuspeed);
641 		if (err)
642 			return err;
643 		return (sysctl_rdint(oldp, oldlenp, newp, cpuspeed));
644 	case HW_SETPERF:
645 		if (!cpu_setperf)
646 			return (EOPNOTSUPP);
647 		err = sysctl_int(oldp, oldlenp, newp, newlen, &perflevel);
648 		if (err)
649 			return err;
650 		if (perflevel > 100)
651 			perflevel = 100;
652 		if (perflevel < 0)
653 			perflevel = 0;
654 		if (newp)
655 			cpu_setperf(perflevel);
656 		return (0);
657 	case HW_VENDOR:
658 		if (hw_vendor)
659 			return (sysctl_rdstring(oldp, oldlenp, newp,
660 			    hw_vendor));
661 		else
662 			return (EOPNOTSUPP);
663 	case HW_PRODUCT:
664 		if (hw_prod)
665 			return (sysctl_rdstring(oldp, oldlenp, newp, hw_prod));
666 		else
667 			return (EOPNOTSUPP);
668 	case HW_VERSION:
669 		if (hw_ver)
670 			return (sysctl_rdstring(oldp, oldlenp, newp, hw_ver));
671 		else
672 			return (EOPNOTSUPP);
673 	case HW_SERIALNO:
674 		if (hw_serial)
675 			return (sysctl_rdstring(oldp, oldlenp, newp,
676 			    hw_serial));
677 		else
678 			return (EOPNOTSUPP);
679 	case HW_UUID:
680 		if (hw_uuid)
681 			return (sysctl_rdstring(oldp, oldlenp, newp, hw_uuid));
682 		else
683 			return (EOPNOTSUPP);
684 	case HW_PHYSMEM64:
685 		return (sysctl_rdquad(oldp, oldlenp, newp,
686 		    ptoa((psize_t)physmem)));
687 	case HW_USERMEM64:
688 		return (sysctl_rdquad(oldp, oldlenp, newp,
689 		    ptoa((psize_t)physmem - uvmexp.wired)));
690 	default:
691 		return (EOPNOTSUPP);
692 	}
693 	/* NOTREACHED */
694 }
695 
696 #ifdef DEBUG
697 /*
698  * Debugging related system variables.
699  */
700 extern struct ctldebug debug0, debug1;
701 struct ctldebug debug2, debug3, debug4;
702 struct ctldebug debug5, debug6, debug7, debug8, debug9;
703 struct ctldebug debug10, debug11, debug12, debug13, debug14;
704 struct ctldebug debug15, debug16, debug17, debug18, debug19;
705 static struct ctldebug *debugvars[CTL_DEBUG_MAXID] = {
706 	&debug0, &debug1, &debug2, &debug3, &debug4,
707 	&debug5, &debug6, &debug7, &debug8, &debug9,
708 	&debug10, &debug11, &debug12, &debug13, &debug14,
709 	&debug15, &debug16, &debug17, &debug18, &debug19,
710 };
711 int
712 debug_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
713     size_t newlen, struct proc *p)
714 {
715 	struct ctldebug *cdp;
716 
717 	/* all sysctl names at this level are name and field */
718 	if (namelen != 2)
719 		return (ENOTDIR);		/* overloaded */
720 	cdp = debugvars[name[0]];
721 	if (cdp->debugname == 0)
722 		return (EOPNOTSUPP);
723 	switch (name[1]) {
724 	case CTL_DEBUG_NAME:
725 		return (sysctl_rdstring(oldp, oldlenp, newp, cdp->debugname));
726 	case CTL_DEBUG_VALUE:
727 		return (sysctl_int(oldp, oldlenp, newp, newlen, cdp->debugvar));
728 	default:
729 		return (EOPNOTSUPP);
730 	}
731 	/* NOTREACHED */
732 }
733 #endif /* DEBUG */
734 
735 /*
736  * Reads, or writes that lower the value
737  */
738 int
739 sysctl_int_lower(void *oldp, size_t *oldlenp, void *newp, size_t newlen, int *valp)
740 {
741 	unsigned int oval = *valp, val = *valp;
742 	int error;
743 
744 	if (newp == NULL)
745 		return (sysctl_rdint(oldp, oldlenp, newp, *valp));
746 
747 	if ((error = sysctl_int(oldp, oldlenp, newp, newlen, &val)))
748 		return (error);
749 	if (val > oval)
750 		return (EPERM);		/* do not allow raising */
751 	*(unsigned int *)valp = val;
752 	return (0);
753 }
754 
755 /*
756  * Validate parameters and get old / set new parameters
757  * for an integer-valued sysctl function.
758  */
759 int
760 sysctl_int(void *oldp, size_t *oldlenp, void *newp, size_t newlen, int *valp)
761 {
762 	int error = 0;
763 
764 	if (oldp && *oldlenp < sizeof(int))
765 		return (ENOMEM);
766 	if (newp && newlen != sizeof(int))
767 		return (EINVAL);
768 	*oldlenp = sizeof(int);
769 	if (oldp)
770 		error = copyout(valp, oldp, sizeof(int));
771 	if (error == 0 && newp)
772 		error = copyin(newp, valp, sizeof(int));
773 	return (error);
774 }
775 
776 /*
777  * As above, but read-only.
778  */
779 int
780 sysctl_rdint(void *oldp, size_t *oldlenp, void *newp, int val)
781 {
782 	int error = 0;
783 
784 	if (oldp && *oldlenp < sizeof(int))
785 		return (ENOMEM);
786 	if (newp)
787 		return (EPERM);
788 	*oldlenp = sizeof(int);
789 	if (oldp)
790 		error = copyout((caddr_t)&val, oldp, sizeof(int));
791 	return (error);
792 }
793 
794 /*
795  * Array of integer values.
796  */
797 int
798 sysctl_int_arr(int **valpp, int *name, u_int namelen, void *oldp,
799     size_t *oldlenp, void *newp, size_t newlen)
800 {
801 	if (namelen > 1)
802 		return (ENOTDIR);
803 	if (name[0] < 0 || valpp[name[0]] == NULL)
804 		return (EOPNOTSUPP);
805 	return (sysctl_int(oldp, oldlenp, newp, newlen, valpp[name[0]]));
806 }
807 
808 /*
809  * Validate parameters and get old / set new parameters
810  * for an integer-valued sysctl function.
811  */
812 int
813 sysctl_quad(void *oldp, size_t *oldlenp, void *newp, size_t newlen,
814     int64_t *valp)
815 {
816 	int error = 0;
817 
818 	if (oldp && *oldlenp < sizeof(int64_t))
819 		return (ENOMEM);
820 	if (newp && newlen != sizeof(int64_t))
821 		return (EINVAL);
822 	*oldlenp = sizeof(int64_t);
823 	if (oldp)
824 		error = copyout(valp, oldp, sizeof(int64_t));
825 	if (error == 0 && newp)
826 		error = copyin(newp, valp, sizeof(int64_t));
827 	return (error);
828 }
829 
830 /*
831  * As above, but read-only.
832  */
833 int
834 sysctl_rdquad(void *oldp, size_t *oldlenp, void *newp, int64_t val)
835 {
836 	int error = 0;
837 
838 	if (oldp && *oldlenp < sizeof(int64_t))
839 		return (ENOMEM);
840 	if (newp)
841 		return (EPERM);
842 	*oldlenp = sizeof(int64_t);
843 	if (oldp)
844 		error = copyout((caddr_t)&val, oldp, sizeof(int64_t));
845 	return (error);
846 }
847 
848 /*
849  * Validate parameters and get old / set new parameters
850  * for a string-valued sysctl function.
851  */
852 int
853 sysctl_string(void *oldp, size_t *oldlenp, void *newp, size_t newlen, char *str,
854     int maxlen)
855 {
856 	return sysctl__string(oldp, oldlenp, newp, newlen, str, maxlen, 0);
857 }
858 
859 int
860 sysctl_tstring(void *oldp, size_t *oldlenp, void *newp, size_t newlen,
861     char *str, int maxlen)
862 {
863 	return sysctl__string(oldp, oldlenp, newp, newlen, str, maxlen, 1);
864 }
865 
866 int
867 sysctl__string(void *oldp, size_t *oldlenp, void *newp, size_t newlen,
868     char *str, int maxlen, int trunc)
869 {
870 	int len, error = 0;
871 	char c;
872 
873 	len = strlen(str) + 1;
874 	if (oldp && *oldlenp < len) {
875 		if (trunc == 0 || *oldlenp == 0)
876 			return (ENOMEM);
877 	}
878 	if (newp && newlen >= maxlen)
879 		return (EINVAL);
880 	if (oldp) {
881 		if (trunc && *oldlenp < len) {
882 			/* save & zap NUL terminator while copying */
883 			c = str[*oldlenp-1];
884 			str[*oldlenp-1] = '\0';
885 			error = copyout(str, oldp, *oldlenp);
886 			str[*oldlenp-1] = c;
887 		} else {
888 			*oldlenp = len;
889 			error = copyout(str, oldp, len);
890 		}
891 	}
892 	if (error == 0 && newp) {
893 		error = copyin(newp, str, newlen);
894 		str[newlen] = 0;
895 	}
896 	return (error);
897 }
898 
899 /*
900  * As above, but read-only.
901  */
902 int
903 sysctl_rdstring(void *oldp, size_t *oldlenp, void *newp, const char *str)
904 {
905 	int len, error = 0;
906 
907 	len = strlen(str) + 1;
908 	if (oldp && *oldlenp < len)
909 		return (ENOMEM);
910 	if (newp)
911 		return (EPERM);
912 	*oldlenp = len;
913 	if (oldp)
914 		error = copyout(str, oldp, len);
915 	return (error);
916 }
917 
918 /*
919  * Validate parameters and get old / set new parameters
920  * for a structure oriented sysctl function.
921  */
922 int
923 sysctl_struct(void *oldp, size_t *oldlenp, void *newp, size_t newlen, void *sp,
924     int len)
925 {
926 	int error = 0;
927 
928 	if (oldp && *oldlenp < len)
929 		return (ENOMEM);
930 	if (newp && newlen > len)
931 		return (EINVAL);
932 	if (oldp) {
933 		*oldlenp = len;
934 		error = copyout(sp, oldp, len);
935 	}
936 	if (error == 0 && newp)
937 		error = copyin(newp, sp, len);
938 	return (error);
939 }
940 
941 /*
942  * Validate parameters and get old parameters
943  * for a structure oriented sysctl function.
944  */
945 int
946 sysctl_rdstruct(void *oldp, size_t *oldlenp, void *newp, const void *sp,
947     int len)
948 {
949 	int error = 0;
950 
951 	if (oldp && *oldlenp < len)
952 		return (ENOMEM);
953 	if (newp)
954 		return (EPERM);
955 	*oldlenp = len;
956 	if (oldp)
957 		error = copyout(sp, oldp, len);
958 	return (error);
959 }
960 
961 /*
962  * Get file structures.
963  */
964 int
965 sysctl_file(char *where, size_t *sizep, struct proc *p)
966 {
967 	int buflen, error;
968 	struct file *fp, cfile;
969 	char *start = where;
970 	struct ucred *cred = p->p_ucred;
971 
972 	buflen = *sizep;
973 	if (where == NULL) {
974 		/*
975 		 * overestimate by KERN_FILESLOP files
976 		 */
977 		*sizep = sizeof(filehead) +
978 		    (nfiles + KERN_FILESLOP) * sizeof(struct file);
979 		return (0);
980 	}
981 
982 	/*
983 	 * first copyout filehead
984 	 */
985 	if (buflen < sizeof(filehead)) {
986 		*sizep = 0;
987 		return (0);
988 	}
989 	error = copyout((caddr_t)&filehead, where, sizeof(filehead));
990 	if (error)
991 		return (error);
992 	buflen -= sizeof(filehead);
993 	where += sizeof(filehead);
994 
995 	/*
996 	 * followed by an array of file structures
997 	 */
998 	LIST_FOREACH(fp, &filehead, f_list) {
999 		if (buflen < sizeof(struct file)) {
1000 			*sizep = where - start;
1001 			return (ENOMEM);
1002 		}
1003 
1004 		/* Only let the superuser or the owner see some information */
1005 		bcopy(fp, &cfile, sizeof (struct file));
1006 		if (suser(p, 0) != 0 && cred->cr_uid != fp->f_cred->cr_uid) {
1007 			cfile.f_offset = (off_t)-1;
1008 			cfile.f_rxfer = 0;
1009 			cfile.f_wxfer = 0;
1010 			cfile.f_rbytes = 0;
1011 			cfile.f_wbytes = 0;
1012 		}
1013 		error = copyout(&cfile, where, sizeof (struct file));
1014 		if (error)
1015 			return (error);
1016 		buflen -= sizeof(struct file);
1017 		where += sizeof(struct file);
1018 	}
1019 	*sizep = where - start;
1020 	return (0);
1021 }
1022 
1023 #ifndef SMALL_KERNEL
1024 void
1025 fill_file2(struct kinfo_file2 *kf, struct file *fp, struct filedesc *fdp,
1026 	  int fd, struct vnode *vp, struct proc *pp, struct proc *p)
1027 {
1028 	struct vattr va;
1029 
1030 	memset(kf, 0, sizeof(*kf));
1031 
1032 	kf->fd_fd = fd;		/* might not really be an fd */
1033 
1034 	if (fp != NULL) {
1035 		kf->f_fileaddr = PTRTOINT64(fp);
1036 		kf->f_flag = fp->f_flag;
1037 		kf->f_iflags = fp->f_iflags;
1038 		kf->f_type = fp->f_type;
1039 		kf->f_count = fp->f_count;
1040 		kf->f_msgcount = fp->f_msgcount;
1041 		kf->f_ucred = PTRTOINT64(fp->f_cred);
1042 		kf->f_uid = fp->f_cred->cr_uid;
1043 		kf->f_gid = fp->f_cred->cr_gid;
1044 		kf->f_ops = PTRTOINT64(fp->f_ops);
1045 		kf->f_offset = fp->f_offset;
1046 		kf->f_data = PTRTOINT64(fp->f_data);
1047 		kf->f_usecount = fp->f_usecount;
1048 
1049 		if (suser(p, 0) == 0 || p->p_ucred->cr_uid == fp->f_cred->cr_uid) {
1050 			kf->f_rxfer = fp->f_rxfer;
1051 			kf->f_rwfer = fp->f_wxfer;
1052 			kf->f_seek = fp->f_seek;
1053 			kf->f_rbytes = fp->f_rbytes;
1054 			kf->f_wbytes = fp->f_rbytes;
1055 		}
1056 	} else if (vp != NULL) {
1057 		/* fake it */
1058 		kf->f_type = DTYPE_VNODE;
1059 		kf->f_flag = FREAD;
1060 		if (fd == KERN_FILE_TRACE)
1061 			kf->f_flag |= FWRITE;
1062 	}
1063 
1064 	/* information about the object associated with this file */
1065 	switch (kf->f_type) {
1066 	case DTYPE_VNODE:
1067 		if (fp != NULL)
1068 			vp = (struct vnode *)fp->f_data;
1069 
1070 		kf->v_un = PTRTOINT64(vp->v_un.vu_socket);
1071 		kf->v_type = vp->v_type;
1072 		kf->v_tag = vp->v_tag;
1073 		kf->v_flag = vp->v_flag;
1074 		kf->v_data = PTRTOINT64(vp->v_data);
1075 		kf->v_mount = PTRTOINT64(vp->v_mount);
1076 		if (vp->v_mount)
1077 			strlcpy(kf->f_mntonname,
1078 			    vp->v_mount->mnt_stat.f_mntonname,
1079 			    sizeof(kf->f_mntonname));
1080 
1081 		if (VOP_GETATTR(vp, &va, p->p_ucred, p) == 0) {
1082 			kf->va_fileid = va.va_fileid;
1083 			kf->va_mode = MAKEIMODE(va.va_type, va.va_mode);
1084 			kf->va_size = va.va_size;
1085 			kf->va_rdev = va.va_rdev;
1086 			kf->va_fsid = va.va_fsid & 0xffffffff;
1087 		}
1088 		break;
1089 
1090 	case DTYPE_SOCKET: {
1091 		struct socket *so = (struct socket *)fp->f_data;
1092 
1093 		kf->so_type = so->so_type;
1094 		kf->so_state = so->so_state;
1095 		kf->so_pcb = PTRTOINT64(so->so_pcb);
1096 		kf->so_protocol = so->so_proto->pr_protocol;
1097 		kf->so_family = so->so_proto->pr_domain->dom_family;
1098 		if (!so->so_pcb)
1099 			break;
1100 		switch (kf->so_family) {
1101 		case AF_INET: {
1102 			struct inpcb *inpcb = so->so_pcb;
1103 
1104 			kf->inp_ppcb = PTRTOINT64(inpcb->inp_ppcb);
1105 			kf->inp_lport = inpcb->inp_lport;
1106 			kf->inp_laddru[0] = inpcb->inp_laddr.s_addr;
1107 			kf->inp_fport = inpcb->inp_fport;
1108 			kf->inp_faddru[0] = inpcb->inp_faddr.s_addr;
1109 			break;
1110 		    }
1111 		case AF_INET6: {
1112 			struct inpcb *inpcb = so->so_pcb;
1113 
1114 			kf->inp_ppcb = PTRTOINT64(inpcb->inp_ppcb);
1115 			kf->inp_lport = inpcb->inp_lport;
1116 			kf->inp_laddru[0] = inpcb->inp_laddr6.s6_addr32[0];
1117 			kf->inp_laddru[1] = inpcb->inp_laddr6.s6_addr32[1];
1118 			kf->inp_laddru[2] = inpcb->inp_laddr6.s6_addr32[2];
1119 			kf->inp_laddru[3] = inpcb->inp_laddr6.s6_addr32[3];
1120 			kf->inp_fport = inpcb->inp_fport;
1121 			kf->inp_faddru[0] = inpcb->inp_laddr6.s6_addr32[0];
1122 			kf->inp_faddru[1] = inpcb->inp_faddr6.s6_addr32[1];
1123 			kf->inp_faddru[2] = inpcb->inp_faddr6.s6_addr32[2];
1124 			kf->inp_faddru[3] = inpcb->inp_faddr6.s6_addr32[3];
1125 			break;
1126 		    }
1127 		case AF_UNIX: {
1128 			struct unpcb *unpcb = so->so_pcb;
1129 
1130 			kf->unp_conn = PTRTOINT64(unpcb->unp_conn);
1131 			break;
1132 		    }
1133 		}
1134 		break;
1135 	    }
1136 
1137 	case DTYPE_PIPE: {
1138 		struct pipe *pipe = (struct pipe *)fp->f_data;
1139 
1140 		kf->pipe_peer = PTRTOINT64(pipe->pipe_peer);
1141 		kf->pipe_state = pipe->pipe_state;
1142 		break;
1143 	    }
1144 
1145 	case DTYPE_KQUEUE: {
1146 		struct kqueue *kqi = (struct kqueue *)fp->f_data;
1147 
1148 		kf->kq_count = kqi->kq_count;
1149 		kf->kq_state = kqi->kq_state;
1150 		break;
1151 	    }
1152 	case DTYPE_SYSTRACE: {
1153 		struct fsystrace *f = (struct fsystrace *)fp->f_data;
1154 
1155 		kf->str_npolicies = f->npolicies;
1156 		break;
1157 	    }
1158 	}
1159 
1160 	/* per-process information for KERN_FILE_BY[PU]ID */
1161 	if (pp != NULL) {
1162 		kf->p_pid = pp->p_pid;
1163 		kf->p_uid = pp->p_ucred->cr_uid;
1164 		kf->p_gid = pp->p_ucred->cr_gid;
1165 		strlcpy(kf->p_comm, pp->p_comm, sizeof(kf->p_comm));
1166 	}
1167 	if (fdp != NULL)
1168 		kf->fd_ofileflags = fdp->fd_ofileflags[fd];
1169 }
1170 
1171 /*
1172  * Get file structures.
1173  */
1174 int
1175 sysctl_file2(int *name, u_int namelen, char *where, size_t *sizep,
1176     struct proc *p)
1177 {
1178 	struct kinfo_file2 *kf;
1179 	struct filedesc *fdp;
1180 	struct file *fp;
1181 	struct proc *pp;
1182 	size_t buflen, elem_size, elem_count, outsize;
1183 	char *dp = where;
1184 	int arg, i, error = 0, needed = 0;
1185 	u_int op;
1186 
1187 	if (namelen > 4)
1188 		return (ENOTDIR);
1189 	if (namelen < 4)
1190 		return (EINVAL);
1191 
1192 	buflen = where != NULL ? *sizep : 0;
1193 	op = name[0];
1194 	arg = name[1];
1195 	elem_size = name[2];
1196 	elem_count = name[3];
1197 	outsize = MIN(sizeof(*kf), elem_size);
1198 
1199 	if (elem_size < 1 || elem_count < 0)
1200 		return (EINVAL);
1201 
1202 	kf = malloc(sizeof(*kf), M_TEMP, M_WAITOK);
1203 
1204 #define FILLIT(fp, fdp, i, vp, pp) do {				\
1205 	if (buflen >= elem_size && elem_count > 0) {		\
1206 		fill_file2(kf, fp, fdp, i, vp, pp, p);		\
1207 		error = copyout(kf, dp, outsize);		\
1208 		if (error)					\
1209 			break;					\
1210 		dp += elem_size;				\
1211 		buflen -= elem_size;				\
1212 		elem_count--;					\
1213 	}							\
1214 	needed += elem_size;					\
1215 } while (0)
1216 
1217 	switch (op) {
1218 	case KERN_FILE_BYFILE:
1219 		if (arg != 0) {
1220 			/* no arg in file mode */
1221 			error = EINVAL;
1222 			break;
1223 		}
1224 		LIST_FOREACH(fp, &filehead, f_list) {
1225 			if (fp->f_count == 0)
1226 				continue;
1227 			FILLIT(fp, NULL, 0, NULL, NULL);
1228 		}
1229 		break;
1230 	case KERN_FILE_BYPID:
1231 		/* A arg of -1 indicates all processes */
1232 		if (arg < -1) {
1233 			error = EINVAL;
1234 			break;
1235 		}
1236 		LIST_FOREACH(pp, &allproc, p_list) {
1237 			/* skip system, embryonic and undead processes */
1238 			if ((pp->p_flag & P_SYSTEM) ||
1239 			    pp->p_stat == SIDL || pp->p_stat == SZOMB)
1240 				continue;
1241 			if (arg > 0 && pp->p_pid != (pid_t)arg) {
1242 				/* not the pid we are looking for */
1243 				continue;
1244 			}
1245 			fdp = pp->p_fd;
1246 			if (pp->p_textvp)
1247 				FILLIT(NULL, NULL, KERN_FILE_TEXT, pp->p_textvp, pp);
1248 			if (fdp->fd_cdir)
1249 				FILLIT(NULL, NULL, KERN_FILE_CDIR, fdp->fd_cdir, pp);
1250 			if (fdp->fd_rdir)
1251 				FILLIT(NULL, NULL, KERN_FILE_RDIR, fdp->fd_rdir, pp);
1252 			if (pp->p_tracep)
1253 				FILLIT(NULL, NULL, KERN_FILE_TRACE, pp->p_tracep, pp);
1254 			for (i = 0; i < fdp->fd_nfiles; i++) {
1255 				if ((fp = fdp->fd_ofiles[i]) == NULL)
1256 					continue;
1257 				if (!FILE_IS_USABLE(fp))
1258 					continue;
1259 				FILLIT(fp, fdp, i, NULL, pp);
1260 			}
1261 		}
1262 		break;
1263 	case KERN_FILE_BYUID:
1264 		LIST_FOREACH(pp, &allproc, p_list) {
1265 			/* skip system, embryonic and undead processes */
1266 			if ((pp->p_flag & P_SYSTEM) ||
1267 			    pp->p_stat == SIDL || pp->p_stat == SZOMB)
1268 				continue;
1269 			if (arg > 0 && pp->p_ucred->cr_uid != (uid_t)arg) {
1270 				/* not the uid we are looking for */
1271 				continue;
1272 			}
1273 			fdp = pp->p_fd;
1274 			if (fdp->fd_cdir)
1275 				FILLIT(NULL, NULL, KERN_FILE_CDIR, fdp->fd_cdir, pp);
1276 			if (fdp->fd_rdir)
1277 				FILLIT(NULL, NULL, KERN_FILE_RDIR, fdp->fd_rdir, pp);
1278 			if (pp->p_tracep)
1279 				FILLIT(NULL, NULL, KERN_FILE_TRACE, pp->p_tracep, pp);
1280 			for (i = 0; i < fdp->fd_nfiles; i++) {
1281 				if ((fp = fdp->fd_ofiles[i]) == NULL)
1282 					continue;
1283 				if (!FILE_IS_USABLE(fp))
1284 					continue;
1285 				FILLIT(fp, fdp, i, NULL, pp);
1286 			}
1287 		}
1288 		break;
1289 	default:
1290 		error = EINVAL;
1291 		break;
1292 	}
1293 	free(kf, M_TEMP);
1294 
1295 	if (!error) {
1296 		if (where == NULL)
1297 			needed += KERN_FILESLOP * elem_size;
1298 		*sizep = needed;
1299 	}
1300 
1301 	return (error);
1302 }
1303 
1304 /*
1305  * try over estimating by 5 procs
1306  */
1307 #define KERN_PROCSLOP	(5 * sizeof (struct kinfo_proc))
1308 
1309 int
1310 sysctl_doproc(int *name, u_int namelen, char *where, size_t *sizep)
1311 {
1312 	struct kinfo_proc2 *kproc2 = NULL;
1313 	struct eproc *eproc = NULL;
1314 	struct proc *p;
1315 	char *dp;
1316 	int arg, buflen, doingzomb, elem_size, elem_count;
1317 	int error, needed, type, op;
1318 
1319 	dp = where;
1320 	buflen = where != NULL ? *sizep : 0;
1321 	needed = error = 0;
1322 	type = name[0];
1323 
1324 	if (type == KERN_PROC) {
1325 		if (namelen != 3 && !(namelen == 2 &&
1326 		    (name[1] == KERN_PROC_ALL || name[1] == KERN_PROC_KTHREAD)))
1327 			return (EINVAL);
1328 		op = name[1];
1329 		arg = op == KERN_PROC_ALL ? 0 : name[2];
1330 		elem_size = elem_count = 0;
1331 		eproc = malloc(sizeof(struct eproc), M_TEMP, M_WAITOK);
1332 	} else /* if (type == KERN_PROC2) */ {
1333 		if (namelen != 5 || name[3] < 0 || name[4] < 0)
1334 			return (EINVAL);
1335 		op = name[1];
1336 		arg = name[2];
1337 		elem_size = name[3];
1338 		elem_count = name[4];
1339 		kproc2 = malloc(sizeof(struct kinfo_proc2), M_TEMP, M_WAITOK);
1340 	}
1341 	p = LIST_FIRST(&allproc);
1342 	doingzomb = 0;
1343 again:
1344 	for (; p != 0; p = LIST_NEXT(p, p_list)) {
1345 		/*
1346 		 * Skip embryonic processes.
1347 		 */
1348 		if (p->p_stat == SIDL)
1349 			continue;
1350 		/*
1351 		 * TODO - make more efficient (see notes below).
1352 		 */
1353 		switch (op) {
1354 
1355 		case KERN_PROC_PID:
1356 			/* could do this with just a lookup */
1357 			if (p->p_pid != (pid_t)arg)
1358 				continue;
1359 			break;
1360 
1361 		case KERN_PROC_PGRP:
1362 			/* could do this by traversing pgrp */
1363 			if (p->p_pgrp->pg_id != (pid_t)arg)
1364 				continue;
1365 			break;
1366 
1367 		case KERN_PROC_SESSION:
1368 			if (p->p_session->s_leader == NULL ||
1369 			    p->p_session->s_leader->p_pid != (pid_t)arg)
1370 				continue;
1371 			break;
1372 
1373 		case KERN_PROC_TTY:
1374 			if ((p->p_flag & P_CONTROLT) == 0 ||
1375 			    p->p_session->s_ttyp == NULL ||
1376 			    p->p_session->s_ttyp->t_dev != (dev_t)arg)
1377 				continue;
1378 			break;
1379 
1380 		case KERN_PROC_UID:
1381 			if (p->p_ucred->cr_uid != (uid_t)arg)
1382 				continue;
1383 			break;
1384 
1385 		case KERN_PROC_RUID:
1386 			if (p->p_cred->p_ruid != (uid_t)arg)
1387 				continue;
1388 			break;
1389 
1390 		case KERN_PROC_ALL:
1391 			if (p->p_flag & P_SYSTEM)
1392 				continue;
1393 			break;
1394 		case KERN_PROC_KTHREAD:
1395 			/* no filtering */
1396 			break;
1397 		default:
1398 			error = EINVAL;
1399 			goto err;
1400 		}
1401 		if (type == KERN_PROC) {
1402 			if (buflen >= sizeof(struct kinfo_proc)) {
1403 				fill_eproc(p, eproc);
1404 				error = copyout((caddr_t)p,
1405 				    &((struct kinfo_proc *)dp)->kp_proc,
1406 				    sizeof(struct proc));
1407 				if (error)
1408 					goto err;
1409 				error = copyout((caddr_t)eproc,
1410 				    &((struct kinfo_proc *)dp)->kp_eproc,
1411 				    sizeof(*eproc));
1412 				if (error)
1413 					goto err;
1414 				dp += sizeof(struct kinfo_proc);
1415 				buflen -= sizeof(struct kinfo_proc);
1416 			}
1417 			needed += sizeof(struct kinfo_proc);
1418 		} else /* if (type == KERN_PROC2) */ {
1419 			if (buflen >= elem_size && elem_count > 0) {
1420 				fill_kproc2(p, kproc2);
1421 				/*
1422 				 * Copy out elem_size, but not larger than
1423 				 * the size of a struct kinfo_proc2.
1424 				 */
1425 				error = copyout(kproc2, dp,
1426 				    min(sizeof(*kproc2), elem_size));
1427 				if (error)
1428 					goto err;
1429 				dp += elem_size;
1430 				buflen -= elem_size;
1431 				elem_count--;
1432 			}
1433 			needed += elem_size;
1434 		}
1435 	}
1436 	if (doingzomb == 0) {
1437 		p = LIST_FIRST(&zombproc);
1438 		doingzomb++;
1439 		goto again;
1440 	}
1441 	if (where != NULL) {
1442 		*sizep = dp - where;
1443 		if (needed > *sizep) {
1444 			error = ENOMEM;
1445 			goto err;
1446 		}
1447 	} else {
1448 		needed += KERN_PROCSLOP;
1449 		*sizep = needed;
1450 	}
1451 err:
1452 	if (eproc)
1453 		free(eproc, M_TEMP);
1454 	if (kproc2)
1455 		free(kproc2, M_TEMP);
1456 	return (error);
1457 }
1458 
1459 #endif	/* SMALL_KERNEL */
1460 
1461 /*
1462  * Fill in an eproc structure for the specified process.
1463  */
1464 void
1465 fill_eproc(struct proc *p, struct eproc *ep)
1466 {
1467 	struct tty *tp;
1468 
1469 	ep->e_paddr = p;
1470 	ep->e_sess = p->p_pgrp->pg_session;
1471 	ep->e_pcred = *p->p_cred;
1472 	ep->e_ucred = *p->p_ucred;
1473 	if (p->p_stat == SIDL || P_ZOMBIE(p)) {
1474 		ep->e_vm.vm_rssize = 0;
1475 		ep->e_vm.vm_tsize = 0;
1476 		ep->e_vm.vm_dsize = 0;
1477 		ep->e_vm.vm_ssize = 0;
1478 		bzero(&ep->e_pstats, sizeof(ep->e_pstats));
1479 		ep->e_pstats_valid = 0;
1480 	} else {
1481 		struct vmspace *vm = p->p_vmspace;
1482 
1483 		ep->e_vm.vm_rssize = vm_resident_count(vm);
1484 		ep->e_vm.vm_tsize = vm->vm_tsize;
1485 		ep->e_vm.vm_dsize = vm->vm_dused;
1486 		ep->e_vm.vm_ssize = vm->vm_ssize;
1487 		ep->e_pstats = *p->p_stats;
1488 		ep->e_pstats_valid = 1;
1489 	}
1490 	if (p->p_pptr)
1491 		ep->e_ppid = p->p_pptr->p_pid;
1492 	else
1493 		ep->e_ppid = 0;
1494 	ep->e_pgid = p->p_pgrp->pg_id;
1495 	ep->e_jobc = p->p_pgrp->pg_jobc;
1496 	if ((p->p_flag & P_CONTROLT) &&
1497 	     (tp = ep->e_sess->s_ttyp)) {
1498 		ep->e_tdev = tp->t_dev;
1499 		ep->e_tpgid = tp->t_pgrp ? tp->t_pgrp->pg_id : NO_PID;
1500 		ep->e_tsess = tp->t_session;
1501 	} else
1502 		ep->e_tdev = NODEV;
1503 	ep->e_flag = ep->e_sess->s_ttyvp ? EPROC_CTTY : 0;
1504 	if (SESS_LEADER(p))
1505 		ep->e_flag |= EPROC_SLEADER;
1506 	strncpy(ep->e_wmesg, p->p_wmesg ? p->p_wmesg : "", WMESGLEN);
1507 	ep->e_wmesg[WMESGLEN] = '\0';
1508 	ep->e_xsize = ep->e_xrssize = 0;
1509 	ep->e_xccount = ep->e_xswrss = 0;
1510 	strncpy(ep->e_login, ep->e_sess->s_login, MAXLOGNAME-1);
1511 	ep->e_login[MAXLOGNAME-1] = '\0';
1512 	strncpy(ep->e_emul, p->p_emul->e_name, EMULNAMELEN);
1513 	ep->e_emul[EMULNAMELEN] = '\0';
1514 	ep->e_maxrss = p->p_rlimit ? p->p_rlimit[RLIMIT_RSS].rlim_cur : 0;
1515 	ep->e_limit = p->p_p->ps_limit;
1516 }
1517 
1518 #ifndef	SMALL_KERNEL
1519 
1520 /*
1521  * Fill in a kproc2 structure for the specified process.
1522  */
1523 void
1524 fill_kproc2(struct proc *p, struct kinfo_proc2 *ki)
1525 {
1526 	struct tty *tp;
1527 	struct timeval ut, st;
1528 
1529 	bzero(ki, sizeof(*ki));
1530 
1531 	ki->p_paddr = PTRTOINT64(p);
1532 	ki->p_fd = PTRTOINT64(p->p_fd);
1533 	ki->p_stats = PTRTOINT64(p->p_stats);
1534 	ki->p_limit = PTRTOINT64(p->p_p->ps_limit);
1535 	ki->p_vmspace = PTRTOINT64(p->p_vmspace);
1536 	ki->p_sigacts = PTRTOINT64(p->p_sigacts);
1537 	ki->p_sess = PTRTOINT64(p->p_session);
1538 	ki->p_tsess = 0;	/* may be changed if controlling tty below */
1539 	ki->p_ru = PTRTOINT64(p->p_ru);
1540 
1541 	ki->p_eflag = 0;
1542 	ki->p_exitsig = p->p_exitsig;
1543 	ki->p_flag = p->p_flag | P_INMEM;
1544 
1545 	ki->p_pid = p->p_pid;
1546 	if (p->p_pptr)
1547 		ki->p_ppid = p->p_pptr->p_pid;
1548 	else
1549 		ki->p_ppid = 0;
1550 	if (p->p_session->s_leader)
1551 		ki->p_sid = p->p_session->s_leader->p_pid;
1552 	else
1553 		ki->p_sid = 0;
1554 	ki->p__pgid = p->p_pgrp->pg_id;
1555 
1556 	ki->p_tpgid = -1;	/* may be changed if controlling tty below */
1557 
1558 	ki->p_uid = p->p_ucred->cr_uid;
1559 	ki->p_ruid = p->p_cred->p_ruid;
1560 	ki->p_gid = p->p_ucred->cr_gid;
1561 	ki->p_rgid = p->p_cred->p_rgid;
1562 	ki->p_svuid = p->p_cred->p_svuid;
1563 	ki->p_svgid = p->p_cred->p_svgid;
1564 
1565 	memcpy(ki->p_groups, p->p_cred->pc_ucred->cr_groups,
1566 	    min(sizeof(ki->p_groups), sizeof(p->p_cred->pc_ucred->cr_groups)));
1567 	ki->p_ngroups = p->p_cred->pc_ucred->cr_ngroups;
1568 
1569 	ki->p_jobc = p->p_pgrp->pg_jobc;
1570 	if ((p->p_flag & P_CONTROLT) && (tp = p->p_session->s_ttyp)) {
1571 		ki->p_tdev = tp->t_dev;
1572 		ki->p_tpgid = tp->t_pgrp ? tp->t_pgrp->pg_id : -1;
1573 		ki->p_tsess = PTRTOINT64(tp->t_session);
1574 	} else {
1575 		ki->p_tdev = NODEV;
1576 	}
1577 
1578 	ki->p_estcpu = p->p_estcpu;
1579 	ki->p_rtime_sec = p->p_rtime.tv_sec;
1580 	ki->p_rtime_usec = p->p_rtime.tv_usec;
1581 	ki->p_cpticks = p->p_cpticks;
1582 	ki->p_pctcpu = p->p_pctcpu;
1583 
1584 	ki->p_uticks = p->p_uticks;
1585 	ki->p_sticks = p->p_sticks;
1586 	ki->p_iticks = p->p_iticks;
1587 
1588 	ki->p_tracep = PTRTOINT64(p->p_tracep);
1589 	ki->p_traceflag = p->p_traceflag;
1590 
1591 	ki->p_siglist = p->p_siglist;
1592 	ki->p_sigmask = p->p_sigmask;
1593 	ki->p_sigignore = p->p_sigignore;
1594 	ki->p_sigcatch = p->p_sigcatch;
1595 
1596 	ki->p_stat = p->p_stat;
1597 	ki->p_nice = p->p_nice;
1598 
1599 	ki->p_xstat = p->p_xstat;
1600 	ki->p_acflag = p->p_acflag;
1601 
1602 	strlcpy(ki->p_emul, p->p_emul->e_name, sizeof(ki->p_emul));
1603 	strlcpy(ki->p_comm, p->p_comm, sizeof(ki->p_comm));
1604 	strncpy(ki->p_login, p->p_session->s_login,
1605 	    min(sizeof(ki->p_login) - 1, sizeof(p->p_session->s_login)));
1606 
1607 	if (p->p_stat == SIDL || P_ZOMBIE(p)) {
1608 		ki->p_vm_rssize = 0;
1609 		ki->p_vm_tsize = 0;
1610 		ki->p_vm_dsize = 0;
1611 		ki->p_vm_ssize = 0;
1612 	} else {
1613 		struct vmspace *vm = p->p_vmspace;
1614 
1615 		ki->p_vm_rssize = vm_resident_count(vm);
1616 		ki->p_vm_tsize = vm->vm_tsize;
1617 		ki->p_vm_dsize = vm->vm_dused;
1618 		ki->p_vm_ssize = vm->vm_ssize;
1619 		ki->p_forw = ki->p_back = 0;
1620 		ki->p_addr = PTRTOINT64(p->p_addr);
1621 		ki->p_stat = p->p_stat;
1622 		ki->p_swtime = p->p_swtime;
1623 		ki->p_slptime = p->p_slptime;
1624 		ki->p_schedflags = 0;
1625 		ki->p_holdcnt = 1;
1626 		ki->p_priority = p->p_priority;
1627 		ki->p_usrpri = p->p_usrpri;
1628 		if (p->p_wmesg)
1629 			strlcpy(ki->p_wmesg, p->p_wmesg, sizeof(ki->p_wmesg));
1630 		ki->p_wchan = PTRTOINT64(p->p_wchan);
1631 
1632 	}
1633 
1634 	if (p->p_session->s_ttyvp)
1635 		ki->p_eflag |= EPROC_CTTY;
1636 	if (SESS_LEADER(p))
1637 		ki->p_eflag |= EPROC_SLEADER;
1638 	if (p->p_rlimit)
1639 		ki->p_rlim_rss_cur = p->p_rlimit[RLIMIT_RSS].rlim_cur;
1640 
1641 	/* XXX Is this double check necessary? */
1642 	if (P_ZOMBIE(p)) {
1643 		ki->p_uvalid = 0;
1644 	} else {
1645 		ki->p_uvalid = 1;
1646 
1647 		ki->p_ustart_sec = p->p_stats->p_start.tv_sec;
1648 		ki->p_ustart_usec = p->p_stats->p_start.tv_usec;
1649 
1650 		calcru(p, &ut, &st, NULL);
1651 		ki->p_uutime_sec = ut.tv_sec;
1652 		ki->p_uutime_usec = ut.tv_usec;
1653 		ki->p_ustime_sec = st.tv_sec;
1654 		ki->p_ustime_usec = st.tv_usec;
1655 
1656 		ki->p_uru_maxrss = p->p_stats->p_ru.ru_maxrss;
1657 		ki->p_uru_ixrss = p->p_stats->p_ru.ru_ixrss;
1658 		ki->p_uru_idrss = p->p_stats->p_ru.ru_idrss;
1659 		ki->p_uru_isrss = p->p_stats->p_ru.ru_isrss;
1660 		ki->p_uru_minflt = p->p_stats->p_ru.ru_minflt;
1661 		ki->p_uru_majflt = p->p_stats->p_ru.ru_majflt;
1662 		ki->p_uru_nswap = p->p_stats->p_ru.ru_nswap;
1663 		ki->p_uru_inblock = p->p_stats->p_ru.ru_inblock;
1664 		ki->p_uru_oublock = p->p_stats->p_ru.ru_oublock;
1665 		ki->p_uru_msgsnd = p->p_stats->p_ru.ru_msgsnd;
1666 		ki->p_uru_msgrcv = p->p_stats->p_ru.ru_msgrcv;
1667 		ki->p_uru_nsignals = p->p_stats->p_ru.ru_nsignals;
1668 		ki->p_uru_nvcsw = p->p_stats->p_ru.ru_nvcsw;
1669 		ki->p_uru_nivcsw = p->p_stats->p_ru.ru_nivcsw;
1670 
1671 		timeradd(&p->p_stats->p_cru.ru_utime,
1672 			 &p->p_stats->p_cru.ru_stime, &ut);
1673 		ki->p_uctime_sec = ut.tv_sec;
1674 		ki->p_uctime_usec = ut.tv_usec;
1675 		ki->p_cpuid = KI_NOCPU;
1676 #ifdef MULTIPROCESSOR
1677 		if (p->p_cpu != NULL)
1678 			ki->p_cpuid = CPU_INFO_UNIT(p->p_cpu);
1679 #endif
1680 	}
1681 }
1682 
1683 int
1684 sysctl_proc_args(int *name, u_int namelen, void *oldp, size_t *oldlenp,
1685     struct proc *cp)
1686 {
1687 	struct proc *vp;
1688 	pid_t pid;
1689 	struct ps_strings pss;
1690 	struct iovec iov;
1691 	struct uio uio;
1692 	int error, cnt, op;
1693 	size_t limit;
1694 	char **rargv, **vargv;		/* reader vs. victim */
1695 	char *rarg, *varg, *buf;
1696 	struct vmspace *vm;
1697 
1698 	if (namelen > 2)
1699 		return (ENOTDIR);
1700 	if (namelen < 2)
1701 		return (EINVAL);
1702 
1703 	pid = name[0];
1704 	op = name[1];
1705 
1706 	switch (op) {
1707 	case KERN_PROC_ARGV:
1708 	case KERN_PROC_NARGV:
1709 	case KERN_PROC_ENV:
1710 	case KERN_PROC_NENV:
1711 		break;
1712 	default:
1713 		return (EOPNOTSUPP);
1714 	}
1715 
1716 	if ((vp = pfind(pid)) == NULL)
1717 		return (ESRCH);
1718 
1719 	if (oldp == NULL) {
1720 		if (op == KERN_PROC_NARGV || op == KERN_PROC_NENV)
1721 			*oldlenp = sizeof(int);
1722 		else
1723 			*oldlenp = ARG_MAX;	/* XXX XXX XXX */
1724 		return (0);
1725 	}
1726 
1727 	if (P_ZOMBIE(vp) || (vp->p_flag & P_SYSTEM))
1728 		return (EINVAL);
1729 
1730 	/* Exiting - don't bother, it will be gone soon anyway */
1731 	if ((vp->p_flag & P_WEXIT))
1732 		return (ESRCH);
1733 
1734 	/* Execing - danger. */
1735 	if ((vp->p_flag & P_INEXEC))
1736 		return (EBUSY);
1737 
1738 	vm = vp->p_vmspace;
1739 	vm->vm_refcnt++;
1740 	vp = NULL;
1741 
1742 	buf = malloc(PAGE_SIZE, M_TEMP, M_WAITOK);
1743 
1744 	iov.iov_base = &pss;
1745 	iov.iov_len = sizeof(pss);
1746 	uio.uio_iov = &iov;
1747 	uio.uio_iovcnt = 1;
1748 	uio.uio_offset = (off_t)PS_STRINGS;
1749 	uio.uio_resid = sizeof(pss);
1750 	uio.uio_segflg = UIO_SYSSPACE;
1751 	uio.uio_rw = UIO_READ;
1752 	uio.uio_procp = cp;
1753 
1754 	if ((error = uvm_io(&vm->vm_map, &uio, 0)) != 0)
1755 		goto out;
1756 
1757 	if (op == KERN_PROC_NARGV) {
1758 		error = sysctl_rdint(oldp, oldlenp, NULL, pss.ps_nargvstr);
1759 		goto out;
1760 	}
1761 	if (op == KERN_PROC_NENV) {
1762 		error = sysctl_rdint(oldp, oldlenp, NULL, pss.ps_nenvstr);
1763 		goto out;
1764 	}
1765 
1766 	if (op == KERN_PROC_ARGV) {
1767 		cnt = pss.ps_nargvstr;
1768 		vargv = pss.ps_argvstr;
1769 	} else {
1770 		cnt = pss.ps_nenvstr;
1771 		vargv = pss.ps_envstr;
1772 	}
1773 
1774 	/* -1 to have space for a terminating NUL */
1775 	limit = *oldlenp - 1;
1776 	*oldlenp = 0;
1777 
1778 	rargv = oldp;
1779 
1780 	/*
1781 	 * *oldlenp - number of bytes copied out into readers buffer.
1782 	 * limit - maximal number of bytes allowed into readers buffer.
1783 	 * rarg - pointer into readers buffer where next arg will be stored.
1784 	 * rargv - pointer into readers buffer where the next rarg pointer
1785 	 *  will be stored.
1786 	 * vargv - pointer into victim address space where the next argument
1787 	 *  will be read.
1788 	 */
1789 
1790 	/* space for cnt pointers and a NULL */
1791 	rarg = (char *)(rargv + cnt + 1);
1792 	*oldlenp += (cnt + 1) * sizeof(char **);
1793 
1794 	while (cnt > 0 && *oldlenp < limit) {
1795 		size_t len, vstrlen;
1796 
1797 		/* Write to readers argv */
1798 		if ((error = copyout(&rarg, rargv, sizeof(rarg))) != 0)
1799 			goto out;
1800 
1801 		/* read the victim argv */
1802 		iov.iov_base = &varg;
1803 		iov.iov_len = sizeof(varg);
1804 		uio.uio_iov = &iov;
1805 		uio.uio_iovcnt = 1;
1806 		uio.uio_offset = (off_t)(vaddr_t)vargv;
1807 		uio.uio_resid = sizeof(varg);
1808 		uio.uio_segflg = UIO_SYSSPACE;
1809 		uio.uio_rw = UIO_READ;
1810 		uio.uio_procp = cp;
1811 		if ((error = uvm_io(&vm->vm_map, &uio, 0)) != 0)
1812 			goto out;
1813 
1814 		if (varg == NULL)
1815 			break;
1816 
1817 		/*
1818 		 * read the victim arg. We must jump through hoops to avoid
1819 		 * crossing a page boundary too much and returning an error.
1820 		 */
1821 more:
1822 		len = PAGE_SIZE - (((vaddr_t)varg) & PAGE_MASK);
1823 		/* leave space for the terminating NUL */
1824 		iov.iov_base = buf;
1825 		iov.iov_len = len;
1826 		uio.uio_iov = &iov;
1827 		uio.uio_iovcnt = 1;
1828 		uio.uio_offset = (off_t)(vaddr_t)varg;
1829 		uio.uio_resid = len;
1830 		uio.uio_segflg = UIO_SYSSPACE;
1831 		uio.uio_rw = UIO_READ;
1832 		uio.uio_procp = cp;
1833 		if ((error = uvm_io(&vm->vm_map, &uio, 0)) != 0)
1834 			goto out;
1835 
1836 		for (vstrlen = 0; vstrlen < len; vstrlen++) {
1837 			if (buf[vstrlen] == '\0')
1838 				break;
1839 		}
1840 
1841 		/* Don't overflow readers buffer. */
1842 		if (*oldlenp + vstrlen + 1 >= limit) {
1843 			error = ENOMEM;
1844 			goto out;
1845 		}
1846 
1847 		if ((error = copyout(buf, rarg, vstrlen)) != 0)
1848 			goto out;
1849 
1850 		*oldlenp += vstrlen;
1851 		rarg += vstrlen;
1852 
1853 		/* The string didn't end in this page? */
1854 		if (vstrlen == len) {
1855 			varg += vstrlen;
1856 			goto more;
1857 		}
1858 
1859 		/* End of string. Terminate it with a NUL */
1860 		buf[0] = '\0';
1861 		if ((error = copyout(buf, rarg, 1)) != 0)
1862 			goto out;
1863 		*oldlenp += 1;
1864 		rarg += 1;
1865 
1866 		vargv++;
1867 		rargv++;
1868 		cnt--;
1869 	}
1870 
1871 	if (*oldlenp >= limit) {
1872 		error = ENOMEM;
1873 		goto out;
1874 	}
1875 
1876 	/* Write the terminating null */
1877 	rarg = NULL;
1878 	error = copyout(&rarg, rargv, sizeof(rarg));
1879 
1880 out:
1881 	uvmspace_free(vm);
1882 	free(buf, M_TEMP);
1883 	return (error);
1884 }
1885 
1886 #endif
1887 
1888 /*
1889  * Initialize disknames/diskstats for export by sysctl. If update is set,
1890  * then we simply update the disk statistics information.
1891  */
1892 int
1893 sysctl_diskinit(int update, struct proc *p)
1894 {
1895 	struct diskstats *sdk;
1896 	struct disk *dk;
1897 	int i, tlen, l;
1898 
1899 	if ((i = rw_enter(&sysctl_disklock, RW_WRITE|RW_INTR)) != 0)
1900 		return i;
1901 
1902 	if (disk_change) {
1903 		for (dk = TAILQ_FIRST(&disklist), tlen = 0; dk;
1904 		    dk = TAILQ_NEXT(dk, dk_link))
1905 			tlen += strlen(dk->dk_name) + 1;
1906 		tlen++;
1907 
1908 		if (disknames)
1909 			free(disknames, M_SYSCTL);
1910 		if (diskstats)
1911 			free(diskstats, M_SYSCTL);
1912 		diskstats = NULL;
1913 		disknames = NULL;
1914 		diskstats = malloc(disk_count * sizeof(struct diskstats),
1915 		    M_SYSCTL, M_WAITOK);
1916 		disknames = malloc(tlen, M_SYSCTL, M_WAITOK);
1917 		disknames[0] = '\0';
1918 
1919 		for (dk = TAILQ_FIRST(&disklist), i = 0, l = 0; dk;
1920 		    dk = TAILQ_NEXT(dk, dk_link), i++) {
1921 			snprintf(disknames + l, tlen - l, "%s,",
1922 			    dk->dk_name ? dk->dk_name : "");
1923 			l += strlen(disknames + l);
1924 			sdk = diskstats + i;
1925 			strlcpy(sdk->ds_name, dk->dk_name,
1926 			    sizeof(sdk->ds_name));
1927 			mtx_enter(&dk->dk_mtx);
1928 			sdk->ds_busy = dk->dk_busy;
1929 			sdk->ds_rxfer = dk->dk_rxfer;
1930 			sdk->ds_wxfer = dk->dk_wxfer;
1931 			sdk->ds_seek = dk->dk_seek;
1932 			sdk->ds_rbytes = dk->dk_rbytes;
1933 			sdk->ds_wbytes = dk->dk_wbytes;
1934 			sdk->ds_attachtime = dk->dk_attachtime;
1935 			sdk->ds_timestamp = dk->dk_timestamp;
1936 			sdk->ds_time = dk->dk_time;
1937 			mtx_leave(&dk->dk_mtx);
1938 		}
1939 
1940 		/* Eliminate trailing comma */
1941 		if (l != 0)
1942 			disknames[l - 1] = '\0';
1943 		disk_change = 0;
1944 	} else if (update) {
1945 		/* Just update, number of drives hasn't changed */
1946 		for (dk = TAILQ_FIRST(&disklist), i = 0; dk;
1947 		    dk = TAILQ_NEXT(dk, dk_link), i++) {
1948 			sdk = diskstats + i;
1949 			strlcpy(sdk->ds_name, dk->dk_name,
1950 			    sizeof(sdk->ds_name));
1951 			mtx_enter(&dk->dk_mtx);
1952 			sdk->ds_busy = dk->dk_busy;
1953 			sdk->ds_rxfer = dk->dk_rxfer;
1954 			sdk->ds_wxfer = dk->dk_wxfer;
1955 			sdk->ds_seek = dk->dk_seek;
1956 			sdk->ds_rbytes = dk->dk_rbytes;
1957 			sdk->ds_wbytes = dk->dk_wbytes;
1958 			sdk->ds_attachtime = dk->dk_attachtime;
1959 			sdk->ds_timestamp = dk->dk_timestamp;
1960 			sdk->ds_time = dk->dk_time;
1961 			mtx_leave(&dk->dk_mtx);
1962 		}
1963 	}
1964 	rw_exit_write(&sysctl_disklock);
1965 	return 0;
1966 }
1967 
1968 #if defined(SYSVMSG) || defined(SYSVSEM) || defined(SYSVSHM)
1969 int
1970 sysctl_sysvipc(int *name, u_int namelen, void *where, size_t *sizep)
1971 {
1972 #ifdef SYSVSEM
1973 	struct sem_sysctl_info *semsi;
1974 #endif
1975 #ifdef SYSVSHM
1976 	struct shm_sysctl_info *shmsi;
1977 #endif
1978 	size_t infosize, dssize, tsize, buflen;
1979 	int i, nds, error, ret;
1980 	void *buf;
1981 
1982 	if (namelen != 1)
1983 		return (EINVAL);
1984 
1985 	buflen = *sizep;
1986 
1987 	switch (*name) {
1988 	case KERN_SYSVIPC_MSG_INFO:
1989 #ifdef SYSVMSG
1990 		return (sysctl_sysvmsg(name, namelen, where, sizep));
1991 #else
1992 		return (EOPNOTSUPP);
1993 #endif
1994 	case KERN_SYSVIPC_SEM_INFO:
1995 #ifdef SYSVSEM
1996 		infosize = sizeof(semsi->seminfo);
1997 		nds = seminfo.semmni;
1998 		dssize = sizeof(semsi->semids[0]);
1999 		break;
2000 #else
2001 		return (EOPNOTSUPP);
2002 #endif
2003 	case KERN_SYSVIPC_SHM_INFO:
2004 #ifdef SYSVSHM
2005 		infosize = sizeof(shmsi->shminfo);
2006 		nds = shminfo.shmmni;
2007 		dssize = sizeof(shmsi->shmids[0]);
2008 		break;
2009 #else
2010 		return (EOPNOTSUPP);
2011 #endif
2012 	default:
2013 		return (EINVAL);
2014 	}
2015 	tsize = infosize + (nds * dssize);
2016 
2017 	/* Return just the total size required. */
2018 	if (where == NULL) {
2019 		*sizep = tsize;
2020 		return (0);
2021 	}
2022 
2023 	/* Not enough room for even the info struct. */
2024 	if (buflen < infosize) {
2025 		*sizep = 0;
2026 		return (ENOMEM);
2027 	}
2028 	buf = malloc(min(tsize, buflen), M_TEMP, M_WAITOK|M_ZERO);
2029 
2030 	switch (*name) {
2031 #ifdef SYSVSEM
2032 	case KERN_SYSVIPC_SEM_INFO:
2033 		semsi = (struct sem_sysctl_info *)buf;
2034 		semsi->seminfo = seminfo;
2035 		break;
2036 #endif
2037 #ifdef SYSVSHM
2038 	case KERN_SYSVIPC_SHM_INFO:
2039 		shmsi = (struct shm_sysctl_info *)buf;
2040 		shmsi->shminfo = shminfo;
2041 		break;
2042 #endif
2043 	}
2044 	buflen -= infosize;
2045 
2046 	ret = 0;
2047 	if (buflen > 0) {
2048 		/* Fill in the IPC data structures.  */
2049 		for (i = 0; i < nds; i++) {
2050 			if (buflen < dssize) {
2051 				ret = ENOMEM;
2052 				break;
2053 			}
2054 			switch (*name) {
2055 #ifdef SYSVSEM
2056 			case KERN_SYSVIPC_SEM_INFO:
2057 				if (sema[i] != NULL)
2058 					bcopy(sema[i], &semsi->semids[i],
2059 					    dssize);
2060 				else
2061 					bzero(&semsi->semids[i], dssize);
2062 				break;
2063 #endif
2064 #ifdef SYSVSHM
2065 			case KERN_SYSVIPC_SHM_INFO:
2066 				if (shmsegs[i] != NULL)
2067 					bcopy(shmsegs[i], &shmsi->shmids[i],
2068 					    dssize);
2069 				else
2070 					bzero(&shmsi->shmids[i], dssize);
2071 				break;
2072 #endif
2073 			}
2074 			buflen -= dssize;
2075 		}
2076 	}
2077 	*sizep -= buflen;
2078 	error = copyout(buf, where, *sizep);
2079 	free(buf, M_TEMP);
2080 	/* If copyout succeeded, use return code set earlier. */
2081 	return (error ? error : ret);
2082 }
2083 #endif /* SYSVMSG || SYSVSEM || SYSVSHM */
2084 
2085 #ifndef	SMALL_KERNEL
2086 
2087 int
2088 sysctl_intrcnt(int *name, u_int namelen, void *oldp, size_t *oldlenp)
2089 {
2090 	return (evcount_sysctl(name, namelen, oldp, oldlenp, NULL, 0));
2091 }
2092 
2093 
2094 int
2095 sysctl_sensors(int *name, u_int namelen, void *oldp, size_t *oldlenp,
2096     void *newp, size_t newlen)
2097 {
2098 	struct ksensor *ks;
2099 	struct sensor *us;
2100 	struct ksensordev *ksd;
2101 	struct sensordev *usd;
2102 	int dev, numt, ret;
2103 	enum sensor_type type;
2104 
2105 	if (namelen != 1 && namelen != 3)
2106 		return (ENOTDIR);
2107 
2108 	dev = name[0];
2109 	if (namelen == 1) {
2110 		ksd = sensordev_get(dev);
2111 		if (ksd == NULL)
2112 			return (ENOENT);
2113 
2114 		/* Grab a copy, to clear the kernel pointers */
2115 		usd = malloc(sizeof(*usd), M_TEMP, M_WAITOK|M_ZERO);
2116 		usd->num = ksd->num;
2117 		strlcpy(usd->xname, ksd->xname, sizeof(usd->xname));
2118 		memcpy(usd->maxnumt, ksd->maxnumt, sizeof(usd->maxnumt));
2119 		usd->sensors_count = ksd->sensors_count;
2120 
2121 		ret = sysctl_rdstruct(oldp, oldlenp, newp, usd,
2122 		    sizeof(struct sensordev));
2123 
2124 		free(usd, M_TEMP);
2125 		return (ret);
2126 	}
2127 
2128 	type = name[1];
2129 	numt = name[2];
2130 
2131 	ks = sensor_find(dev, type, numt);
2132 	if (ks == NULL)
2133 		return (ENOENT);
2134 
2135 	/* Grab a copy, to clear the kernel pointers */
2136 	us = malloc(sizeof(*us), M_TEMP, M_WAITOK|M_ZERO);
2137 	memcpy(us->desc, ks->desc, sizeof(us->desc));
2138 	us->tv = ks->tv;
2139 	us->value = ks->value;
2140 	us->type = ks->type;
2141 	us->status = ks->status;
2142 	us->numt = ks->numt;
2143 	us->flags = ks->flags;
2144 
2145 	ret = sysctl_rdstruct(oldp, oldlenp, newp, us,
2146 	    sizeof(struct sensor));
2147 	free(us, M_TEMP);
2148 	return (ret);
2149 }
2150 
2151 int
2152 sysctl_emul(int *name, u_int namelen, void *oldp, size_t *oldlenp,
2153     void *newp, size_t newlen)
2154 {
2155 	int enabled, error;
2156 	struct emul *e;
2157 
2158 	if (name[0] == KERN_EMUL_NUM) {
2159 		if (namelen != 1)
2160 			return (ENOTDIR);
2161 		return (sysctl_rdint(oldp, oldlenp, newp, nexecs));
2162 	}
2163 
2164 	if (namelen != 2)
2165 		return (ENOTDIR);
2166 	if (name[0] > nexecs || name[0] < 0)
2167 		return (EINVAL);
2168 	e = execsw[name[0] - 1].es_emul;
2169 	if (e == NULL)
2170 		return (EINVAL);
2171 
2172 	switch (name[1]) {
2173 	case KERN_EMUL_NAME:
2174 		return (sysctl_rdstring(oldp, oldlenp, newp, e->e_name));
2175 	case KERN_EMUL_ENABLED:
2176 		enabled = (e->e_flags & EMUL_ENABLED);
2177 		error = sysctl_int(oldp, oldlenp, newp, newlen,
2178 		    &enabled);
2179 		e->e_flags = (enabled & EMUL_ENABLED);
2180 		return (error);
2181 	default:
2182 		return (EINVAL);
2183 	}
2184 }
2185 
2186 #endif	/* SMALL_KERNEL */
2187 
2188 int
2189 sysctl_cptime2(int *name, u_int namelen, void *oldp, size_t *oldlenp,
2190     void *newp, size_t newlen)
2191 {
2192 	CPU_INFO_ITERATOR cii;
2193 	struct cpu_info *ci;
2194 	int i;
2195 
2196 	if (namelen != 1)
2197 		return (ENOTDIR);
2198 
2199 	i = name[0];
2200 
2201 	CPU_INFO_FOREACH(cii, ci) {
2202 		if (i-- == 0)
2203 			break;
2204 	}
2205 	if (i > 0)
2206 		return (ENOENT);
2207 
2208 	return (sysctl_rdstruct(oldp, oldlenp, newp,
2209 	    &ci->ci_schedstate.spc_cp_time,
2210 	    sizeof(ci->ci_schedstate.spc_cp_time)));
2211 }
2212