xref: /openbsd-src/sys/kern/kern_sysctl.c (revision 098ff4accc6eb5eaf1cfee7dc19234ea8c0bd383)
1 /*	$OpenBSD: kern_sysctl.c,v 1.458 2024/12/16 21:22:51 mvs Exp $	*/
2 /*	$NetBSD: kern_sysctl.c,v 1.17 1996/05/20 17:49:05 mrg Exp $	*/
3 
4 /*-
5  * Copyright (c) 1982, 1986, 1989, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * This code is derived from software contributed to Berkeley by
9  * Mike Karels at Berkeley Software Design, Inc.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. Neither the name of the University nor the names of its contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  *
35  *	@(#)kern_sysctl.c	8.4 (Berkeley) 4/14/94
36  */
37 
38 /*
39  * sysctl system call.
40  */
41 
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/atomic.h>
45 #include <sys/kernel.h>
46 #include <sys/malloc.h>
47 #include <sys/pool.h>
48 #include <sys/proc.h>
49 #include <sys/resourcevar.h>
50 #include <sys/signalvar.h>
51 #include <sys/fcntl.h>
52 #include <sys/file.h>
53 #include <sys/filedesc.h>
54 #include <sys/vnode.h>
55 #include <sys/unistd.h>
56 #include <sys/buf.h>
57 #include <sys/clockintr.h>
58 #include <sys/tty.h>
59 #include <sys/disklabel.h>
60 #include <sys/disk.h>
61 #include <sys/sysctl.h>
62 #include <sys/msgbuf.h>
63 #include <sys/vmmeter.h>
64 #include <sys/namei.h>
65 #include <sys/exec.h>
66 #include <sys/mbuf.h>
67 #include <sys/percpu.h>
68 #include <sys/sensors.h>
69 #include <sys/pipe.h>
70 #include <sys/eventvar.h>
71 #include <sys/socketvar.h>
72 #include <sys/socket.h>
73 #include <sys/domain.h>
74 #include <sys/protosw.h>
75 #include <sys/pledge.h>
76 #include <sys/timetc.h>
77 #include <sys/evcount.h>
78 #include <sys/un.h>
79 #include <sys/unpcb.h>
80 #include <sys/sched.h>
81 #include <sys/mount.h>
82 #include <sys/syscallargs.h>
83 #include <sys/wait.h>
84 #include <sys/witness.h>
85 
86 #include <uvm/uvm_extern.h>
87 
88 #include <dev/cons.h>
89 
90 #include <dev/usb/ucomvar.h>
91 
92 #include <net/route.h>
93 #include <netinet/in.h>
94 #include <netinet/ip.h>
95 #include <netinet/ip_var.h>
96 #include <netinet/in_pcb.h>
97 #include <netinet/ip6.h>
98 #include <netinet/tcp.h>
99 #include <netinet/tcp_timer.h>
100 #include <netinet/tcp_var.h>
101 #include <netinet/udp.h>
102 #include <netinet/udp_var.h>
103 #include <netinet6/ip6_var.h>
104 
105 #ifdef DDB
106 #include <ddb/db_var.h>
107 #endif
108 
109 #ifdef SYSVMSG
110 #include <sys/msg.h>
111 #endif
112 #ifdef SYSVSEM
113 #include <sys/sem.h>
114 #endif
115 #ifdef SYSVSHM
116 #include <sys/shm.h>
117 #endif
118 
119 #include "audio.h"
120 #include "dt.h"
121 #include "pf.h"
122 #include "ucom.h"
123 #include "video.h"
124 
125 /*
126  * Locks used to protect data:
127  *	a	atomic
128  */
129 
130 extern struct forkstat forkstat;
131 extern struct nchstats nchstats;
132 extern int fscale;
133 extern fixpt_t ccpu;
134 extern long numvnodes;
135 extern int allowdt;
136 extern int audio_record_enable;
137 extern int video_record_enable;
138 extern int autoconf_serial;
139 
140 int allowkmem;		/* [a] */
141 
142 int sysctl_securelevel(void *, size_t *, void *, size_t, struct proc *);
143 int sysctl_diskinit(int, struct proc *);
144 int sysctl_proc_args(int *, u_int, void *, size_t *, struct proc *);
145 int sysctl_proc_cwd(int *, u_int, void *, size_t *, struct proc *);
146 int sysctl_proc_nobroadcastkill(int *, u_int, void *, size_t, void *, size_t *,
147 	struct proc *);
148 int sysctl_proc_vmmap(int *, u_int, void *, size_t *, struct proc *);
149 int sysctl_intrcnt(int *, u_int, void *, size_t *);
150 int sysctl_sensors(int *, u_int, void *, size_t *, void *, size_t);
151 int sysctl_cptime2(int *, u_int, void *, size_t *, void *, size_t);
152 int sysctl_audio(int *, u_int, void *, size_t *, void *, size_t);
153 int sysctl_video(int *, u_int, void *, size_t *, void *, size_t);
154 int sysctl_cpustats(int *, u_int, void *, size_t *, void *, size_t);
155 int sysctl_utc_offset(void *, size_t *, void *, size_t);
156 int sysctl_hwbattery(int *, u_int, void *, size_t *, void *, size_t);
157 
158 void fill_file(struct kinfo_file *, struct file *, struct filedesc *, int,
159     struct vnode *, struct process *, struct proc *, struct socket *, int);
160 void fill_kproc(struct process *, struct kinfo_proc *, struct proc *, int);
161 
162 int kern_sysctl_locked(int *, u_int, void *, size_t *, void *, size_t,
163 	struct proc *);
164 int kern_sysctl_dirs(int, int *, u_int, void *, size_t *, void *,
165 	size_t, struct proc *);
166 int kern_sysctl_dirs_locked(int, int *, u_int, void *, size_t *, void *,
167 	size_t, struct proc *);
168 int hw_sysctl_locked(int *, u_int, void *, size_t *,void *, size_t,
169 	struct proc *);
170 
171 int (*cpu_cpuspeed)(int *);
172 
173 /*
174  * Lock to avoid too many processes vslocking a large amount of memory
175  * at the same time.
176  */
177 struct rwlock sysctl_lock = RWLOCK_INITIALIZER("sysctllk");
178 struct rwlock sysctl_disklock = RWLOCK_INITIALIZER("sysctldlk");
179 
180 int
181 sysctl_vslock(void *addr, size_t len)
182 {
183 	int error;
184 
185 	error = rw_enter(&sysctl_lock, RW_WRITE|RW_INTR);
186 	if (error)
187 		return (error);
188 	KERNEL_LOCK();
189 
190 	if (addr) {
191 		if (atop(len) > uvmexp.wiredmax - uvmexp.wired) {
192 			error = ENOMEM;
193 			goto out;
194 		}
195 		error = uvm_vslock(curproc, addr, len, PROT_READ | PROT_WRITE);
196 		if (error)
197 			goto out;
198 	}
199 
200 	return (0);
201 out:
202 	KERNEL_UNLOCK();
203 	rw_exit_write(&sysctl_lock);
204 	return (error);
205 }
206 
207 void
208 sysctl_vsunlock(void *addr, size_t len)
209 {
210 	KERNEL_ASSERT_LOCKED();
211 
212 	if (addr)
213 		uvm_vsunlock(curproc, addr, len);
214 	KERNEL_UNLOCK();
215 	rw_exit_write(&sysctl_lock);
216 }
217 
218 int
219 sys_sysctl(struct proc *p, void *v, register_t *retval)
220 {
221 	struct sys_sysctl_args /* {
222 		syscallarg(const int *) name;
223 		syscallarg(u_int) namelen;
224 		syscallarg(void *) old;
225 		syscallarg(size_t *) oldlenp;
226 		syscallarg(void *) new;
227 		syscallarg(size_t) newlen;
228 	} */ *uap = v;
229 	int error, dolock = 1;
230 	size_t savelen = 0, oldlen = 0;
231 	sysctlfn *fn;
232 	int name[CTL_MAXNAME];
233 
234 	if (SCARG(uap, new) != NULL &&
235 	    (error = suser(p)))
236 		return (error);
237 	/*
238 	 * all top-level sysctl names are non-terminal
239 	 */
240 	if (SCARG(uap, namelen) > CTL_MAXNAME || SCARG(uap, namelen) < 2)
241 		return (EINVAL);
242 	error = copyin(SCARG(uap, name), name,
243 		       SCARG(uap, namelen) * sizeof(int));
244 	if (error)
245 		return (error);
246 
247 	error = pledge_sysctl(p, SCARG(uap, namelen),
248 	    name, SCARG(uap, new));
249 	if (error)
250 		return (error);
251 
252 	switch (name[0]) {
253 	case CTL_KERN:
254 		dolock = 0;
255 		fn = kern_sysctl;
256 		break;
257 	case CTL_HW:
258 		dolock = 0;
259 		fn = hw_sysctl;
260 		break;
261 	case CTL_VM:
262 		fn = uvm_sysctl;
263 		break;
264 	case CTL_NET:
265 		dolock = 0;
266 		fn = net_sysctl;
267 		break;
268 	case CTL_FS:
269 		dolock = 0;
270 		fn = fs_sysctl;
271 		break;
272 	case CTL_VFS:
273 		fn = vfs_sysctl;
274 		break;
275 	case CTL_MACHDEP:
276 		fn = cpu_sysctl;
277 		break;
278 #ifdef DEBUG_SYSCTL
279 	case CTL_DEBUG:
280 		fn = debug_sysctl;
281 		break;
282 #endif
283 #ifdef DDB
284 	case CTL_DDB:
285 		fn = ddb_sysctl;
286 		break;
287 #endif
288 	default:
289 		return (EOPNOTSUPP);
290 	}
291 
292 	if (SCARG(uap, oldlenp) &&
293 	    (error = copyin(SCARG(uap, oldlenp), &oldlen, sizeof(oldlen))))
294 		return (error);
295 
296 	if (dolock) {
297 		error = sysctl_vslock(SCARG(uap, old), oldlen);
298 		if (error)
299 			return (error);
300 		savelen = oldlen;
301 	}
302 	error = (*fn)(&name[1], SCARG(uap, namelen) - 1, SCARG(uap, old),
303 	    &oldlen, SCARG(uap, new), SCARG(uap, newlen), p);
304 	if (dolock)
305 		sysctl_vsunlock(SCARG(uap, old), savelen);
306 
307 	if (error)
308 		return (error);
309 	if (SCARG(uap, oldlenp))
310 		error = copyout(&oldlen, SCARG(uap, oldlenp), sizeof(oldlen));
311 	return (error);
312 }
313 
314 /*
315  * Attributes stored in the kernel.
316  */
317 char hostname[MAXHOSTNAMELEN];
318 int hostnamelen;
319 char domainname[MAXHOSTNAMELEN];
320 int domainnamelen;
321 int hostid;
322 char *disknames = NULL;
323 size_t disknameslen;
324 struct diskstats *diskstats = NULL;
325 size_t diskstatslen;
326 int securelevel;
327 
328 /* morally const values reported by sysctl_bounded_arr */
329 static int arg_max = ARG_MAX;
330 static int openbsd = OpenBSD;
331 static int posix_version = _POSIX_VERSION;
332 static int ngroups_max = NGROUPS_MAX;
333 static int int_zero = 0;
334 static int int_one = 1;
335 static int maxpartitions = MAXPARTITIONS;
336 static int raw_part = RAW_PART;
337 
338 extern int somaxconn, sominconn;
339 extern int nosuidcoredump;
340 extern int maxlocksperuid;
341 extern int uvm_wxabort;
342 extern int global_ptrace;
343 
344 const struct sysctl_bounded_args kern_vars[] = {
345 	{KERN_OSREV, &openbsd, SYSCTL_INT_READONLY},
346 	{KERN_MAXVNODES, &maxvnodes, 0, INT_MAX},
347 	{KERN_MAXPROC, &maxprocess, 0, INT_MAX},
348 	{KERN_MAXFILES, &maxfiles, 0, INT_MAX},
349 	{KERN_NFILES, &numfiles, SYSCTL_INT_READONLY},
350 	{KERN_TTYCOUNT, &tty_count, SYSCTL_INT_READONLY},
351 	{KERN_ARGMAX, &arg_max, SYSCTL_INT_READONLY},
352 	{KERN_POSIX1, &posix_version, SYSCTL_INT_READONLY},
353 	{KERN_NGROUPS, &ngroups_max, SYSCTL_INT_READONLY},
354 	{KERN_JOB_CONTROL, &int_one, SYSCTL_INT_READONLY},
355 	{KERN_SAVED_IDS, &int_one, SYSCTL_INT_READONLY},
356 	{KERN_MAXPARTITIONS, &maxpartitions, SYSCTL_INT_READONLY},
357 	{KERN_RAWPARTITION, &raw_part, SYSCTL_INT_READONLY},
358 	{KERN_MAXTHREAD, &maxthread, 0, INT_MAX},
359 	{KERN_NTHREADS, &nthreads, SYSCTL_INT_READONLY},
360 	{KERN_SOMAXCONN, &somaxconn, 0, SHRT_MAX},
361 	{KERN_SOMINCONN, &sominconn, 0, SHRT_MAX},
362 	{KERN_NOSUIDCOREDUMP, &nosuidcoredump, 0, 3},
363 	{KERN_FSYNC, &int_one, SYSCTL_INT_READONLY},
364 	{KERN_SYSVMSG,
365 #ifdef SYSVMSG
366 	 &int_one,
367 #else
368 	 &int_zero,
369 #endif
370 	 SYSCTL_INT_READONLY},
371 	{KERN_SYSVSEM,
372 #ifdef SYSVSEM
373 	 &int_one,
374 #else
375 	 &int_zero,
376 #endif
377 	 SYSCTL_INT_READONLY},
378 	{KERN_SYSVSHM,
379 #ifdef SYSVSHM
380 	 &int_one,
381 #else
382 	 &int_zero,
383 #endif
384 	 SYSCTL_INT_READONLY},
385 	{KERN_FSCALE, &fscale, SYSCTL_INT_READONLY},
386 	{KERN_CCPU, &ccpu, SYSCTL_INT_READONLY},
387 	{KERN_NPROCS, &nprocesses, SYSCTL_INT_READONLY},
388 	{KERN_SPLASSERT, &splassert_ctl, 0, 3},
389 	{KERN_MAXLOCKSPERUID, &maxlocksperuid, 0, INT_MAX},
390 	{KERN_WXABORT, &uvm_wxabort, 0, 1},
391 	{KERN_NETLIVELOCKS, &int_zero, SYSCTL_INT_READONLY},
392 #ifdef PTRACE
393 	{KERN_GLOBAL_PTRACE, &global_ptrace, 0, 1},
394 #endif
395 	{KERN_AUTOCONF_SERIAL, &autoconf_serial, SYSCTL_INT_READONLY},
396 };
397 
398 int
399 kern_sysctl_dirs(int top_name, int *name, u_int namelen,
400     void *oldp, size_t *oldlenp, void *newp, size_t newlen, struct proc *p)
401 {
402 	size_t savelen;
403 	int error;
404 
405 	switch (top_name) {
406 #if NAUDIO > 0
407 	case KERN_AUDIO:
408 		return (sysctl_audio(name, namelen, oldp, oldlenp,
409 		    newp, newlen));
410 #endif
411 #if NVIDEO > 0
412 	case KERN_VIDEO:
413 		return (sysctl_video(name, namelen, oldp, oldlenp,
414 		    newp, newlen));
415 #endif
416 	default:
417 		break;
418 	}
419 
420 	savelen = *oldlenp;
421 	if ((error = sysctl_vslock(oldp, savelen)))
422 		return (error);
423 	error = kern_sysctl_dirs_locked(top_name, name, namelen,
424 	    oldp, oldlenp, newp, newlen, p);
425 	sysctl_vsunlock(oldp, savelen);
426 
427 	return (error);
428 }
429 
430 int
431 kern_sysctl_dirs_locked(int top_name, int *name, u_int namelen,
432     void *oldp, size_t *oldlenp, void *newp, size_t newlen, struct proc *p)
433 {
434 	switch (top_name) {
435 #ifndef SMALL_KERNEL
436 	case KERN_PROC:
437 		return (sysctl_doproc(name, namelen, oldp, oldlenp));
438 	case KERN_PROC_ARGS:
439 		return (sysctl_proc_args(name, namelen, oldp, oldlenp, p));
440 	case KERN_PROC_CWD:
441 		return (sysctl_proc_cwd(name, namelen, oldp, oldlenp, p));
442 	case KERN_PROC_NOBROADCASTKILL:
443 		return (sysctl_proc_nobroadcastkill(name, namelen,
444 		     newp, newlen, oldp, oldlenp, p));
445 	case KERN_PROC_VMMAP:
446 		return (sysctl_proc_vmmap(name, namelen, oldp, oldlenp, p));
447 	case KERN_FILE:
448 		return (sysctl_file(name, namelen, oldp, oldlenp, p));
449 #endif
450 #if defined(GPROF) || defined(DDBPROF)
451 	case KERN_PROF:
452 		return (sysctl_doprof(name, namelen, oldp, oldlenp,
453 		    newp, newlen));
454 #endif
455 	case KERN_MALLOCSTATS:
456 		return (sysctl_malloc(name, namelen, oldp, oldlenp,
457 		    newp, newlen, p));
458 	case KERN_TTY:
459 		return (sysctl_tty(name, namelen, oldp, oldlenp,
460 		    newp, newlen));
461 	case KERN_POOL:
462 		return (sysctl_dopool(name, namelen, oldp, oldlenp));
463 #if defined(SYSVMSG) || defined(SYSVSEM) || defined(SYSVSHM)
464 	case KERN_SYSVIPC_INFO:
465 		return (sysctl_sysvipc(name, namelen, oldp, oldlenp));
466 #endif
467 #ifdef SYSVSEM
468 	case KERN_SEMINFO:
469 		return (sysctl_sysvsem(name, namelen, oldp, oldlenp,
470 		    newp, newlen));
471 #endif
472 #ifdef SYSVSHM
473 	case KERN_SHMINFO:
474 		return (sysctl_sysvshm(name, namelen, oldp, oldlenp,
475 		    newp, newlen));
476 #endif
477 #ifndef SMALL_KERNEL
478 	case KERN_INTRCNT:
479 		return (sysctl_intrcnt(name, namelen, oldp, oldlenp));
480 	case KERN_WATCHDOG:
481 		return (sysctl_wdog(name, namelen, oldp, oldlenp,
482 		    newp, newlen));
483 #endif
484 #ifndef SMALL_KERNEL
485 	case KERN_EVCOUNT:
486 		return (evcount_sysctl(name, namelen, oldp, oldlenp,
487 		    newp, newlen));
488 #endif
489 	case KERN_TIMECOUNTER:
490 		return (sysctl_tc(name, namelen, oldp, oldlenp, newp, newlen));
491 	case KERN_CPTIME2:
492 		return (sysctl_cptime2(name, namelen, oldp, oldlenp,
493 		    newp, newlen));
494 #ifdef WITNESS
495 	case KERN_WITNESSWATCH:
496 		return witness_sysctl_watch(oldp, oldlenp, newp, newlen);
497 	case KERN_WITNESS:
498 		return witness_sysctl(name, namelen, oldp, oldlenp,
499 		    newp, newlen);
500 #endif
501 	case KERN_CPUSTATS:
502 		return (sysctl_cpustats(name, namelen, oldp, oldlenp,
503 		    newp, newlen));
504 	case KERN_CLOCKINTR:
505 		return sysctl_clockintr(name, namelen, oldp, oldlenp, newp,
506 		    newlen);
507 	default:
508 		return (ENOTDIR);	/* overloaded */
509 	}
510 }
511 
512 /*
513  * kernel related system variables.
514  */
515 int
516 kern_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
517     size_t newlen, struct proc *p)
518 {
519 	int error;
520 	size_t savelen;
521 
522 	/* dispatch the non-terminal nodes first */
523 	if (namelen != 1)
524 		return (kern_sysctl_dirs(name[0], name + 1, namelen - 1,
525 		    oldp, oldlenp, newp, newlen, p));
526 
527 	switch (name[0]) {
528 	case KERN_ALLOWKMEM:
529 		return (sysctl_securelevel_int(oldp, oldlenp, newp, newlen,
530 		    &allowkmem));
531 	case KERN_OSTYPE:
532 		return (sysctl_rdstring(oldp, oldlenp, newp, ostype));
533 	case KERN_OSRELEASE:
534 		return (sysctl_rdstring(oldp, oldlenp, newp, osrelease));
535 	case KERN_OSVERSION:
536 		return (sysctl_rdstring(oldp, oldlenp, newp, osversion));
537 	case KERN_VERSION:
538 		return (sysctl_rdstring(oldp, oldlenp, newp, version));
539 	case KERN_NUMVNODES:  /* XXX numvnodes is a long */
540 		return (sysctl_rdint(oldp, oldlenp, newp, numvnodes));
541 #if NDT > 0
542 	case KERN_ALLOWDT:
543 		return (sysctl_securelevel_int(oldp, oldlenp, newp, newlen,
544 		    &allowdt));
545 #endif
546 	case KERN_HOSTID:
547 		return (sysctl_int(oldp, oldlenp, newp, newlen, &hostid));
548 	case KERN_CLOCKRATE:
549 		return (sysctl_clockrate(oldp, oldlenp, newp));
550 	case KERN_BOOTTIME: {
551 		struct timeval bt;
552 		memset(&bt, 0, sizeof bt);
553 		microboottime(&bt);
554 		return (sysctl_rdstruct(oldp, oldlenp, newp, &bt, sizeof bt));
555 	}
556 	case KERN_MBSTAT: {
557 		uint64_t counters[MBSTAT_COUNT];
558 		struct mbstat mbs;
559 		unsigned int i;
560 
561 		memset(&mbs, 0, sizeof(mbs));
562 		counters_read(mbstat, counters, MBSTAT_COUNT, NULL);
563 		for (i = 0; i < MBSTAT_TYPES; i++)
564 			mbs.m_mtypes[i] = counters[i];
565 
566 		mbs.m_drops = counters[MBSTAT_DROPS];
567 		mbs.m_wait = counters[MBSTAT_WAIT];
568 		mbs.m_drain = counters[MBSTAT_DRAIN];
569 		mbs.m_defrag_alloc = counters[MBSTAT_DEFRAG_ALLOC];
570 		mbs.m_prepend_alloc = counters[MBSTAT_PREPEND_ALLOC];
571 		mbs.m_pullup_alloc = counters[MBSTAT_PULLUP_ALLOC];
572 		mbs.m_pullup_copy = counters[MBSTAT_PULLUP_COPY];
573 		mbs.m_pulldown_alloc = counters[MBSTAT_PULLDOWN_ALLOC];
574 		mbs.m_pulldown_copy = counters[MBSTAT_PULLDOWN_COPY];
575 
576 		return (sysctl_rdstruct(oldp, oldlenp, newp,
577 		    &mbs, sizeof(mbs)));
578 	}
579 	case KERN_MSGBUFSIZE:
580 	case KERN_CONSBUFSIZE: {
581 		struct msgbuf *mp;
582 		mp = (name[0] == KERN_MSGBUFSIZE) ? msgbufp : consbufp;
583 		/*
584 		 * deal with cases where the message buffer has
585 		 * become corrupted.
586 		 */
587 		if (!mp || mp->msg_magic != MSG_MAGIC)
588 			return (ENXIO);
589 		return (sysctl_rdint(oldp, oldlenp, newp, mp->msg_bufs));
590 	}
591 	case KERN_TIMEOUT_STATS:
592 		return (timeout_sysctl(oldp, oldlenp, newp, newlen));
593 	case KERN_OSREV:
594 	case KERN_MAXPROC:
595 	case KERN_MAXFILES:
596 	case KERN_NFILES:
597 	case KERN_TTYCOUNT:
598 	case KERN_ARGMAX:
599 	case KERN_POSIX1:
600 	case KERN_NGROUPS:
601 	case KERN_JOB_CONTROL:
602 	case KERN_SAVED_IDS:
603 	case KERN_MAXPARTITIONS:
604 	case KERN_RAWPARTITION:
605 	case KERN_MAXTHREAD:
606 	case KERN_NTHREADS:
607 	case KERN_SOMAXCONN:
608 	case KERN_SOMINCONN:
609 	case KERN_FSYNC:
610 	case KERN_SYSVMSG:
611 	case KERN_SYSVSEM:
612 	case KERN_SYSVSHM:
613 	case KERN_FSCALE:
614 	case KERN_CCPU:
615 	case KERN_NPROCS:
616 	case KERN_WXABORT:
617 	case KERN_NETLIVELOCKS:
618 	case KERN_GLOBAL_PTRACE:
619 	case KERN_AUTOCONF_SERIAL:
620 		return (sysctl_bounded_arr(kern_vars, nitems(kern_vars), name,
621 		    namelen, oldp, oldlenp, newp, newlen));
622 	}
623 
624 	savelen = *oldlenp;
625 	if ((error = sysctl_vslock(oldp, savelen)))
626 		return (error);
627 	error = kern_sysctl_locked(name, namelen, oldp, oldlenp,
628 	    newp, newlen, p);
629 	sysctl_vsunlock(oldp, savelen);
630 
631 	return (error);
632 }
633 
634 int
635 kern_sysctl_locked(int *name, u_int namelen, void *oldp, size_t *oldlenp,
636     void *newp, size_t newlen, struct proc *p)
637 {
638 	int error, stackgap;
639 	dev_t dev;
640 	extern int pool_debug;
641 
642 	switch (name[0]) {
643 	case KERN_SECURELVL:
644 		return (sysctl_securelevel(oldp, oldlenp, newp, newlen, p));
645 	case KERN_HOSTNAME:
646 		error = sysctl_tstring(oldp, oldlenp, newp, newlen,
647 		    hostname, sizeof(hostname));
648 		if (newp && !error)
649 			hostnamelen = newlen;
650 		return (error);
651 	case KERN_DOMAINNAME:
652 		if (securelevel >= 1 && domainnamelen && newp)
653 			error = EPERM;
654 		else
655 			error = sysctl_tstring(oldp, oldlenp, newp, newlen,
656 			    domainname, sizeof(domainname));
657 		if (newp && !error)
658 			domainnamelen = newlen;
659 		return (error);
660 	case KERN_CONSBUF:
661 		if ((error = suser(p)))
662 			return (error);
663 		/* FALLTHROUGH */
664 	case KERN_MSGBUF: {
665 		struct msgbuf *mp;
666 		mp = (name[0] == KERN_MSGBUF) ? msgbufp : consbufp;
667 		/*
668 		 * deal with cases where the message buffer has
669 		 * become corrupted.
670 		 */
671 		if (!mp || mp->msg_magic != MSG_MAGIC)
672 			return (ENXIO);
673 		return (sysctl_rdstruct(oldp, oldlenp, newp, mp,
674 		    mp->msg_bufs + offsetof(struct msgbuf, msg_bufc)));
675 	}
676 	case KERN_CPTIME:
677 	{
678 		CPU_INFO_ITERATOR cii;
679 		struct cpu_info *ci;
680 		long cp_time[CPUSTATES];
681 		int i, n = 0;
682 
683 		memset(cp_time, 0, sizeof(cp_time));
684 
685 		CPU_INFO_FOREACH(cii, ci) {
686 			if (!cpu_is_online(ci))
687 				continue;
688 			n++;
689 			for (i = 0; i < CPUSTATES; i++)
690 				cp_time[i] += ci->ci_schedstate.spc_cp_time[i];
691 		}
692 
693 		for (i = 0; i < CPUSTATES; i++)
694 			cp_time[i] /= n;
695 
696 		return (sysctl_rdstruct(oldp, oldlenp, newp, &cp_time,
697 		    sizeof(cp_time)));
698 	}
699 	case KERN_NCHSTATS:
700 		return (sysctl_rdstruct(oldp, oldlenp, newp, &nchstats,
701 		    sizeof(struct nchstats)));
702 	case KERN_FORKSTAT:
703 		return (sysctl_rdstruct(oldp, oldlenp, newp, &forkstat,
704 		    sizeof(struct forkstat)));
705 	case KERN_STACKGAPRANDOM:
706 		stackgap = stackgap_random;
707 		error = sysctl_int(oldp, oldlenp, newp, newlen, &stackgap);
708 		if (error)
709 			return (error);
710 		/*
711 		 * Safety harness.
712 		 */
713 		if ((stackgap < ALIGNBYTES && stackgap != 0) ||
714 		    !powerof2(stackgap) || stackgap >= MAXSSIZ)
715 			return (EINVAL);
716 		stackgap_random = stackgap;
717 		return (0);
718 	case KERN_MAXCLUSTERS: {
719 		int val = nmbclust;
720 		error = sysctl_int(oldp, oldlenp, newp, newlen, &val);
721 		if (error == 0 && val != nmbclust)
722 			error = nmbclust_update(val);
723 		return (error);
724 	}
725 	case KERN_CACHEPCT: {
726 		u_int64_t dmapages;
727 		int opct, pgs;
728 		opct = bufcachepercent;
729 		error = sysctl_int(oldp, oldlenp, newp, newlen,
730 		    &bufcachepercent);
731 		if (error)
732 			return(error);
733 		if (bufcachepercent > 90 || bufcachepercent < 5) {
734 			bufcachepercent = opct;
735 			return (EINVAL);
736 		}
737 		dmapages = uvm_pagecount(&dma_constraint);
738 		if (bufcachepercent != opct) {
739 			pgs = bufcachepercent * dmapages / 100;
740 			bufadjust(pgs); /* adjust bufpages */
741 			bufhighpages = bufpages; /* set high water mark */
742 		}
743 		return(0);
744 	}
745 	case KERN_CONSDEV:
746 		if (cn_tab != NULL)
747 			dev = cn_tab->cn_dev;
748 		else
749 			dev = NODEV;
750 		return sysctl_rdstruct(oldp, oldlenp, newp, &dev, sizeof(dev));
751 	case KERN_POOL_DEBUG: {
752 		int old_pool_debug = pool_debug;
753 
754 		error = sysctl_int(oldp, oldlenp, newp, newlen,
755 		    &pool_debug);
756 		if (error == 0 && pool_debug != old_pool_debug)
757 			pool_reclaim_all();
758 		return (error);
759 	}
760 #if NPF > 0
761 	case KERN_PFSTATUS:
762 		return (pf_sysctl(oldp, oldlenp, newp, newlen));
763 #endif
764 	case KERN_UTC_OFFSET:
765 		return (sysctl_utc_offset(oldp, oldlenp, newp, newlen));
766 	default:
767 		return (sysctl_bounded_arr(kern_vars, nitems(kern_vars), name,
768 		    namelen, oldp, oldlenp, newp, newlen));
769 	}
770 	/* NOTREACHED */
771 }
772 
773 /*
774  * hardware related system variables.
775  */
776 char *hw_vendor, *hw_prod, *hw_uuid, *hw_serial, *hw_ver;
777 int allowpowerdown = 1;
778 int hw_power = 1;
779 
780 /* morally const values reported by sysctl_bounded_arr */
781 static int byte_order = BYTE_ORDER;
782 
783 const struct sysctl_bounded_args hw_vars[] = {
784 	{HW_NCPU, &ncpus, SYSCTL_INT_READONLY},
785 	{HW_NCPUFOUND, &ncpusfound, SYSCTL_INT_READONLY},
786 	{HW_BYTEORDER, &byte_order, SYSCTL_INT_READONLY},
787 	{HW_PAGESIZE, &uvmexp.pagesize, SYSCTL_INT_READONLY},
788 	{HW_DISKCOUNT, &disk_count, SYSCTL_INT_READONLY},
789 	{HW_POWER, &hw_power, SYSCTL_INT_READONLY},
790 };
791 
792 int
793 hw_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
794     size_t newlen, struct proc *p)
795 {
796 	extern char machine[], cpu_model[];
797 	int err;
798 
799 	/*
800 	 * all sysctl names at this level except sensors and battery
801 	 * are terminal
802 	 */
803 	if (name[0] != HW_SENSORS && name[0] != HW_BATTERY && namelen != 1)
804 		return (ENOTDIR);		/* overloaded */
805 
806 	switch (name[0]) {
807 	case HW_MACHINE:
808 		return (sysctl_rdstring(oldp, oldlenp, newp, machine));
809 	case HW_MODEL:
810 		return (sysctl_rdstring(oldp, oldlenp, newp, cpu_model));
811 	case HW_NCPUONLINE:
812 		return (sysctl_rdint(oldp, oldlenp, newp,
813 		    sysctl_hwncpuonline()));
814 	case HW_PHYSMEM:
815 		return (sysctl_rdint(oldp, oldlenp, newp, ptoa(physmem)));
816 	case HW_USERMEM:
817 		return (sysctl_rdint(oldp, oldlenp, newp,
818 		    ptoa(physmem - uvmexp.wired)));
819 	case HW_DISKNAMES:
820 	case HW_DISKSTATS:
821 	case HW_CPUSPEED:
822 #ifndef	SMALL_KERNEL
823 	case HW_SENSORS:
824 	case HW_SETPERF:
825 	case HW_PERFPOLICY:
826 	case HW_BATTERY:
827 #endif /* !SMALL_KERNEL */
828 	case HW_ALLOWPOWERDOWN:
829 	case HW_UCOMNAMES:
830 #ifdef __HAVE_CPU_TOPOLOGY
831 	case HW_SMT:
832 #endif
833 	{
834 		size_t savelen = *oldlenp;
835 		if ((err = sysctl_vslock(oldp, savelen)))
836 			return (err);
837 		err = hw_sysctl_locked(name, namelen, oldp, oldlenp,
838 		    newp, newlen, p);
839 		sysctl_vsunlock(oldp, savelen);
840 		return (err);
841 	}
842 	case HW_VENDOR:
843 		if (hw_vendor)
844 			return (sysctl_rdstring(oldp, oldlenp, newp,
845 			    hw_vendor));
846 		else
847 			return (EOPNOTSUPP);
848 	case HW_PRODUCT:
849 		if (hw_prod)
850 			return (sysctl_rdstring(oldp, oldlenp, newp, hw_prod));
851 		else
852 			return (EOPNOTSUPP);
853 	case HW_VERSION:
854 		if (hw_ver)
855 			return (sysctl_rdstring(oldp, oldlenp, newp, hw_ver));
856 		else
857 			return (EOPNOTSUPP);
858 	case HW_SERIALNO:
859 		if (hw_serial)
860 			return (sysctl_rdstring(oldp, oldlenp, newp,
861 			    hw_serial));
862 		else
863 			return (EOPNOTSUPP);
864 	case HW_UUID:
865 		if (hw_uuid)
866 			return (sysctl_rdstring(oldp, oldlenp, newp, hw_uuid));
867 		else
868 			return (EOPNOTSUPP);
869 	case HW_PHYSMEM64:
870 		return (sysctl_rdquad(oldp, oldlenp, newp,
871 		    ptoa((psize_t)physmem)));
872 	case HW_USERMEM64:
873 		return (sysctl_rdquad(oldp, oldlenp, newp,
874 		    ptoa((psize_t)physmem - uvmexp.wired)));
875 	default:
876 		return sysctl_bounded_arr(hw_vars, nitems(hw_vars), name,
877 		    namelen, oldp, oldlenp, newp, newlen);
878 	}
879 	/* NOTREACHED */
880 }
881 
882 int
883 hw_sysctl_locked(int *name, u_int namelen, void *oldp, size_t *oldlenp,
884     void *newp, size_t newlen, struct proc *p)
885 {
886 	int err, cpuspeed;
887 
888 	switch (name[0]) {
889 	case HW_DISKNAMES:
890 		err = sysctl_diskinit(0, p);
891 		if (err)
892 			return err;
893 		if (disknames)
894 			return (sysctl_rdstring(oldp, oldlenp, newp,
895 			    disknames));
896 		else
897 			return (sysctl_rdstring(oldp, oldlenp, newp, ""));
898 	case HW_DISKSTATS:
899 		err = sysctl_diskinit(1, p);
900 		if (err)
901 			return err;
902 		return (sysctl_rdstruct(oldp, oldlenp, newp, diskstats,
903 		    disk_count * sizeof(struct diskstats)));
904 	case HW_CPUSPEED:
905 		if (!cpu_cpuspeed)
906 			return (EOPNOTSUPP);
907 		err = cpu_cpuspeed(&cpuspeed);
908 		if (err)
909 			return err;
910 		return (sysctl_rdint(oldp, oldlenp, newp, cpuspeed));
911 #ifndef SMALL_KERNEL
912 	case HW_SENSORS:
913 		return (sysctl_sensors(name + 1, namelen - 1, oldp, oldlenp,
914 		    newp, newlen));
915 	case HW_SETPERF:
916 		return (sysctl_hwsetperf(oldp, oldlenp, newp, newlen));
917 	case HW_PERFPOLICY:
918 		return (sysctl_hwperfpolicy(oldp, oldlenp, newp, newlen));
919 #endif /* !SMALL_KERNEL */
920 	case HW_ALLOWPOWERDOWN:
921 		return (sysctl_securelevel_int(oldp, oldlenp, newp, newlen,
922 		    &allowpowerdown));
923 	case HW_UCOMNAMES: {
924 		const char *str = "";
925 #if NUCOM > 0
926 		str = sysctl_ucominit();
927 #endif	/* NUCOM > 0 */
928 		return (sysctl_rdstring(oldp, oldlenp, newp, str));
929 	}
930 #ifdef __HAVE_CPU_TOPOLOGY
931 	case HW_SMT:
932 		return (sysctl_hwsmt(oldp, oldlenp, newp, newlen));
933 #endif
934 #ifndef SMALL_KERNEL
935 	case HW_BATTERY:
936 		return (sysctl_hwbattery(name + 1, namelen - 1, oldp, oldlenp,
937 		    newp, newlen));
938 #endif
939 	default:
940 		return (EOPNOTSUPP);
941 	}
942 	/* NOTREACHED */
943 }
944 
945 #ifndef SMALL_KERNEL
946 
947 int hw_battery_chargemode;
948 int hw_battery_chargestart;
949 int hw_battery_chargestop;
950 int (*hw_battery_setchargemode)(int);
951 int (*hw_battery_setchargestart)(int);
952 int (*hw_battery_setchargestop)(int);
953 
954 int
955 sysctl_hwchargemode(void *oldp, size_t *oldlenp, void *newp, size_t newlen)
956 {
957 	int mode = hw_battery_chargemode;
958 	int error;
959 
960 	if (!hw_battery_setchargemode)
961 		return EOPNOTSUPP;
962 
963 	error = sysctl_int_bounded(oldp, oldlenp, newp, newlen,
964 	    &mode, -1, 1);
965 	if (error)
966 		return error;
967 
968 	if (newp != NULL)
969 		error = hw_battery_setchargemode(mode);
970 
971 	return error;
972 }
973 
974 int
975 sysctl_hwchargestart(void *oldp, size_t *oldlenp, void *newp, size_t newlen)
976 {
977 	int start = hw_battery_chargestart;
978 	int error;
979 
980 	if (!hw_battery_setchargestart)
981 		return EOPNOTSUPP;
982 
983 	error = sysctl_int_bounded(oldp, oldlenp, newp, newlen,
984 	    &start, 0, 100);
985 	if (error)
986 		return error;
987 
988 	if (newp != NULL)
989 		error = hw_battery_setchargestart(start);
990 
991 	return error;
992 }
993 
994 int
995 sysctl_hwchargestop(void *oldp, size_t *oldlenp, void *newp, size_t newlen)
996 {
997 	int stop = hw_battery_chargestop;
998 	int error;
999 
1000 	if (!hw_battery_setchargestop)
1001 		return EOPNOTSUPP;
1002 
1003 	error = sysctl_int_bounded(oldp, oldlenp, newp, newlen,
1004 	    &stop, 0, 100);
1005 	if (error)
1006 		return error;
1007 
1008 	if (newp != NULL)
1009 		error = hw_battery_setchargestop(stop);
1010 
1011 	return error;
1012 }
1013 
1014 int
1015 sysctl_hwbattery(int *name, u_int namelen, void *oldp, size_t *oldlenp,
1016     void *newp, size_t newlen)
1017 {
1018 	if (namelen != 1)
1019 		return (ENOTDIR);
1020 
1021 	switch (name[0]) {
1022 	case HW_BATTERY_CHARGEMODE:
1023 		return (sysctl_hwchargemode(oldp, oldlenp, newp, newlen));
1024 	case HW_BATTERY_CHARGESTART:
1025 		return (sysctl_hwchargestart(oldp, oldlenp, newp, newlen));
1026 	case HW_BATTERY_CHARGESTOP:
1027 		return (sysctl_hwchargestop(oldp, oldlenp, newp, newlen));
1028 	default:
1029 		return (EOPNOTSUPP);
1030 	}
1031 	/* NOTREACHED */
1032 }
1033 
1034 #endif
1035 
1036 #ifdef DEBUG_SYSCTL
1037 /*
1038  * Debugging related system variables.
1039  */
1040 extern struct ctldebug debug_vfs_busyprt;
1041 struct ctldebug debug1, debug2, debug3, debug4;
1042 struct ctldebug debug5, debug6, debug7, debug8, debug9;
1043 struct ctldebug debug10, debug11, debug12, debug13, debug14;
1044 struct ctldebug debug15, debug16, debug17, debug18, debug19;
1045 static struct ctldebug *debugvars[CTL_DEBUG_MAXID] = {
1046 	&debug_vfs_busyprt,
1047 	&debug1, &debug2, &debug3, &debug4,
1048 	&debug5, &debug6, &debug7, &debug8, &debug9,
1049 	&debug10, &debug11, &debug12, &debug13, &debug14,
1050 	&debug15, &debug16, &debug17, &debug18, &debug19,
1051 };
1052 int
1053 debug_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
1054     size_t newlen, struct proc *p)
1055 {
1056 	struct ctldebug *cdp;
1057 
1058 	/* all sysctl names at this level are name and field */
1059 	if (namelen != 2)
1060 		return (ENOTDIR);		/* overloaded */
1061 	if (name[0] < 0 || name[0] >= nitems(debugvars))
1062 		return (EOPNOTSUPP);
1063 	cdp = debugvars[name[0]];
1064 	if (cdp->debugname == 0)
1065 		return (EOPNOTSUPP);
1066 	switch (name[1]) {
1067 	case CTL_DEBUG_NAME:
1068 		return (sysctl_rdstring(oldp, oldlenp, newp, cdp->debugname));
1069 	case CTL_DEBUG_VALUE:
1070 		return (sysctl_int(oldp, oldlenp, newp, newlen, cdp->debugvar));
1071 	default:
1072 		return (EOPNOTSUPP);
1073 	}
1074 	/* NOTREACHED */
1075 }
1076 #endif /* DEBUG_SYSCTL */
1077 
1078 /*
1079  * Reads, or writes that lower the value
1080  */
1081 int
1082 sysctl_int_lower(void *oldp, size_t *oldlenp, void *newp, size_t newlen,
1083     int *valp)
1084 {
1085 	unsigned int oldval, newval;
1086 	int error;
1087 
1088 	if (oldp && *oldlenp < sizeof(int))
1089 		return (ENOMEM);
1090 	if (newp && newlen != sizeof(int))
1091 		return (EINVAL);
1092 	*oldlenp = sizeof(int);
1093 
1094 	if (newp) {
1095 		if ((error = copyin(newp, &newval, sizeof(int))))
1096 			return (error);
1097 		do {
1098 			oldval = atomic_load_int(valp);
1099 			if (oldval < (unsigned int)newval)
1100 				return (EPERM);	/* do not allow raising */
1101 		} while (atomic_cas_uint(valp, oldval, newval) != oldval);
1102 
1103 		if (oldp) {
1104 			/* new value has been set although user gets error */
1105 			if ((error = copyout(&oldval, oldp, sizeof(int))))
1106 				return (error);
1107 		}
1108 	} else if (oldp) {
1109 		oldval = atomic_load_int(valp);
1110 
1111 		if ((error = copyout(&oldval, oldp, sizeof(int))))
1112 			return (error);
1113 	}
1114 
1115 	return (0);
1116 }
1117 
1118 /*
1119  * Validate parameters and get old / set new parameters
1120  * for an integer-valued sysctl function.
1121  */
1122 int
1123 sysctl_int(void *oldp, size_t *oldlenp, void *newp, size_t newlen, int *valp)
1124 {
1125 	return (sysctl_int_bounded(oldp, oldlenp, newp, newlen, valp,
1126 	    INT_MIN, INT_MAX));
1127 }
1128 
1129 /*
1130  * As above, but read-only.
1131  */
1132 int
1133 sysctl_rdint(void *oldp, size_t *oldlenp, void *newp, int val)
1134 {
1135 	int error = 0;
1136 
1137 	if (oldp && *oldlenp < sizeof(int))
1138 		return (ENOMEM);
1139 	if (newp)
1140 		return (EPERM);
1141 	*oldlenp = sizeof(int);
1142 	if (oldp)
1143 		error = copyout((caddr_t)&val, oldp, sizeof(int));
1144 	return (error);
1145 }
1146 
1147 int
1148 sysctl_securelevel(void *oldp, size_t *oldlenp, void *newp, size_t newlen,
1149     struct proc *p)
1150 {
1151 	int oldval, newval;
1152 	int error;
1153 
1154 	if (oldp && *oldlenp < sizeof(int))
1155 		return (ENOMEM);
1156 	if (newp && newlen != sizeof(int))
1157 		return (EINVAL);
1158 	*oldlenp = sizeof(int);
1159 
1160 	if (newp) {
1161 		if ((error = copyin(newp, &newval, sizeof(int))))
1162 			return (error);
1163 		do {
1164 			oldval = atomic_load_int(&securelevel);
1165 			if ((oldval > 0 || newval < -1) && newval < oldval &&
1166 			    p->p_p->ps_pid != 1)
1167 				return (EPERM);
1168 		} while (atomic_cas_uint(&securelevel, oldval, newval) !=
1169 		    oldval);
1170 
1171 		if (oldp) {
1172 			/* new value has been set although user gets error */
1173 			if ((error = copyout(&oldval, oldp, sizeof(int))))
1174 				return (error);
1175 		}
1176 	} else if (oldp) {
1177 		oldval = atomic_load_int(&securelevel);
1178 
1179 		if ((error = copyout(&oldval, oldp, sizeof(int))))
1180 			return (error);
1181 	}
1182 
1183 	return (0);
1184 }
1185 
1186 /*
1187  * Selects between sysctl_rdint and sysctl_int according to securelevel.
1188  */
1189 int
1190 sysctl_securelevel_int(void *oldp, size_t *oldlenp, void *newp, size_t newlen,
1191     int *valp)
1192 {
1193 	if ((int)atomic_load_int(&securelevel) > 0)
1194 		return (sysctl_rdint(oldp, oldlenp, newp, *valp));
1195 	return (sysctl_int(oldp, oldlenp, newp, newlen, valp));
1196 }
1197 
1198 /*
1199  * Read-only or bounded integer values.
1200  */
1201 int
1202 sysctl_int_bounded(void *oldp, size_t *oldlenp, void *newp, size_t newlen,
1203     int *valp, int minimum, int maximum)
1204 {
1205 	int oldval, newval;
1206 	int error;
1207 
1208 	/* read only */
1209 	if (newp != NULL && minimum > maximum)
1210 		return (EPERM);
1211 
1212 	if (oldp != NULL && *oldlenp < sizeof(int))
1213 		return (ENOMEM);
1214 	if (newp != NULL && newlen != sizeof(int))
1215 		return (EINVAL);
1216 	*oldlenp = sizeof(int);
1217 
1218 	/* copyin() may sleep, call it first */
1219 	if (newp != NULL) {
1220 		if ((error = copyin(newp, &newval, sizeof(int))))
1221 			return (error);
1222 		/* outside limits */
1223 		if (newval < minimum || maximum < newval)
1224 			return (EINVAL);
1225 	}
1226 	if (oldp != NULL) {
1227 		if (newp != NULL)
1228 			oldval = atomic_swap_uint(valp, newval);
1229 		else
1230 			oldval = atomic_load_int(valp);
1231 		if ((error = copyout(&oldval, oldp, sizeof(int)))) {
1232 			/* new value has been set although user gets error */
1233 			return (error);
1234 		}
1235 	} else if (newp != NULL)
1236 		atomic_store_int(valp, newval);
1237 
1238 	return (0);
1239 }
1240 
1241 /*
1242  * Array of read-only or bounded integer values.
1243  */
1244 int
1245 sysctl_bounded_arr(const struct sysctl_bounded_args *valpp, u_int valplen,
1246     int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
1247     size_t newlen)
1248 {
1249 	u_int i;
1250 	if (namelen != 1)
1251 		return (ENOTDIR);
1252 	for (i = 0; i < valplen; ++i) {
1253 		if (valpp[i].mib == name[0]) {
1254 			return (sysctl_int_bounded(oldp, oldlenp, newp, newlen,
1255 			    valpp[i].var, valpp[i].minimum, valpp[i].maximum));
1256 		}
1257 	}
1258 	return (EOPNOTSUPP);
1259 }
1260 
1261 /*
1262  * Validate parameters and get old / set new parameters
1263  * for an integer-valued sysctl function.
1264  */
1265 int
1266 sysctl_quad(void *oldp, size_t *oldlenp, void *newp, size_t newlen,
1267     int64_t *valp)
1268 {
1269 	int error = 0;
1270 
1271 	if (oldp && *oldlenp < sizeof(int64_t))
1272 		return (ENOMEM);
1273 	if (newp && newlen != sizeof(int64_t))
1274 		return (EINVAL);
1275 	*oldlenp = sizeof(int64_t);
1276 	if (oldp)
1277 		error = copyout(valp, oldp, sizeof(int64_t));
1278 	if (error == 0 && newp)
1279 		error = copyin(newp, valp, sizeof(int64_t));
1280 	return (error);
1281 }
1282 
1283 /*
1284  * As above, but read-only.
1285  */
1286 int
1287 sysctl_rdquad(void *oldp, size_t *oldlenp, void *newp, int64_t val)
1288 {
1289 	int error = 0;
1290 
1291 	if (oldp && *oldlenp < sizeof(int64_t))
1292 		return (ENOMEM);
1293 	if (newp)
1294 		return (EPERM);
1295 	*oldlenp = sizeof(int64_t);
1296 	if (oldp)
1297 		error = copyout((caddr_t)&val, oldp, sizeof(int64_t));
1298 	return (error);
1299 }
1300 
1301 /*
1302  * Validate parameters and get old / set new parameters
1303  * for a string-valued sysctl function.
1304  */
1305 int
1306 sysctl_string(void *oldp, size_t *oldlenp, void *newp, size_t newlen, char *str,
1307     size_t maxlen)
1308 {
1309 	return sysctl__string(oldp, oldlenp, newp, newlen, str, maxlen, 0);
1310 }
1311 
1312 int
1313 sysctl_tstring(void *oldp, size_t *oldlenp, void *newp, size_t newlen,
1314     char *str, size_t maxlen)
1315 {
1316 	return sysctl__string(oldp, oldlenp, newp, newlen, str, maxlen, 1);
1317 }
1318 
1319 int
1320 sysctl__string(void *oldp, size_t *oldlenp, void *newp, size_t newlen,
1321     char *str, size_t maxlen, int trunc)
1322 {
1323 	size_t len;
1324 	int error = 0;
1325 
1326 	len = strlen(str) + 1;
1327 	if (oldp && *oldlenp < len) {
1328 		if (trunc == 0 || *oldlenp == 0)
1329 			return (ENOMEM);
1330 	}
1331 	if (newp && newlen >= maxlen)
1332 		return (EINVAL);
1333 	if (oldp) {
1334 		if (trunc && *oldlenp < len) {
1335 			len = *oldlenp;
1336 			error = copyout(str, oldp, len - 1);
1337 			if (error == 0)
1338 				error = copyout("", (char *)oldp + len - 1, 1);
1339 		} else {
1340 			error = copyout(str, oldp, len);
1341 		}
1342 	}
1343 	*oldlenp = len;
1344 	if (error == 0 && newp) {
1345 		error = copyin(newp, str, newlen);
1346 		str[newlen] = 0;
1347 	}
1348 	return (error);
1349 }
1350 
1351 /*
1352  * As above, but read-only.
1353  */
1354 int
1355 sysctl_rdstring(void *oldp, size_t *oldlenp, void *newp, const char *str)
1356 {
1357 	size_t len;
1358 	int error = 0;
1359 
1360 	len = strlen(str) + 1;
1361 	if (oldp && *oldlenp < len)
1362 		return (ENOMEM);
1363 	if (newp)
1364 		return (EPERM);
1365 	*oldlenp = len;
1366 	if (oldp)
1367 		error = copyout(str, oldp, len);
1368 	return (error);
1369 }
1370 
1371 /*
1372  * Validate parameters and get old / set new parameters
1373  * for a structure oriented sysctl function.
1374  */
1375 int
1376 sysctl_struct(void *oldp, size_t *oldlenp, void *newp, size_t newlen, void *sp,
1377     size_t len)
1378 {
1379 	int error = 0;
1380 
1381 	if (oldp && *oldlenp < len)
1382 		return (ENOMEM);
1383 	if (newp && newlen > len)
1384 		return (EINVAL);
1385 	if (oldp) {
1386 		*oldlenp = len;
1387 		error = copyout(sp, oldp, len);
1388 	}
1389 	if (error == 0 && newp)
1390 		error = copyin(newp, sp, len);
1391 	return (error);
1392 }
1393 
1394 /*
1395  * Validate parameters and get old parameters
1396  * for a structure oriented sysctl function.
1397  */
1398 int
1399 sysctl_rdstruct(void *oldp, size_t *oldlenp, void *newp, const void *sp,
1400     size_t len)
1401 {
1402 	int error = 0;
1403 
1404 	if (oldp && *oldlenp < len)
1405 		return (ENOMEM);
1406 	if (newp)
1407 		return (EPERM);
1408 	*oldlenp = len;
1409 	if (oldp)
1410 		error = copyout(sp, oldp, len);
1411 	return (error);
1412 }
1413 
1414 #ifndef SMALL_KERNEL
1415 void
1416 fill_file(struct kinfo_file *kf, struct file *fp, struct filedesc *fdp,
1417 	  int fd, struct vnode *vp, struct process *pr, struct proc *p,
1418 	  struct socket *so, int show_pointers)
1419 {
1420 	struct vattr va;
1421 
1422 	memset(kf, 0, sizeof(*kf));
1423 
1424 	kf->fd_fd = fd;		/* might not really be an fd */
1425 
1426 	if (fp != NULL) {
1427 		if (show_pointers)
1428 			kf->f_fileaddr = PTRTOINT64(fp);
1429 		kf->f_flag = fp->f_flag;
1430 		kf->f_iflags = fp->f_iflags;
1431 		kf->f_type = fp->f_type;
1432 		kf->f_count = fp->f_count;
1433 		if (show_pointers)
1434 			kf->f_ucred = PTRTOINT64(fp->f_cred);
1435 		kf->f_uid = fp->f_cred->cr_uid;
1436 		kf->f_gid = fp->f_cred->cr_gid;
1437 		if (show_pointers)
1438 			kf->f_ops = PTRTOINT64(fp->f_ops);
1439 		if (show_pointers)
1440 			kf->f_data = PTRTOINT64(fp->f_data);
1441 		kf->f_usecount = 0;
1442 
1443 		if (suser(p) == 0 || p->p_ucred->cr_uid == fp->f_cred->cr_uid) {
1444 			mtx_enter(&fp->f_mtx);
1445 			kf->f_offset = fp->f_offset;
1446 			kf->f_rxfer = fp->f_rxfer;
1447 			kf->f_rwfer = fp->f_wxfer;
1448 			kf->f_seek = fp->f_seek;
1449 			kf->f_rbytes = fp->f_rbytes;
1450 			kf->f_wbytes = fp->f_wbytes;
1451 			mtx_leave(&fp->f_mtx);
1452 		} else
1453 			kf->f_offset = -1;
1454 	} else if (vp != NULL) {
1455 		/* fake it */
1456 		kf->f_type = DTYPE_VNODE;
1457 		kf->f_flag = FREAD;
1458 		if (fd == KERN_FILE_TRACE)
1459 			kf->f_flag |= FWRITE;
1460 	} else if (so != NULL) {
1461 		/* fake it */
1462 		kf->f_type = DTYPE_SOCKET;
1463 	}
1464 
1465 	/* information about the object associated with this file */
1466 	switch (kf->f_type) {
1467 	case DTYPE_VNODE:
1468 		if (fp != NULL)
1469 			vp = (struct vnode *)fp->f_data;
1470 
1471 		if (show_pointers)
1472 			kf->v_un = PTRTOINT64(vp->v_un.vu_socket);
1473 		kf->v_type = vp->v_type;
1474 		kf->v_tag = vp->v_tag;
1475 		kf->v_flag = vp->v_flag;
1476 		if (show_pointers)
1477 			kf->v_data = PTRTOINT64(vp->v_data);
1478 		if (show_pointers)
1479 			kf->v_mount = PTRTOINT64(vp->v_mount);
1480 		if (vp->v_mount)
1481 			strlcpy(kf->f_mntonname,
1482 			    vp->v_mount->mnt_stat.f_mntonname,
1483 			    sizeof(kf->f_mntonname));
1484 
1485 		if (VOP_GETATTR(vp, &va, p->p_ucred, p) == 0) {
1486 			kf->va_fileid = va.va_fileid;
1487 			kf->va_mode = MAKEIMODE(va.va_type, va.va_mode);
1488 			kf->va_size = va.va_size;
1489 			kf->va_rdev = va.va_rdev;
1490 			kf->va_fsid = va.va_fsid & 0xffffffff;
1491 			kf->va_nlink = va.va_nlink;
1492 		}
1493 		break;
1494 
1495 	case DTYPE_SOCKET: {
1496 		int locked = 0;
1497 
1498 		if (so == NULL) {
1499 			so = (struct socket *)fp->f_data;
1500 			/* if so is passed as parameter it is already locked */
1501 			solock(so);
1502 			locked = 1;
1503 		}
1504 
1505 		kf->so_type = so->so_type;
1506 		kf->so_state = so->so_state | so->so_snd.sb_state |
1507 		    so->so_rcv.sb_state;
1508 		if (show_pointers)
1509 			kf->so_pcb = PTRTOINT64(so->so_pcb);
1510 		else
1511 			kf->so_pcb = -1;
1512 		kf->so_protocol = so->so_proto->pr_protocol;
1513 		kf->so_family = so->so_proto->pr_domain->dom_family;
1514 		kf->so_rcv_cc = so->so_rcv.sb_cc;
1515 		kf->so_snd_cc = so->so_snd.sb_cc;
1516 		if (isspliced(so)) {
1517 			if (show_pointers)
1518 				kf->so_splice =
1519 				    PTRTOINT64(so->so_sp->ssp_socket);
1520 			kf->so_splicelen = so->so_sp->ssp_len;
1521 		} else if (issplicedback(so))
1522 			kf->so_splicelen = -1;
1523 		if (so->so_pcb == NULL) {
1524 			if (locked)
1525 				sounlock(so);
1526 			break;
1527 		}
1528 		switch (kf->so_family) {
1529 		case AF_INET: {
1530 			struct inpcb *inpcb = so->so_pcb;
1531 
1532 			soassertlocked(so);
1533 			if (show_pointers)
1534 				kf->inp_ppcb = PTRTOINT64(inpcb->inp_ppcb);
1535 			kf->inp_lport = inpcb->inp_lport;
1536 			kf->inp_laddru[0] = inpcb->inp_laddr.s_addr;
1537 			kf->inp_fport = inpcb->inp_fport;
1538 			kf->inp_faddru[0] = inpcb->inp_faddr.s_addr;
1539 			kf->inp_rtableid = inpcb->inp_rtableid;
1540 			if (so->so_type == SOCK_RAW)
1541 				kf->inp_proto = inpcb->inp_ip.ip_p;
1542 			if (so->so_proto->pr_protocol == IPPROTO_TCP) {
1543 				struct tcpcb *tcpcb = (void *)inpcb->inp_ppcb;
1544 				kf->t_rcv_wnd = tcpcb->rcv_wnd;
1545 				kf->t_snd_wnd = tcpcb->snd_wnd;
1546 				kf->t_snd_cwnd = tcpcb->snd_cwnd;
1547 				kf->t_state = tcpcb->t_state;
1548 			}
1549 			break;
1550 		    }
1551 		case AF_INET6: {
1552 			struct inpcb *inpcb = so->so_pcb;
1553 
1554 			soassertlocked(so);
1555 			if (show_pointers)
1556 				kf->inp_ppcb = PTRTOINT64(inpcb->inp_ppcb);
1557 			kf->inp_lport = inpcb->inp_lport;
1558 			kf->inp_laddru[0] = inpcb->inp_laddr6.s6_addr32[0];
1559 			kf->inp_laddru[1] = inpcb->inp_laddr6.s6_addr32[1];
1560 			kf->inp_laddru[2] = inpcb->inp_laddr6.s6_addr32[2];
1561 			kf->inp_laddru[3] = inpcb->inp_laddr6.s6_addr32[3];
1562 			kf->inp_fport = inpcb->inp_fport;
1563 			kf->inp_faddru[0] = inpcb->inp_faddr6.s6_addr32[0];
1564 			kf->inp_faddru[1] = inpcb->inp_faddr6.s6_addr32[1];
1565 			kf->inp_faddru[2] = inpcb->inp_faddr6.s6_addr32[2];
1566 			kf->inp_faddru[3] = inpcb->inp_faddr6.s6_addr32[3];
1567 			kf->inp_rtableid = inpcb->inp_rtableid;
1568 			if (so->so_type == SOCK_RAW)
1569 				kf->inp_proto = inpcb->inp_ipv6.ip6_nxt;
1570 			if (so->so_proto->pr_protocol == IPPROTO_TCP) {
1571 				struct tcpcb *tcpcb = (void *)inpcb->inp_ppcb;
1572 				kf->t_rcv_wnd = tcpcb->rcv_wnd;
1573 				kf->t_snd_wnd = tcpcb->snd_wnd;
1574 				kf->t_state = tcpcb->t_state;
1575 			}
1576 			break;
1577 		    }
1578 		case AF_UNIX: {
1579 			struct unpcb *unpcb = so->so_pcb;
1580 
1581 			kf->f_msgcount = unpcb->unp_msgcount;
1582 			if (show_pointers) {
1583 				kf->unp_conn	= PTRTOINT64(unpcb->unp_conn);
1584 				kf->unp_refs	= PTRTOINT64(
1585 				    SLIST_FIRST(&unpcb->unp_refs));
1586 				kf->unp_nextref	= PTRTOINT64(
1587 				    SLIST_NEXT(unpcb, unp_nextref));
1588 				kf->v_un	= PTRTOINT64(unpcb->unp_vnode);
1589 				kf->unp_addr	= PTRTOINT64(unpcb->unp_addr);
1590 			}
1591 			if (unpcb->unp_addr != NULL) {
1592 				struct sockaddr_un *un = mtod(unpcb->unp_addr,
1593 				    struct sockaddr_un *);
1594 				memcpy(kf->unp_path, un->sun_path, un->sun_len
1595 				    - offsetof(struct sockaddr_un,sun_path));
1596 			}
1597 			break;
1598 		    }
1599 		}
1600 		if (locked)
1601 			sounlock(so);
1602 		break;
1603 	    }
1604 
1605 	case DTYPE_PIPE: {
1606 		struct pipe *pipe = (struct pipe *)fp->f_data;
1607 
1608 		if (show_pointers)
1609 			kf->pipe_peer = PTRTOINT64(pipe->pipe_peer);
1610 		kf->pipe_state = pipe->pipe_state;
1611 		break;
1612 	    }
1613 
1614 	case DTYPE_KQUEUE: {
1615 		struct kqueue *kqi = (struct kqueue *)fp->f_data;
1616 
1617 		kf->kq_count = kqi->kq_count;
1618 		kf->kq_state = kqi->kq_state;
1619 		break;
1620 	    }
1621 	}
1622 
1623 	/* per-process information for KERN_FILE_BY[PU]ID */
1624 	if (pr != NULL) {
1625 		kf->p_pid = pr->ps_pid;
1626 		kf->p_uid = pr->ps_ucred->cr_uid;
1627 		kf->p_gid = pr->ps_ucred->cr_gid;
1628 		kf->p_tid = -1;
1629 		strlcpy(kf->p_comm, pr->ps_comm, sizeof(kf->p_comm));
1630 	}
1631 	if (fdp != NULL) {
1632 		fdplock(fdp);
1633 		kf->fd_ofileflags = fdp->fd_ofileflags[fd];
1634 		fdpunlock(fdp);
1635 	}
1636 }
1637 
1638 /*
1639  * Get file structures.
1640  */
1641 int
1642 sysctl_file(int *name, u_int namelen, char *where, size_t *sizep,
1643     struct proc *p)
1644 {
1645 	struct kinfo_file *kf;
1646 	struct filedesc *fdp;
1647 	struct file *fp;
1648 	struct process *pr;
1649 	size_t buflen, elem_size, elem_count, outsize;
1650 	char *dp = where;
1651 	int arg, i, error = 0, needed = 0, matched;
1652 	u_int op;
1653 	int show_pointers;
1654 
1655 	if (namelen > 4)
1656 		return (ENOTDIR);
1657 	if (namelen < 4 || name[2] > sizeof(*kf))
1658 		return (EINVAL);
1659 
1660 	buflen = where != NULL ? *sizep : 0;
1661 	op = name[0];
1662 	arg = name[1];
1663 	elem_size = name[2];
1664 	elem_count = name[3];
1665 	outsize = MIN(sizeof(*kf), elem_size);
1666 
1667 	if (elem_size < 1)
1668 		return (EINVAL);
1669 
1670 	show_pointers = suser(curproc) == 0;
1671 
1672 	kf = malloc(sizeof(*kf), M_TEMP, M_WAITOK);
1673 
1674 #define FILLIT2(fp, fdp, i, vp, pr, so) do {				\
1675 	if (buflen >= elem_size && elem_count > 0) {			\
1676 		fill_file(kf, fp, fdp, i, vp, pr, p, so, show_pointers);\
1677 		error = copyout(kf, dp, outsize);			\
1678 		if (error)						\
1679 			break;						\
1680 		dp += elem_size;					\
1681 		buflen -= elem_size;					\
1682 		elem_count--;						\
1683 	}								\
1684 	needed += elem_size;						\
1685 } while (0)
1686 #define FILLIT(fp, fdp, i, vp, pr) \
1687 	FILLIT2(fp, fdp, i, vp, pr, NULL)
1688 #define FILLSO(so) \
1689 	FILLIT2(NULL, NULL, 0, NULL, NULL, so)
1690 
1691 	switch (op) {
1692 	case KERN_FILE_BYFILE:
1693 		/* use the inp-tables to pick up closed connections, too */
1694 		if (arg == DTYPE_SOCKET) {
1695 			struct inpcb *inp;
1696 
1697 			NET_LOCK();
1698 			mtx_enter(&tcbtable.inpt_mtx);
1699 			TAILQ_FOREACH(inp, &tcbtable.inpt_queue, inp_queue)
1700 				FILLSO(inp->inp_socket);
1701 			mtx_leave(&tcbtable.inpt_mtx);
1702 #ifdef INET6
1703 			mtx_enter(&tcb6table.inpt_mtx);
1704 			TAILQ_FOREACH(inp, &tcb6table.inpt_queue, inp_queue)
1705 				FILLSO(inp->inp_socket);
1706 			mtx_leave(&tcb6table.inpt_mtx);
1707 #endif
1708 			mtx_enter(&udbtable.inpt_mtx);
1709 			TAILQ_FOREACH(inp, &udbtable.inpt_queue, inp_queue) {
1710 				if (in_pcb_is_iterator(inp))
1711 					continue;
1712 				FILLSO(inp->inp_socket);
1713 			}
1714 			mtx_leave(&udbtable.inpt_mtx);
1715 #ifdef INET6
1716 			mtx_enter(&udb6table.inpt_mtx);
1717 			TAILQ_FOREACH(inp, &udb6table.inpt_queue, inp_queue) {
1718 				if (in_pcb_is_iterator(inp))
1719 					continue;
1720 				FILLSO(inp->inp_socket);
1721 			}
1722 			mtx_leave(&udb6table.inpt_mtx);
1723 #endif
1724 			mtx_enter(&rawcbtable.inpt_mtx);
1725 			TAILQ_FOREACH(inp, &rawcbtable.inpt_queue, inp_queue) {
1726 				if (in_pcb_is_iterator(inp))
1727 					continue;
1728 				FILLSO(inp->inp_socket);
1729 			}
1730 			mtx_leave(&rawcbtable.inpt_mtx);
1731 #ifdef INET6
1732 			mtx_enter(&rawin6pcbtable.inpt_mtx);
1733 			TAILQ_FOREACH(inp, &rawin6pcbtable.inpt_queue,
1734 			    inp_queue) {
1735 				if (in_pcb_is_iterator(inp))
1736 					continue;
1737 				FILLSO(inp->inp_socket);
1738 			}
1739 			mtx_leave(&rawin6pcbtable.inpt_mtx);
1740 #endif
1741 			NET_UNLOCK();
1742 		}
1743 		fp = NULL;
1744 		while ((fp = fd_iterfile(fp, p)) != NULL) {
1745 			if ((arg == 0 || fp->f_type == arg)) {
1746 				int af, skip = 0;
1747 				if (arg == DTYPE_SOCKET && fp->f_type == arg) {
1748 					af = ((struct socket *)fp->f_data)->
1749 					    so_proto->pr_domain->dom_family;
1750 					if (af == AF_INET || af == AF_INET6)
1751 						skip = 1;
1752 				}
1753 				if (!skip)
1754 					FILLIT(fp, NULL, 0, NULL, NULL);
1755 			}
1756 		}
1757 		break;
1758 	case KERN_FILE_BYPID:
1759 		/* A arg of -1 indicates all processes */
1760 		if (arg < -1) {
1761 			error = EINVAL;
1762 			break;
1763 		}
1764 		matched = 0;
1765 		LIST_FOREACH(pr, &allprocess, ps_list) {
1766 			/*
1767 			 * skip system, exiting, embryonic and undead
1768 			 * processes
1769 			 */
1770 			if (pr->ps_flags & (PS_SYSTEM | PS_EMBRYO | PS_EXITING))
1771 				continue;
1772 			if (arg >= 0 && pr->ps_pid != (pid_t)arg) {
1773 				/* not the pid we are looking for */
1774 				continue;
1775 			}
1776 
1777 			refcnt_take(&pr->ps_refcnt);
1778 
1779 			matched = 1;
1780 			fdp = pr->ps_fd;
1781 			if (pr->ps_textvp)
1782 				FILLIT(NULL, NULL, KERN_FILE_TEXT, pr->ps_textvp, pr);
1783 			if (fdp->fd_cdir)
1784 				FILLIT(NULL, NULL, KERN_FILE_CDIR, fdp->fd_cdir, pr);
1785 			if (fdp->fd_rdir)
1786 				FILLIT(NULL, NULL, KERN_FILE_RDIR, fdp->fd_rdir, pr);
1787 			if (pr->ps_tracevp)
1788 				FILLIT(NULL, NULL, KERN_FILE_TRACE, pr->ps_tracevp, pr);
1789 			for (i = 0; i < fdp->fd_nfiles; i++) {
1790 				if ((fp = fd_getfile(fdp, i)) == NULL)
1791 					continue;
1792 				FILLIT(fp, fdp, i, NULL, pr);
1793 				FRELE(fp, p);
1794 			}
1795 
1796 			refcnt_rele_wake(&pr->ps_refcnt);
1797 
1798 			/* pid is unique, stop searching */
1799 			if (arg >= 0)
1800 				break;
1801 		}
1802 		if (!matched)
1803 			error = ESRCH;
1804 		break;
1805 	case KERN_FILE_BYUID:
1806 		LIST_FOREACH(pr, &allprocess, ps_list) {
1807 			/*
1808 			 * skip system, exiting, embryonic and undead
1809 			 * processes
1810 			 */
1811 			if (pr->ps_flags & (PS_SYSTEM | PS_EMBRYO | PS_EXITING))
1812 				continue;
1813 			if (arg >= 0 && pr->ps_ucred->cr_uid != (uid_t)arg) {
1814 				/* not the uid we are looking for */
1815 				continue;
1816 			}
1817 
1818 			refcnt_take(&pr->ps_refcnt);
1819 
1820 			fdp = pr->ps_fd;
1821 			if (fdp->fd_cdir)
1822 				FILLIT(NULL, NULL, KERN_FILE_CDIR, fdp->fd_cdir, pr);
1823 			if (fdp->fd_rdir)
1824 				FILLIT(NULL, NULL, KERN_FILE_RDIR, fdp->fd_rdir, pr);
1825 			if (pr->ps_tracevp)
1826 				FILLIT(NULL, NULL, KERN_FILE_TRACE, pr->ps_tracevp, pr);
1827 			for (i = 0; i < fdp->fd_nfiles; i++) {
1828 				if ((fp = fd_getfile(fdp, i)) == NULL)
1829 					continue;
1830 				FILLIT(fp, fdp, i, NULL, pr);
1831 				FRELE(fp, p);
1832 			}
1833 
1834 			refcnt_rele_wake(&pr->ps_refcnt);
1835 		}
1836 		break;
1837 	default:
1838 		error = EINVAL;
1839 		break;
1840 	}
1841 	free(kf, M_TEMP, sizeof(*kf));
1842 
1843 	if (!error) {
1844 		if (where == NULL)
1845 			needed += KERN_FILESLOP * elem_size;
1846 		else if (*sizep < needed)
1847 			error = ENOMEM;
1848 		*sizep = needed;
1849 	}
1850 
1851 	return (error);
1852 }
1853 
1854 /*
1855  * try over estimating by 5 procs
1856  */
1857 #define KERN_PROCSLOP	5
1858 
1859 int
1860 sysctl_doproc(int *name, u_int namelen, char *where, size_t *sizep)
1861 {
1862 	struct kinfo_proc *kproc = NULL;
1863 	struct proc *p;
1864 	struct process *pr;
1865 	char *dp;
1866 	int arg, buflen, doingzomb, elem_size, elem_count;
1867 	int error, needed, op;
1868 	int dothreads = 0;
1869 	int show_pointers;
1870 
1871 	dp = where;
1872 	buflen = where != NULL ? *sizep : 0;
1873 	needed = error = 0;
1874 
1875 	if (namelen != 4 || name[2] <= 0 || name[3] < 0 ||
1876 	    name[2] > sizeof(*kproc))
1877 		return (EINVAL);
1878 	op = name[0];
1879 	arg = name[1];
1880 	elem_size = name[2];
1881 	elem_count = name[3];
1882 
1883 	dothreads = op & KERN_PROC_SHOW_THREADS;
1884 	op &= ~KERN_PROC_SHOW_THREADS;
1885 
1886 	show_pointers = suser(curproc) == 0;
1887 
1888 	if (where != NULL)
1889 		kproc = malloc(sizeof(*kproc), M_TEMP, M_WAITOK);
1890 
1891 	pr = LIST_FIRST(&allprocess);
1892 	doingzomb = 0;
1893 again:
1894 	for (; pr != NULL; pr = LIST_NEXT(pr, ps_list)) {
1895 		/* XXX skip processes in the middle of being zapped */
1896 		if (pr->ps_pgrp == NULL)
1897 			continue;
1898 
1899 		/*
1900 		 * Skip embryonic processes.
1901 		 */
1902 		if (pr->ps_flags & PS_EMBRYO)
1903 			continue;
1904 
1905 		/*
1906 		 * TODO - make more efficient (see notes below).
1907 		 */
1908 		switch (op) {
1909 
1910 		case KERN_PROC_PID:
1911 			/* could do this with just a lookup */
1912 			if (pr->ps_pid != (pid_t)arg)
1913 				continue;
1914 			break;
1915 
1916 		case KERN_PROC_PGRP:
1917 			/* could do this by traversing pgrp */
1918 			if (pr->ps_pgrp->pg_id != (pid_t)arg)
1919 				continue;
1920 			break;
1921 
1922 		case KERN_PROC_SESSION:
1923 			if (pr->ps_session->s_leader == NULL ||
1924 			    pr->ps_session->s_leader->ps_pid != (pid_t)arg)
1925 				continue;
1926 			break;
1927 
1928 		case KERN_PROC_TTY:
1929 			if ((pr->ps_flags & PS_CONTROLT) == 0 ||
1930 			    pr->ps_session->s_ttyp == NULL ||
1931 			    pr->ps_session->s_ttyp->t_dev != (dev_t)arg)
1932 				continue;
1933 			break;
1934 
1935 		case KERN_PROC_UID:
1936 			if (pr->ps_ucred->cr_uid != (uid_t)arg)
1937 				continue;
1938 			break;
1939 
1940 		case KERN_PROC_RUID:
1941 			if (pr->ps_ucred->cr_ruid != (uid_t)arg)
1942 				continue;
1943 			break;
1944 
1945 		case KERN_PROC_ALL:
1946 			if (pr->ps_flags & PS_SYSTEM)
1947 				continue;
1948 			break;
1949 
1950 		case KERN_PROC_KTHREAD:
1951 			/* no filtering */
1952 			break;
1953 
1954 		default:
1955 			error = EINVAL;
1956 			goto err;
1957 		}
1958 
1959 		if (buflen >= elem_size && elem_count > 0) {
1960 			fill_kproc(pr, kproc, NULL, show_pointers);
1961 			error = copyout(kproc, dp, elem_size);
1962 			if (error)
1963 				goto err;
1964 			dp += elem_size;
1965 			buflen -= elem_size;
1966 			elem_count--;
1967 		}
1968 		needed += elem_size;
1969 
1970 		/* Skip per-thread entries if not required by op */
1971 		if (!dothreads)
1972 			continue;
1973 
1974 		TAILQ_FOREACH(p, &pr->ps_threads, p_thr_link) {
1975 			if (buflen >= elem_size && elem_count > 0) {
1976 				fill_kproc(pr, kproc, p, show_pointers);
1977 				error = copyout(kproc, dp, elem_size);
1978 				if (error)
1979 					goto err;
1980 				dp += elem_size;
1981 				buflen -= elem_size;
1982 				elem_count--;
1983 			}
1984 			needed += elem_size;
1985 		}
1986 	}
1987 	if (doingzomb == 0) {
1988 		pr = LIST_FIRST(&zombprocess);
1989 		doingzomb++;
1990 		goto again;
1991 	}
1992 	if (where != NULL) {
1993 		*sizep = dp - where;
1994 		if (needed > *sizep) {
1995 			error = ENOMEM;
1996 			goto err;
1997 		}
1998 	} else {
1999 		needed += KERN_PROCSLOP * elem_size;
2000 		*sizep = needed;
2001 	}
2002 err:
2003 	if (kproc)
2004 		free(kproc, M_TEMP, sizeof(*kproc));
2005 	return (error);
2006 }
2007 
2008 /*
2009  * Fill in a kproc structure for the specified process.
2010  */
2011 void
2012 fill_kproc(struct process *pr, struct kinfo_proc *ki, struct proc *p,
2013     int show_pointers)
2014 {
2015 	struct session *s = pr->ps_session;
2016 	struct tty *tp;
2017 	struct vmspace *vm = pr->ps_vmspace;
2018 	struct timespec booted, st, ut, utc;
2019 	struct tusage tu;
2020 	int isthread;
2021 
2022 	isthread = p != NULL;
2023 	if (!isthread) {
2024 		p = pr->ps_mainproc;		/* XXX */
2025 		tuagg_get_process(&tu, pr);
2026 	} else
2027 		tuagg_get_proc(&tu, p);
2028 
2029 	FILL_KPROC(ki, strlcpy, p, pr, pr->ps_ucred, pr->ps_pgrp,
2030 	    p, pr, s, vm, pr->ps_limit, pr->ps_sigacts, &tu, isthread,
2031 	    show_pointers);
2032 
2033 	/* stuff that's too painful to generalize into the macros */
2034 	if (s->s_leader)
2035 		ki->p_sid = s->s_leader->ps_pid;
2036 
2037 	if ((pr->ps_flags & PS_CONTROLT) && (tp = s->s_ttyp)) {
2038 		ki->p_tdev = tp->t_dev;
2039 		ki->p_tpgid = tp->t_pgrp ? tp->t_pgrp->pg_id : -1;
2040 		if (show_pointers)
2041 			ki->p_tsess = PTRTOINT64(tp->t_session);
2042 	} else {
2043 		ki->p_tdev = NODEV;
2044 		ki->p_tpgid = -1;
2045 	}
2046 
2047 	/* fixups that can only be done in the kernel */
2048 	if ((pr->ps_flags & PS_ZOMBIE) == 0) {
2049 		if ((pr->ps_flags & PS_EMBRYO) == 0 && vm != NULL)
2050 			ki->p_vm_rssize = vm_resident_count(vm);
2051 		calctsru(&tu, &ut, &st, NULL);
2052 		ki->p_uutime_sec = ut.tv_sec;
2053 		ki->p_uutime_usec = ut.tv_nsec/1000;
2054 		ki->p_ustime_sec = st.tv_sec;
2055 		ki->p_ustime_usec = st.tv_nsec/1000;
2056 
2057 		/* Convert starting uptime to a starting UTC time. */
2058 		nanoboottime(&booted);
2059 		timespecadd(&booted, &pr->ps_start, &utc);
2060 		ki->p_ustart_sec = utc.tv_sec;
2061 		ki->p_ustart_usec = utc.tv_nsec / 1000;
2062 
2063 #ifdef MULTIPROCESSOR
2064 		if (p->p_cpu != NULL)
2065 			ki->p_cpuid = CPU_INFO_UNIT(p->p_cpu);
2066 #endif
2067 	}
2068 
2069 	/* get %cpu and schedule state: just one thread or sum of all? */
2070 	if (isthread) {
2071 		ki->p_pctcpu = p->p_pctcpu;
2072 		ki->p_stat   = p->p_stat;
2073 	} else {
2074 		ki->p_pctcpu = 0;
2075 		ki->p_stat = (pr->ps_flags & PS_ZOMBIE) ? SDEAD : SIDL;
2076 		TAILQ_FOREACH(p, &pr->ps_threads, p_thr_link) {
2077 			ki->p_pctcpu += p->p_pctcpu;
2078 			/* find best state: ONPROC > RUN > STOP > SLEEP > .. */
2079 			if (p->p_stat == SONPROC || ki->p_stat == SONPROC)
2080 				ki->p_stat = SONPROC;
2081 			else if (p->p_stat == SRUN || ki->p_stat == SRUN)
2082 				ki->p_stat = SRUN;
2083 			else if (p->p_stat == SSTOP || ki->p_stat == SSTOP)
2084 				ki->p_stat = SSTOP;
2085 			else if (p->p_stat == SSLEEP)
2086 				ki->p_stat = SSLEEP;
2087 		}
2088 	}
2089 }
2090 
2091 int
2092 sysctl_proc_args(int *name, u_int namelen, void *oldp, size_t *oldlenp,
2093     struct proc *cp)
2094 {
2095 	struct process *vpr;
2096 	pid_t pid;
2097 	struct ps_strings pss;
2098 	struct iovec iov;
2099 	struct uio uio;
2100 	int error, cnt, op;
2101 	size_t limit;
2102 	char **rargv, **vargv;		/* reader vs. victim */
2103 	char *rarg, *varg, *buf;
2104 	struct vmspace *vm;
2105 	vaddr_t ps_strings;
2106 
2107 	if (namelen > 2)
2108 		return (ENOTDIR);
2109 	if (namelen < 2)
2110 		return (EINVAL);
2111 
2112 	pid = name[0];
2113 	op = name[1];
2114 
2115 	switch (op) {
2116 	case KERN_PROC_ARGV:
2117 	case KERN_PROC_NARGV:
2118 	case KERN_PROC_ENV:
2119 	case KERN_PROC_NENV:
2120 		break;
2121 	default:
2122 		return (EOPNOTSUPP);
2123 	}
2124 
2125 	if ((vpr = prfind(pid)) == NULL)
2126 		return (ESRCH);
2127 
2128 	if (oldp == NULL) {
2129 		if (op == KERN_PROC_NARGV || op == KERN_PROC_NENV)
2130 			*oldlenp = sizeof(int);
2131 		else
2132 			*oldlenp = ARG_MAX;	/* XXX XXX XXX */
2133 		return (0);
2134 	}
2135 
2136 	/* Either system process or exiting/zombie */
2137 	if (vpr->ps_flags & (PS_SYSTEM | PS_EXITING))
2138 		return (EINVAL);
2139 
2140 	/* Execing - danger. */
2141 	if ((vpr->ps_flags & PS_INEXEC))
2142 		return (EBUSY);
2143 
2144 	/* Only owner or root can get env */
2145 	if ((op == KERN_PROC_NENV || op == KERN_PROC_ENV) &&
2146 	    (vpr->ps_ucred->cr_uid != cp->p_ucred->cr_uid &&
2147 	    (error = suser(cp)) != 0))
2148 		return (error);
2149 
2150 	ps_strings = vpr->ps_strings;
2151 	vm = vpr->ps_vmspace;
2152 	uvmspace_addref(vm);
2153 	vpr = NULL;
2154 
2155 	buf = malloc(PAGE_SIZE, M_TEMP, M_WAITOK);
2156 
2157 	iov.iov_base = &pss;
2158 	iov.iov_len = sizeof(pss);
2159 	uio.uio_iov = &iov;
2160 	uio.uio_iovcnt = 1;
2161 	uio.uio_offset = (off_t)ps_strings;
2162 	uio.uio_resid = sizeof(pss);
2163 	uio.uio_segflg = UIO_SYSSPACE;
2164 	uio.uio_rw = UIO_READ;
2165 	uio.uio_procp = cp;
2166 
2167 	if ((error = uvm_io(&vm->vm_map, &uio, 0)) != 0)
2168 		goto out;
2169 
2170 	if (op == KERN_PROC_NARGV) {
2171 		error = sysctl_rdint(oldp, oldlenp, NULL, pss.ps_nargvstr);
2172 		goto out;
2173 	}
2174 	if (op == KERN_PROC_NENV) {
2175 		error = sysctl_rdint(oldp, oldlenp, NULL, pss.ps_nenvstr);
2176 		goto out;
2177 	}
2178 
2179 	if (op == KERN_PROC_ARGV) {
2180 		cnt = pss.ps_nargvstr;
2181 		vargv = pss.ps_argvstr;
2182 	} else {
2183 		cnt = pss.ps_nenvstr;
2184 		vargv = pss.ps_envstr;
2185 	}
2186 
2187 	/* -1 to have space for a terminating NUL */
2188 	limit = *oldlenp - 1;
2189 	*oldlenp = 0;
2190 
2191 	rargv = oldp;
2192 
2193 	/*
2194 	 * *oldlenp - number of bytes copied out into readers buffer.
2195 	 * limit - maximal number of bytes allowed into readers buffer.
2196 	 * rarg - pointer into readers buffer where next arg will be stored.
2197 	 * rargv - pointer into readers buffer where the next rarg pointer
2198 	 *  will be stored.
2199 	 * vargv - pointer into victim address space where the next argument
2200 	 *  will be read.
2201 	 */
2202 
2203 	/* space for cnt pointers and a NULL */
2204 	rarg = (char *)(rargv + cnt + 1);
2205 	*oldlenp += (cnt + 1) * sizeof(char **);
2206 
2207 	while (cnt > 0 && *oldlenp < limit) {
2208 		size_t len, vstrlen;
2209 
2210 		/* Write to readers argv */
2211 		if ((error = copyout(&rarg, rargv, sizeof(rarg))) != 0)
2212 			goto out;
2213 
2214 		/* read the victim argv */
2215 		iov.iov_base = &varg;
2216 		iov.iov_len = sizeof(varg);
2217 		uio.uio_iov = &iov;
2218 		uio.uio_iovcnt = 1;
2219 		uio.uio_offset = (off_t)(vaddr_t)vargv;
2220 		uio.uio_resid = sizeof(varg);
2221 		uio.uio_segflg = UIO_SYSSPACE;
2222 		uio.uio_rw = UIO_READ;
2223 		uio.uio_procp = cp;
2224 		if ((error = uvm_io(&vm->vm_map, &uio, 0)) != 0)
2225 			goto out;
2226 
2227 		if (varg == NULL)
2228 			break;
2229 
2230 		/*
2231 		 * read the victim arg. We must jump through hoops to avoid
2232 		 * crossing a page boundary too much and returning an error.
2233 		 */
2234 more:
2235 		len = PAGE_SIZE - (((vaddr_t)varg) & PAGE_MASK);
2236 		/* leave space for the terminating NUL */
2237 		iov.iov_base = buf;
2238 		iov.iov_len = len;
2239 		uio.uio_iov = &iov;
2240 		uio.uio_iovcnt = 1;
2241 		uio.uio_offset = (off_t)(vaddr_t)varg;
2242 		uio.uio_resid = len;
2243 		uio.uio_segflg = UIO_SYSSPACE;
2244 		uio.uio_rw = UIO_READ;
2245 		uio.uio_procp = cp;
2246 		if ((error = uvm_io(&vm->vm_map, &uio, 0)) != 0)
2247 			goto out;
2248 
2249 		for (vstrlen = 0; vstrlen < len; vstrlen++) {
2250 			if (buf[vstrlen] == '\0')
2251 				break;
2252 		}
2253 
2254 		/* Don't overflow readers buffer. */
2255 		if (*oldlenp + vstrlen + 1 >= limit) {
2256 			error = ENOMEM;
2257 			goto out;
2258 		}
2259 
2260 		if ((error = copyout(buf, rarg, vstrlen)) != 0)
2261 			goto out;
2262 
2263 		*oldlenp += vstrlen;
2264 		rarg += vstrlen;
2265 
2266 		/* The string didn't end in this page? */
2267 		if (vstrlen == len) {
2268 			varg += vstrlen;
2269 			goto more;
2270 		}
2271 
2272 		/* End of string. Terminate it with a NUL */
2273 		buf[0] = '\0';
2274 		if ((error = copyout(buf, rarg, 1)) != 0)
2275 			goto out;
2276 		*oldlenp += 1;
2277 		rarg += 1;
2278 
2279 		vargv++;
2280 		rargv++;
2281 		cnt--;
2282 	}
2283 
2284 	if (*oldlenp >= limit) {
2285 		error = ENOMEM;
2286 		goto out;
2287 	}
2288 
2289 	/* Write the terminating null */
2290 	rarg = NULL;
2291 	error = copyout(&rarg, rargv, sizeof(rarg));
2292 
2293 out:
2294 	uvmspace_free(vm);
2295 	free(buf, M_TEMP, PAGE_SIZE);
2296 	return (error);
2297 }
2298 
2299 int
2300 sysctl_proc_cwd(int *name, u_int namelen, void *oldp, size_t *oldlenp,
2301     struct proc *cp)
2302 {
2303 	struct process *findpr;
2304 	struct vnode *vp;
2305 	pid_t pid;
2306 	int error;
2307 	size_t lenused, len;
2308 	char *path, *bp, *bend;
2309 
2310 	if (namelen > 1)
2311 		return (ENOTDIR);
2312 	if (namelen < 1)
2313 		return (EINVAL);
2314 
2315 	pid = name[0];
2316 	if ((findpr = prfind(pid)) == NULL)
2317 		return (ESRCH);
2318 
2319 	if (oldp == NULL) {
2320 		*oldlenp = MAXPATHLEN * 4;
2321 		return (0);
2322 	}
2323 
2324 	/* Either system process or exiting/zombie */
2325 	if (findpr->ps_flags & (PS_SYSTEM | PS_EXITING))
2326 		return (EINVAL);
2327 
2328 	/* Only owner or root can get cwd */
2329 	if (findpr->ps_ucred->cr_uid != cp->p_ucred->cr_uid &&
2330 	    (error = suser(cp)) != 0)
2331 		return (error);
2332 
2333 	len = *oldlenp;
2334 	if (len > MAXPATHLEN * 4)
2335 		len = MAXPATHLEN * 4;
2336 	else if (len < 2)
2337 		return (ERANGE);
2338 	*oldlenp = 0;
2339 
2340 	/* snag a reference to the vnode before we can sleep */
2341 	vp = findpr->ps_fd->fd_cdir;
2342 	vref(vp);
2343 
2344 	path = malloc(len, M_TEMP, M_WAITOK);
2345 
2346 	bp = &path[len];
2347 	bend = bp;
2348 	*(--bp) = '\0';
2349 
2350 	/* Same as sys__getcwd */
2351 	error = vfs_getcwd_common(vp, NULL,
2352 	    &bp, path, len / 2, GETCWD_CHECK_ACCESS, cp);
2353 	if (error == 0) {
2354 		*oldlenp = lenused = bend - bp;
2355 		error = copyout(bp, oldp, lenused);
2356 	}
2357 
2358 	vrele(vp);
2359 	free(path, M_TEMP, len);
2360 
2361 	return (error);
2362 }
2363 
2364 int
2365 sysctl_proc_nobroadcastkill(int *name, u_int namelen, void *newp, size_t newlen,
2366     void *oldp, size_t *oldlenp, struct proc *cp)
2367 {
2368 	struct process *findpr;
2369 	pid_t pid;
2370 	int error, flag;
2371 
2372 	if (namelen > 1)
2373 		return (ENOTDIR);
2374 	if (namelen < 1)
2375 		return (EINVAL);
2376 
2377 	pid = name[0];
2378 	if ((findpr = prfind(pid)) == NULL)
2379 		return (ESRCH);
2380 
2381 	/* Either system process or exiting/zombie */
2382 	if (findpr->ps_flags & (PS_SYSTEM | PS_EXITING))
2383 		return (EINVAL);
2384 
2385 	/* Only root can change PS_NOBROADCASTKILL */
2386 	if (newp != NULL && (error = suser(cp)) != 0)
2387 		return (error);
2388 
2389 	/* get the PS_NOBROADCASTKILL flag */
2390 	flag = findpr->ps_flags & PS_NOBROADCASTKILL ? 1 : 0;
2391 
2392 	error = sysctl_int(oldp, oldlenp, newp, newlen, &flag);
2393 	if (error == 0 && newp) {
2394 		if (flag)
2395 			atomic_setbits_int(&findpr->ps_flags,
2396 			    PS_NOBROADCASTKILL);
2397 		else
2398 			atomic_clearbits_int(&findpr->ps_flags,
2399 			    PS_NOBROADCASTKILL);
2400 	}
2401 
2402 	return (error);
2403 }
2404 
2405 /* Arbitrary but reasonable limit for one iteration. */
2406 #define	VMMAP_MAXLEN	MAXPHYS
2407 
2408 int
2409 sysctl_proc_vmmap(int *name, u_int namelen, void *oldp, size_t *oldlenp,
2410     struct proc *cp)
2411 {
2412 	struct process *findpr;
2413 	pid_t pid;
2414 	int error;
2415 	size_t oldlen, len;
2416 	struct kinfo_vmentry *kve, *ukve;
2417 	u_long *ustart, start;
2418 
2419 	if (namelen > 1)
2420 		return (ENOTDIR);
2421 	if (namelen < 1)
2422 		return (EINVAL);
2423 
2424 	/* Provide max buffer length as hint. */
2425 	if (oldp == NULL) {
2426 		if (oldlenp == NULL)
2427 			return (EINVAL);
2428 		else {
2429 			*oldlenp = VMMAP_MAXLEN;
2430 			return (0);
2431 		}
2432 	}
2433 
2434 	pid = name[0];
2435 	if (pid == cp->p_p->ps_pid) {
2436 		/* Self process mapping. */
2437 		findpr = cp->p_p;
2438 	} else if (pid > 0) {
2439 		if ((findpr = prfind(pid)) == NULL)
2440 			return (ESRCH);
2441 
2442 		/* Either system process or exiting/zombie */
2443 		if (findpr->ps_flags & (PS_SYSTEM | PS_EXITING))
2444 			return (EINVAL);
2445 
2446 #if 1
2447 		/* XXX Allow only root for now */
2448 		if ((error = suser(cp)) != 0)
2449 			return (error);
2450 #else
2451 		/* Only owner or root can get vmmap */
2452 		if (findpr->ps_ucred->cr_uid != cp->p_ucred->cr_uid &&
2453 		    (error = suser(cp)) != 0)
2454 			return (error);
2455 #endif
2456 	} else {
2457 		/* Only root can get kernel_map */
2458 		if ((error = suser(cp)) != 0)
2459 			return (error);
2460 		findpr = NULL;
2461 	}
2462 
2463 	/* Check the given size. */
2464 	oldlen = *oldlenp;
2465 	if (oldlen == 0 || oldlen % sizeof(*kve) != 0)
2466 		return (EINVAL);
2467 
2468 	/* Deny huge allocation. */
2469 	if (oldlen > VMMAP_MAXLEN)
2470 		return (EINVAL);
2471 
2472 	/*
2473 	 * Iterate from the given address passed as the first element's
2474 	 * kve_start via oldp.
2475 	 */
2476 	ukve = (struct kinfo_vmentry *)oldp;
2477 	ustart = &ukve->kve_start;
2478 	error = copyin(ustart, &start, sizeof(start));
2479 	if (error != 0)
2480 		return (error);
2481 
2482 	/* Allocate wired memory to not block. */
2483 	kve = malloc(oldlen, M_TEMP, M_WAITOK);
2484 
2485 	/* Set the base address and read entries. */
2486 	kve[0].kve_start = start;
2487 	len = oldlen;
2488 	error = fill_vmmap(findpr, kve, &len);
2489 	if (error != 0 && error != ENOMEM)
2490 		goto done;
2491 	if (len == 0)
2492 		goto done;
2493 
2494 	KASSERT(len <= oldlen);
2495 	KASSERT((len % sizeof(struct kinfo_vmentry)) == 0);
2496 
2497 	error = copyout(kve, oldp, len);
2498 
2499 done:
2500 	*oldlenp = len;
2501 
2502 	free(kve, M_TEMP, oldlen);
2503 
2504 	return (error);
2505 }
2506 #endif
2507 
2508 /*
2509  * Initialize disknames/diskstats for export by sysctl. If update is set,
2510  * then we simply update the disk statistics information.
2511  */
2512 int
2513 sysctl_diskinit(int update, struct proc *p)
2514 {
2515 	struct diskstats *sdk;
2516 	struct disk *dk;
2517 	const char *duid;
2518 	int error, changed = 0;
2519 
2520 	KERNEL_ASSERT_LOCKED();
2521 
2522 	if ((error = rw_enter(&sysctl_disklock, RW_WRITE|RW_INTR)) != 0)
2523 		return error;
2524 
2525 	/* Run in a loop, disks may change while malloc sleeps. */
2526 	while (disk_change) {
2527 		int tlen, count;
2528 
2529 		disk_change = 0;
2530 
2531 		tlen = 0;
2532 		TAILQ_FOREACH(dk, &disklist, dk_link) {
2533 			if (dk->dk_name)
2534 				tlen += strlen(dk->dk_name);
2535 			tlen += 18;	/* label uid + separators */
2536 		}
2537 		tlen++;
2538 		/* disk_count may change when malloc sleeps */
2539 		count = disk_count;
2540 
2541 		/*
2542 		 * The sysctl_disklock ensures that no other process can
2543 		 * allocate disknames and diskstats while our malloc sleeps.
2544 		 */
2545 		free(disknames, M_SYSCTL, disknameslen);
2546 		free(diskstats, M_SYSCTL, diskstatslen);
2547 		diskstats = NULL;
2548 		disknames = NULL;
2549 		diskstats = mallocarray(count, sizeof(struct diskstats),
2550 		    M_SYSCTL, M_WAITOK|M_ZERO);
2551 		diskstatslen = count * sizeof(struct diskstats);
2552 		disknames = malloc(tlen, M_SYSCTL, M_WAITOK|M_ZERO);
2553 		disknameslen = tlen;
2554 		disknames[0] = '\0';
2555 		changed = 1;
2556 	}
2557 
2558 	if (changed) {
2559 		int l;
2560 
2561 		l = 0;
2562 		sdk = diskstats;
2563 		TAILQ_FOREACH(dk, &disklist, dk_link) {
2564 			duid = NULL;
2565 			if (dk->dk_label && !duid_iszero(dk->dk_label->d_uid))
2566 				duid = duid_format(dk->dk_label->d_uid);
2567 			snprintf(disknames + l, disknameslen - l, "%s:%s,",
2568 			    dk->dk_name ? dk->dk_name : "",
2569 			    duid ? duid : "");
2570 			l += strlen(disknames + l);
2571 			strlcpy(sdk->ds_name, dk->dk_name,
2572 			    sizeof(sdk->ds_name));
2573 			mtx_enter(&dk->dk_mtx);
2574 			sdk->ds_busy = dk->dk_busy;
2575 			sdk->ds_rxfer = dk->dk_rxfer;
2576 			sdk->ds_wxfer = dk->dk_wxfer;
2577 			sdk->ds_seek = dk->dk_seek;
2578 			sdk->ds_rbytes = dk->dk_rbytes;
2579 			sdk->ds_wbytes = dk->dk_wbytes;
2580 			sdk->ds_attachtime = dk->dk_attachtime;
2581 			sdk->ds_timestamp = dk->dk_timestamp;
2582 			sdk->ds_time = dk->dk_time;
2583 			mtx_leave(&dk->dk_mtx);
2584 			sdk++;
2585 		}
2586 
2587 		/* Eliminate trailing comma */
2588 		if (l != 0)
2589 			disknames[l - 1] = '\0';
2590 	} else if (update) {
2591 		/* Just update, number of drives hasn't changed */
2592 		sdk = diskstats;
2593 		TAILQ_FOREACH(dk, &disklist, dk_link) {
2594 			strlcpy(sdk->ds_name, dk->dk_name,
2595 			    sizeof(sdk->ds_name));
2596 			mtx_enter(&dk->dk_mtx);
2597 			sdk->ds_busy = dk->dk_busy;
2598 			sdk->ds_rxfer = dk->dk_rxfer;
2599 			sdk->ds_wxfer = dk->dk_wxfer;
2600 			sdk->ds_seek = dk->dk_seek;
2601 			sdk->ds_rbytes = dk->dk_rbytes;
2602 			sdk->ds_wbytes = dk->dk_wbytes;
2603 			sdk->ds_attachtime = dk->dk_attachtime;
2604 			sdk->ds_timestamp = dk->dk_timestamp;
2605 			sdk->ds_time = dk->dk_time;
2606 			mtx_leave(&dk->dk_mtx);
2607 			sdk++;
2608 		}
2609 	}
2610 	rw_exit_write(&sysctl_disklock);
2611 	return 0;
2612 }
2613 
2614 #if defined(SYSVMSG) || defined(SYSVSEM) || defined(SYSVSHM)
2615 int
2616 sysctl_sysvipc(int *name, u_int namelen, void *where, size_t *sizep)
2617 {
2618 #ifdef SYSVSEM
2619 	struct sem_sysctl_info *semsi;
2620 #endif
2621 #ifdef SYSVSHM
2622 	struct shm_sysctl_info *shmsi;
2623 #endif
2624 	size_t infosize, dssize, tsize, buflen, bufsiz;
2625 	int i, nds, error, ret;
2626 	void *buf;
2627 
2628 	if (namelen != 1)
2629 		return (EINVAL);
2630 
2631 	buflen = *sizep;
2632 
2633 	switch (*name) {
2634 	case KERN_SYSVIPC_MSG_INFO:
2635 #ifdef SYSVMSG
2636 		return (sysctl_sysvmsg(name, namelen, where, sizep));
2637 #else
2638 		return (EOPNOTSUPP);
2639 #endif
2640 	case KERN_SYSVIPC_SEM_INFO:
2641 #ifdef SYSVSEM
2642 		infosize = sizeof(semsi->seminfo);
2643 		nds = seminfo.semmni;
2644 		dssize = sizeof(semsi->semids[0]);
2645 		break;
2646 #else
2647 		return (EOPNOTSUPP);
2648 #endif
2649 	case KERN_SYSVIPC_SHM_INFO:
2650 #ifdef SYSVSHM
2651 		infosize = sizeof(shmsi->shminfo);
2652 		nds = shminfo.shmmni;
2653 		dssize = sizeof(shmsi->shmids[0]);
2654 		break;
2655 #else
2656 		return (EOPNOTSUPP);
2657 #endif
2658 	default:
2659 		return (EINVAL);
2660 	}
2661 	tsize = infosize + (nds * dssize);
2662 
2663 	/* Return just the total size required. */
2664 	if (where == NULL) {
2665 		*sizep = tsize;
2666 		return (0);
2667 	}
2668 
2669 	/* Not enough room for even the info struct. */
2670 	if (buflen < infosize) {
2671 		*sizep = 0;
2672 		return (ENOMEM);
2673 	}
2674 	bufsiz = min(tsize, buflen);
2675 	buf = malloc(bufsiz, M_TEMP, M_WAITOK|M_ZERO);
2676 
2677 	switch (*name) {
2678 #ifdef SYSVSEM
2679 	case KERN_SYSVIPC_SEM_INFO:
2680 		semsi = (struct sem_sysctl_info *)buf;
2681 		semsi->seminfo = seminfo;
2682 		break;
2683 #endif
2684 #ifdef SYSVSHM
2685 	case KERN_SYSVIPC_SHM_INFO:
2686 		shmsi = (struct shm_sysctl_info *)buf;
2687 		shmsi->shminfo = shminfo;
2688 		break;
2689 #endif
2690 	}
2691 	buflen -= infosize;
2692 
2693 	ret = 0;
2694 	if (buflen > 0) {
2695 		/* Fill in the IPC data structures.  */
2696 		for (i = 0; i < nds; i++) {
2697 			if (buflen < dssize) {
2698 				ret = ENOMEM;
2699 				break;
2700 			}
2701 			switch (*name) {
2702 #ifdef SYSVSEM
2703 			case KERN_SYSVIPC_SEM_INFO:
2704 				if (sema[i] != NULL)
2705 					memcpy(&semsi->semids[i], sema[i],
2706 					    dssize);
2707 				else
2708 					memset(&semsi->semids[i], 0, dssize);
2709 				break;
2710 #endif
2711 #ifdef SYSVSHM
2712 			case KERN_SYSVIPC_SHM_INFO:
2713 				if (shmsegs[i] != NULL)
2714 					memcpy(&shmsi->shmids[i], shmsegs[i],
2715 					    dssize);
2716 				else
2717 					memset(&shmsi->shmids[i], 0, dssize);
2718 				break;
2719 #endif
2720 			}
2721 			buflen -= dssize;
2722 		}
2723 	}
2724 	*sizep -= buflen;
2725 	error = copyout(buf, where, *sizep);
2726 	free(buf, M_TEMP, bufsiz);
2727 	/* If copyout succeeded, use return code set earlier. */
2728 	return (error ? error : ret);
2729 }
2730 #endif /* SYSVMSG || SYSVSEM || SYSVSHM */
2731 
2732 #ifndef	SMALL_KERNEL
2733 
2734 int
2735 sysctl_intrcnt(int *name, u_int namelen, void *oldp, size_t *oldlenp)
2736 {
2737 	return (evcount_sysctl(name, namelen, oldp, oldlenp, NULL, 0));
2738 }
2739 
2740 
2741 int
2742 sysctl_sensors(int *name, u_int namelen, void *oldp, size_t *oldlenp,
2743     void *newp, size_t newlen)
2744 {
2745 	struct ksensor *ks;
2746 	struct sensor *us;
2747 	struct ksensordev *ksd;
2748 	struct sensordev *usd;
2749 	int dev, numt, ret;
2750 	enum sensor_type type;
2751 
2752 	if (namelen != 1 && namelen != 3)
2753 		return (ENOTDIR);
2754 
2755 	dev = name[0];
2756 	if (namelen == 1) {
2757 		ret = sensordev_get(dev, &ksd);
2758 		if (ret)
2759 			return (ret);
2760 
2761 		/* Grab a copy, to clear the kernel pointers */
2762 		usd = malloc(sizeof(*usd), M_TEMP, M_WAITOK|M_ZERO);
2763 		usd->num = ksd->num;
2764 		strlcpy(usd->xname, ksd->xname, sizeof(usd->xname));
2765 		memcpy(usd->maxnumt, ksd->maxnumt, sizeof(usd->maxnumt));
2766 		usd->sensors_count = ksd->sensors_count;
2767 
2768 		ret = sysctl_rdstruct(oldp, oldlenp, newp, usd,
2769 		    sizeof(struct sensordev));
2770 
2771 		free(usd, M_TEMP, sizeof(*usd));
2772 		return (ret);
2773 	}
2774 
2775 	type = name[1];
2776 	numt = name[2];
2777 
2778 	ret = sensor_find(dev, type, numt, &ks);
2779 	if (ret)
2780 		return (ret);
2781 
2782 	/* Grab a copy, to clear the kernel pointers */
2783 	us = malloc(sizeof(*us), M_TEMP, M_WAITOK|M_ZERO);
2784 	memcpy(us->desc, ks->desc, sizeof(us->desc));
2785 	us->tv = ks->tv;
2786 	us->value = ks->value;
2787 	us->type = ks->type;
2788 	us->status = ks->status;
2789 	us->numt = ks->numt;
2790 	us->flags = ks->flags;
2791 
2792 	ret = sysctl_rdstruct(oldp, oldlenp, newp, us,
2793 	    sizeof(struct sensor));
2794 	free(us, M_TEMP, sizeof(*us));
2795 	return (ret);
2796 }
2797 #endif	/* SMALL_KERNEL */
2798 
2799 int
2800 sysctl_cptime2(int *name, u_int namelen, void *oldp, size_t *oldlenp,
2801     void *newp, size_t newlen)
2802 {
2803 	CPU_INFO_ITERATOR cii;
2804 	struct cpu_info *ci;
2805 	int found = 0;
2806 
2807 	if (namelen != 1)
2808 		return (ENOTDIR);
2809 
2810 	CPU_INFO_FOREACH(cii, ci) {
2811 		if (name[0] == CPU_INFO_UNIT(ci)) {
2812 			found = 1;
2813 			break;
2814 		}
2815 	}
2816 	if (!found)
2817 		return (ENOENT);
2818 
2819 	return (sysctl_rdstruct(oldp, oldlenp, newp,
2820 	    &ci->ci_schedstate.spc_cp_time,
2821 	    sizeof(ci->ci_schedstate.spc_cp_time)));
2822 }
2823 
2824 #if NAUDIO > 0
2825 int
2826 sysctl_audio(int *name, u_int namelen, void *oldp, size_t *oldlenp,
2827     void *newp, size_t newlen)
2828 {
2829 	if (namelen != 1)
2830 		return (ENOTDIR);
2831 
2832 	if (name[0] != KERN_AUDIO_RECORD)
2833 		return (ENOENT);
2834 
2835 	return (sysctl_int(oldp, oldlenp, newp, newlen, &audio_record_enable));
2836 }
2837 #endif
2838 
2839 #if NVIDEO > 0
2840 int
2841 sysctl_video(int *name, u_int namelen, void *oldp, size_t *oldlenp,
2842     void *newp, size_t newlen)
2843 {
2844 	if (namelen != 1)
2845 		return (ENOTDIR);
2846 
2847 	if (name[0] != KERN_VIDEO_RECORD)
2848 		return (ENOENT);
2849 
2850 	return (sysctl_int(oldp, oldlenp, newp, newlen, &video_record_enable));
2851 }
2852 #endif
2853 
2854 int
2855 sysctl_cpustats(int *name, u_int namelen, void *oldp, size_t *oldlenp,
2856     void *newp, size_t newlen)
2857 {
2858 	CPU_INFO_ITERATOR cii;
2859 	struct cpustats cs;
2860 	struct cpu_info *ci;
2861 	int found = 0;
2862 
2863 	if (namelen != 1)
2864 		return (ENOTDIR);
2865 
2866 	CPU_INFO_FOREACH(cii, ci) {
2867 		if (name[0] == CPU_INFO_UNIT(ci)) {
2868 			found = 1;
2869 			break;
2870 		}
2871 	}
2872 	if (!found)
2873 		return (ENOENT);
2874 
2875 	memset(&cs, 0, sizeof cs);
2876 	memcpy(&cs.cs_time, &ci->ci_schedstate.spc_cp_time, sizeof(cs.cs_time));
2877 	cs.cs_flags = 0;
2878 	if (cpu_is_online(ci))
2879 		cs.cs_flags |= CPUSTATS_ONLINE;
2880 
2881 	return (sysctl_rdstruct(oldp, oldlenp, newp, &cs, sizeof(cs)));
2882 }
2883 
2884 int
2885 sysctl_utc_offset(void *oldp, size_t *oldlenp, void *newp, size_t newlen)
2886 {
2887 	struct timespec adjusted, now;
2888 	int adjustment_seconds, error, new_offset_minutes, old_offset_minutes;
2889 
2890 	old_offset_minutes = utc_offset / 60;	/* seconds -> minutes */
2891 	new_offset_minutes = old_offset_minutes;
2892 	error = sysctl_securelevel_int(oldp, oldlenp, newp, newlen,
2893 	     &new_offset_minutes);
2894 	if (error)
2895 		return error;
2896 	if (new_offset_minutes < -24 * 60 || new_offset_minutes > 24 * 60)
2897 		return EINVAL;
2898 	if (new_offset_minutes == old_offset_minutes)
2899 		return 0;
2900 
2901 	utc_offset = new_offset_minutes * 60;	/* minutes -> seconds */
2902 	adjustment_seconds = (new_offset_minutes - old_offset_minutes) * 60;
2903 
2904 	nanotime(&now);
2905 	adjusted = now;
2906 	adjusted.tv_sec -= adjustment_seconds;
2907 	tc_setrealtimeclock(&adjusted);
2908 	resettodr();
2909 
2910 	return 0;
2911 }
2912