xref: /openbsd-src/sys/kern/kern_sysctl.c (revision f11c1ce42cc0195248acb8882c438695ad498ab4)
1 /*	$OpenBSD: kern_sysctl.c,v 1.452 2024/11/05 10:49:23 bluhm Exp $	*/
2 /*	$NetBSD: kern_sysctl.c,v 1.17 1996/05/20 17:49:05 mrg Exp $	*/
3 
4 /*-
5  * Copyright (c) 1982, 1986, 1989, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * This code is derived from software contributed to Berkeley by
9  * Mike Karels at Berkeley Software Design, Inc.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. Neither the name of the University nor the names of its contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  *
35  *	@(#)kern_sysctl.c	8.4 (Berkeley) 4/14/94
36  */
37 
38 /*
39  * sysctl system call.
40  */
41 
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/atomic.h>
45 #include <sys/kernel.h>
46 #include <sys/malloc.h>
47 #include <sys/pool.h>
48 #include <sys/proc.h>
49 #include <sys/resourcevar.h>
50 #include <sys/signalvar.h>
51 #include <sys/fcntl.h>
52 #include <sys/file.h>
53 #include <sys/filedesc.h>
54 #include <sys/vnode.h>
55 #include <sys/unistd.h>
56 #include <sys/buf.h>
57 #include <sys/clockintr.h>
58 #include <sys/tty.h>
59 #include <sys/disklabel.h>
60 #include <sys/disk.h>
61 #include <sys/sysctl.h>
62 #include <sys/msgbuf.h>
63 #include <sys/vmmeter.h>
64 #include <sys/namei.h>
65 #include <sys/exec.h>
66 #include <sys/mbuf.h>
67 #include <sys/percpu.h>
68 #include <sys/sensors.h>
69 #include <sys/pipe.h>
70 #include <sys/eventvar.h>
71 #include <sys/socketvar.h>
72 #include <sys/socket.h>
73 #include <sys/domain.h>
74 #include <sys/protosw.h>
75 #include <sys/pledge.h>
76 #include <sys/timetc.h>
77 #include <sys/evcount.h>
78 #include <sys/un.h>
79 #include <sys/unpcb.h>
80 #include <sys/sched.h>
81 #include <sys/mount.h>
82 #include <sys/syscallargs.h>
83 #include <sys/wait.h>
84 #include <sys/witness.h>
85 
86 #include <uvm/uvm_extern.h>
87 
88 #include <dev/cons.h>
89 
90 #include <dev/usb/ucomvar.h>
91 
92 #include <net/route.h>
93 #include <netinet/in.h>
94 #include <netinet/ip.h>
95 #include <netinet/ip_var.h>
96 #include <netinet/in_pcb.h>
97 #include <netinet/ip6.h>
98 #include <netinet/tcp.h>
99 #include <netinet/tcp_timer.h>
100 #include <netinet/tcp_var.h>
101 #include <netinet/udp.h>
102 #include <netinet/udp_var.h>
103 #include <netinet6/ip6_var.h>
104 
105 #ifdef DDB
106 #include <ddb/db_var.h>
107 #endif
108 
109 #ifdef SYSVMSG
110 #include <sys/msg.h>
111 #endif
112 #ifdef SYSVSEM
113 #include <sys/sem.h>
114 #endif
115 #ifdef SYSVSHM
116 #include <sys/shm.h>
117 #endif
118 
119 #include "audio.h"
120 #include "dt.h"
121 #include "pf.h"
122 #include "ucom.h"
123 #include "video.h"
124 
125 /*
126  * Locks used to protect data:
127  *	a	atomic
128  */
129 
130 extern struct forkstat forkstat;
131 extern struct nchstats nchstats;
132 extern int fscale;
133 extern fixpt_t ccpu;
134 extern long numvnodes;
135 extern int allowdt;
136 extern int audio_record_enable;
137 extern int video_record_enable;
138 extern int autoconf_serial;
139 
140 int allowkmem;		/* [a] */
141 
142 int sysctl_securelevel(void *, size_t *, void *, size_t, struct proc *);
143 int sysctl_diskinit(int, struct proc *);
144 int sysctl_proc_args(int *, u_int, void *, size_t *, struct proc *);
145 int sysctl_proc_cwd(int *, u_int, void *, size_t *, struct proc *);
146 int sysctl_proc_nobroadcastkill(int *, u_int, void *, size_t, void *, size_t *,
147 	struct proc *);
148 int sysctl_proc_vmmap(int *, u_int, void *, size_t *, struct proc *);
149 int sysctl_intrcnt(int *, u_int, void *, size_t *);
150 int sysctl_sensors(int *, u_int, void *, size_t *, void *, size_t);
151 int sysctl_cptime2(int *, u_int, void *, size_t *, void *, size_t);
152 int sysctl_audio(int *, u_int, void *, size_t *, void *, size_t);
153 int sysctl_video(int *, u_int, void *, size_t *, void *, size_t);
154 int sysctl_cpustats(int *, u_int, void *, size_t *, void *, size_t);
155 int sysctl_utc_offset(void *, size_t *, void *, size_t);
156 int sysctl_hwbattery(int *, u_int, void *, size_t *, void *, size_t);
157 
158 void fill_file(struct kinfo_file *, struct file *, struct filedesc *, int,
159     struct vnode *, struct process *, struct proc *, struct socket *, int);
160 void fill_kproc(struct process *, struct kinfo_proc *, struct proc *, int);
161 
162 int kern_sysctl_locked(int *, u_int, void *, size_t *, void *, size_t,
163 	struct proc *);
164 int hw_sysctl_locked(int *, u_int, void *, size_t *,void *, size_t,
165 	struct proc *);
166 
167 int (*cpu_cpuspeed)(int *);
168 
169 /*
170  * Lock to avoid too many processes vslocking a large amount of memory
171  * at the same time.
172  */
173 struct rwlock sysctl_lock = RWLOCK_INITIALIZER("sysctllk");
174 struct rwlock sysctl_disklock = RWLOCK_INITIALIZER("sysctldlk");
175 
176 int
177 sysctl_vslock(void *addr, size_t len)
178 {
179 	int error;
180 
181 	error = rw_enter(&sysctl_lock, RW_WRITE|RW_INTR);
182 	if (error)
183 		return (error);
184 	KERNEL_LOCK();
185 
186 	if (addr) {
187 		if (atop(len) > uvmexp.wiredmax - uvmexp.wired) {
188 			error = ENOMEM;
189 			goto out;
190 		}
191 		error = uvm_vslock(curproc, addr, len, PROT_READ | PROT_WRITE);
192 		if (error)
193 			goto out;
194 	}
195 
196 	return (0);
197 out:
198 	KERNEL_UNLOCK();
199 	rw_exit_write(&sysctl_lock);
200 	return (error);
201 }
202 
203 void
204 sysctl_vsunlock(void *addr, size_t len)
205 {
206 	KERNEL_ASSERT_LOCKED();
207 
208 	if (addr)
209 		uvm_vsunlock(curproc, addr, len);
210 	KERNEL_UNLOCK();
211 	rw_exit_write(&sysctl_lock);
212 }
213 
214 int
215 sys_sysctl(struct proc *p, void *v, register_t *retval)
216 {
217 	struct sys_sysctl_args /* {
218 		syscallarg(const int *) name;
219 		syscallarg(u_int) namelen;
220 		syscallarg(void *) old;
221 		syscallarg(size_t *) oldlenp;
222 		syscallarg(void *) new;
223 		syscallarg(size_t) newlen;
224 	} */ *uap = v;
225 	int error, dolock = 1;
226 	size_t savelen = 0, oldlen = 0;
227 	sysctlfn *fn;
228 	int name[CTL_MAXNAME];
229 
230 	if (SCARG(uap, new) != NULL &&
231 	    (error = suser(p)))
232 		return (error);
233 	/*
234 	 * all top-level sysctl names are non-terminal
235 	 */
236 	if (SCARG(uap, namelen) > CTL_MAXNAME || SCARG(uap, namelen) < 2)
237 		return (EINVAL);
238 	error = copyin(SCARG(uap, name), name,
239 		       SCARG(uap, namelen) * sizeof(int));
240 	if (error)
241 		return (error);
242 
243 	error = pledge_sysctl(p, SCARG(uap, namelen),
244 	    name, SCARG(uap, new));
245 	if (error)
246 		return (error);
247 
248 	switch (name[0]) {
249 	case CTL_KERN:
250 		dolock = 0;
251 		fn = kern_sysctl;
252 		break;
253 	case CTL_HW:
254 		dolock = 0;
255 		fn = hw_sysctl;
256 		break;
257 	case CTL_VM:
258 		fn = uvm_sysctl;
259 		break;
260 	case CTL_NET:
261 		dolock = 0;
262 		fn = net_sysctl;
263 		break;
264 	case CTL_FS:
265 		dolock = 0;
266 		fn = fs_sysctl;
267 		break;
268 	case CTL_VFS:
269 		fn = vfs_sysctl;
270 		break;
271 	case CTL_MACHDEP:
272 		fn = cpu_sysctl;
273 		break;
274 #ifdef DEBUG_SYSCTL
275 	case CTL_DEBUG:
276 		fn = debug_sysctl;
277 		break;
278 #endif
279 #ifdef DDB
280 	case CTL_DDB:
281 		fn = ddb_sysctl;
282 		break;
283 #endif
284 	default:
285 		return (EOPNOTSUPP);
286 	}
287 
288 	if (SCARG(uap, oldlenp) &&
289 	    (error = copyin(SCARG(uap, oldlenp), &oldlen, sizeof(oldlen))))
290 		return (error);
291 
292 	if (dolock) {
293 		error = sysctl_vslock(SCARG(uap, old), oldlen);
294 		if (error)
295 			return (error);
296 		savelen = oldlen;
297 	}
298 	error = (*fn)(&name[1], SCARG(uap, namelen) - 1, SCARG(uap, old),
299 	    &oldlen, SCARG(uap, new), SCARG(uap, newlen), p);
300 	if (dolock)
301 		sysctl_vsunlock(SCARG(uap, old), savelen);
302 
303 	if (error)
304 		return (error);
305 	if (SCARG(uap, oldlenp))
306 		error = copyout(&oldlen, SCARG(uap, oldlenp), sizeof(oldlen));
307 	return (error);
308 }
309 
310 /*
311  * Attributes stored in the kernel.
312  */
313 char hostname[MAXHOSTNAMELEN];
314 int hostnamelen;
315 char domainname[MAXHOSTNAMELEN];
316 int domainnamelen;
317 int hostid;
318 char *disknames = NULL;
319 size_t disknameslen;
320 struct diskstats *diskstats = NULL;
321 size_t diskstatslen;
322 int securelevel;
323 
324 /* morally const values reported by sysctl_bounded_arr */
325 static int arg_max = ARG_MAX;
326 static int openbsd = OpenBSD;
327 static int posix_version = _POSIX_VERSION;
328 static int ngroups_max = NGROUPS_MAX;
329 static int int_zero = 0;
330 static int int_one = 1;
331 static int maxpartitions = MAXPARTITIONS;
332 static int raw_part = RAW_PART;
333 
334 extern int somaxconn, sominconn;
335 extern int nosuidcoredump;
336 extern int maxlocksperuid;
337 extern int uvm_wxabort;
338 extern int global_ptrace;
339 
340 const struct sysctl_bounded_args kern_vars[] = {
341 	{KERN_OSREV, &openbsd, SYSCTL_INT_READONLY},
342 	{KERN_MAXVNODES, &maxvnodes, 0, INT_MAX},
343 	{KERN_MAXPROC, &maxprocess, 0, INT_MAX},
344 	{KERN_MAXFILES, &maxfiles, 0, INT_MAX},
345 	{KERN_NFILES, &numfiles, SYSCTL_INT_READONLY},
346 	{KERN_TTYCOUNT, &tty_count, SYSCTL_INT_READONLY},
347 	{KERN_ARGMAX, &arg_max, SYSCTL_INT_READONLY},
348 	{KERN_POSIX1, &posix_version, SYSCTL_INT_READONLY},
349 	{KERN_NGROUPS, &ngroups_max, SYSCTL_INT_READONLY},
350 	{KERN_JOB_CONTROL, &int_one, SYSCTL_INT_READONLY},
351 	{KERN_SAVED_IDS, &int_one, SYSCTL_INT_READONLY},
352 	{KERN_MAXPARTITIONS, &maxpartitions, SYSCTL_INT_READONLY},
353 	{KERN_RAWPARTITION, &raw_part, SYSCTL_INT_READONLY},
354 	{KERN_MAXTHREAD, &maxthread, 0, INT_MAX},
355 	{KERN_NTHREADS, &nthreads, SYSCTL_INT_READONLY},
356 	{KERN_SOMAXCONN, &somaxconn, 0, SHRT_MAX},
357 	{KERN_SOMINCONN, &sominconn, 0, SHRT_MAX},
358 	{KERN_NOSUIDCOREDUMP, &nosuidcoredump, 0, 3},
359 	{KERN_FSYNC, &int_one, SYSCTL_INT_READONLY},
360 	{KERN_SYSVMSG,
361 #ifdef SYSVMSG
362 	 &int_one,
363 #else
364 	 &int_zero,
365 #endif
366 	 SYSCTL_INT_READONLY},
367 	{KERN_SYSVSEM,
368 #ifdef SYSVSEM
369 	 &int_one,
370 #else
371 	 &int_zero,
372 #endif
373 	 SYSCTL_INT_READONLY},
374 	{KERN_SYSVSHM,
375 #ifdef SYSVSHM
376 	 &int_one,
377 #else
378 	 &int_zero,
379 #endif
380 	 SYSCTL_INT_READONLY},
381 	{KERN_FSCALE, &fscale, SYSCTL_INT_READONLY},
382 	{KERN_CCPU, &ccpu, SYSCTL_INT_READONLY},
383 	{KERN_NPROCS, &nprocesses, SYSCTL_INT_READONLY},
384 	{KERN_SPLASSERT, &splassert_ctl, 0, 3},
385 	{KERN_MAXLOCKSPERUID, &maxlocksperuid, 0, INT_MAX},
386 	{KERN_WXABORT, &uvm_wxabort, 0, 1},
387 	{KERN_NETLIVELOCKS, &int_zero, SYSCTL_INT_READONLY},
388 #ifdef PTRACE
389 	{KERN_GLOBAL_PTRACE, &global_ptrace, 0, 1},
390 #endif
391 	{KERN_AUTOCONF_SERIAL, &autoconf_serial, SYSCTL_INT_READONLY},
392 };
393 
394 int
395 kern_sysctl_dirs(int top_name, int *name, u_int namelen,
396     void *oldp, size_t *oldlenp, void *newp, size_t newlen, struct proc *p)
397 {
398 	switch (top_name) {
399 #ifndef SMALL_KERNEL
400 	case KERN_PROC:
401 		return (sysctl_doproc(name, namelen, oldp, oldlenp));
402 	case KERN_PROC_ARGS:
403 		return (sysctl_proc_args(name, namelen, oldp, oldlenp, p));
404 	case KERN_PROC_CWD:
405 		return (sysctl_proc_cwd(name, namelen, oldp, oldlenp, p));
406 	case KERN_PROC_NOBROADCASTKILL:
407 		return (sysctl_proc_nobroadcastkill(name, namelen,
408 		     newp, newlen, oldp, oldlenp, p));
409 	case KERN_PROC_VMMAP:
410 		return (sysctl_proc_vmmap(name, namelen, oldp, oldlenp, p));
411 	case KERN_FILE:
412 		return (sysctl_file(name, namelen, oldp, oldlenp, p));
413 #endif
414 #if defined(GPROF) || defined(DDBPROF)
415 	case KERN_PROF:
416 		return (sysctl_doprof(name, namelen, oldp, oldlenp,
417 		    newp, newlen));
418 #endif
419 	case KERN_MALLOCSTATS:
420 		return (sysctl_malloc(name, namelen, oldp, oldlenp,
421 		    newp, newlen, p));
422 	case KERN_TTY:
423 		return (sysctl_tty(name, namelen, oldp, oldlenp,
424 		    newp, newlen));
425 	case KERN_POOL:
426 		return (sysctl_dopool(name, namelen, oldp, oldlenp));
427 #if defined(SYSVMSG) || defined(SYSVSEM) || defined(SYSVSHM)
428 	case KERN_SYSVIPC_INFO:
429 		return (sysctl_sysvipc(name, namelen, oldp, oldlenp));
430 #endif
431 #ifdef SYSVSEM
432 	case KERN_SEMINFO:
433 		return (sysctl_sysvsem(name, namelen, oldp, oldlenp,
434 		    newp, newlen));
435 #endif
436 #ifdef SYSVSHM
437 	case KERN_SHMINFO:
438 		return (sysctl_sysvshm(name, namelen, oldp, oldlenp,
439 		    newp, newlen));
440 #endif
441 #ifndef SMALL_KERNEL
442 	case KERN_INTRCNT:
443 		return (sysctl_intrcnt(name, namelen, oldp, oldlenp));
444 	case KERN_WATCHDOG:
445 		return (sysctl_wdog(name, namelen, oldp, oldlenp,
446 		    newp, newlen));
447 #endif
448 #ifndef SMALL_KERNEL
449 	case KERN_EVCOUNT:
450 		return (evcount_sysctl(name, namelen, oldp, oldlenp,
451 		    newp, newlen));
452 #endif
453 	case KERN_TIMECOUNTER:
454 		return (sysctl_tc(name, namelen, oldp, oldlenp, newp, newlen));
455 	case KERN_CPTIME2:
456 		return (sysctl_cptime2(name, namelen, oldp, oldlenp,
457 		    newp, newlen));
458 #ifdef WITNESS
459 	case KERN_WITNESSWATCH:
460 		return witness_sysctl_watch(oldp, oldlenp, newp, newlen);
461 	case KERN_WITNESS:
462 		return witness_sysctl(name, namelen, oldp, oldlenp,
463 		    newp, newlen);
464 #endif
465 #if NVIDEO > 0
466 	case KERN_VIDEO:
467 		return (sysctl_video(name, namelen, oldp, oldlenp,
468 		    newp, newlen));
469 #endif
470 	case KERN_CPUSTATS:
471 		return (sysctl_cpustats(name, namelen, oldp, oldlenp,
472 		    newp, newlen));
473 	case KERN_CLOCKINTR:
474 		return sysctl_clockintr(name, namelen, oldp, oldlenp, newp,
475 		    newlen);
476 	default:
477 		return (ENOTDIR);	/* overloaded */
478 	}
479 }
480 
481 /*
482  * kernel related system variables.
483  */
484 int
485 kern_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
486     size_t newlen, struct proc *p)
487 {
488 	int error;
489 	size_t savelen;
490 
491 	/* dispatch the non-terminal nodes first */
492 	if (namelen != 1) {
493 		switch (name[0]) {
494 #if NAUDIO > 0
495 		case KERN_AUDIO:
496 			return (sysctl_audio(name + 1, namelen - 1,
497 			    oldp, oldlenp, newp, newlen));
498 #endif
499 		default:
500 			break;
501 		}
502 
503 		savelen = *oldlenp;
504 		if ((error = sysctl_vslock(oldp, savelen)))
505 			return (error);
506 		error = kern_sysctl_dirs(name[0], name + 1, namelen - 1,
507 		    oldp, oldlenp, newp, newlen, p);
508 		sysctl_vsunlock(oldp, savelen);
509 		return (error);
510 	}
511 
512 	switch (name[0]) {
513 	case KERN_ALLOWKMEM:
514 		return (sysctl_securelevel_int(oldp, oldlenp, newp, newlen,
515 		    &allowkmem));
516 	case KERN_OSTYPE:
517 		return (sysctl_rdstring(oldp, oldlenp, newp, ostype));
518 	case KERN_OSRELEASE:
519 		return (sysctl_rdstring(oldp, oldlenp, newp, osrelease));
520 	case KERN_OSVERSION:
521 		return (sysctl_rdstring(oldp, oldlenp, newp, osversion));
522 	case KERN_VERSION:
523 		return (sysctl_rdstring(oldp, oldlenp, newp, version));
524 	case KERN_NUMVNODES:  /* XXX numvnodes is a long */
525 		return (sysctl_rdint(oldp, oldlenp, newp, numvnodes));
526 #if NDT > 0
527 	case KERN_ALLOWDT:
528 		return (sysctl_securelevel_int(oldp, oldlenp, newp, newlen,
529 		    &allowdt));
530 #endif
531 	case KERN_HOSTID:
532 		return (sysctl_int(oldp, oldlenp, newp, newlen, &hostid));
533 	case KERN_CLOCKRATE:
534 		return (sysctl_clockrate(oldp, oldlenp, newp));
535 	case KERN_BOOTTIME: {
536 		struct timeval bt;
537 		memset(&bt, 0, sizeof bt);
538 		microboottime(&bt);
539 		return (sysctl_rdstruct(oldp, oldlenp, newp, &bt, sizeof bt));
540 	}
541 	case KERN_MBSTAT: {
542 		uint64_t counters[MBSTAT_COUNT];
543 		struct mbstat mbs;
544 		unsigned int i;
545 
546 		memset(&mbs, 0, sizeof(mbs));
547 		counters_read(mbstat, counters, MBSTAT_COUNT, NULL);
548 		for (i = 0; i < MBSTAT_TYPES; i++)
549 			mbs.m_mtypes[i] = counters[i];
550 
551 		mbs.m_drops = counters[MBSTAT_DROPS];
552 		mbs.m_wait = counters[MBSTAT_WAIT];
553 		mbs.m_drain = counters[MBSTAT_DRAIN];
554 		mbs.m_defrag_alloc = counters[MBSTAT_DEFRAG_ALLOC];
555 		mbs.m_prepend_alloc = counters[MBSTAT_PREPEND_ALLOC];
556 		mbs.m_pullup_alloc = counters[MBSTAT_PULLUP_ALLOC];
557 		mbs.m_pullup_copy = counters[MBSTAT_PULLUP_COPY];
558 		mbs.m_pulldown_alloc = counters[MBSTAT_PULLDOWN_ALLOC];
559 		mbs.m_pulldown_copy = counters[MBSTAT_PULLDOWN_COPY];
560 
561 		return (sysctl_rdstruct(oldp, oldlenp, newp,
562 		    &mbs, sizeof(mbs)));
563 	}
564 	case KERN_MSGBUFSIZE:
565 	case KERN_CONSBUFSIZE: {
566 		struct msgbuf *mp;
567 		mp = (name[0] == KERN_MSGBUFSIZE) ? msgbufp : consbufp;
568 		/*
569 		 * deal with cases where the message buffer has
570 		 * become corrupted.
571 		 */
572 		if (!mp || mp->msg_magic != MSG_MAGIC)
573 			return (ENXIO);
574 		return (sysctl_rdint(oldp, oldlenp, newp, mp->msg_bufs));
575 	}
576 	case KERN_TIMEOUT_STATS:
577 		return (timeout_sysctl(oldp, oldlenp, newp, newlen));
578 	case KERN_OSREV:
579 	case KERN_MAXPROC:
580 	case KERN_MAXFILES:
581 	case KERN_NFILES:
582 	case KERN_TTYCOUNT:
583 	case KERN_ARGMAX:
584 	case KERN_POSIX1:
585 	case KERN_NGROUPS:
586 	case KERN_JOB_CONTROL:
587 	case KERN_SAVED_IDS:
588 	case KERN_MAXPARTITIONS:
589 	case KERN_RAWPARTITION:
590 	case KERN_MAXTHREAD:
591 	case KERN_NTHREADS:
592 	case KERN_SOMAXCONN:
593 	case KERN_SOMINCONN:
594 	case KERN_FSYNC:
595 	case KERN_SYSVMSG:
596 	case KERN_SYSVSEM:
597 	case KERN_SYSVSHM:
598 	case KERN_FSCALE:
599 	case KERN_CCPU:
600 	case KERN_NPROCS:
601 	case KERN_NETLIVELOCKS:
602 	case KERN_AUTOCONF_SERIAL:
603 		return (sysctl_bounded_arr(kern_vars, nitems(kern_vars), name,
604 		    namelen, oldp, oldlenp, newp, newlen));
605 	}
606 
607 	savelen = *oldlenp;
608 	if ((error = sysctl_vslock(oldp, savelen)))
609 		return (error);
610 	error = kern_sysctl_locked(name, namelen, oldp, oldlenp,
611 	    newp, newlen, p);
612 	sysctl_vsunlock(oldp, savelen);
613 
614 	return (error);
615 }
616 
617 int
618 kern_sysctl_locked(int *name, u_int namelen, void *oldp, size_t *oldlenp,
619     void *newp, size_t newlen, struct proc *p)
620 {
621 	int error, stackgap;
622 	dev_t dev;
623 	extern int pool_debug;
624 
625 	switch (name[0]) {
626 	case KERN_SECURELVL:
627 		return (sysctl_securelevel(oldp, oldlenp, newp, newlen, p));
628 	case KERN_HOSTNAME:
629 		error = sysctl_tstring(oldp, oldlenp, newp, newlen,
630 		    hostname, sizeof(hostname));
631 		if (newp && !error)
632 			hostnamelen = newlen;
633 		return (error);
634 	case KERN_DOMAINNAME:
635 		if (securelevel >= 1 && domainnamelen && newp)
636 			error = EPERM;
637 		else
638 			error = sysctl_tstring(oldp, oldlenp, newp, newlen,
639 			    domainname, sizeof(domainname));
640 		if (newp && !error)
641 			domainnamelen = newlen;
642 		return (error);
643 	case KERN_CONSBUF:
644 		if ((error = suser(p)))
645 			return (error);
646 		/* FALLTHROUGH */
647 	case KERN_MSGBUF: {
648 		struct msgbuf *mp;
649 		mp = (name[0] == KERN_MSGBUF) ? msgbufp : consbufp;
650 		/*
651 		 * deal with cases where the message buffer has
652 		 * become corrupted.
653 		 */
654 		if (!mp || mp->msg_magic != MSG_MAGIC)
655 			return (ENXIO);
656 		return (sysctl_rdstruct(oldp, oldlenp, newp, mp,
657 		    mp->msg_bufs + offsetof(struct msgbuf, msg_bufc)));
658 	}
659 	case KERN_CPTIME:
660 	{
661 		CPU_INFO_ITERATOR cii;
662 		struct cpu_info *ci;
663 		long cp_time[CPUSTATES];
664 		int i, n = 0;
665 
666 		memset(cp_time, 0, sizeof(cp_time));
667 
668 		CPU_INFO_FOREACH(cii, ci) {
669 			if (!cpu_is_online(ci))
670 				continue;
671 			n++;
672 			for (i = 0; i < CPUSTATES; i++)
673 				cp_time[i] += ci->ci_schedstate.spc_cp_time[i];
674 		}
675 
676 		for (i = 0; i < CPUSTATES; i++)
677 			cp_time[i] /= n;
678 
679 		return (sysctl_rdstruct(oldp, oldlenp, newp, &cp_time,
680 		    sizeof(cp_time)));
681 	}
682 	case KERN_NCHSTATS:
683 		return (sysctl_rdstruct(oldp, oldlenp, newp, &nchstats,
684 		    sizeof(struct nchstats)));
685 	case KERN_FORKSTAT:
686 		return (sysctl_rdstruct(oldp, oldlenp, newp, &forkstat,
687 		    sizeof(struct forkstat)));
688 	case KERN_STACKGAPRANDOM:
689 		stackgap = stackgap_random;
690 		error = sysctl_int(oldp, oldlenp, newp, newlen, &stackgap);
691 		if (error)
692 			return (error);
693 		/*
694 		 * Safety harness.
695 		 */
696 		if ((stackgap < ALIGNBYTES && stackgap != 0) ||
697 		    !powerof2(stackgap) || stackgap >= MAXSSIZ)
698 			return (EINVAL);
699 		stackgap_random = stackgap;
700 		return (0);
701 	case KERN_MAXCLUSTERS: {
702 		int val = nmbclust;
703 		error = sysctl_int(oldp, oldlenp, newp, newlen, &val);
704 		if (error == 0 && val != nmbclust)
705 			error = nmbclust_update(val);
706 		return (error);
707 	}
708 	case KERN_CACHEPCT: {
709 		u_int64_t dmapages;
710 		int opct, pgs;
711 		opct = bufcachepercent;
712 		error = sysctl_int(oldp, oldlenp, newp, newlen,
713 		    &bufcachepercent);
714 		if (error)
715 			return(error);
716 		if (bufcachepercent > 90 || bufcachepercent < 5) {
717 			bufcachepercent = opct;
718 			return (EINVAL);
719 		}
720 		dmapages = uvm_pagecount(&dma_constraint);
721 		if (bufcachepercent != opct) {
722 			pgs = bufcachepercent * dmapages / 100;
723 			bufadjust(pgs); /* adjust bufpages */
724 			bufhighpages = bufpages; /* set high water mark */
725 		}
726 		return(0);
727 	}
728 	case KERN_CONSDEV:
729 		if (cn_tab != NULL)
730 			dev = cn_tab->cn_dev;
731 		else
732 			dev = NODEV;
733 		return sysctl_rdstruct(oldp, oldlenp, newp, &dev, sizeof(dev));
734 	case KERN_POOL_DEBUG: {
735 		int old_pool_debug = pool_debug;
736 
737 		error = sysctl_int(oldp, oldlenp, newp, newlen,
738 		    &pool_debug);
739 		if (error == 0 && pool_debug != old_pool_debug)
740 			pool_reclaim_all();
741 		return (error);
742 	}
743 #if NPF > 0
744 	case KERN_PFSTATUS:
745 		return (pf_sysctl(oldp, oldlenp, newp, newlen));
746 #endif
747 	case KERN_UTC_OFFSET:
748 		return (sysctl_utc_offset(oldp, oldlenp, newp, newlen));
749 	default:
750 		return (sysctl_bounded_arr(kern_vars, nitems(kern_vars), name,
751 		    namelen, oldp, oldlenp, newp, newlen));
752 	}
753 	/* NOTREACHED */
754 }
755 
756 /*
757  * hardware related system variables.
758  */
759 char *hw_vendor, *hw_prod, *hw_uuid, *hw_serial, *hw_ver;
760 int allowpowerdown = 1;
761 int hw_power = 1;
762 
763 /* morally const values reported by sysctl_bounded_arr */
764 static int byte_order = BYTE_ORDER;
765 
766 const struct sysctl_bounded_args hw_vars[] = {
767 	{HW_NCPU, &ncpus, SYSCTL_INT_READONLY},
768 	{HW_NCPUFOUND, &ncpusfound, SYSCTL_INT_READONLY},
769 	{HW_BYTEORDER, &byte_order, SYSCTL_INT_READONLY},
770 	{HW_PAGESIZE, &uvmexp.pagesize, SYSCTL_INT_READONLY},
771 	{HW_DISKCOUNT, &disk_count, SYSCTL_INT_READONLY},
772 	{HW_POWER, &hw_power, SYSCTL_INT_READONLY},
773 };
774 
775 int
776 hw_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
777     size_t newlen, struct proc *p)
778 {
779 	extern char machine[], cpu_model[];
780 	int err;
781 
782 	/*
783 	 * all sysctl names at this level except sensors and battery
784 	 * are terminal
785 	 */
786 	if (name[0] != HW_SENSORS && name[0] != HW_BATTERY && namelen != 1)
787 		return (ENOTDIR);		/* overloaded */
788 
789 	switch (name[0]) {
790 	case HW_MACHINE:
791 		return (sysctl_rdstring(oldp, oldlenp, newp, machine));
792 	case HW_MODEL:
793 		return (sysctl_rdstring(oldp, oldlenp, newp, cpu_model));
794 	case HW_NCPUONLINE:
795 		return (sysctl_rdint(oldp, oldlenp, newp,
796 		    sysctl_hwncpuonline()));
797 	case HW_PHYSMEM:
798 		return (sysctl_rdint(oldp, oldlenp, newp, ptoa(physmem)));
799 	case HW_USERMEM:
800 		return (sysctl_rdint(oldp, oldlenp, newp,
801 		    ptoa(physmem - uvmexp.wired)));
802 	case HW_DISKNAMES:
803 	case HW_DISKSTATS:
804 	case HW_CPUSPEED:
805 #ifndef	SMALL_KERNEL
806 	case HW_SENSORS:
807 	case HW_SETPERF:
808 	case HW_PERFPOLICY:
809 	case HW_BATTERY:
810 #endif /* !SMALL_KERNEL */
811 	case HW_ALLOWPOWERDOWN:
812 	case HW_UCOMNAMES:
813 #ifdef __HAVE_CPU_TOPOLOGY
814 	case HW_SMT:
815 #endif
816 	{
817 		size_t savelen = *oldlenp;
818 		if ((err = sysctl_vslock(oldp, savelen)))
819 			return (err);
820 		err = hw_sysctl_locked(name, namelen, oldp, oldlenp,
821 		    newp, newlen, p);
822 		sysctl_vsunlock(oldp, savelen);
823 		return (err);
824 	}
825 	case HW_VENDOR:
826 		if (hw_vendor)
827 			return (sysctl_rdstring(oldp, oldlenp, newp,
828 			    hw_vendor));
829 		else
830 			return (EOPNOTSUPP);
831 	case HW_PRODUCT:
832 		if (hw_prod)
833 			return (sysctl_rdstring(oldp, oldlenp, newp, hw_prod));
834 		else
835 			return (EOPNOTSUPP);
836 	case HW_VERSION:
837 		if (hw_ver)
838 			return (sysctl_rdstring(oldp, oldlenp, newp, hw_ver));
839 		else
840 			return (EOPNOTSUPP);
841 	case HW_SERIALNO:
842 		if (hw_serial)
843 			return (sysctl_rdstring(oldp, oldlenp, newp,
844 			    hw_serial));
845 		else
846 			return (EOPNOTSUPP);
847 	case HW_UUID:
848 		if (hw_uuid)
849 			return (sysctl_rdstring(oldp, oldlenp, newp, hw_uuid));
850 		else
851 			return (EOPNOTSUPP);
852 	case HW_PHYSMEM64:
853 		return (sysctl_rdquad(oldp, oldlenp, newp,
854 		    ptoa((psize_t)physmem)));
855 	case HW_USERMEM64:
856 		return (sysctl_rdquad(oldp, oldlenp, newp,
857 		    ptoa((psize_t)physmem - uvmexp.wired)));
858 	default:
859 		return sysctl_bounded_arr(hw_vars, nitems(hw_vars), name,
860 		    namelen, oldp, oldlenp, newp, newlen);
861 	}
862 	/* NOTREACHED */
863 }
864 
865 int
866 hw_sysctl_locked(int *name, u_int namelen, void *oldp, size_t *oldlenp,
867     void *newp, size_t newlen, struct proc *p)
868 {
869 	int err, cpuspeed;
870 
871 	switch (name[0]) {
872 	case HW_DISKNAMES:
873 		err = sysctl_diskinit(0, p);
874 		if (err)
875 			return err;
876 		if (disknames)
877 			return (sysctl_rdstring(oldp, oldlenp, newp,
878 			    disknames));
879 		else
880 			return (sysctl_rdstring(oldp, oldlenp, newp, ""));
881 	case HW_DISKSTATS:
882 		err = sysctl_diskinit(1, p);
883 		if (err)
884 			return err;
885 		return (sysctl_rdstruct(oldp, oldlenp, newp, diskstats,
886 		    disk_count * sizeof(struct diskstats)));
887 	case HW_CPUSPEED:
888 		if (!cpu_cpuspeed)
889 			return (EOPNOTSUPP);
890 		err = cpu_cpuspeed(&cpuspeed);
891 		if (err)
892 			return err;
893 		return (sysctl_rdint(oldp, oldlenp, newp, cpuspeed));
894 #ifndef SMALL_KERNEL
895 	case HW_SENSORS:
896 		return (sysctl_sensors(name + 1, namelen - 1, oldp, oldlenp,
897 		    newp, newlen));
898 	case HW_SETPERF:
899 		return (sysctl_hwsetperf(oldp, oldlenp, newp, newlen));
900 	case HW_PERFPOLICY:
901 		return (sysctl_hwperfpolicy(oldp, oldlenp, newp, newlen));
902 #endif /* !SMALL_KERNEL */
903 	case HW_ALLOWPOWERDOWN:
904 		return (sysctl_securelevel_int(oldp, oldlenp, newp, newlen,
905 		    &allowpowerdown));
906 	case HW_UCOMNAMES: {
907 		const char *str = "";
908 #if NUCOM > 0
909 		str = sysctl_ucominit();
910 #endif	/* NUCOM > 0 */
911 		return (sysctl_rdstring(oldp, oldlenp, newp, str));
912 	}
913 #ifdef __HAVE_CPU_TOPOLOGY
914 	case HW_SMT:
915 		return (sysctl_hwsmt(oldp, oldlenp, newp, newlen));
916 #endif
917 #ifndef SMALL_KERNEL
918 	case HW_BATTERY:
919 		return (sysctl_hwbattery(name + 1, namelen - 1, oldp, oldlenp,
920 		    newp, newlen));
921 #endif
922 	default:
923 		return (EOPNOTSUPP);
924 	}
925 	/* NOTREACHED */
926 }
927 
928 #ifndef SMALL_KERNEL
929 
930 int hw_battery_chargemode;
931 int hw_battery_chargestart;
932 int hw_battery_chargestop;
933 int (*hw_battery_setchargemode)(int);
934 int (*hw_battery_setchargestart)(int);
935 int (*hw_battery_setchargestop)(int);
936 
937 int
938 sysctl_hwchargemode(void *oldp, size_t *oldlenp, void *newp, size_t newlen)
939 {
940 	int mode = hw_battery_chargemode;
941 	int error;
942 
943 	if (!hw_battery_setchargemode)
944 		return EOPNOTSUPP;
945 
946 	error = sysctl_int_bounded(oldp, oldlenp, newp, newlen,
947 	    &mode, -1, 1);
948 	if (error)
949 		return error;
950 
951 	if (newp != NULL)
952 		error = hw_battery_setchargemode(mode);
953 
954 	return error;
955 }
956 
957 int
958 sysctl_hwchargestart(void *oldp, size_t *oldlenp, void *newp, size_t newlen)
959 {
960 	int start = hw_battery_chargestart;
961 	int error;
962 
963 	if (!hw_battery_setchargestart)
964 		return EOPNOTSUPP;
965 
966 	error = sysctl_int_bounded(oldp, oldlenp, newp, newlen,
967 	    &start, 0, 100);
968 	if (error)
969 		return error;
970 
971 	if (newp != NULL)
972 		error = hw_battery_setchargestart(start);
973 
974 	return error;
975 }
976 
977 int
978 sysctl_hwchargestop(void *oldp, size_t *oldlenp, void *newp, size_t newlen)
979 {
980 	int stop = hw_battery_chargestop;
981 	int error;
982 
983 	if (!hw_battery_setchargestop)
984 		return EOPNOTSUPP;
985 
986 	error = sysctl_int_bounded(oldp, oldlenp, newp, newlen,
987 	    &stop, 0, 100);
988 	if (error)
989 		return error;
990 
991 	if (newp != NULL)
992 		error = hw_battery_setchargestop(stop);
993 
994 	return error;
995 }
996 
997 int
998 sysctl_hwbattery(int *name, u_int namelen, void *oldp, size_t *oldlenp,
999     void *newp, size_t newlen)
1000 {
1001 	if (namelen != 1)
1002 		return (ENOTDIR);
1003 
1004 	switch (name[0]) {
1005 	case HW_BATTERY_CHARGEMODE:
1006 		return (sysctl_hwchargemode(oldp, oldlenp, newp, newlen));
1007 	case HW_BATTERY_CHARGESTART:
1008 		return (sysctl_hwchargestart(oldp, oldlenp, newp, newlen));
1009 	case HW_BATTERY_CHARGESTOP:
1010 		return (sysctl_hwchargestop(oldp, oldlenp, newp, newlen));
1011 	default:
1012 		return (EOPNOTSUPP);
1013 	}
1014 	/* NOTREACHED */
1015 }
1016 
1017 #endif
1018 
1019 #ifdef DEBUG_SYSCTL
1020 /*
1021  * Debugging related system variables.
1022  */
1023 extern struct ctldebug debug_vfs_busyprt;
1024 struct ctldebug debug1, debug2, debug3, debug4;
1025 struct ctldebug debug5, debug6, debug7, debug8, debug9;
1026 struct ctldebug debug10, debug11, debug12, debug13, debug14;
1027 struct ctldebug debug15, debug16, debug17, debug18, debug19;
1028 static struct ctldebug *debugvars[CTL_DEBUG_MAXID] = {
1029 	&debug_vfs_busyprt,
1030 	&debug1, &debug2, &debug3, &debug4,
1031 	&debug5, &debug6, &debug7, &debug8, &debug9,
1032 	&debug10, &debug11, &debug12, &debug13, &debug14,
1033 	&debug15, &debug16, &debug17, &debug18, &debug19,
1034 };
1035 int
1036 debug_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
1037     size_t newlen, struct proc *p)
1038 {
1039 	struct ctldebug *cdp;
1040 
1041 	/* all sysctl names at this level are name and field */
1042 	if (namelen != 2)
1043 		return (ENOTDIR);		/* overloaded */
1044 	if (name[0] < 0 || name[0] >= nitems(debugvars))
1045 		return (EOPNOTSUPP);
1046 	cdp = debugvars[name[0]];
1047 	if (cdp->debugname == 0)
1048 		return (EOPNOTSUPP);
1049 	switch (name[1]) {
1050 	case CTL_DEBUG_NAME:
1051 		return (sysctl_rdstring(oldp, oldlenp, newp, cdp->debugname));
1052 	case CTL_DEBUG_VALUE:
1053 		return (sysctl_int(oldp, oldlenp, newp, newlen, cdp->debugvar));
1054 	default:
1055 		return (EOPNOTSUPP);
1056 	}
1057 	/* NOTREACHED */
1058 }
1059 #endif /* DEBUG_SYSCTL */
1060 
1061 /*
1062  * Reads, or writes that lower the value
1063  */
1064 int
1065 sysctl_int_lower(void *oldp, size_t *oldlenp, void *newp, size_t newlen,
1066     int *valp)
1067 {
1068 	unsigned int oldval, newval;
1069 	int error;
1070 
1071 	if (oldp && *oldlenp < sizeof(int))
1072 		return (ENOMEM);
1073 	if (newp && newlen != sizeof(int))
1074 		return (EINVAL);
1075 	*oldlenp = sizeof(int);
1076 
1077 	if (newp) {
1078 		if ((error = copyin(newp, &newval, sizeof(int))))
1079 			return (error);
1080 		do {
1081 			oldval = atomic_load_int(valp);
1082 			if (oldval < (unsigned int)newval)
1083 				return (EPERM);	/* do not allow raising */
1084 		} while (atomic_cas_uint(valp, oldval, newval) != oldval);
1085 
1086 		if (oldp) {
1087 			/* new value has been set although user gets error */
1088 			if ((error = copyout(&oldval, oldp, sizeof(int))))
1089 				return (error);
1090 		}
1091 	} else if (oldp) {
1092 		oldval = atomic_load_int(valp);
1093 
1094 		if ((error = copyout(&oldval, oldp, sizeof(int))))
1095 			return (error);
1096 	}
1097 
1098 	return (0);
1099 }
1100 
1101 /*
1102  * Validate parameters and get old / set new parameters
1103  * for an integer-valued sysctl function.
1104  */
1105 int
1106 sysctl_int(void *oldp, size_t *oldlenp, void *newp, size_t newlen, int *valp)
1107 {
1108 	return (sysctl_int_bounded(oldp, oldlenp, newp, newlen, valp,
1109 	    INT_MIN, INT_MAX));
1110 }
1111 
1112 /*
1113  * As above, but read-only.
1114  */
1115 int
1116 sysctl_rdint(void *oldp, size_t *oldlenp, void *newp, int val)
1117 {
1118 	int error = 0;
1119 
1120 	if (oldp && *oldlenp < sizeof(int))
1121 		return (ENOMEM);
1122 	if (newp)
1123 		return (EPERM);
1124 	*oldlenp = sizeof(int);
1125 	if (oldp)
1126 		error = copyout((caddr_t)&val, oldp, sizeof(int));
1127 	return (error);
1128 }
1129 
1130 int
1131 sysctl_securelevel(void *oldp, size_t *oldlenp, void *newp, size_t newlen,
1132     struct proc *p)
1133 {
1134 	int oldval, newval;
1135 	int error;
1136 
1137 	if (oldp && *oldlenp < sizeof(int))
1138 		return (ENOMEM);
1139 	if (newp && newlen != sizeof(int))
1140 		return (EINVAL);
1141 	*oldlenp = sizeof(int);
1142 
1143 	if (newp) {
1144 		if ((error = copyin(newp, &newval, sizeof(int))))
1145 			return (error);
1146 		do {
1147 			oldval = atomic_load_int(&securelevel);
1148 			if ((oldval > 0 || newval < -1) && newval < oldval &&
1149 			    p->p_p->ps_pid != 1)
1150 				return (EPERM);
1151 		} while (atomic_cas_uint(&securelevel, oldval, newval) !=
1152 		    oldval);
1153 
1154 		if (oldp) {
1155 			/* new value has been set although user gets error */
1156 			if ((error = copyout(&oldval, oldp, sizeof(int))))
1157 				return (error);
1158 		}
1159 	} else if (oldp) {
1160 		oldval = atomic_load_int(&securelevel);
1161 
1162 		if ((error = copyout(&oldval, oldp, sizeof(int))))
1163 			return (error);
1164 	}
1165 
1166 	return (0);
1167 }
1168 
1169 /*
1170  * Selects between sysctl_rdint and sysctl_int according to securelevel.
1171  */
1172 int
1173 sysctl_securelevel_int(void *oldp, size_t *oldlenp, void *newp, size_t newlen,
1174     int *valp)
1175 {
1176 	if (atomic_load_int(&securelevel) > 0)
1177 		return (sysctl_rdint(oldp, oldlenp, newp, *valp));
1178 	return (sysctl_int(oldp, oldlenp, newp, newlen, valp));
1179 }
1180 
1181 /*
1182  * Read-only or bounded integer values.
1183  */
1184 int
1185 sysctl_int_bounded(void *oldp, size_t *oldlenp, void *newp, size_t newlen,
1186     int *valp, int minimum, int maximum)
1187 {
1188 	int oldval, newval;
1189 	int error;
1190 
1191 	/* read only */
1192 	if (newp != NULL && minimum > maximum)
1193 		return (EPERM);
1194 
1195 	if (oldp != NULL && *oldlenp < sizeof(int))
1196 		return (ENOMEM);
1197 	if (newp != NULL && newlen != sizeof(int))
1198 		return (EINVAL);
1199 	*oldlenp = sizeof(int);
1200 
1201 	/* copyin() may sleep, call it first */
1202 	if (newp != NULL) {
1203 		if ((error = copyin(newp, &newval, sizeof(int))))
1204 			return (error);
1205 		/* outside limits */
1206 		if (newval < minimum || maximum < newval)
1207 			return (EINVAL);
1208 	}
1209 	if (oldp != NULL) {
1210 		if (newp != NULL)
1211 			oldval = atomic_swap_uint(valp, newval);
1212 		else
1213 			oldval = atomic_load_int(valp);
1214 		if ((error = copyout(&oldval, oldp, sizeof(int)))) {
1215 			/* new value has been set although user gets error */
1216 			return (error);
1217 		}
1218 	} else if (newp != NULL)
1219 		atomic_store_int(valp, newval);
1220 
1221 	return (0);
1222 }
1223 
1224 /*
1225  * Array of read-only or bounded integer values.
1226  */
1227 int
1228 sysctl_bounded_arr(const struct sysctl_bounded_args *valpp, u_int valplen,
1229     int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
1230     size_t newlen)
1231 {
1232 	u_int i;
1233 	if (namelen != 1)
1234 		return (ENOTDIR);
1235 	for (i = 0; i < valplen; ++i) {
1236 		if (valpp[i].mib == name[0]) {
1237 			return (sysctl_int_bounded(oldp, oldlenp, newp, newlen,
1238 			    valpp[i].var, valpp[i].minimum, valpp[i].maximum));
1239 		}
1240 	}
1241 	return (EOPNOTSUPP);
1242 }
1243 
1244 /*
1245  * Validate parameters and get old / set new parameters
1246  * for an integer-valued sysctl function.
1247  */
1248 int
1249 sysctl_quad(void *oldp, size_t *oldlenp, void *newp, size_t newlen,
1250     int64_t *valp)
1251 {
1252 	int error = 0;
1253 
1254 	if (oldp && *oldlenp < sizeof(int64_t))
1255 		return (ENOMEM);
1256 	if (newp && newlen != sizeof(int64_t))
1257 		return (EINVAL);
1258 	*oldlenp = sizeof(int64_t);
1259 	if (oldp)
1260 		error = copyout(valp, oldp, sizeof(int64_t));
1261 	if (error == 0 && newp)
1262 		error = copyin(newp, valp, sizeof(int64_t));
1263 	return (error);
1264 }
1265 
1266 /*
1267  * As above, but read-only.
1268  */
1269 int
1270 sysctl_rdquad(void *oldp, size_t *oldlenp, void *newp, int64_t val)
1271 {
1272 	int error = 0;
1273 
1274 	if (oldp && *oldlenp < sizeof(int64_t))
1275 		return (ENOMEM);
1276 	if (newp)
1277 		return (EPERM);
1278 	*oldlenp = sizeof(int64_t);
1279 	if (oldp)
1280 		error = copyout((caddr_t)&val, oldp, sizeof(int64_t));
1281 	return (error);
1282 }
1283 
1284 /*
1285  * Validate parameters and get old / set new parameters
1286  * for a string-valued sysctl function.
1287  */
1288 int
1289 sysctl_string(void *oldp, size_t *oldlenp, void *newp, size_t newlen, char *str,
1290     size_t maxlen)
1291 {
1292 	return sysctl__string(oldp, oldlenp, newp, newlen, str, maxlen, 0);
1293 }
1294 
1295 int
1296 sysctl_tstring(void *oldp, size_t *oldlenp, void *newp, size_t newlen,
1297     char *str, size_t maxlen)
1298 {
1299 	return sysctl__string(oldp, oldlenp, newp, newlen, str, maxlen, 1);
1300 }
1301 
1302 int
1303 sysctl__string(void *oldp, size_t *oldlenp, void *newp, size_t newlen,
1304     char *str, size_t maxlen, int trunc)
1305 {
1306 	size_t len;
1307 	int error = 0;
1308 
1309 	len = strlen(str) + 1;
1310 	if (oldp && *oldlenp < len) {
1311 		if (trunc == 0 || *oldlenp == 0)
1312 			return (ENOMEM);
1313 	}
1314 	if (newp && newlen >= maxlen)
1315 		return (EINVAL);
1316 	if (oldp) {
1317 		if (trunc && *oldlenp < len) {
1318 			len = *oldlenp;
1319 			error = copyout(str, oldp, len - 1);
1320 			if (error == 0)
1321 				error = copyout("", (char *)oldp + len - 1, 1);
1322 		} else {
1323 			error = copyout(str, oldp, len);
1324 		}
1325 	}
1326 	*oldlenp = len;
1327 	if (error == 0 && newp) {
1328 		error = copyin(newp, str, newlen);
1329 		str[newlen] = 0;
1330 	}
1331 	return (error);
1332 }
1333 
1334 /*
1335  * As above, but read-only.
1336  */
1337 int
1338 sysctl_rdstring(void *oldp, size_t *oldlenp, void *newp, const char *str)
1339 {
1340 	size_t len;
1341 	int error = 0;
1342 
1343 	len = strlen(str) + 1;
1344 	if (oldp && *oldlenp < len)
1345 		return (ENOMEM);
1346 	if (newp)
1347 		return (EPERM);
1348 	*oldlenp = len;
1349 	if (oldp)
1350 		error = copyout(str, oldp, len);
1351 	return (error);
1352 }
1353 
1354 /*
1355  * Validate parameters and get old / set new parameters
1356  * for a structure oriented sysctl function.
1357  */
1358 int
1359 sysctl_struct(void *oldp, size_t *oldlenp, void *newp, size_t newlen, void *sp,
1360     size_t len)
1361 {
1362 	int error = 0;
1363 
1364 	if (oldp && *oldlenp < len)
1365 		return (ENOMEM);
1366 	if (newp && newlen > len)
1367 		return (EINVAL);
1368 	if (oldp) {
1369 		*oldlenp = len;
1370 		error = copyout(sp, oldp, len);
1371 	}
1372 	if (error == 0 && newp)
1373 		error = copyin(newp, sp, len);
1374 	return (error);
1375 }
1376 
1377 /*
1378  * Validate parameters and get old parameters
1379  * for a structure oriented sysctl function.
1380  */
1381 int
1382 sysctl_rdstruct(void *oldp, size_t *oldlenp, void *newp, const void *sp,
1383     size_t len)
1384 {
1385 	int error = 0;
1386 
1387 	if (oldp && *oldlenp < len)
1388 		return (ENOMEM);
1389 	if (newp)
1390 		return (EPERM);
1391 	*oldlenp = len;
1392 	if (oldp)
1393 		error = copyout(sp, oldp, len);
1394 	return (error);
1395 }
1396 
1397 #ifndef SMALL_KERNEL
1398 void
1399 fill_file(struct kinfo_file *kf, struct file *fp, struct filedesc *fdp,
1400 	  int fd, struct vnode *vp, struct process *pr, struct proc *p,
1401 	  struct socket *so, int show_pointers)
1402 {
1403 	struct vattr va;
1404 
1405 	memset(kf, 0, sizeof(*kf));
1406 
1407 	kf->fd_fd = fd;		/* might not really be an fd */
1408 
1409 	if (fp != NULL) {
1410 		if (show_pointers)
1411 			kf->f_fileaddr = PTRTOINT64(fp);
1412 		kf->f_flag = fp->f_flag;
1413 		kf->f_iflags = fp->f_iflags;
1414 		kf->f_type = fp->f_type;
1415 		kf->f_count = fp->f_count;
1416 		if (show_pointers)
1417 			kf->f_ucred = PTRTOINT64(fp->f_cred);
1418 		kf->f_uid = fp->f_cred->cr_uid;
1419 		kf->f_gid = fp->f_cred->cr_gid;
1420 		if (show_pointers)
1421 			kf->f_ops = PTRTOINT64(fp->f_ops);
1422 		if (show_pointers)
1423 			kf->f_data = PTRTOINT64(fp->f_data);
1424 		kf->f_usecount = 0;
1425 
1426 		if (suser(p) == 0 || p->p_ucred->cr_uid == fp->f_cred->cr_uid) {
1427 			mtx_enter(&fp->f_mtx);
1428 			kf->f_offset = fp->f_offset;
1429 			kf->f_rxfer = fp->f_rxfer;
1430 			kf->f_rwfer = fp->f_wxfer;
1431 			kf->f_seek = fp->f_seek;
1432 			kf->f_rbytes = fp->f_rbytes;
1433 			kf->f_wbytes = fp->f_wbytes;
1434 			mtx_leave(&fp->f_mtx);
1435 		} else
1436 			kf->f_offset = -1;
1437 	} else if (vp != NULL) {
1438 		/* fake it */
1439 		kf->f_type = DTYPE_VNODE;
1440 		kf->f_flag = FREAD;
1441 		if (fd == KERN_FILE_TRACE)
1442 			kf->f_flag |= FWRITE;
1443 	} else if (so != NULL) {
1444 		/* fake it */
1445 		kf->f_type = DTYPE_SOCKET;
1446 	}
1447 
1448 	/* information about the object associated with this file */
1449 	switch (kf->f_type) {
1450 	case DTYPE_VNODE:
1451 		if (fp != NULL)
1452 			vp = (struct vnode *)fp->f_data;
1453 
1454 		if (show_pointers)
1455 			kf->v_un = PTRTOINT64(vp->v_un.vu_socket);
1456 		kf->v_type = vp->v_type;
1457 		kf->v_tag = vp->v_tag;
1458 		kf->v_flag = vp->v_flag;
1459 		if (show_pointers)
1460 			kf->v_data = PTRTOINT64(vp->v_data);
1461 		if (show_pointers)
1462 			kf->v_mount = PTRTOINT64(vp->v_mount);
1463 		if (vp->v_mount)
1464 			strlcpy(kf->f_mntonname,
1465 			    vp->v_mount->mnt_stat.f_mntonname,
1466 			    sizeof(kf->f_mntonname));
1467 
1468 		if (VOP_GETATTR(vp, &va, p->p_ucred, p) == 0) {
1469 			kf->va_fileid = va.va_fileid;
1470 			kf->va_mode = MAKEIMODE(va.va_type, va.va_mode);
1471 			kf->va_size = va.va_size;
1472 			kf->va_rdev = va.va_rdev;
1473 			kf->va_fsid = va.va_fsid & 0xffffffff;
1474 			kf->va_nlink = va.va_nlink;
1475 		}
1476 		break;
1477 
1478 	case DTYPE_SOCKET: {
1479 		int locked = 0;
1480 
1481 		if (so == NULL) {
1482 			so = (struct socket *)fp->f_data;
1483 			/* if so is passed as parameter it is already locked */
1484 			solock(so);
1485 			locked = 1;
1486 		}
1487 
1488 		kf->so_type = so->so_type;
1489 		kf->so_state = so->so_state | so->so_snd.sb_state |
1490 		    so->so_rcv.sb_state;
1491 		if (show_pointers)
1492 			kf->so_pcb = PTRTOINT64(so->so_pcb);
1493 		else
1494 			kf->so_pcb = -1;
1495 		kf->so_protocol = so->so_proto->pr_protocol;
1496 		kf->so_family = so->so_proto->pr_domain->dom_family;
1497 		kf->so_rcv_cc = so->so_rcv.sb_cc;
1498 		kf->so_snd_cc = so->so_snd.sb_cc;
1499 		if (isspliced(so)) {
1500 			if (show_pointers)
1501 				kf->so_splice =
1502 				    PTRTOINT64(so->so_sp->ssp_socket);
1503 			kf->so_splicelen = so->so_sp->ssp_len;
1504 		} else if (issplicedback(so))
1505 			kf->so_splicelen = -1;
1506 		if (so->so_pcb == NULL) {
1507 			if (locked)
1508 				sounlock(so);
1509 			break;
1510 		}
1511 		switch (kf->so_family) {
1512 		case AF_INET: {
1513 			struct inpcb *inpcb = so->so_pcb;
1514 
1515 			soassertlocked(so);
1516 			if (show_pointers)
1517 				kf->inp_ppcb = PTRTOINT64(inpcb->inp_ppcb);
1518 			kf->inp_lport = inpcb->inp_lport;
1519 			kf->inp_laddru[0] = inpcb->inp_laddr.s_addr;
1520 			kf->inp_fport = inpcb->inp_fport;
1521 			kf->inp_faddru[0] = inpcb->inp_faddr.s_addr;
1522 			kf->inp_rtableid = inpcb->inp_rtableid;
1523 			if (so->so_type == SOCK_RAW)
1524 				kf->inp_proto = inpcb->inp_ip.ip_p;
1525 			if (so->so_proto->pr_protocol == IPPROTO_TCP) {
1526 				struct tcpcb *tcpcb = (void *)inpcb->inp_ppcb;
1527 				kf->t_rcv_wnd = tcpcb->rcv_wnd;
1528 				kf->t_snd_wnd = tcpcb->snd_wnd;
1529 				kf->t_snd_cwnd = tcpcb->snd_cwnd;
1530 				kf->t_state = tcpcb->t_state;
1531 			}
1532 			break;
1533 		    }
1534 		case AF_INET6: {
1535 			struct inpcb *inpcb = so->so_pcb;
1536 
1537 			soassertlocked(so);
1538 			if (show_pointers)
1539 				kf->inp_ppcb = PTRTOINT64(inpcb->inp_ppcb);
1540 			kf->inp_lport = inpcb->inp_lport;
1541 			kf->inp_laddru[0] = inpcb->inp_laddr6.s6_addr32[0];
1542 			kf->inp_laddru[1] = inpcb->inp_laddr6.s6_addr32[1];
1543 			kf->inp_laddru[2] = inpcb->inp_laddr6.s6_addr32[2];
1544 			kf->inp_laddru[3] = inpcb->inp_laddr6.s6_addr32[3];
1545 			kf->inp_fport = inpcb->inp_fport;
1546 			kf->inp_faddru[0] = inpcb->inp_faddr6.s6_addr32[0];
1547 			kf->inp_faddru[1] = inpcb->inp_faddr6.s6_addr32[1];
1548 			kf->inp_faddru[2] = inpcb->inp_faddr6.s6_addr32[2];
1549 			kf->inp_faddru[3] = inpcb->inp_faddr6.s6_addr32[3];
1550 			kf->inp_rtableid = inpcb->inp_rtableid;
1551 			if (so->so_type == SOCK_RAW)
1552 				kf->inp_proto = inpcb->inp_ipv6.ip6_nxt;
1553 			if (so->so_proto->pr_protocol == IPPROTO_TCP) {
1554 				struct tcpcb *tcpcb = (void *)inpcb->inp_ppcb;
1555 				kf->t_rcv_wnd = tcpcb->rcv_wnd;
1556 				kf->t_snd_wnd = tcpcb->snd_wnd;
1557 				kf->t_state = tcpcb->t_state;
1558 			}
1559 			break;
1560 		    }
1561 		case AF_UNIX: {
1562 			struct unpcb *unpcb = so->so_pcb;
1563 
1564 			kf->f_msgcount = unpcb->unp_msgcount;
1565 			if (show_pointers) {
1566 				kf->unp_conn	= PTRTOINT64(unpcb->unp_conn);
1567 				kf->unp_refs	= PTRTOINT64(
1568 				    SLIST_FIRST(&unpcb->unp_refs));
1569 				kf->unp_nextref	= PTRTOINT64(
1570 				    SLIST_NEXT(unpcb, unp_nextref));
1571 				kf->v_un	= PTRTOINT64(unpcb->unp_vnode);
1572 				kf->unp_addr	= PTRTOINT64(unpcb->unp_addr);
1573 			}
1574 			if (unpcb->unp_addr != NULL) {
1575 				struct sockaddr_un *un = mtod(unpcb->unp_addr,
1576 				    struct sockaddr_un *);
1577 				memcpy(kf->unp_path, un->sun_path, un->sun_len
1578 				    - offsetof(struct sockaddr_un,sun_path));
1579 			}
1580 			break;
1581 		    }
1582 		}
1583 		if (locked)
1584 			sounlock(so);
1585 		break;
1586 	    }
1587 
1588 	case DTYPE_PIPE: {
1589 		struct pipe *pipe = (struct pipe *)fp->f_data;
1590 
1591 		if (show_pointers)
1592 			kf->pipe_peer = PTRTOINT64(pipe->pipe_peer);
1593 		kf->pipe_state = pipe->pipe_state;
1594 		break;
1595 	    }
1596 
1597 	case DTYPE_KQUEUE: {
1598 		struct kqueue *kqi = (struct kqueue *)fp->f_data;
1599 
1600 		kf->kq_count = kqi->kq_count;
1601 		kf->kq_state = kqi->kq_state;
1602 		break;
1603 	    }
1604 	}
1605 
1606 	/* per-process information for KERN_FILE_BY[PU]ID */
1607 	if (pr != NULL) {
1608 		kf->p_pid = pr->ps_pid;
1609 		kf->p_uid = pr->ps_ucred->cr_uid;
1610 		kf->p_gid = pr->ps_ucred->cr_gid;
1611 		kf->p_tid = -1;
1612 		strlcpy(kf->p_comm, pr->ps_comm, sizeof(kf->p_comm));
1613 	}
1614 	if (fdp != NULL) {
1615 		fdplock(fdp);
1616 		kf->fd_ofileflags = fdp->fd_ofileflags[fd];
1617 		fdpunlock(fdp);
1618 	}
1619 }
1620 
1621 /*
1622  * Get file structures.
1623  */
1624 int
1625 sysctl_file(int *name, u_int namelen, char *where, size_t *sizep,
1626     struct proc *p)
1627 {
1628 	struct kinfo_file *kf;
1629 	struct filedesc *fdp;
1630 	struct file *fp;
1631 	struct process *pr;
1632 	size_t buflen, elem_size, elem_count, outsize;
1633 	char *dp = where;
1634 	int arg, i, error = 0, needed = 0, matched;
1635 	u_int op;
1636 	int show_pointers;
1637 
1638 	if (namelen > 4)
1639 		return (ENOTDIR);
1640 	if (namelen < 4 || name[2] > sizeof(*kf))
1641 		return (EINVAL);
1642 
1643 	buflen = where != NULL ? *sizep : 0;
1644 	op = name[0];
1645 	arg = name[1];
1646 	elem_size = name[2];
1647 	elem_count = name[3];
1648 	outsize = MIN(sizeof(*kf), elem_size);
1649 
1650 	if (elem_size < 1)
1651 		return (EINVAL);
1652 
1653 	show_pointers = suser(curproc) == 0;
1654 
1655 	kf = malloc(sizeof(*kf), M_TEMP, M_WAITOK);
1656 
1657 #define FILLIT2(fp, fdp, i, vp, pr, so) do {				\
1658 	if (buflen >= elem_size && elem_count > 0) {			\
1659 		fill_file(kf, fp, fdp, i, vp, pr, p, so, show_pointers);\
1660 		error = copyout(kf, dp, outsize);			\
1661 		if (error)						\
1662 			break;						\
1663 		dp += elem_size;					\
1664 		buflen -= elem_size;					\
1665 		elem_count--;						\
1666 	}								\
1667 	needed += elem_size;						\
1668 } while (0)
1669 #define FILLIT(fp, fdp, i, vp, pr) \
1670 	FILLIT2(fp, fdp, i, vp, pr, NULL)
1671 #define FILLSO(so) \
1672 	FILLIT2(NULL, NULL, 0, NULL, NULL, so)
1673 
1674 	switch (op) {
1675 	case KERN_FILE_BYFILE:
1676 		/* use the inp-tables to pick up closed connections, too */
1677 		if (arg == DTYPE_SOCKET) {
1678 			struct inpcb *inp;
1679 
1680 			NET_LOCK();
1681 			mtx_enter(&tcbtable.inpt_mtx);
1682 			TAILQ_FOREACH(inp, &tcbtable.inpt_queue, inp_queue)
1683 				FILLSO(inp->inp_socket);
1684 			mtx_leave(&tcbtable.inpt_mtx);
1685 #ifdef INET6
1686 			mtx_enter(&tcb6table.inpt_mtx);
1687 			TAILQ_FOREACH(inp, &tcb6table.inpt_queue, inp_queue)
1688 				FILLSO(inp->inp_socket);
1689 			mtx_leave(&tcb6table.inpt_mtx);
1690 #endif
1691 			mtx_enter(&udbtable.inpt_mtx);
1692 			TAILQ_FOREACH(inp, &udbtable.inpt_queue, inp_queue) {
1693 				if (in_pcb_is_iterator(inp))
1694 					continue;
1695 				FILLSO(inp->inp_socket);
1696 			}
1697 			mtx_leave(&udbtable.inpt_mtx);
1698 #ifdef INET6
1699 			mtx_enter(&udb6table.inpt_mtx);
1700 			TAILQ_FOREACH(inp, &udb6table.inpt_queue, inp_queue) {
1701 				if (in_pcb_is_iterator(inp))
1702 					continue;
1703 				FILLSO(inp->inp_socket);
1704 			}
1705 			mtx_leave(&udb6table.inpt_mtx);
1706 #endif
1707 			mtx_enter(&rawcbtable.inpt_mtx);
1708 			TAILQ_FOREACH(inp, &rawcbtable.inpt_queue, inp_queue)
1709 				FILLSO(inp->inp_socket);
1710 			mtx_leave(&rawcbtable.inpt_mtx);
1711 #ifdef INET6
1712 			mtx_enter(&rawin6pcbtable.inpt_mtx);
1713 			TAILQ_FOREACH(inp, &rawin6pcbtable.inpt_queue,
1714 			    inp_queue)
1715 				FILLSO(inp->inp_socket);
1716 			mtx_leave(&rawin6pcbtable.inpt_mtx);
1717 #endif
1718 			NET_UNLOCK();
1719 		}
1720 		fp = NULL;
1721 		while ((fp = fd_iterfile(fp, p)) != NULL) {
1722 			if ((arg == 0 || fp->f_type == arg)) {
1723 				int af, skip = 0;
1724 				if (arg == DTYPE_SOCKET && fp->f_type == arg) {
1725 					af = ((struct socket *)fp->f_data)->
1726 					    so_proto->pr_domain->dom_family;
1727 					if (af == AF_INET || af == AF_INET6)
1728 						skip = 1;
1729 				}
1730 				if (!skip)
1731 					FILLIT(fp, NULL, 0, NULL, NULL);
1732 			}
1733 		}
1734 		break;
1735 	case KERN_FILE_BYPID:
1736 		/* A arg of -1 indicates all processes */
1737 		if (arg < -1) {
1738 			error = EINVAL;
1739 			break;
1740 		}
1741 		matched = 0;
1742 		LIST_FOREACH(pr, &allprocess, ps_list) {
1743 			/*
1744 			 * skip system, exiting, embryonic and undead
1745 			 * processes
1746 			 */
1747 			if (pr->ps_flags & (PS_SYSTEM | PS_EMBRYO | PS_EXITING))
1748 				continue;
1749 			if (arg >= 0 && pr->ps_pid != (pid_t)arg) {
1750 				/* not the pid we are looking for */
1751 				continue;
1752 			}
1753 
1754 			refcnt_take(&pr->ps_refcnt);
1755 
1756 			matched = 1;
1757 			fdp = pr->ps_fd;
1758 			if (pr->ps_textvp)
1759 				FILLIT(NULL, NULL, KERN_FILE_TEXT, pr->ps_textvp, pr);
1760 			if (fdp->fd_cdir)
1761 				FILLIT(NULL, NULL, KERN_FILE_CDIR, fdp->fd_cdir, pr);
1762 			if (fdp->fd_rdir)
1763 				FILLIT(NULL, NULL, KERN_FILE_RDIR, fdp->fd_rdir, pr);
1764 			if (pr->ps_tracevp)
1765 				FILLIT(NULL, NULL, KERN_FILE_TRACE, pr->ps_tracevp, pr);
1766 			for (i = 0; i < fdp->fd_nfiles; i++) {
1767 				if ((fp = fd_getfile(fdp, i)) == NULL)
1768 					continue;
1769 				FILLIT(fp, fdp, i, NULL, pr);
1770 				FRELE(fp, p);
1771 			}
1772 
1773 			refcnt_rele_wake(&pr->ps_refcnt);
1774 
1775 			/* pid is unique, stop searching */
1776 			if (arg >= 0)
1777 				break;
1778 		}
1779 		if (!matched)
1780 			error = ESRCH;
1781 		break;
1782 	case KERN_FILE_BYUID:
1783 		LIST_FOREACH(pr, &allprocess, ps_list) {
1784 			/*
1785 			 * skip system, exiting, embryonic and undead
1786 			 * processes
1787 			 */
1788 			if (pr->ps_flags & (PS_SYSTEM | PS_EMBRYO | PS_EXITING))
1789 				continue;
1790 			if (arg >= 0 && pr->ps_ucred->cr_uid != (uid_t)arg) {
1791 				/* not the uid we are looking for */
1792 				continue;
1793 			}
1794 
1795 			refcnt_take(&pr->ps_refcnt);
1796 
1797 			fdp = pr->ps_fd;
1798 			if (fdp->fd_cdir)
1799 				FILLIT(NULL, NULL, KERN_FILE_CDIR, fdp->fd_cdir, pr);
1800 			if (fdp->fd_rdir)
1801 				FILLIT(NULL, NULL, KERN_FILE_RDIR, fdp->fd_rdir, pr);
1802 			if (pr->ps_tracevp)
1803 				FILLIT(NULL, NULL, KERN_FILE_TRACE, pr->ps_tracevp, pr);
1804 			for (i = 0; i < fdp->fd_nfiles; i++) {
1805 				if ((fp = fd_getfile(fdp, i)) == NULL)
1806 					continue;
1807 				FILLIT(fp, fdp, i, NULL, pr);
1808 				FRELE(fp, p);
1809 			}
1810 
1811 			refcnt_rele_wake(&pr->ps_refcnt);
1812 		}
1813 		break;
1814 	default:
1815 		error = EINVAL;
1816 		break;
1817 	}
1818 	free(kf, M_TEMP, sizeof(*kf));
1819 
1820 	if (!error) {
1821 		if (where == NULL)
1822 			needed += KERN_FILESLOP * elem_size;
1823 		else if (*sizep < needed)
1824 			error = ENOMEM;
1825 		*sizep = needed;
1826 	}
1827 
1828 	return (error);
1829 }
1830 
1831 /*
1832  * try over estimating by 5 procs
1833  */
1834 #define KERN_PROCSLOP	5
1835 
1836 int
1837 sysctl_doproc(int *name, u_int namelen, char *where, size_t *sizep)
1838 {
1839 	struct kinfo_proc *kproc = NULL;
1840 	struct proc *p;
1841 	struct process *pr;
1842 	char *dp;
1843 	int arg, buflen, doingzomb, elem_size, elem_count;
1844 	int error, needed, op;
1845 	int dothreads = 0;
1846 	int show_pointers;
1847 
1848 	dp = where;
1849 	buflen = where != NULL ? *sizep : 0;
1850 	needed = error = 0;
1851 
1852 	if (namelen != 4 || name[2] <= 0 || name[3] < 0 ||
1853 	    name[2] > sizeof(*kproc))
1854 		return (EINVAL);
1855 	op = name[0];
1856 	arg = name[1];
1857 	elem_size = name[2];
1858 	elem_count = name[3];
1859 
1860 	dothreads = op & KERN_PROC_SHOW_THREADS;
1861 	op &= ~KERN_PROC_SHOW_THREADS;
1862 
1863 	show_pointers = suser(curproc) == 0;
1864 
1865 	if (where != NULL)
1866 		kproc = malloc(sizeof(*kproc), M_TEMP, M_WAITOK);
1867 
1868 	pr = LIST_FIRST(&allprocess);
1869 	doingzomb = 0;
1870 again:
1871 	for (; pr != NULL; pr = LIST_NEXT(pr, ps_list)) {
1872 		/* XXX skip processes in the middle of being zapped */
1873 		if (pr->ps_pgrp == NULL)
1874 			continue;
1875 
1876 		/*
1877 		 * Skip embryonic processes.
1878 		 */
1879 		if (pr->ps_flags & PS_EMBRYO)
1880 			continue;
1881 
1882 		/*
1883 		 * TODO - make more efficient (see notes below).
1884 		 */
1885 		switch (op) {
1886 
1887 		case KERN_PROC_PID:
1888 			/* could do this with just a lookup */
1889 			if (pr->ps_pid != (pid_t)arg)
1890 				continue;
1891 			break;
1892 
1893 		case KERN_PROC_PGRP:
1894 			/* could do this by traversing pgrp */
1895 			if (pr->ps_pgrp->pg_id != (pid_t)arg)
1896 				continue;
1897 			break;
1898 
1899 		case KERN_PROC_SESSION:
1900 			if (pr->ps_session->s_leader == NULL ||
1901 			    pr->ps_session->s_leader->ps_pid != (pid_t)arg)
1902 				continue;
1903 			break;
1904 
1905 		case KERN_PROC_TTY:
1906 			if ((pr->ps_flags & PS_CONTROLT) == 0 ||
1907 			    pr->ps_session->s_ttyp == NULL ||
1908 			    pr->ps_session->s_ttyp->t_dev != (dev_t)arg)
1909 				continue;
1910 			break;
1911 
1912 		case KERN_PROC_UID:
1913 			if (pr->ps_ucred->cr_uid != (uid_t)arg)
1914 				continue;
1915 			break;
1916 
1917 		case KERN_PROC_RUID:
1918 			if (pr->ps_ucred->cr_ruid != (uid_t)arg)
1919 				continue;
1920 			break;
1921 
1922 		case KERN_PROC_ALL:
1923 			if (pr->ps_flags & PS_SYSTEM)
1924 				continue;
1925 			break;
1926 
1927 		case KERN_PROC_KTHREAD:
1928 			/* no filtering */
1929 			break;
1930 
1931 		default:
1932 			error = EINVAL;
1933 			goto err;
1934 		}
1935 
1936 		if (buflen >= elem_size && elem_count > 0) {
1937 			fill_kproc(pr, kproc, NULL, show_pointers);
1938 			error = copyout(kproc, dp, elem_size);
1939 			if (error)
1940 				goto err;
1941 			dp += elem_size;
1942 			buflen -= elem_size;
1943 			elem_count--;
1944 		}
1945 		needed += elem_size;
1946 
1947 		/* Skip per-thread entries if not required by op */
1948 		if (!dothreads)
1949 			continue;
1950 
1951 		TAILQ_FOREACH(p, &pr->ps_threads, p_thr_link) {
1952 			if (buflen >= elem_size && elem_count > 0) {
1953 				fill_kproc(pr, kproc, p, show_pointers);
1954 				error = copyout(kproc, dp, elem_size);
1955 				if (error)
1956 					goto err;
1957 				dp += elem_size;
1958 				buflen -= elem_size;
1959 				elem_count--;
1960 			}
1961 			needed += elem_size;
1962 		}
1963 	}
1964 	if (doingzomb == 0) {
1965 		pr = LIST_FIRST(&zombprocess);
1966 		doingzomb++;
1967 		goto again;
1968 	}
1969 	if (where != NULL) {
1970 		*sizep = dp - where;
1971 		if (needed > *sizep) {
1972 			error = ENOMEM;
1973 			goto err;
1974 		}
1975 	} else {
1976 		needed += KERN_PROCSLOP * elem_size;
1977 		*sizep = needed;
1978 	}
1979 err:
1980 	if (kproc)
1981 		free(kproc, M_TEMP, sizeof(*kproc));
1982 	return (error);
1983 }
1984 
1985 /*
1986  * Fill in a kproc structure for the specified process.
1987  */
1988 void
1989 fill_kproc(struct process *pr, struct kinfo_proc *ki, struct proc *p,
1990     int show_pointers)
1991 {
1992 	struct session *s = pr->ps_session;
1993 	struct tty *tp;
1994 	struct vmspace *vm = pr->ps_vmspace;
1995 	struct timespec booted, st, ut, utc;
1996 	struct tusage tu;
1997 	int isthread;
1998 
1999 	isthread = p != NULL;
2000 	if (!isthread) {
2001 		p = pr->ps_mainproc;		/* XXX */
2002 		tuagg_get_process(&tu, pr);
2003 	} else
2004 		tuagg_get_proc(&tu, p);
2005 
2006 	FILL_KPROC(ki, strlcpy, p, pr, pr->ps_ucred, pr->ps_pgrp,
2007 	    p, pr, s, vm, pr->ps_limit, pr->ps_sigacts, &tu, isthread,
2008 	    show_pointers);
2009 
2010 	/* stuff that's too painful to generalize into the macros */
2011 	if (s->s_leader)
2012 		ki->p_sid = s->s_leader->ps_pid;
2013 
2014 	if ((pr->ps_flags & PS_CONTROLT) && (tp = s->s_ttyp)) {
2015 		ki->p_tdev = tp->t_dev;
2016 		ki->p_tpgid = tp->t_pgrp ? tp->t_pgrp->pg_id : -1;
2017 		if (show_pointers)
2018 			ki->p_tsess = PTRTOINT64(tp->t_session);
2019 	} else {
2020 		ki->p_tdev = NODEV;
2021 		ki->p_tpgid = -1;
2022 	}
2023 
2024 	/* fixups that can only be done in the kernel */
2025 	if ((pr->ps_flags & PS_ZOMBIE) == 0) {
2026 		if ((pr->ps_flags & PS_EMBRYO) == 0 && vm != NULL)
2027 			ki->p_vm_rssize = vm_resident_count(vm);
2028 		calctsru(&tu, &ut, &st, NULL);
2029 		ki->p_uutime_sec = ut.tv_sec;
2030 		ki->p_uutime_usec = ut.tv_nsec/1000;
2031 		ki->p_ustime_sec = st.tv_sec;
2032 		ki->p_ustime_usec = st.tv_nsec/1000;
2033 
2034 		/* Convert starting uptime to a starting UTC time. */
2035 		nanoboottime(&booted);
2036 		timespecadd(&booted, &pr->ps_start, &utc);
2037 		ki->p_ustart_sec = utc.tv_sec;
2038 		ki->p_ustart_usec = utc.tv_nsec / 1000;
2039 
2040 #ifdef MULTIPROCESSOR
2041 		if (p->p_cpu != NULL)
2042 			ki->p_cpuid = CPU_INFO_UNIT(p->p_cpu);
2043 #endif
2044 	}
2045 
2046 	/* get %cpu and schedule state: just one thread or sum of all? */
2047 	if (isthread) {
2048 		ki->p_pctcpu = p->p_pctcpu;
2049 		ki->p_stat   = p->p_stat;
2050 	} else {
2051 		ki->p_pctcpu = 0;
2052 		ki->p_stat = (pr->ps_flags & PS_ZOMBIE) ? SDEAD : SIDL;
2053 		TAILQ_FOREACH(p, &pr->ps_threads, p_thr_link) {
2054 			ki->p_pctcpu += p->p_pctcpu;
2055 			/* find best state: ONPROC > RUN > STOP > SLEEP > .. */
2056 			if (p->p_stat == SONPROC || ki->p_stat == SONPROC)
2057 				ki->p_stat = SONPROC;
2058 			else if (p->p_stat == SRUN || ki->p_stat == SRUN)
2059 				ki->p_stat = SRUN;
2060 			else if (p->p_stat == SSTOP || ki->p_stat == SSTOP)
2061 				ki->p_stat = SSTOP;
2062 			else if (p->p_stat == SSLEEP)
2063 				ki->p_stat = SSLEEP;
2064 		}
2065 	}
2066 }
2067 
2068 int
2069 sysctl_proc_args(int *name, u_int namelen, void *oldp, size_t *oldlenp,
2070     struct proc *cp)
2071 {
2072 	struct process *vpr;
2073 	pid_t pid;
2074 	struct ps_strings pss;
2075 	struct iovec iov;
2076 	struct uio uio;
2077 	int error, cnt, op;
2078 	size_t limit;
2079 	char **rargv, **vargv;		/* reader vs. victim */
2080 	char *rarg, *varg, *buf;
2081 	struct vmspace *vm;
2082 	vaddr_t ps_strings;
2083 
2084 	if (namelen > 2)
2085 		return (ENOTDIR);
2086 	if (namelen < 2)
2087 		return (EINVAL);
2088 
2089 	pid = name[0];
2090 	op = name[1];
2091 
2092 	switch (op) {
2093 	case KERN_PROC_ARGV:
2094 	case KERN_PROC_NARGV:
2095 	case KERN_PROC_ENV:
2096 	case KERN_PROC_NENV:
2097 		break;
2098 	default:
2099 		return (EOPNOTSUPP);
2100 	}
2101 
2102 	if ((vpr = prfind(pid)) == NULL)
2103 		return (ESRCH);
2104 
2105 	if (oldp == NULL) {
2106 		if (op == KERN_PROC_NARGV || op == KERN_PROC_NENV)
2107 			*oldlenp = sizeof(int);
2108 		else
2109 			*oldlenp = ARG_MAX;	/* XXX XXX XXX */
2110 		return (0);
2111 	}
2112 
2113 	/* Either system process or exiting/zombie */
2114 	if (vpr->ps_flags & (PS_SYSTEM | PS_EXITING))
2115 		return (EINVAL);
2116 
2117 	/* Execing - danger. */
2118 	if ((vpr->ps_flags & PS_INEXEC))
2119 		return (EBUSY);
2120 
2121 	/* Only owner or root can get env */
2122 	if ((op == KERN_PROC_NENV || op == KERN_PROC_ENV) &&
2123 	    (vpr->ps_ucred->cr_uid != cp->p_ucred->cr_uid &&
2124 	    (error = suser(cp)) != 0))
2125 		return (error);
2126 
2127 	ps_strings = vpr->ps_strings;
2128 	vm = vpr->ps_vmspace;
2129 	uvmspace_addref(vm);
2130 	vpr = NULL;
2131 
2132 	buf = malloc(PAGE_SIZE, M_TEMP, M_WAITOK);
2133 
2134 	iov.iov_base = &pss;
2135 	iov.iov_len = sizeof(pss);
2136 	uio.uio_iov = &iov;
2137 	uio.uio_iovcnt = 1;
2138 	uio.uio_offset = (off_t)ps_strings;
2139 	uio.uio_resid = sizeof(pss);
2140 	uio.uio_segflg = UIO_SYSSPACE;
2141 	uio.uio_rw = UIO_READ;
2142 	uio.uio_procp = cp;
2143 
2144 	if ((error = uvm_io(&vm->vm_map, &uio, 0)) != 0)
2145 		goto out;
2146 
2147 	if (op == KERN_PROC_NARGV) {
2148 		error = sysctl_rdint(oldp, oldlenp, NULL, pss.ps_nargvstr);
2149 		goto out;
2150 	}
2151 	if (op == KERN_PROC_NENV) {
2152 		error = sysctl_rdint(oldp, oldlenp, NULL, pss.ps_nenvstr);
2153 		goto out;
2154 	}
2155 
2156 	if (op == KERN_PROC_ARGV) {
2157 		cnt = pss.ps_nargvstr;
2158 		vargv = pss.ps_argvstr;
2159 	} else {
2160 		cnt = pss.ps_nenvstr;
2161 		vargv = pss.ps_envstr;
2162 	}
2163 
2164 	/* -1 to have space for a terminating NUL */
2165 	limit = *oldlenp - 1;
2166 	*oldlenp = 0;
2167 
2168 	rargv = oldp;
2169 
2170 	/*
2171 	 * *oldlenp - number of bytes copied out into readers buffer.
2172 	 * limit - maximal number of bytes allowed into readers buffer.
2173 	 * rarg - pointer into readers buffer where next arg will be stored.
2174 	 * rargv - pointer into readers buffer where the next rarg pointer
2175 	 *  will be stored.
2176 	 * vargv - pointer into victim address space where the next argument
2177 	 *  will be read.
2178 	 */
2179 
2180 	/* space for cnt pointers and a NULL */
2181 	rarg = (char *)(rargv + cnt + 1);
2182 	*oldlenp += (cnt + 1) * sizeof(char **);
2183 
2184 	while (cnt > 0 && *oldlenp < limit) {
2185 		size_t len, vstrlen;
2186 
2187 		/* Write to readers argv */
2188 		if ((error = copyout(&rarg, rargv, sizeof(rarg))) != 0)
2189 			goto out;
2190 
2191 		/* read the victim argv */
2192 		iov.iov_base = &varg;
2193 		iov.iov_len = sizeof(varg);
2194 		uio.uio_iov = &iov;
2195 		uio.uio_iovcnt = 1;
2196 		uio.uio_offset = (off_t)(vaddr_t)vargv;
2197 		uio.uio_resid = sizeof(varg);
2198 		uio.uio_segflg = UIO_SYSSPACE;
2199 		uio.uio_rw = UIO_READ;
2200 		uio.uio_procp = cp;
2201 		if ((error = uvm_io(&vm->vm_map, &uio, 0)) != 0)
2202 			goto out;
2203 
2204 		if (varg == NULL)
2205 			break;
2206 
2207 		/*
2208 		 * read the victim arg. We must jump through hoops to avoid
2209 		 * crossing a page boundary too much and returning an error.
2210 		 */
2211 more:
2212 		len = PAGE_SIZE - (((vaddr_t)varg) & PAGE_MASK);
2213 		/* leave space for the terminating NUL */
2214 		iov.iov_base = buf;
2215 		iov.iov_len = len;
2216 		uio.uio_iov = &iov;
2217 		uio.uio_iovcnt = 1;
2218 		uio.uio_offset = (off_t)(vaddr_t)varg;
2219 		uio.uio_resid = len;
2220 		uio.uio_segflg = UIO_SYSSPACE;
2221 		uio.uio_rw = UIO_READ;
2222 		uio.uio_procp = cp;
2223 		if ((error = uvm_io(&vm->vm_map, &uio, 0)) != 0)
2224 			goto out;
2225 
2226 		for (vstrlen = 0; vstrlen < len; vstrlen++) {
2227 			if (buf[vstrlen] == '\0')
2228 				break;
2229 		}
2230 
2231 		/* Don't overflow readers buffer. */
2232 		if (*oldlenp + vstrlen + 1 >= limit) {
2233 			error = ENOMEM;
2234 			goto out;
2235 		}
2236 
2237 		if ((error = copyout(buf, rarg, vstrlen)) != 0)
2238 			goto out;
2239 
2240 		*oldlenp += vstrlen;
2241 		rarg += vstrlen;
2242 
2243 		/* The string didn't end in this page? */
2244 		if (vstrlen == len) {
2245 			varg += vstrlen;
2246 			goto more;
2247 		}
2248 
2249 		/* End of string. Terminate it with a NUL */
2250 		buf[0] = '\0';
2251 		if ((error = copyout(buf, rarg, 1)) != 0)
2252 			goto out;
2253 		*oldlenp += 1;
2254 		rarg += 1;
2255 
2256 		vargv++;
2257 		rargv++;
2258 		cnt--;
2259 	}
2260 
2261 	if (*oldlenp >= limit) {
2262 		error = ENOMEM;
2263 		goto out;
2264 	}
2265 
2266 	/* Write the terminating null */
2267 	rarg = NULL;
2268 	error = copyout(&rarg, rargv, sizeof(rarg));
2269 
2270 out:
2271 	uvmspace_free(vm);
2272 	free(buf, M_TEMP, PAGE_SIZE);
2273 	return (error);
2274 }
2275 
2276 int
2277 sysctl_proc_cwd(int *name, u_int namelen, void *oldp, size_t *oldlenp,
2278     struct proc *cp)
2279 {
2280 	struct process *findpr;
2281 	struct vnode *vp;
2282 	pid_t pid;
2283 	int error;
2284 	size_t lenused, len;
2285 	char *path, *bp, *bend;
2286 
2287 	if (namelen > 1)
2288 		return (ENOTDIR);
2289 	if (namelen < 1)
2290 		return (EINVAL);
2291 
2292 	pid = name[0];
2293 	if ((findpr = prfind(pid)) == NULL)
2294 		return (ESRCH);
2295 
2296 	if (oldp == NULL) {
2297 		*oldlenp = MAXPATHLEN * 4;
2298 		return (0);
2299 	}
2300 
2301 	/* Either system process or exiting/zombie */
2302 	if (findpr->ps_flags & (PS_SYSTEM | PS_EXITING))
2303 		return (EINVAL);
2304 
2305 	/* Only owner or root can get cwd */
2306 	if (findpr->ps_ucred->cr_uid != cp->p_ucred->cr_uid &&
2307 	    (error = suser(cp)) != 0)
2308 		return (error);
2309 
2310 	len = *oldlenp;
2311 	if (len > MAXPATHLEN * 4)
2312 		len = MAXPATHLEN * 4;
2313 	else if (len < 2)
2314 		return (ERANGE);
2315 	*oldlenp = 0;
2316 
2317 	/* snag a reference to the vnode before we can sleep */
2318 	vp = findpr->ps_fd->fd_cdir;
2319 	vref(vp);
2320 
2321 	path = malloc(len, M_TEMP, M_WAITOK);
2322 
2323 	bp = &path[len];
2324 	bend = bp;
2325 	*(--bp) = '\0';
2326 
2327 	/* Same as sys__getcwd */
2328 	error = vfs_getcwd_common(vp, NULL,
2329 	    &bp, path, len / 2, GETCWD_CHECK_ACCESS, cp);
2330 	if (error == 0) {
2331 		*oldlenp = lenused = bend - bp;
2332 		error = copyout(bp, oldp, lenused);
2333 	}
2334 
2335 	vrele(vp);
2336 	free(path, M_TEMP, len);
2337 
2338 	return (error);
2339 }
2340 
2341 int
2342 sysctl_proc_nobroadcastkill(int *name, u_int namelen, void *newp, size_t newlen,
2343     void *oldp, size_t *oldlenp, struct proc *cp)
2344 {
2345 	struct process *findpr;
2346 	pid_t pid;
2347 	int error, flag;
2348 
2349 	if (namelen > 1)
2350 		return (ENOTDIR);
2351 	if (namelen < 1)
2352 		return (EINVAL);
2353 
2354 	pid = name[0];
2355 	if ((findpr = prfind(pid)) == NULL)
2356 		return (ESRCH);
2357 
2358 	/* Either system process or exiting/zombie */
2359 	if (findpr->ps_flags & (PS_SYSTEM | PS_EXITING))
2360 		return (EINVAL);
2361 
2362 	/* Only root can change PS_NOBROADCASTKILL */
2363 	if (newp != NULL && (error = suser(cp)) != 0)
2364 		return (error);
2365 
2366 	/* get the PS_NOBROADCASTKILL flag */
2367 	flag = findpr->ps_flags & PS_NOBROADCASTKILL ? 1 : 0;
2368 
2369 	error = sysctl_int(oldp, oldlenp, newp, newlen, &flag);
2370 	if (error == 0 && newp) {
2371 		if (flag)
2372 			atomic_setbits_int(&findpr->ps_flags,
2373 			    PS_NOBROADCASTKILL);
2374 		else
2375 			atomic_clearbits_int(&findpr->ps_flags,
2376 			    PS_NOBROADCASTKILL);
2377 	}
2378 
2379 	return (error);
2380 }
2381 
2382 /* Arbitrary but reasonable limit for one iteration. */
2383 #define	VMMAP_MAXLEN	MAXPHYS
2384 
2385 int
2386 sysctl_proc_vmmap(int *name, u_int namelen, void *oldp, size_t *oldlenp,
2387     struct proc *cp)
2388 {
2389 	struct process *findpr;
2390 	pid_t pid;
2391 	int error;
2392 	size_t oldlen, len;
2393 	struct kinfo_vmentry *kve, *ukve;
2394 	u_long *ustart, start;
2395 
2396 	if (namelen > 1)
2397 		return (ENOTDIR);
2398 	if (namelen < 1)
2399 		return (EINVAL);
2400 
2401 	/* Provide max buffer length as hint. */
2402 	if (oldp == NULL) {
2403 		if (oldlenp == NULL)
2404 			return (EINVAL);
2405 		else {
2406 			*oldlenp = VMMAP_MAXLEN;
2407 			return (0);
2408 		}
2409 	}
2410 
2411 	pid = name[0];
2412 	if (pid == cp->p_p->ps_pid) {
2413 		/* Self process mapping. */
2414 		findpr = cp->p_p;
2415 	} else if (pid > 0) {
2416 		if ((findpr = prfind(pid)) == NULL)
2417 			return (ESRCH);
2418 
2419 		/* Either system process or exiting/zombie */
2420 		if (findpr->ps_flags & (PS_SYSTEM | PS_EXITING))
2421 			return (EINVAL);
2422 
2423 #if 1
2424 		/* XXX Allow only root for now */
2425 		if ((error = suser(cp)) != 0)
2426 			return (error);
2427 #else
2428 		/* Only owner or root can get vmmap */
2429 		if (findpr->ps_ucred->cr_uid != cp->p_ucred->cr_uid &&
2430 		    (error = suser(cp)) != 0)
2431 			return (error);
2432 #endif
2433 	} else {
2434 		/* Only root can get kernel_map */
2435 		if ((error = suser(cp)) != 0)
2436 			return (error);
2437 		findpr = NULL;
2438 	}
2439 
2440 	/* Check the given size. */
2441 	oldlen = *oldlenp;
2442 	if (oldlen == 0 || oldlen % sizeof(*kve) != 0)
2443 		return (EINVAL);
2444 
2445 	/* Deny huge allocation. */
2446 	if (oldlen > VMMAP_MAXLEN)
2447 		return (EINVAL);
2448 
2449 	/*
2450 	 * Iterate from the given address passed as the first element's
2451 	 * kve_start via oldp.
2452 	 */
2453 	ukve = (struct kinfo_vmentry *)oldp;
2454 	ustart = &ukve->kve_start;
2455 	error = copyin(ustart, &start, sizeof(start));
2456 	if (error != 0)
2457 		return (error);
2458 
2459 	/* Allocate wired memory to not block. */
2460 	kve = malloc(oldlen, M_TEMP, M_WAITOK);
2461 
2462 	/* Set the base address and read entries. */
2463 	kve[0].kve_start = start;
2464 	len = oldlen;
2465 	error = fill_vmmap(findpr, kve, &len);
2466 	if (error != 0 && error != ENOMEM)
2467 		goto done;
2468 	if (len == 0)
2469 		goto done;
2470 
2471 	KASSERT(len <= oldlen);
2472 	KASSERT((len % sizeof(struct kinfo_vmentry)) == 0);
2473 
2474 	error = copyout(kve, oldp, len);
2475 
2476 done:
2477 	*oldlenp = len;
2478 
2479 	free(kve, M_TEMP, oldlen);
2480 
2481 	return (error);
2482 }
2483 #endif
2484 
2485 /*
2486  * Initialize disknames/diskstats for export by sysctl. If update is set,
2487  * then we simply update the disk statistics information.
2488  */
2489 int
2490 sysctl_diskinit(int update, struct proc *p)
2491 {
2492 	struct diskstats *sdk;
2493 	struct disk *dk;
2494 	const char *duid;
2495 	int error, changed = 0;
2496 
2497 	KERNEL_ASSERT_LOCKED();
2498 
2499 	if ((error = rw_enter(&sysctl_disklock, RW_WRITE|RW_INTR)) != 0)
2500 		return error;
2501 
2502 	/* Run in a loop, disks may change while malloc sleeps. */
2503 	while (disk_change) {
2504 		int tlen, count;
2505 
2506 		disk_change = 0;
2507 
2508 		tlen = 0;
2509 		TAILQ_FOREACH(dk, &disklist, dk_link) {
2510 			if (dk->dk_name)
2511 				tlen += strlen(dk->dk_name);
2512 			tlen += 18;	/* label uid + separators */
2513 		}
2514 		tlen++;
2515 		/* disk_count may change when malloc sleeps */
2516 		count = disk_count;
2517 
2518 		/*
2519 		 * The sysctl_disklock ensures that no other process can
2520 		 * allocate disknames and diskstats while our malloc sleeps.
2521 		 */
2522 		free(disknames, M_SYSCTL, disknameslen);
2523 		free(diskstats, M_SYSCTL, diskstatslen);
2524 		diskstats = NULL;
2525 		disknames = NULL;
2526 		diskstats = mallocarray(count, sizeof(struct diskstats),
2527 		    M_SYSCTL, M_WAITOK|M_ZERO);
2528 		diskstatslen = count * sizeof(struct diskstats);
2529 		disknames = malloc(tlen, M_SYSCTL, M_WAITOK|M_ZERO);
2530 		disknameslen = tlen;
2531 		disknames[0] = '\0';
2532 		changed = 1;
2533 	}
2534 
2535 	if (changed) {
2536 		int l;
2537 
2538 		l = 0;
2539 		sdk = diskstats;
2540 		TAILQ_FOREACH(dk, &disklist, dk_link) {
2541 			duid = NULL;
2542 			if (dk->dk_label && !duid_iszero(dk->dk_label->d_uid))
2543 				duid = duid_format(dk->dk_label->d_uid);
2544 			snprintf(disknames + l, disknameslen - l, "%s:%s,",
2545 			    dk->dk_name ? dk->dk_name : "",
2546 			    duid ? duid : "");
2547 			l += strlen(disknames + l);
2548 			strlcpy(sdk->ds_name, dk->dk_name,
2549 			    sizeof(sdk->ds_name));
2550 			mtx_enter(&dk->dk_mtx);
2551 			sdk->ds_busy = dk->dk_busy;
2552 			sdk->ds_rxfer = dk->dk_rxfer;
2553 			sdk->ds_wxfer = dk->dk_wxfer;
2554 			sdk->ds_seek = dk->dk_seek;
2555 			sdk->ds_rbytes = dk->dk_rbytes;
2556 			sdk->ds_wbytes = dk->dk_wbytes;
2557 			sdk->ds_attachtime = dk->dk_attachtime;
2558 			sdk->ds_timestamp = dk->dk_timestamp;
2559 			sdk->ds_time = dk->dk_time;
2560 			mtx_leave(&dk->dk_mtx);
2561 			sdk++;
2562 		}
2563 
2564 		/* Eliminate trailing comma */
2565 		if (l != 0)
2566 			disknames[l - 1] = '\0';
2567 	} else if (update) {
2568 		/* Just update, number of drives hasn't changed */
2569 		sdk = diskstats;
2570 		TAILQ_FOREACH(dk, &disklist, dk_link) {
2571 			strlcpy(sdk->ds_name, dk->dk_name,
2572 			    sizeof(sdk->ds_name));
2573 			mtx_enter(&dk->dk_mtx);
2574 			sdk->ds_busy = dk->dk_busy;
2575 			sdk->ds_rxfer = dk->dk_rxfer;
2576 			sdk->ds_wxfer = dk->dk_wxfer;
2577 			sdk->ds_seek = dk->dk_seek;
2578 			sdk->ds_rbytes = dk->dk_rbytes;
2579 			sdk->ds_wbytes = dk->dk_wbytes;
2580 			sdk->ds_attachtime = dk->dk_attachtime;
2581 			sdk->ds_timestamp = dk->dk_timestamp;
2582 			sdk->ds_time = dk->dk_time;
2583 			mtx_leave(&dk->dk_mtx);
2584 			sdk++;
2585 		}
2586 	}
2587 	rw_exit_write(&sysctl_disklock);
2588 	return 0;
2589 }
2590 
2591 #if defined(SYSVMSG) || defined(SYSVSEM) || defined(SYSVSHM)
2592 int
2593 sysctl_sysvipc(int *name, u_int namelen, void *where, size_t *sizep)
2594 {
2595 #ifdef SYSVSEM
2596 	struct sem_sysctl_info *semsi;
2597 #endif
2598 #ifdef SYSVSHM
2599 	struct shm_sysctl_info *shmsi;
2600 #endif
2601 	size_t infosize, dssize, tsize, buflen, bufsiz;
2602 	int i, nds, error, ret;
2603 	void *buf;
2604 
2605 	if (namelen != 1)
2606 		return (EINVAL);
2607 
2608 	buflen = *sizep;
2609 
2610 	switch (*name) {
2611 	case KERN_SYSVIPC_MSG_INFO:
2612 #ifdef SYSVMSG
2613 		return (sysctl_sysvmsg(name, namelen, where, sizep));
2614 #else
2615 		return (EOPNOTSUPP);
2616 #endif
2617 	case KERN_SYSVIPC_SEM_INFO:
2618 #ifdef SYSVSEM
2619 		infosize = sizeof(semsi->seminfo);
2620 		nds = seminfo.semmni;
2621 		dssize = sizeof(semsi->semids[0]);
2622 		break;
2623 #else
2624 		return (EOPNOTSUPP);
2625 #endif
2626 	case KERN_SYSVIPC_SHM_INFO:
2627 #ifdef SYSVSHM
2628 		infosize = sizeof(shmsi->shminfo);
2629 		nds = shminfo.shmmni;
2630 		dssize = sizeof(shmsi->shmids[0]);
2631 		break;
2632 #else
2633 		return (EOPNOTSUPP);
2634 #endif
2635 	default:
2636 		return (EINVAL);
2637 	}
2638 	tsize = infosize + (nds * dssize);
2639 
2640 	/* Return just the total size required. */
2641 	if (where == NULL) {
2642 		*sizep = tsize;
2643 		return (0);
2644 	}
2645 
2646 	/* Not enough room for even the info struct. */
2647 	if (buflen < infosize) {
2648 		*sizep = 0;
2649 		return (ENOMEM);
2650 	}
2651 	bufsiz = min(tsize, buflen);
2652 	buf = malloc(bufsiz, M_TEMP, M_WAITOK|M_ZERO);
2653 
2654 	switch (*name) {
2655 #ifdef SYSVSEM
2656 	case KERN_SYSVIPC_SEM_INFO:
2657 		semsi = (struct sem_sysctl_info *)buf;
2658 		semsi->seminfo = seminfo;
2659 		break;
2660 #endif
2661 #ifdef SYSVSHM
2662 	case KERN_SYSVIPC_SHM_INFO:
2663 		shmsi = (struct shm_sysctl_info *)buf;
2664 		shmsi->shminfo = shminfo;
2665 		break;
2666 #endif
2667 	}
2668 	buflen -= infosize;
2669 
2670 	ret = 0;
2671 	if (buflen > 0) {
2672 		/* Fill in the IPC data structures.  */
2673 		for (i = 0; i < nds; i++) {
2674 			if (buflen < dssize) {
2675 				ret = ENOMEM;
2676 				break;
2677 			}
2678 			switch (*name) {
2679 #ifdef SYSVSEM
2680 			case KERN_SYSVIPC_SEM_INFO:
2681 				if (sema[i] != NULL)
2682 					memcpy(&semsi->semids[i], sema[i],
2683 					    dssize);
2684 				else
2685 					memset(&semsi->semids[i], 0, dssize);
2686 				break;
2687 #endif
2688 #ifdef SYSVSHM
2689 			case KERN_SYSVIPC_SHM_INFO:
2690 				if (shmsegs[i] != NULL)
2691 					memcpy(&shmsi->shmids[i], shmsegs[i],
2692 					    dssize);
2693 				else
2694 					memset(&shmsi->shmids[i], 0, dssize);
2695 				break;
2696 #endif
2697 			}
2698 			buflen -= dssize;
2699 		}
2700 	}
2701 	*sizep -= buflen;
2702 	error = copyout(buf, where, *sizep);
2703 	free(buf, M_TEMP, bufsiz);
2704 	/* If copyout succeeded, use return code set earlier. */
2705 	return (error ? error : ret);
2706 }
2707 #endif /* SYSVMSG || SYSVSEM || SYSVSHM */
2708 
2709 #ifndef	SMALL_KERNEL
2710 
2711 int
2712 sysctl_intrcnt(int *name, u_int namelen, void *oldp, size_t *oldlenp)
2713 {
2714 	return (evcount_sysctl(name, namelen, oldp, oldlenp, NULL, 0));
2715 }
2716 
2717 
2718 int
2719 sysctl_sensors(int *name, u_int namelen, void *oldp, size_t *oldlenp,
2720     void *newp, size_t newlen)
2721 {
2722 	struct ksensor *ks;
2723 	struct sensor *us;
2724 	struct ksensordev *ksd;
2725 	struct sensordev *usd;
2726 	int dev, numt, ret;
2727 	enum sensor_type type;
2728 
2729 	if (namelen != 1 && namelen != 3)
2730 		return (ENOTDIR);
2731 
2732 	dev = name[0];
2733 	if (namelen == 1) {
2734 		ret = sensordev_get(dev, &ksd);
2735 		if (ret)
2736 			return (ret);
2737 
2738 		/* Grab a copy, to clear the kernel pointers */
2739 		usd = malloc(sizeof(*usd), M_TEMP, M_WAITOK|M_ZERO);
2740 		usd->num = ksd->num;
2741 		strlcpy(usd->xname, ksd->xname, sizeof(usd->xname));
2742 		memcpy(usd->maxnumt, ksd->maxnumt, sizeof(usd->maxnumt));
2743 		usd->sensors_count = ksd->sensors_count;
2744 
2745 		ret = sysctl_rdstruct(oldp, oldlenp, newp, usd,
2746 		    sizeof(struct sensordev));
2747 
2748 		free(usd, M_TEMP, sizeof(*usd));
2749 		return (ret);
2750 	}
2751 
2752 	type = name[1];
2753 	numt = name[2];
2754 
2755 	ret = sensor_find(dev, type, numt, &ks);
2756 	if (ret)
2757 		return (ret);
2758 
2759 	/* Grab a copy, to clear the kernel pointers */
2760 	us = malloc(sizeof(*us), M_TEMP, M_WAITOK|M_ZERO);
2761 	memcpy(us->desc, ks->desc, sizeof(us->desc));
2762 	us->tv = ks->tv;
2763 	us->value = ks->value;
2764 	us->type = ks->type;
2765 	us->status = ks->status;
2766 	us->numt = ks->numt;
2767 	us->flags = ks->flags;
2768 
2769 	ret = sysctl_rdstruct(oldp, oldlenp, newp, us,
2770 	    sizeof(struct sensor));
2771 	free(us, M_TEMP, sizeof(*us));
2772 	return (ret);
2773 }
2774 #endif	/* SMALL_KERNEL */
2775 
2776 int
2777 sysctl_cptime2(int *name, u_int namelen, void *oldp, size_t *oldlenp,
2778     void *newp, size_t newlen)
2779 {
2780 	CPU_INFO_ITERATOR cii;
2781 	struct cpu_info *ci;
2782 	int found = 0;
2783 
2784 	if (namelen != 1)
2785 		return (ENOTDIR);
2786 
2787 	CPU_INFO_FOREACH(cii, ci) {
2788 		if (name[0] == CPU_INFO_UNIT(ci)) {
2789 			found = 1;
2790 			break;
2791 		}
2792 	}
2793 	if (!found)
2794 		return (ENOENT);
2795 
2796 	return (sysctl_rdstruct(oldp, oldlenp, newp,
2797 	    &ci->ci_schedstate.spc_cp_time,
2798 	    sizeof(ci->ci_schedstate.spc_cp_time)));
2799 }
2800 
2801 #if NAUDIO > 0
2802 int
2803 sysctl_audio(int *name, u_int namelen, void *oldp, size_t *oldlenp,
2804     void *newp, size_t newlen)
2805 {
2806 	if (namelen != 1)
2807 		return (ENOTDIR);
2808 
2809 	if (name[0] != KERN_AUDIO_RECORD)
2810 		return (ENOENT);
2811 
2812 	return (sysctl_int(oldp, oldlenp, newp, newlen, &audio_record_enable));
2813 }
2814 #endif
2815 
2816 #if NVIDEO > 0
2817 int
2818 sysctl_video(int *name, u_int namelen, void *oldp, size_t *oldlenp,
2819     void *newp, size_t newlen)
2820 {
2821 	if (namelen != 1)
2822 		return (ENOTDIR);
2823 
2824 	if (name[0] != KERN_VIDEO_RECORD)
2825 		return (ENOENT);
2826 
2827 	return (sysctl_int(oldp, oldlenp, newp, newlen, &video_record_enable));
2828 }
2829 #endif
2830 
2831 int
2832 sysctl_cpustats(int *name, u_int namelen, void *oldp, size_t *oldlenp,
2833     void *newp, size_t newlen)
2834 {
2835 	CPU_INFO_ITERATOR cii;
2836 	struct cpustats cs;
2837 	struct cpu_info *ci;
2838 	int found = 0;
2839 
2840 	if (namelen != 1)
2841 		return (ENOTDIR);
2842 
2843 	CPU_INFO_FOREACH(cii, ci) {
2844 		if (name[0] == CPU_INFO_UNIT(ci)) {
2845 			found = 1;
2846 			break;
2847 		}
2848 	}
2849 	if (!found)
2850 		return (ENOENT);
2851 
2852 	memset(&cs, 0, sizeof cs);
2853 	memcpy(&cs.cs_time, &ci->ci_schedstate.spc_cp_time, sizeof(cs.cs_time));
2854 	cs.cs_flags = 0;
2855 	if (cpu_is_online(ci))
2856 		cs.cs_flags |= CPUSTATS_ONLINE;
2857 
2858 	return (sysctl_rdstruct(oldp, oldlenp, newp, &cs, sizeof(cs)));
2859 }
2860 
2861 int
2862 sysctl_utc_offset(void *oldp, size_t *oldlenp, void *newp, size_t newlen)
2863 {
2864 	struct timespec adjusted, now;
2865 	int adjustment_seconds, error, new_offset_minutes, old_offset_minutes;
2866 
2867 	old_offset_minutes = utc_offset / 60;	/* seconds -> minutes */
2868 	new_offset_minutes = old_offset_minutes;
2869 	error = sysctl_securelevel_int(oldp, oldlenp, newp, newlen,
2870 	     &new_offset_minutes);
2871 	if (error)
2872 		return error;
2873 	if (new_offset_minutes < -24 * 60 || new_offset_minutes > 24 * 60)
2874 		return EINVAL;
2875 	if (new_offset_minutes == old_offset_minutes)
2876 		return 0;
2877 
2878 	utc_offset = new_offset_minutes * 60;	/* minutes -> seconds */
2879 	adjustment_seconds = (new_offset_minutes - old_offset_minutes) * 60;
2880 
2881 	nanotime(&now);
2882 	adjusted = now;
2883 	adjusted.tv_sec -= adjustment_seconds;
2884 	tc_setrealtimeclock(&adjusted);
2885 	resettodr();
2886 
2887 	return 0;
2888 }
2889