xref: /openbsd-src/sys/kern/kern_sysctl.c (revision 2d79d4b5a10f3e117aa79e926f1f059309bd52b2)
1 /*	$OpenBSD: kern_sysctl.c,v 1.441 2024/08/20 07:48:23 mvs Exp $	*/
2 /*	$NetBSD: kern_sysctl.c,v 1.17 1996/05/20 17:49:05 mrg Exp $	*/
3 
4 /*-
5  * Copyright (c) 1982, 1986, 1989, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * This code is derived from software contributed to Berkeley by
9  * Mike Karels at Berkeley Software Design, Inc.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. Neither the name of the University nor the names of its contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  *
35  *	@(#)kern_sysctl.c	8.4 (Berkeley) 4/14/94
36  */
37 
38 /*
39  * sysctl system call.
40  */
41 
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/atomic.h>
45 #include <sys/kernel.h>
46 #include <sys/malloc.h>
47 #include <sys/pool.h>
48 #include <sys/proc.h>
49 #include <sys/resourcevar.h>
50 #include <sys/signalvar.h>
51 #include <sys/fcntl.h>
52 #include <sys/file.h>
53 #include <sys/filedesc.h>
54 #include <sys/vnode.h>
55 #include <sys/unistd.h>
56 #include <sys/buf.h>
57 #include <sys/clockintr.h>
58 #include <sys/tty.h>
59 #include <sys/disklabel.h>
60 #include <sys/disk.h>
61 #include <sys/sysctl.h>
62 #include <sys/msgbuf.h>
63 #include <sys/vmmeter.h>
64 #include <sys/namei.h>
65 #include <sys/exec.h>
66 #include <sys/mbuf.h>
67 #include <sys/percpu.h>
68 #include <sys/sensors.h>
69 #include <sys/pipe.h>
70 #include <sys/eventvar.h>
71 #include <sys/socketvar.h>
72 #include <sys/socket.h>
73 #include <sys/domain.h>
74 #include <sys/protosw.h>
75 #include <sys/pledge.h>
76 #include <sys/timetc.h>
77 #include <sys/evcount.h>
78 #include <sys/un.h>
79 #include <sys/unpcb.h>
80 #include <sys/sched.h>
81 #include <sys/mount.h>
82 #include <sys/syscallargs.h>
83 #include <sys/wait.h>
84 #include <sys/witness.h>
85 
86 #include <uvm/uvm_extern.h>
87 
88 #include <dev/cons.h>
89 
90 #include <dev/usb/ucomvar.h>
91 
92 #include <net/route.h>
93 #include <netinet/in.h>
94 #include <netinet/ip.h>
95 #include <netinet/ip_var.h>
96 #include <netinet/in_pcb.h>
97 #include <netinet/ip6.h>
98 #include <netinet/tcp.h>
99 #include <netinet/tcp_timer.h>
100 #include <netinet/tcp_var.h>
101 #include <netinet/udp.h>
102 #include <netinet/udp_var.h>
103 #include <netinet6/ip6_var.h>
104 
105 #ifdef DDB
106 #include <ddb/db_var.h>
107 #endif
108 
109 #ifdef SYSVMSG
110 #include <sys/msg.h>
111 #endif
112 #ifdef SYSVSEM
113 #include <sys/sem.h>
114 #endif
115 #ifdef SYSVSHM
116 #include <sys/shm.h>
117 #endif
118 
119 #include "audio.h"
120 #include "dt.h"
121 #include "pf.h"
122 #include "ucom.h"
123 #include "video.h"
124 
125 extern struct forkstat forkstat;
126 extern struct nchstats nchstats;
127 extern int fscale;
128 extern fixpt_t ccpu;
129 extern long numvnodes;
130 extern int allowdt;
131 extern int audio_record_enable;
132 extern int video_record_enable;
133 extern int autoconf_serial;
134 
135 int allowkmem;
136 
137 int sysctl_diskinit(int, struct proc *);
138 int sysctl_proc_args(int *, u_int, void *, size_t *, struct proc *);
139 int sysctl_proc_cwd(int *, u_int, void *, size_t *, struct proc *);
140 int sysctl_proc_nobroadcastkill(int *, u_int, void *, size_t, void *, size_t *,
141 	struct proc *);
142 int sysctl_proc_vmmap(int *, u_int, void *, size_t *, struct proc *);
143 int sysctl_intrcnt(int *, u_int, void *, size_t *);
144 int sysctl_sensors(int *, u_int, void *, size_t *, void *, size_t);
145 int sysctl_cptime2(int *, u_int, void *, size_t *, void *, size_t);
146 int sysctl_audio(int *, u_int, void *, size_t *, void *, size_t);
147 int sysctl_video(int *, u_int, void *, size_t *, void *, size_t);
148 int sysctl_cpustats(int *, u_int, void *, size_t *, void *, size_t);
149 int sysctl_utc_offset(void *, size_t *, void *, size_t);
150 int sysctl_hwbattery(int *, u_int, void *, size_t *, void *, size_t);
151 
152 void fill_file(struct kinfo_file *, struct file *, struct filedesc *, int,
153     struct vnode *, struct process *, struct proc *, struct socket *, int);
154 void fill_kproc(struct process *, struct kinfo_proc *, struct proc *, int);
155 
156 int kern_sysctl_locked(int *, u_int, void *, size_t *, void *, size_t,
157 	struct proc *);
158 int hw_sysctl_locked(int *, u_int, void *, size_t *,void *, size_t,
159 	struct proc *);
160 
161 int (*cpu_cpuspeed)(int *);
162 
163 /*
164  * Lock to avoid too many processes vslocking a large amount of memory
165  * at the same time.
166  */
167 struct rwlock sysctl_lock = RWLOCK_INITIALIZER("sysctllk");
168 struct rwlock sysctl_disklock = RWLOCK_INITIALIZER("sysctldlk");
169 
170 int
171 sysctl_vslock(void *addr, size_t len)
172 {
173 	int error;
174 
175 	error = rw_enter(&sysctl_lock, RW_WRITE|RW_INTR);
176 	if (error)
177 		return (error);
178 	KERNEL_LOCK();
179 
180 	if (addr) {
181 		if (atop(len) > uvmexp.wiredmax - uvmexp.wired) {
182 			error = ENOMEM;
183 			goto out;
184 		}
185 		error = uvm_vslock(curproc, addr, len, PROT_READ | PROT_WRITE);
186 		if (error)
187 			goto out;
188 	}
189 
190 	return (0);
191 out:
192 	KERNEL_UNLOCK();
193 	rw_exit_write(&sysctl_lock);
194 	return (error);
195 }
196 
197 void
198 sysctl_vsunlock(void *addr, size_t len)
199 {
200 	KERNEL_ASSERT_LOCKED();
201 
202 	if (addr)
203 		uvm_vsunlock(curproc, addr, len);
204 	KERNEL_UNLOCK();
205 	rw_exit_write(&sysctl_lock);
206 }
207 
208 int
209 sys_sysctl(struct proc *p, void *v, register_t *retval)
210 {
211 	struct sys_sysctl_args /* {
212 		syscallarg(const int *) name;
213 		syscallarg(u_int) namelen;
214 		syscallarg(void *) old;
215 		syscallarg(size_t *) oldlenp;
216 		syscallarg(void *) new;
217 		syscallarg(size_t) newlen;
218 	} */ *uap = v;
219 	int error, dolock = 1;
220 	size_t savelen = 0, oldlen = 0;
221 	sysctlfn *fn;
222 	int name[CTL_MAXNAME];
223 
224 	if (SCARG(uap, new) != NULL &&
225 	    (error = suser(p)))
226 		return (error);
227 	/*
228 	 * all top-level sysctl names are non-terminal
229 	 */
230 	if (SCARG(uap, namelen) > CTL_MAXNAME || SCARG(uap, namelen) < 2)
231 		return (EINVAL);
232 	error = copyin(SCARG(uap, name), name,
233 		       SCARG(uap, namelen) * sizeof(int));
234 	if (error)
235 		return (error);
236 
237 	error = pledge_sysctl(p, SCARG(uap, namelen),
238 	    name, SCARG(uap, new));
239 	if (error)
240 		return (error);
241 
242 	switch (name[0]) {
243 	case CTL_KERN:
244 		dolock = 0;
245 		fn = kern_sysctl;
246 		break;
247 	case CTL_HW:
248 		dolock = 0;
249 		fn = hw_sysctl;
250 		break;
251 	case CTL_VM:
252 		fn = uvm_sysctl;
253 		break;
254 	case CTL_NET:
255 		dolock = 0;
256 		fn = net_sysctl;
257 		break;
258 	case CTL_FS:
259 		fn = fs_sysctl;
260 		break;
261 	case CTL_VFS:
262 		fn = vfs_sysctl;
263 		break;
264 	case CTL_MACHDEP:
265 		fn = cpu_sysctl;
266 		break;
267 #ifdef DEBUG_SYSCTL
268 	case CTL_DEBUG:
269 		fn = debug_sysctl;
270 		break;
271 #endif
272 #ifdef DDB
273 	case CTL_DDB:
274 		fn = ddb_sysctl;
275 		break;
276 #endif
277 	default:
278 		return (EOPNOTSUPP);
279 	}
280 
281 	if (SCARG(uap, oldlenp) &&
282 	    (error = copyin(SCARG(uap, oldlenp), &oldlen, sizeof(oldlen))))
283 		return (error);
284 
285 	if (dolock) {
286 		error = sysctl_vslock(SCARG(uap, old), oldlen);
287 		if (error)
288 			return (error);
289 		savelen = oldlen;
290 	}
291 	error = (*fn)(&name[1], SCARG(uap, namelen) - 1, SCARG(uap, old),
292 	    &oldlen, SCARG(uap, new), SCARG(uap, newlen), p);
293 	if (dolock)
294 		sysctl_vsunlock(SCARG(uap, old), savelen);
295 
296 	if (error)
297 		return (error);
298 	if (SCARG(uap, oldlenp))
299 		error = copyout(&oldlen, SCARG(uap, oldlenp), sizeof(oldlen));
300 	return (error);
301 }
302 
303 /*
304  * Attributes stored in the kernel.
305  */
306 char hostname[MAXHOSTNAMELEN];
307 int hostnamelen;
308 char domainname[MAXHOSTNAMELEN];
309 int domainnamelen;
310 int hostid;
311 char *disknames = NULL;
312 size_t disknameslen;
313 struct diskstats *diskstats = NULL;
314 size_t diskstatslen;
315 int securelevel;
316 
317 /* morally const values reported by sysctl_bounded_arr */
318 static int arg_max = ARG_MAX;
319 static int openbsd = OpenBSD;
320 static int posix_version = _POSIX_VERSION;
321 static int ngroups_max = NGROUPS_MAX;
322 static int int_zero = 0;
323 static int int_one = 1;
324 static int maxpartitions = MAXPARTITIONS;
325 static int raw_part = RAW_PART;
326 
327 extern int somaxconn, sominconn;
328 extern int nosuidcoredump;
329 extern int maxlocksperuid;
330 extern int uvm_wxabort;
331 extern int global_ptrace;
332 
333 const struct sysctl_bounded_args kern_vars[] = {
334 	{KERN_OSREV, &openbsd, SYSCTL_INT_READONLY},
335 	{KERN_MAXVNODES, &maxvnodes, 0, INT_MAX},
336 	{KERN_MAXPROC, &maxprocess, 0, INT_MAX},
337 	{KERN_MAXFILES, &maxfiles, 0, INT_MAX},
338 	{KERN_NFILES, &numfiles, SYSCTL_INT_READONLY},
339 	{KERN_TTYCOUNT, &tty_count, SYSCTL_INT_READONLY},
340 	{KERN_ARGMAX, &arg_max, SYSCTL_INT_READONLY},
341 	{KERN_POSIX1, &posix_version, SYSCTL_INT_READONLY},
342 	{KERN_NGROUPS, &ngroups_max, SYSCTL_INT_READONLY},
343 	{KERN_JOB_CONTROL, &int_one, SYSCTL_INT_READONLY},
344 	{KERN_SAVED_IDS, &int_one, SYSCTL_INT_READONLY},
345 	{KERN_MAXPARTITIONS, &maxpartitions, SYSCTL_INT_READONLY},
346 	{KERN_RAWPARTITION, &raw_part, SYSCTL_INT_READONLY},
347 	{KERN_MAXTHREAD, &maxthread, 0, INT_MAX},
348 	{KERN_NTHREADS, &nthreads, SYSCTL_INT_READONLY},
349 	{KERN_SOMAXCONN, &somaxconn, 0, SHRT_MAX},
350 	{KERN_SOMINCONN, &sominconn, 0, SHRT_MAX},
351 	{KERN_NOSUIDCOREDUMP, &nosuidcoredump, 0, 3},
352 	{KERN_FSYNC, &int_one, SYSCTL_INT_READONLY},
353 	{KERN_SYSVMSG,
354 #ifdef SYSVMSG
355 	 &int_one,
356 #else
357 	 &int_zero,
358 #endif
359 	 SYSCTL_INT_READONLY},
360 	{KERN_SYSVSEM,
361 #ifdef SYSVSEM
362 	 &int_one,
363 #else
364 	 &int_zero,
365 #endif
366 	 SYSCTL_INT_READONLY},
367 	{KERN_SYSVSHM,
368 #ifdef SYSVSHM
369 	 &int_one,
370 #else
371 	 &int_zero,
372 #endif
373 	 SYSCTL_INT_READONLY},
374 	{KERN_FSCALE, &fscale, SYSCTL_INT_READONLY},
375 	{KERN_CCPU, &ccpu, SYSCTL_INT_READONLY},
376 	{KERN_NPROCS, &nprocesses, SYSCTL_INT_READONLY},
377 	{KERN_SPLASSERT, &splassert_ctl, 0, 3},
378 	{KERN_MAXLOCKSPERUID, &maxlocksperuid, 0, INT_MAX},
379 	{KERN_WXABORT, &uvm_wxabort, 0, 1},
380 	{KERN_NETLIVELOCKS, &int_zero, SYSCTL_INT_READONLY},
381 #ifdef PTRACE
382 	{KERN_GLOBAL_PTRACE, &global_ptrace, 0, 1},
383 #endif
384 	{KERN_AUTOCONF_SERIAL, &autoconf_serial, SYSCTL_INT_READONLY},
385 };
386 
387 int
388 kern_sysctl_dirs(int top_name, int *name, u_int namelen,
389     void *oldp, size_t *oldlenp, void *newp, size_t newlen, struct proc *p)
390 {
391 	switch (top_name) {
392 #ifndef SMALL_KERNEL
393 	case KERN_PROC:
394 		return (sysctl_doproc(name, namelen, oldp, oldlenp));
395 	case KERN_PROC_ARGS:
396 		return (sysctl_proc_args(name, namelen, oldp, oldlenp, p));
397 	case KERN_PROC_CWD:
398 		return (sysctl_proc_cwd(name, namelen, oldp, oldlenp, p));
399 	case KERN_PROC_NOBROADCASTKILL:
400 		return (sysctl_proc_nobroadcastkill(name, namelen,
401 		     newp, newlen, oldp, oldlenp, p));
402 	case KERN_PROC_VMMAP:
403 		return (sysctl_proc_vmmap(name, namelen, oldp, oldlenp, p));
404 	case KERN_FILE:
405 		return (sysctl_file(name, namelen, oldp, oldlenp, p));
406 #endif
407 #if defined(GPROF) || defined(DDBPROF)
408 	case KERN_PROF:
409 		return (sysctl_doprof(name, namelen, oldp, oldlenp,
410 		    newp, newlen));
411 #endif
412 	case KERN_MALLOCSTATS:
413 		return (sysctl_malloc(name, namelen, oldp, oldlenp,
414 		    newp, newlen, p));
415 	case KERN_TTY:
416 		return (sysctl_tty(name, namelen, oldp, oldlenp,
417 		    newp, newlen));
418 	case KERN_POOL:
419 		return (sysctl_dopool(name, namelen, oldp, oldlenp));
420 #if defined(SYSVMSG) || defined(SYSVSEM) || defined(SYSVSHM)
421 	case KERN_SYSVIPC_INFO:
422 		return (sysctl_sysvipc(name, namelen, oldp, oldlenp));
423 #endif
424 #ifdef SYSVSEM
425 	case KERN_SEMINFO:
426 		return (sysctl_sysvsem(name, namelen, oldp, oldlenp,
427 		    newp, newlen));
428 #endif
429 #ifdef SYSVSHM
430 	case KERN_SHMINFO:
431 		return (sysctl_sysvshm(name, namelen, oldp, oldlenp,
432 		    newp, newlen));
433 #endif
434 #ifndef SMALL_KERNEL
435 	case KERN_INTRCNT:
436 		return (sysctl_intrcnt(name, namelen, oldp, oldlenp));
437 	case KERN_WATCHDOG:
438 		return (sysctl_wdog(name, namelen, oldp, oldlenp,
439 		    newp, newlen));
440 #endif
441 #ifndef SMALL_KERNEL
442 	case KERN_EVCOUNT:
443 		return (evcount_sysctl(name, namelen, oldp, oldlenp,
444 		    newp, newlen));
445 #endif
446 	case KERN_TIMECOUNTER:
447 		return (sysctl_tc(name, namelen, oldp, oldlenp, newp, newlen));
448 	case KERN_CPTIME2:
449 		return (sysctl_cptime2(name, namelen, oldp, oldlenp,
450 		    newp, newlen));
451 #ifdef WITNESS
452 	case KERN_WITNESSWATCH:
453 		return witness_sysctl_watch(oldp, oldlenp, newp, newlen);
454 	case KERN_WITNESS:
455 		return witness_sysctl(name, namelen, oldp, oldlenp,
456 		    newp, newlen);
457 #endif
458 #if NVIDEO > 0
459 	case KERN_VIDEO:
460 		return (sysctl_video(name, namelen, oldp, oldlenp,
461 		    newp, newlen));
462 #endif
463 	case KERN_CPUSTATS:
464 		return (sysctl_cpustats(name, namelen, oldp, oldlenp,
465 		    newp, newlen));
466 	case KERN_CLOCKINTR:
467 		return sysctl_clockintr(name, namelen, oldp, oldlenp, newp,
468 		    newlen);
469 	default:
470 		return (ENOTDIR);	/* overloaded */
471 	}
472 }
473 
474 /*
475  * kernel related system variables.
476  */
477 int
478 kern_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
479     size_t newlen, struct proc *p)
480 {
481 	int error;
482 	size_t savelen;
483 
484 	/* dispatch the non-terminal nodes first */
485 	if (namelen != 1) {
486 		switch (name[0]) {
487 #if NAUDIO > 0
488 		case KERN_AUDIO:
489 			return (sysctl_audio(name, namelen, oldp, oldlenp,
490 			    newp, newlen));
491 #endif
492 		default:
493 			break;
494 		}
495 
496 		savelen = *oldlenp;
497 		if ((error = sysctl_vslock(oldp, savelen)))
498 			return (error);
499 		error = kern_sysctl_dirs(name[0], name + 1, namelen - 1,
500 		    oldp, oldlenp, newp, newlen, p);
501 		sysctl_vsunlock(oldp, savelen);
502 		return (error);
503 	}
504 
505 	switch (name[0]) {
506 	case KERN_OSTYPE:
507 		return (sysctl_rdstring(oldp, oldlenp, newp, ostype));
508 	case KERN_OSRELEASE:
509 		return (sysctl_rdstring(oldp, oldlenp, newp, osrelease));
510 	case KERN_OSVERSION:
511 		return (sysctl_rdstring(oldp, oldlenp, newp, osversion));
512 	case KERN_VERSION:
513 		return (sysctl_rdstring(oldp, oldlenp, newp, version));
514 	case KERN_NUMVNODES:  /* XXX numvnodes is a long */
515 		return (sysctl_rdint(oldp, oldlenp, newp, numvnodes));
516 	case KERN_HOSTID:
517 		return (sysctl_int(oldp, oldlenp, newp, newlen, &hostid));
518 	case KERN_CLOCKRATE:
519 		return (sysctl_clockrate(oldp, oldlenp, newp));
520 	case KERN_BOOTTIME: {
521 		struct timeval bt;
522 		memset(&bt, 0, sizeof bt);
523 		microboottime(&bt);
524 		return (sysctl_rdstruct(oldp, oldlenp, newp, &bt, sizeof bt));
525 	}
526 	case KERN_MBSTAT: {
527 		extern struct cpumem *mbstat;
528 		uint64_t counters[MBSTAT_COUNT];
529 		struct mbstat mbs;
530 		unsigned int i;
531 
532 		memset(&mbs, 0, sizeof(mbs));
533 		counters_read(mbstat, counters, MBSTAT_COUNT, NULL);
534 		for (i = 0; i < MBSTAT_TYPES; i++)
535 			mbs.m_mtypes[i] = counters[i];
536 
537 		mbs.m_drops = counters[MBSTAT_DROPS];
538 		mbs.m_wait = counters[MBSTAT_WAIT];
539 		mbs.m_drain = counters[MBSTAT_DRAIN];
540 
541 		return (sysctl_rdstruct(oldp, oldlenp, newp,
542 		    &mbs, sizeof(mbs)));
543 	}
544 	case KERN_MSGBUFSIZE:
545 	case KERN_CONSBUFSIZE: {
546 		struct msgbuf *mp;
547 		mp = (name[0] == KERN_MSGBUFSIZE) ? msgbufp : consbufp;
548 		/*
549 		 * deal with cases where the message buffer has
550 		 * become corrupted.
551 		 */
552 		if (!mp || mp->msg_magic != MSG_MAGIC)
553 			return (ENXIO);
554 		return (sysctl_rdint(oldp, oldlenp, newp, mp->msg_bufs));
555 	}
556 	case KERN_OSREV:
557 	case KERN_MAXPROC:
558 	case KERN_NFILES:
559 	case KERN_TTYCOUNT:
560 	case KERN_ARGMAX:
561 	case KERN_POSIX1:
562 	case KERN_NGROUPS:
563 	case KERN_JOB_CONTROL:
564 	case KERN_SAVED_IDS:
565 	case KERN_MAXPARTITIONS:
566 	case KERN_RAWPARTITION:
567 	case KERN_MAXTHREAD:
568 	case KERN_NTHREADS:
569 	case KERN_SOMAXCONN:
570 	case KERN_SOMINCONN:
571 	case KERN_FSYNC:
572 	case KERN_SYSVMSG:
573 	case KERN_SYSVSEM:
574 	case KERN_SYSVSHM:
575 	case KERN_FSCALE:
576 	case KERN_CCPU:
577 	case KERN_NPROCS:
578 	case KERN_NETLIVELOCKS:
579 	case KERN_AUTOCONF_SERIAL:
580 		return (sysctl_bounded_arr(kern_vars, nitems(kern_vars), name,
581 		    namelen, oldp, oldlenp, newp, newlen));
582 	}
583 
584 	savelen = *oldlenp;
585 	if ((error = sysctl_vslock(oldp, savelen)))
586 		return (error);
587 	error = kern_sysctl_locked(name, namelen, oldp, oldlenp,
588 	    newp, newlen, p);
589 	sysctl_vsunlock(oldp, savelen);
590 
591 	return (error);
592 }
593 
594 int
595 kern_sysctl_locked(int *name, u_int namelen, void *oldp, size_t *oldlenp,
596     void *newp, size_t newlen, struct proc *p)
597 {
598 	int error, level, stackgap;
599 	dev_t dev;
600 	extern int pool_debug;
601 
602 	switch (name[0]) {
603 	case KERN_SECURELVL:
604 		level = securelevel;
605 		if ((error = sysctl_int(oldp, oldlenp, newp, newlen, &level)) ||
606 		    newp == NULL)
607 			return (error);
608 		if ((securelevel > 0 || level < -1) &&
609 		    level < securelevel && p->p_p->ps_pid != 1)
610 			return (EPERM);
611 		securelevel = level;
612 		return (0);
613 #if NDT > 0
614 	case KERN_ALLOWDT:
615 		return (sysctl_securelevel_int(oldp, oldlenp, newp, newlen,
616 		    &allowdt));
617 #endif
618 	case KERN_ALLOWKMEM:
619 		return (sysctl_securelevel_int(oldp, oldlenp, newp, newlen,
620 		    &allowkmem));
621 	case KERN_HOSTNAME:
622 		error = sysctl_tstring(oldp, oldlenp, newp, newlen,
623 		    hostname, sizeof(hostname));
624 		if (newp && !error)
625 			hostnamelen = newlen;
626 		return (error);
627 	case KERN_DOMAINNAME:
628 		if (securelevel >= 1 && domainnamelen && newp)
629 			error = EPERM;
630 		else
631 			error = sysctl_tstring(oldp, oldlenp, newp, newlen,
632 			    domainname, sizeof(domainname));
633 		if (newp && !error)
634 			domainnamelen = newlen;
635 		return (error);
636 	case KERN_CONSBUF:
637 		if ((error = suser(p)))
638 			return (error);
639 		/* FALLTHROUGH */
640 	case KERN_MSGBUF: {
641 		struct msgbuf *mp;
642 		mp = (name[0] == KERN_MSGBUF) ? msgbufp : consbufp;
643 		/*
644 		 * deal with cases where the message buffer has
645 		 * become corrupted.
646 		 */
647 		if (!mp || mp->msg_magic != MSG_MAGIC)
648 			return (ENXIO);
649 		return (sysctl_rdstruct(oldp, oldlenp, newp, mp,
650 		    mp->msg_bufs + offsetof(struct msgbuf, msg_bufc)));
651 	}
652 	case KERN_CPTIME:
653 	{
654 		CPU_INFO_ITERATOR cii;
655 		struct cpu_info *ci;
656 		long cp_time[CPUSTATES];
657 		int i, n = 0;
658 
659 		memset(cp_time, 0, sizeof(cp_time));
660 
661 		CPU_INFO_FOREACH(cii, ci) {
662 			if (!cpu_is_online(ci))
663 				continue;
664 			n++;
665 			for (i = 0; i < CPUSTATES; i++)
666 				cp_time[i] += ci->ci_schedstate.spc_cp_time[i];
667 		}
668 
669 		for (i = 0; i < CPUSTATES; i++)
670 			cp_time[i] /= n;
671 
672 		return (sysctl_rdstruct(oldp, oldlenp, newp, &cp_time,
673 		    sizeof(cp_time)));
674 	}
675 	case KERN_NCHSTATS:
676 		return (sysctl_rdstruct(oldp, oldlenp, newp, &nchstats,
677 		    sizeof(struct nchstats)));
678 	case KERN_FORKSTAT:
679 		return (sysctl_rdstruct(oldp, oldlenp, newp, &forkstat,
680 		    sizeof(struct forkstat)));
681 	case KERN_STACKGAPRANDOM:
682 		stackgap = stackgap_random;
683 		error = sysctl_int(oldp, oldlenp, newp, newlen, &stackgap);
684 		if (error)
685 			return (error);
686 		/*
687 		 * Safety harness.
688 		 */
689 		if ((stackgap < ALIGNBYTES && stackgap != 0) ||
690 		    !powerof2(stackgap) || stackgap >= MAXSSIZ)
691 			return (EINVAL);
692 		stackgap_random = stackgap;
693 		return (0);
694 	case KERN_MAXCLUSTERS: {
695 		int val = nmbclust;
696 		error = sysctl_int(oldp, oldlenp, newp, newlen, &val);
697 		if (error == 0 && val != nmbclust)
698 			error = nmbclust_update(val);
699 		return (error);
700 	}
701 	case KERN_CACHEPCT: {
702 		u_int64_t dmapages;
703 		int opct, pgs;
704 		opct = bufcachepercent;
705 		error = sysctl_int(oldp, oldlenp, newp, newlen,
706 		    &bufcachepercent);
707 		if (error)
708 			return(error);
709 		if (bufcachepercent > 90 || bufcachepercent < 5) {
710 			bufcachepercent = opct;
711 			return (EINVAL);
712 		}
713 		dmapages = uvm_pagecount(&dma_constraint);
714 		if (bufcachepercent != opct) {
715 			pgs = bufcachepercent * dmapages / 100;
716 			bufadjust(pgs); /* adjust bufpages */
717 			bufhighpages = bufpages; /* set high water mark */
718 		}
719 		return(0);
720 	}
721 	case KERN_CONSDEV:
722 		if (cn_tab != NULL)
723 			dev = cn_tab->cn_dev;
724 		else
725 			dev = NODEV;
726 		return sysctl_rdstruct(oldp, oldlenp, newp, &dev, sizeof(dev));
727 	case KERN_POOL_DEBUG: {
728 		int old_pool_debug = pool_debug;
729 
730 		error = sysctl_int(oldp, oldlenp, newp, newlen,
731 		    &pool_debug);
732 		if (error == 0 && pool_debug != old_pool_debug)
733 			pool_reclaim_all();
734 		return (error);
735 	}
736 #if NPF > 0
737 	case KERN_PFSTATUS:
738 		return (pf_sysctl(oldp, oldlenp, newp, newlen));
739 #endif
740 	case KERN_TIMEOUT_STATS:
741 		return (timeout_sysctl(oldp, oldlenp, newp, newlen));
742 	case KERN_UTC_OFFSET:
743 		return (sysctl_utc_offset(oldp, oldlenp, newp, newlen));
744 	default:
745 		return (sysctl_bounded_arr(kern_vars, nitems(kern_vars), name,
746 		    namelen, oldp, oldlenp, newp, newlen));
747 	}
748 	/* NOTREACHED */
749 }
750 
751 /*
752  * hardware related system variables.
753  */
754 char *hw_vendor, *hw_prod, *hw_uuid, *hw_serial, *hw_ver;
755 int allowpowerdown = 1;
756 int hw_power = 1;
757 
758 /* morally const values reported by sysctl_bounded_arr */
759 static int byte_order = BYTE_ORDER;
760 
761 const struct sysctl_bounded_args hw_vars[] = {
762 	{HW_NCPU, &ncpus, SYSCTL_INT_READONLY},
763 	{HW_NCPUFOUND, &ncpusfound, SYSCTL_INT_READONLY},
764 	{HW_BYTEORDER, &byte_order, SYSCTL_INT_READONLY},
765 	{HW_PAGESIZE, &uvmexp.pagesize, SYSCTL_INT_READONLY},
766 	{HW_DISKCOUNT, &disk_count, SYSCTL_INT_READONLY},
767 	{HW_POWER, &hw_power, SYSCTL_INT_READONLY},
768 };
769 
770 int
771 hw_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
772     size_t newlen, struct proc *p)
773 {
774 	extern char machine[], cpu_model[];
775 	int err;
776 
777 	/*
778 	 * all sysctl names at this level except sensors and battery
779 	 * are terminal
780 	 */
781 	if (name[0] != HW_SENSORS && name[0] != HW_BATTERY && namelen != 1)
782 		return (ENOTDIR);		/* overloaded */
783 
784 	switch (name[0]) {
785 	case HW_MACHINE:
786 		return (sysctl_rdstring(oldp, oldlenp, newp, machine));
787 	case HW_MODEL:
788 		return (sysctl_rdstring(oldp, oldlenp, newp, cpu_model));
789 	case HW_NCPUONLINE:
790 		return (sysctl_rdint(oldp, oldlenp, newp,
791 		    sysctl_hwncpuonline()));
792 	case HW_PHYSMEM:
793 		return (sysctl_rdint(oldp, oldlenp, newp, ptoa(physmem)));
794 	case HW_USERMEM:
795 		return (sysctl_rdint(oldp, oldlenp, newp,
796 		    ptoa(physmem - uvmexp.wired)));
797 	case HW_DISKNAMES:
798 	case HW_DISKSTATS:
799 	case HW_CPUSPEED:
800 #ifndef	SMALL_KERNEL
801 	case HW_SENSORS:
802 	case HW_SETPERF:
803 	case HW_PERFPOLICY:
804 	case HW_BATTERY:
805 #endif /* !SMALL_KERNEL */
806 	case HW_ALLOWPOWERDOWN:
807 	case HW_UCOMNAMES:
808 #ifdef __HAVE_CPU_TOPOLOGY
809 	case HW_SMT:
810 #endif
811 	{
812 		size_t savelen = *oldlenp;
813 		if ((err = sysctl_vslock(oldp, savelen)))
814 			return (err);
815 		err = hw_sysctl_locked(name, namelen, oldp, oldlenp,
816 		    newp, newlen, p);
817 		sysctl_vsunlock(oldp, savelen);
818 		return (err);
819 	}
820 	case HW_VENDOR:
821 		if (hw_vendor)
822 			return (sysctl_rdstring(oldp, oldlenp, newp,
823 			    hw_vendor));
824 		else
825 			return (EOPNOTSUPP);
826 	case HW_PRODUCT:
827 		if (hw_prod)
828 			return (sysctl_rdstring(oldp, oldlenp, newp, hw_prod));
829 		else
830 			return (EOPNOTSUPP);
831 	case HW_VERSION:
832 		if (hw_ver)
833 			return (sysctl_rdstring(oldp, oldlenp, newp, hw_ver));
834 		else
835 			return (EOPNOTSUPP);
836 	case HW_SERIALNO:
837 		if (hw_serial)
838 			return (sysctl_rdstring(oldp, oldlenp, newp,
839 			    hw_serial));
840 		else
841 			return (EOPNOTSUPP);
842 	case HW_UUID:
843 		if (hw_uuid)
844 			return (sysctl_rdstring(oldp, oldlenp, newp, hw_uuid));
845 		else
846 			return (EOPNOTSUPP);
847 	case HW_PHYSMEM64:
848 		return (sysctl_rdquad(oldp, oldlenp, newp,
849 		    ptoa((psize_t)physmem)));
850 	case HW_USERMEM64:
851 		return (sysctl_rdquad(oldp, oldlenp, newp,
852 		    ptoa((psize_t)physmem - uvmexp.wired)));
853 	default:
854 		return sysctl_bounded_arr(hw_vars, nitems(hw_vars), name,
855 		    namelen, oldp, oldlenp, newp, newlen);
856 	}
857 	/* NOTREACHED */
858 }
859 
860 int
861 hw_sysctl_locked(int *name, u_int namelen, void *oldp, size_t *oldlenp,
862     void *newp, size_t newlen, struct proc *p)
863 {
864 	int err, cpuspeed;
865 
866 	switch (name[0]) {
867 	case HW_DISKNAMES:
868 		err = sysctl_diskinit(0, p);
869 		if (err)
870 			return err;
871 		if (disknames)
872 			return (sysctl_rdstring(oldp, oldlenp, newp,
873 			    disknames));
874 		else
875 			return (sysctl_rdstring(oldp, oldlenp, newp, ""));
876 	case HW_DISKSTATS:
877 		err = sysctl_diskinit(1, p);
878 		if (err)
879 			return err;
880 		return (sysctl_rdstruct(oldp, oldlenp, newp, diskstats,
881 		    disk_count * sizeof(struct diskstats)));
882 	case HW_CPUSPEED:
883 		if (!cpu_cpuspeed)
884 			return (EOPNOTSUPP);
885 		err = cpu_cpuspeed(&cpuspeed);
886 		if (err)
887 			return err;
888 		return (sysctl_rdint(oldp, oldlenp, newp, cpuspeed));
889 #ifndef SMALL_KERNEL
890 	case HW_SENSORS:
891 		return (sysctl_sensors(name + 1, namelen - 1, oldp, oldlenp,
892 		    newp, newlen));
893 	case HW_SETPERF:
894 		return (sysctl_hwsetperf(oldp, oldlenp, newp, newlen));
895 	case HW_PERFPOLICY:
896 		return (sysctl_hwperfpolicy(oldp, oldlenp, newp, newlen));
897 #endif /* !SMALL_KERNEL */
898 	case HW_ALLOWPOWERDOWN:
899 		return (sysctl_securelevel_int(oldp, oldlenp, newp, newlen,
900 		    &allowpowerdown));
901 	case HW_UCOMNAMES: {
902 		const char *str = "";
903 #if NUCOM > 0
904 		str = sysctl_ucominit();
905 #endif	/* NUCOM > 0 */
906 		return (sysctl_rdstring(oldp, oldlenp, newp, str));
907 	}
908 #ifdef __HAVE_CPU_TOPOLOGY
909 	case HW_SMT:
910 		return (sysctl_hwsmt(oldp, oldlenp, newp, newlen));
911 #endif
912 #ifndef SMALL_KERNEL
913 	case HW_BATTERY:
914 		return (sysctl_hwbattery(name + 1, namelen - 1, oldp, oldlenp,
915 		    newp, newlen));
916 #endif
917 	default:
918 		return (EOPNOTSUPP);
919 	}
920 	/* NOTREACHED */
921 }
922 
923 #ifndef SMALL_KERNEL
924 
925 int hw_battery_chargemode;
926 int hw_battery_chargestart;
927 int hw_battery_chargestop;
928 int (*hw_battery_setchargemode)(int);
929 int (*hw_battery_setchargestart)(int);
930 int (*hw_battery_setchargestop)(int);
931 
932 int
933 sysctl_hwchargemode(void *oldp, size_t *oldlenp, void *newp, size_t newlen)
934 {
935 	int mode = hw_battery_chargemode;
936 	int error;
937 
938 	if (!hw_battery_setchargemode)
939 		return EOPNOTSUPP;
940 
941 	error = sysctl_int_bounded(oldp, oldlenp, newp, newlen,
942 	    &mode, -1, 1);
943 	if (error)
944 		return error;
945 
946 	if (newp != NULL)
947 		error = hw_battery_setchargemode(mode);
948 
949 	return error;
950 }
951 
952 int
953 sysctl_hwchargestart(void *oldp, size_t *oldlenp, void *newp, size_t newlen)
954 {
955 	int start = hw_battery_chargestart;
956 	int error;
957 
958 	if (!hw_battery_setchargestart)
959 		return EOPNOTSUPP;
960 
961 	error = sysctl_int_bounded(oldp, oldlenp, newp, newlen,
962 	    &start, 0, 100);
963 	if (error)
964 		return error;
965 
966 	if (newp != NULL)
967 		error = hw_battery_setchargestart(start);
968 
969 	return error;
970 }
971 
972 int
973 sysctl_hwchargestop(void *oldp, size_t *oldlenp, void *newp, size_t newlen)
974 {
975 	int stop = hw_battery_chargestop;
976 	int error;
977 
978 	if (!hw_battery_setchargestop)
979 		return EOPNOTSUPP;
980 
981 	error = sysctl_int_bounded(oldp, oldlenp, newp, newlen,
982 	    &stop, 0, 100);
983 	if (error)
984 		return error;
985 
986 	if (newp != NULL)
987 		error = hw_battery_setchargestop(stop);
988 
989 	return error;
990 }
991 
992 int
993 sysctl_hwbattery(int *name, u_int namelen, void *oldp, size_t *oldlenp,
994     void *newp, size_t newlen)
995 {
996 	if (namelen != 1)
997 		return (ENOTDIR);
998 
999 	switch (name[0]) {
1000 	case HW_BATTERY_CHARGEMODE:
1001 		return (sysctl_hwchargemode(oldp, oldlenp, newp, newlen));
1002 	case HW_BATTERY_CHARGESTART:
1003 		return (sysctl_hwchargestart(oldp, oldlenp, newp, newlen));
1004 	case HW_BATTERY_CHARGESTOP:
1005 		return (sysctl_hwchargestop(oldp, oldlenp, newp, newlen));
1006 	default:
1007 		return (EOPNOTSUPP);
1008 	}
1009 	/* NOTREACHED */
1010 }
1011 
1012 #endif
1013 
1014 #ifdef DEBUG_SYSCTL
1015 /*
1016  * Debugging related system variables.
1017  */
1018 extern struct ctldebug debug_vfs_busyprt;
1019 struct ctldebug debug1, debug2, debug3, debug4;
1020 struct ctldebug debug5, debug6, debug7, debug8, debug9;
1021 struct ctldebug debug10, debug11, debug12, debug13, debug14;
1022 struct ctldebug debug15, debug16, debug17, debug18, debug19;
1023 static struct ctldebug *debugvars[CTL_DEBUG_MAXID] = {
1024 	&debug_vfs_busyprt,
1025 	&debug1, &debug2, &debug3, &debug4,
1026 	&debug5, &debug6, &debug7, &debug8, &debug9,
1027 	&debug10, &debug11, &debug12, &debug13, &debug14,
1028 	&debug15, &debug16, &debug17, &debug18, &debug19,
1029 };
1030 int
1031 debug_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
1032     size_t newlen, struct proc *p)
1033 {
1034 	struct ctldebug *cdp;
1035 
1036 	/* all sysctl names at this level are name and field */
1037 	if (namelen != 2)
1038 		return (ENOTDIR);		/* overloaded */
1039 	if (name[0] < 0 || name[0] >= nitems(debugvars))
1040 		return (EOPNOTSUPP);
1041 	cdp = debugvars[name[0]];
1042 	if (cdp->debugname == 0)
1043 		return (EOPNOTSUPP);
1044 	switch (name[1]) {
1045 	case CTL_DEBUG_NAME:
1046 		return (sysctl_rdstring(oldp, oldlenp, newp, cdp->debugname));
1047 	case CTL_DEBUG_VALUE:
1048 		return (sysctl_int(oldp, oldlenp, newp, newlen, cdp->debugvar));
1049 	default:
1050 		return (EOPNOTSUPP);
1051 	}
1052 	/* NOTREACHED */
1053 }
1054 #endif /* DEBUG_SYSCTL */
1055 
1056 /*
1057  * Reads, or writes that lower the value
1058  */
1059 int
1060 sysctl_int_lower(void *oldp, size_t *oldlenp, void *newp, size_t newlen,
1061     int *valp)
1062 {
1063 	unsigned int oldval, newval;
1064 	int error;
1065 
1066 	if (oldp && *oldlenp < sizeof(int))
1067 		return (ENOMEM);
1068 	if (newp && newlen != sizeof(int))
1069 		return (EINVAL);
1070 	*oldlenp = sizeof(int);
1071 
1072 	if (newp) {
1073 		if ((error = copyin(newp, &newval, sizeof(int))))
1074 			return (error);
1075 		do {
1076 			oldval = atomic_load_int(valp);
1077 			if (oldval < (unsigned int)newval)
1078 				return (EPERM);	/* do not allow raising */
1079 		} while (atomic_cas_uint(valp, oldval, newval) != oldval);
1080 
1081 		if (oldp) {
1082 			/* new value has been set although user gets error */
1083 			if ((error = copyout(&oldval, oldp, sizeof(int))))
1084 				return (error);
1085 		}
1086 	} else if (oldp) {
1087 		oldval = atomic_load_int(valp);
1088 
1089 		if ((error = copyout(&oldval, oldp, sizeof(int))))
1090 			return (error);
1091 	}
1092 
1093 	return (0);
1094 }
1095 
1096 /*
1097  * Validate parameters and get old / set new parameters
1098  * for an integer-valued sysctl function.
1099  */
1100 int
1101 sysctl_int(void *oldp, size_t *oldlenp, void *newp, size_t newlen, int *valp)
1102 {
1103 	return (sysctl_int_bounded(oldp, oldlenp, newp, newlen, valp,
1104 	    INT_MIN, INT_MAX));
1105 }
1106 
1107 /*
1108  * As above, but read-only.
1109  */
1110 int
1111 sysctl_rdint(void *oldp, size_t *oldlenp, void *newp, int val)
1112 {
1113 	int error = 0;
1114 
1115 	if (oldp && *oldlenp < sizeof(int))
1116 		return (ENOMEM);
1117 	if (newp)
1118 		return (EPERM);
1119 	*oldlenp = sizeof(int);
1120 	if (oldp)
1121 		error = copyout((caddr_t)&val, oldp, sizeof(int));
1122 	return (error);
1123 }
1124 
1125 /*
1126  * Selects between sysctl_rdint and sysctl_int according to securelevel.
1127  */
1128 int
1129 sysctl_securelevel_int(void *oldp, size_t *oldlenp, void *newp, size_t newlen,
1130     int *valp)
1131 {
1132 	if (securelevel > 0)
1133 		return (sysctl_rdint(oldp, oldlenp, newp, *valp));
1134 	return (sysctl_int(oldp, oldlenp, newp, newlen, valp));
1135 }
1136 
1137 /*
1138  * Read-only or bounded integer values.
1139  */
1140 int
1141 sysctl_int_bounded(void *oldp, size_t *oldlenp, void *newp, size_t newlen,
1142     int *valp, int minimum, int maximum)
1143 {
1144 	int oldval, newval;
1145 	int error;
1146 
1147 	/* read only */
1148 	if (newp != NULL && minimum > maximum)
1149 		return (EPERM);
1150 
1151 	if (oldp != NULL && *oldlenp < sizeof(int))
1152 		return (ENOMEM);
1153 	if (newp != NULL && newlen != sizeof(int))
1154 		return (EINVAL);
1155 	*oldlenp = sizeof(int);
1156 
1157 	/* copyin() may sleep, call it first */
1158 	if (newp != NULL) {
1159 		if ((error = copyin(newp, &newval, sizeof(int))))
1160 			return (error);
1161 		/* outside limits */
1162 		if (newval < minimum || maximum < newval)
1163 			return (EINVAL);
1164 	}
1165 	if (oldp != NULL) {
1166 		if (newp != NULL)
1167 			oldval = atomic_swap_uint(valp, newval);
1168 		else
1169 			oldval = atomic_load_int(valp);
1170 		if ((error = copyout(&oldval, oldp, sizeof(int)))) {
1171 			/* new value has been set although user gets error */
1172 			return (error);
1173 		}
1174 	} else if (newp != NULL)
1175 		atomic_store_int(valp, newval);
1176 
1177 	return (0);
1178 }
1179 
1180 /*
1181  * Array of read-only or bounded integer values.
1182  */
1183 int
1184 sysctl_bounded_arr(const struct sysctl_bounded_args *valpp, u_int valplen,
1185     int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
1186     size_t newlen)
1187 {
1188 	u_int i;
1189 	if (namelen != 1)
1190 		return (ENOTDIR);
1191 	for (i = 0; i < valplen; ++i) {
1192 		if (valpp[i].mib == name[0]) {
1193 			return (sysctl_int_bounded(oldp, oldlenp, newp, newlen,
1194 			    valpp[i].var, valpp[i].minimum, valpp[i].maximum));
1195 		}
1196 	}
1197 	return (EOPNOTSUPP);
1198 }
1199 
1200 /*
1201  * Validate parameters and get old / set new parameters
1202  * for an integer-valued sysctl function.
1203  */
1204 int
1205 sysctl_quad(void *oldp, size_t *oldlenp, void *newp, size_t newlen,
1206     int64_t *valp)
1207 {
1208 	int error = 0;
1209 
1210 	if (oldp && *oldlenp < sizeof(int64_t))
1211 		return (ENOMEM);
1212 	if (newp && newlen != sizeof(int64_t))
1213 		return (EINVAL);
1214 	*oldlenp = sizeof(int64_t);
1215 	if (oldp)
1216 		error = copyout(valp, oldp, sizeof(int64_t));
1217 	if (error == 0 && newp)
1218 		error = copyin(newp, valp, sizeof(int64_t));
1219 	return (error);
1220 }
1221 
1222 /*
1223  * As above, but read-only.
1224  */
1225 int
1226 sysctl_rdquad(void *oldp, size_t *oldlenp, void *newp, int64_t val)
1227 {
1228 	int error = 0;
1229 
1230 	if (oldp && *oldlenp < sizeof(int64_t))
1231 		return (ENOMEM);
1232 	if (newp)
1233 		return (EPERM);
1234 	*oldlenp = sizeof(int64_t);
1235 	if (oldp)
1236 		error = copyout((caddr_t)&val, oldp, sizeof(int64_t));
1237 	return (error);
1238 }
1239 
1240 /*
1241  * Validate parameters and get old / set new parameters
1242  * for a string-valued sysctl function.
1243  */
1244 int
1245 sysctl_string(void *oldp, size_t *oldlenp, void *newp, size_t newlen, char *str,
1246     size_t maxlen)
1247 {
1248 	return sysctl__string(oldp, oldlenp, newp, newlen, str, maxlen, 0);
1249 }
1250 
1251 int
1252 sysctl_tstring(void *oldp, size_t *oldlenp, void *newp, size_t newlen,
1253     char *str, size_t maxlen)
1254 {
1255 	return sysctl__string(oldp, oldlenp, newp, newlen, str, maxlen, 1);
1256 }
1257 
1258 int
1259 sysctl__string(void *oldp, size_t *oldlenp, void *newp, size_t newlen,
1260     char *str, size_t maxlen, int trunc)
1261 {
1262 	size_t len;
1263 	int error = 0;
1264 
1265 	len = strlen(str) + 1;
1266 	if (oldp && *oldlenp < len) {
1267 		if (trunc == 0 || *oldlenp == 0)
1268 			return (ENOMEM);
1269 	}
1270 	if (newp && newlen >= maxlen)
1271 		return (EINVAL);
1272 	if (oldp) {
1273 		if (trunc && *oldlenp < len) {
1274 			len = *oldlenp;
1275 			error = copyout(str, oldp, len - 1);
1276 			if (error == 0)
1277 				error = copyout("", (char *)oldp + len - 1, 1);
1278 		} else {
1279 			error = copyout(str, oldp, len);
1280 		}
1281 	}
1282 	*oldlenp = len;
1283 	if (error == 0 && newp) {
1284 		error = copyin(newp, str, newlen);
1285 		str[newlen] = 0;
1286 	}
1287 	return (error);
1288 }
1289 
1290 /*
1291  * As above, but read-only.
1292  */
1293 int
1294 sysctl_rdstring(void *oldp, size_t *oldlenp, void *newp, const char *str)
1295 {
1296 	size_t len;
1297 	int error = 0;
1298 
1299 	len = strlen(str) + 1;
1300 	if (oldp && *oldlenp < len)
1301 		return (ENOMEM);
1302 	if (newp)
1303 		return (EPERM);
1304 	*oldlenp = len;
1305 	if (oldp)
1306 		error = copyout(str, oldp, len);
1307 	return (error);
1308 }
1309 
1310 /*
1311  * Validate parameters and get old / set new parameters
1312  * for a structure oriented sysctl function.
1313  */
1314 int
1315 sysctl_struct(void *oldp, size_t *oldlenp, void *newp, size_t newlen, void *sp,
1316     size_t len)
1317 {
1318 	int error = 0;
1319 
1320 	if (oldp && *oldlenp < len)
1321 		return (ENOMEM);
1322 	if (newp && newlen > len)
1323 		return (EINVAL);
1324 	if (oldp) {
1325 		*oldlenp = len;
1326 		error = copyout(sp, oldp, len);
1327 	}
1328 	if (error == 0 && newp)
1329 		error = copyin(newp, sp, len);
1330 	return (error);
1331 }
1332 
1333 /*
1334  * Validate parameters and get old parameters
1335  * for a structure oriented sysctl function.
1336  */
1337 int
1338 sysctl_rdstruct(void *oldp, size_t *oldlenp, void *newp, const void *sp,
1339     size_t len)
1340 {
1341 	int error = 0;
1342 
1343 	if (oldp && *oldlenp < len)
1344 		return (ENOMEM);
1345 	if (newp)
1346 		return (EPERM);
1347 	*oldlenp = len;
1348 	if (oldp)
1349 		error = copyout(sp, oldp, len);
1350 	return (error);
1351 }
1352 
1353 #ifndef SMALL_KERNEL
1354 void
1355 fill_file(struct kinfo_file *kf, struct file *fp, struct filedesc *fdp,
1356 	  int fd, struct vnode *vp, struct process *pr, struct proc *p,
1357 	  struct socket *so, int show_pointers)
1358 {
1359 	struct vattr va;
1360 
1361 	memset(kf, 0, sizeof(*kf));
1362 
1363 	kf->fd_fd = fd;		/* might not really be an fd */
1364 
1365 	if (fp != NULL) {
1366 		if (show_pointers)
1367 			kf->f_fileaddr = PTRTOINT64(fp);
1368 		kf->f_flag = fp->f_flag;
1369 		kf->f_iflags = fp->f_iflags;
1370 		kf->f_type = fp->f_type;
1371 		kf->f_count = fp->f_count;
1372 		if (show_pointers)
1373 			kf->f_ucred = PTRTOINT64(fp->f_cred);
1374 		kf->f_uid = fp->f_cred->cr_uid;
1375 		kf->f_gid = fp->f_cred->cr_gid;
1376 		if (show_pointers)
1377 			kf->f_ops = PTRTOINT64(fp->f_ops);
1378 		if (show_pointers)
1379 			kf->f_data = PTRTOINT64(fp->f_data);
1380 		kf->f_usecount = 0;
1381 
1382 		if (suser(p) == 0 || p->p_ucred->cr_uid == fp->f_cred->cr_uid) {
1383 			mtx_enter(&fp->f_mtx);
1384 			kf->f_offset = fp->f_offset;
1385 			kf->f_rxfer = fp->f_rxfer;
1386 			kf->f_rwfer = fp->f_wxfer;
1387 			kf->f_seek = fp->f_seek;
1388 			kf->f_rbytes = fp->f_rbytes;
1389 			kf->f_wbytes = fp->f_wbytes;
1390 			mtx_leave(&fp->f_mtx);
1391 		} else
1392 			kf->f_offset = -1;
1393 	} else if (vp != NULL) {
1394 		/* fake it */
1395 		kf->f_type = DTYPE_VNODE;
1396 		kf->f_flag = FREAD;
1397 		if (fd == KERN_FILE_TRACE)
1398 			kf->f_flag |= FWRITE;
1399 	} else if (so != NULL) {
1400 		/* fake it */
1401 		kf->f_type = DTYPE_SOCKET;
1402 	}
1403 
1404 	/* information about the object associated with this file */
1405 	switch (kf->f_type) {
1406 	case DTYPE_VNODE:
1407 		if (fp != NULL)
1408 			vp = (struct vnode *)fp->f_data;
1409 
1410 		if (show_pointers)
1411 			kf->v_un = PTRTOINT64(vp->v_un.vu_socket);
1412 		kf->v_type = vp->v_type;
1413 		kf->v_tag = vp->v_tag;
1414 		kf->v_flag = vp->v_flag;
1415 		if (show_pointers)
1416 			kf->v_data = PTRTOINT64(vp->v_data);
1417 		if (show_pointers)
1418 			kf->v_mount = PTRTOINT64(vp->v_mount);
1419 		if (vp->v_mount)
1420 			strlcpy(kf->f_mntonname,
1421 			    vp->v_mount->mnt_stat.f_mntonname,
1422 			    sizeof(kf->f_mntonname));
1423 
1424 		if (VOP_GETATTR(vp, &va, p->p_ucred, p) == 0) {
1425 			kf->va_fileid = va.va_fileid;
1426 			kf->va_mode = MAKEIMODE(va.va_type, va.va_mode);
1427 			kf->va_size = va.va_size;
1428 			kf->va_rdev = va.va_rdev;
1429 			kf->va_fsid = va.va_fsid & 0xffffffff;
1430 			kf->va_nlink = va.va_nlink;
1431 		}
1432 		break;
1433 
1434 	case DTYPE_SOCKET: {
1435 		int locked = 0;
1436 
1437 		if (so == NULL) {
1438 			so = (struct socket *)fp->f_data;
1439 			/* if so is passed as parameter it is already locked */
1440 			solock(so);
1441 			locked = 1;
1442 		}
1443 
1444 		kf->so_type = so->so_type;
1445 		kf->so_state = so->so_state | so->so_snd.sb_state |
1446 		    so->so_rcv.sb_state;
1447 		if (show_pointers)
1448 			kf->so_pcb = PTRTOINT64(so->so_pcb);
1449 		else
1450 			kf->so_pcb = -1;
1451 		kf->so_protocol = so->so_proto->pr_protocol;
1452 		kf->so_family = so->so_proto->pr_domain->dom_family;
1453 		kf->so_rcv_cc = so->so_rcv.sb_cc;
1454 		kf->so_snd_cc = so->so_snd.sb_cc;
1455 		if (isspliced(so)) {
1456 			if (show_pointers)
1457 				kf->so_splice =
1458 				    PTRTOINT64(so->so_sp->ssp_socket);
1459 			kf->so_splicelen = so->so_sp->ssp_len;
1460 		} else if (issplicedback(so))
1461 			kf->so_splicelen = -1;
1462 		if (so->so_pcb == NULL) {
1463 			if (locked)
1464 				sounlock(so);
1465 			break;
1466 		}
1467 		switch (kf->so_family) {
1468 		case AF_INET: {
1469 			struct inpcb *inpcb = so->so_pcb;
1470 
1471 			soassertlocked(so);
1472 			if (show_pointers)
1473 				kf->inp_ppcb = PTRTOINT64(inpcb->inp_ppcb);
1474 			kf->inp_lport = inpcb->inp_lport;
1475 			kf->inp_laddru[0] = inpcb->inp_laddr.s_addr;
1476 			kf->inp_fport = inpcb->inp_fport;
1477 			kf->inp_faddru[0] = inpcb->inp_faddr.s_addr;
1478 			kf->inp_rtableid = inpcb->inp_rtableid;
1479 			if (so->so_type == SOCK_RAW)
1480 				kf->inp_proto = inpcb->inp_ip.ip_p;
1481 			if (so->so_proto->pr_protocol == IPPROTO_TCP) {
1482 				struct tcpcb *tcpcb = (void *)inpcb->inp_ppcb;
1483 				kf->t_rcv_wnd = tcpcb->rcv_wnd;
1484 				kf->t_snd_wnd = tcpcb->snd_wnd;
1485 				kf->t_snd_cwnd = tcpcb->snd_cwnd;
1486 				kf->t_state = tcpcb->t_state;
1487 			}
1488 			break;
1489 		    }
1490 		case AF_INET6: {
1491 			struct inpcb *inpcb = so->so_pcb;
1492 
1493 			soassertlocked(so);
1494 			if (show_pointers)
1495 				kf->inp_ppcb = PTRTOINT64(inpcb->inp_ppcb);
1496 			kf->inp_lport = inpcb->inp_lport;
1497 			kf->inp_laddru[0] = inpcb->inp_laddr6.s6_addr32[0];
1498 			kf->inp_laddru[1] = inpcb->inp_laddr6.s6_addr32[1];
1499 			kf->inp_laddru[2] = inpcb->inp_laddr6.s6_addr32[2];
1500 			kf->inp_laddru[3] = inpcb->inp_laddr6.s6_addr32[3];
1501 			kf->inp_fport = inpcb->inp_fport;
1502 			kf->inp_faddru[0] = inpcb->inp_faddr6.s6_addr32[0];
1503 			kf->inp_faddru[1] = inpcb->inp_faddr6.s6_addr32[1];
1504 			kf->inp_faddru[2] = inpcb->inp_faddr6.s6_addr32[2];
1505 			kf->inp_faddru[3] = inpcb->inp_faddr6.s6_addr32[3];
1506 			kf->inp_rtableid = inpcb->inp_rtableid;
1507 			if (so->so_type == SOCK_RAW)
1508 				kf->inp_proto = inpcb->inp_ipv6.ip6_nxt;
1509 			if (so->so_proto->pr_protocol == IPPROTO_TCP) {
1510 				struct tcpcb *tcpcb = (void *)inpcb->inp_ppcb;
1511 				kf->t_rcv_wnd = tcpcb->rcv_wnd;
1512 				kf->t_snd_wnd = tcpcb->snd_wnd;
1513 				kf->t_state = tcpcb->t_state;
1514 			}
1515 			break;
1516 		    }
1517 		case AF_UNIX: {
1518 			struct unpcb *unpcb = so->so_pcb;
1519 
1520 			kf->f_msgcount = unpcb->unp_msgcount;
1521 			if (show_pointers) {
1522 				kf->unp_conn	= PTRTOINT64(unpcb->unp_conn);
1523 				kf->unp_refs	= PTRTOINT64(
1524 				    SLIST_FIRST(&unpcb->unp_refs));
1525 				kf->unp_nextref	= PTRTOINT64(
1526 				    SLIST_NEXT(unpcb, unp_nextref));
1527 				kf->v_un	= PTRTOINT64(unpcb->unp_vnode);
1528 				kf->unp_addr	= PTRTOINT64(unpcb->unp_addr);
1529 			}
1530 			if (unpcb->unp_addr != NULL) {
1531 				struct sockaddr_un *un = mtod(unpcb->unp_addr,
1532 				    struct sockaddr_un *);
1533 				memcpy(kf->unp_path, un->sun_path, un->sun_len
1534 				    - offsetof(struct sockaddr_un,sun_path));
1535 			}
1536 			break;
1537 		    }
1538 		}
1539 		if (locked)
1540 			sounlock(so);
1541 		break;
1542 	    }
1543 
1544 	case DTYPE_PIPE: {
1545 		struct pipe *pipe = (struct pipe *)fp->f_data;
1546 
1547 		if (show_pointers)
1548 			kf->pipe_peer = PTRTOINT64(pipe->pipe_peer);
1549 		kf->pipe_state = pipe->pipe_state;
1550 		break;
1551 	    }
1552 
1553 	case DTYPE_KQUEUE: {
1554 		struct kqueue *kqi = (struct kqueue *)fp->f_data;
1555 
1556 		kf->kq_count = kqi->kq_count;
1557 		kf->kq_state = kqi->kq_state;
1558 		break;
1559 	    }
1560 	}
1561 
1562 	/* per-process information for KERN_FILE_BY[PU]ID */
1563 	if (pr != NULL) {
1564 		kf->p_pid = pr->ps_pid;
1565 		kf->p_uid = pr->ps_ucred->cr_uid;
1566 		kf->p_gid = pr->ps_ucred->cr_gid;
1567 		kf->p_tid = -1;
1568 		strlcpy(kf->p_comm, pr->ps_comm, sizeof(kf->p_comm));
1569 	}
1570 	if (fdp != NULL) {
1571 		fdplock(fdp);
1572 		kf->fd_ofileflags = fdp->fd_ofileflags[fd];
1573 		fdpunlock(fdp);
1574 	}
1575 }
1576 
1577 /*
1578  * Get file structures.
1579  */
1580 int
1581 sysctl_file(int *name, u_int namelen, char *where, size_t *sizep,
1582     struct proc *p)
1583 {
1584 	struct kinfo_file *kf;
1585 	struct filedesc *fdp;
1586 	struct file *fp;
1587 	struct process *pr;
1588 	size_t buflen, elem_size, elem_count, outsize;
1589 	char *dp = where;
1590 	int arg, i, error = 0, needed = 0, matched;
1591 	u_int op;
1592 	int show_pointers;
1593 
1594 	if (namelen > 4)
1595 		return (ENOTDIR);
1596 	if (namelen < 4 || name[2] > sizeof(*kf))
1597 		return (EINVAL);
1598 
1599 	buflen = where != NULL ? *sizep : 0;
1600 	op = name[0];
1601 	arg = name[1];
1602 	elem_size = name[2];
1603 	elem_count = name[3];
1604 	outsize = MIN(sizeof(*kf), elem_size);
1605 
1606 	if (elem_size < 1)
1607 		return (EINVAL);
1608 
1609 	show_pointers = suser(curproc) == 0;
1610 
1611 	kf = malloc(sizeof(*kf), M_TEMP, M_WAITOK);
1612 
1613 #define FILLIT2(fp, fdp, i, vp, pr, so) do {				\
1614 	if (buflen >= elem_size && elem_count > 0) {			\
1615 		fill_file(kf, fp, fdp, i, vp, pr, p, so, show_pointers);\
1616 		error = copyout(kf, dp, outsize);			\
1617 		if (error)						\
1618 			break;						\
1619 		dp += elem_size;					\
1620 		buflen -= elem_size;					\
1621 		elem_count--;						\
1622 	}								\
1623 	needed += elem_size;						\
1624 } while (0)
1625 #define FILLIT(fp, fdp, i, vp, pr) \
1626 	FILLIT2(fp, fdp, i, vp, pr, NULL)
1627 #define FILLSO(so) \
1628 	FILLIT2(NULL, NULL, 0, NULL, NULL, so)
1629 
1630 	switch (op) {
1631 	case KERN_FILE_BYFILE:
1632 		/* use the inp-tables to pick up closed connections, too */
1633 		if (arg == DTYPE_SOCKET) {
1634 			struct inpcb *inp;
1635 
1636 			NET_LOCK();
1637 			mtx_enter(&tcbtable.inpt_mtx);
1638 			TAILQ_FOREACH(inp, &tcbtable.inpt_queue, inp_queue)
1639 				FILLSO(inp->inp_socket);
1640 			mtx_leave(&tcbtable.inpt_mtx);
1641 #ifdef INET6
1642 			mtx_enter(&tcb6table.inpt_mtx);
1643 			TAILQ_FOREACH(inp, &tcb6table.inpt_queue, inp_queue)
1644 				FILLSO(inp->inp_socket);
1645 			mtx_leave(&tcb6table.inpt_mtx);
1646 #endif
1647 			mtx_enter(&udbtable.inpt_mtx);
1648 			TAILQ_FOREACH(inp, &udbtable.inpt_queue, inp_queue)
1649 				FILLSO(inp->inp_socket);
1650 			mtx_leave(&udbtable.inpt_mtx);
1651 #ifdef INET6
1652 			mtx_enter(&udb6table.inpt_mtx);
1653 			TAILQ_FOREACH(inp, &udb6table.inpt_queue, inp_queue)
1654 				FILLSO(inp->inp_socket);
1655 			mtx_leave(&udb6table.inpt_mtx);
1656 #endif
1657 			mtx_enter(&rawcbtable.inpt_mtx);
1658 			TAILQ_FOREACH(inp, &rawcbtable.inpt_queue, inp_queue)
1659 				FILLSO(inp->inp_socket);
1660 			mtx_leave(&rawcbtable.inpt_mtx);
1661 #ifdef INET6
1662 			mtx_enter(&rawin6pcbtable.inpt_mtx);
1663 			TAILQ_FOREACH(inp, &rawin6pcbtable.inpt_queue,
1664 			    inp_queue)
1665 				FILLSO(inp->inp_socket);
1666 			mtx_leave(&rawin6pcbtable.inpt_mtx);
1667 #endif
1668 			NET_UNLOCK();
1669 		}
1670 		fp = NULL;
1671 		while ((fp = fd_iterfile(fp, p)) != NULL) {
1672 			if ((arg == 0 || fp->f_type == arg)) {
1673 				int af, skip = 0;
1674 				if (arg == DTYPE_SOCKET && fp->f_type == arg) {
1675 					af = ((struct socket *)fp->f_data)->
1676 					    so_proto->pr_domain->dom_family;
1677 					if (af == AF_INET || af == AF_INET6)
1678 						skip = 1;
1679 				}
1680 				if (!skip)
1681 					FILLIT(fp, NULL, 0, NULL, NULL);
1682 			}
1683 		}
1684 		break;
1685 	case KERN_FILE_BYPID:
1686 		/* A arg of -1 indicates all processes */
1687 		if (arg < -1) {
1688 			error = EINVAL;
1689 			break;
1690 		}
1691 		matched = 0;
1692 		LIST_FOREACH(pr, &allprocess, ps_list) {
1693 			/*
1694 			 * skip system, exiting, embryonic and undead
1695 			 * processes
1696 			 */
1697 			if (pr->ps_flags & (PS_SYSTEM | PS_EMBRYO | PS_EXITING))
1698 				continue;
1699 			if (arg >= 0 && pr->ps_pid != (pid_t)arg) {
1700 				/* not the pid we are looking for */
1701 				continue;
1702 			}
1703 
1704 			refcnt_take(&pr->ps_refcnt);
1705 
1706 			matched = 1;
1707 			fdp = pr->ps_fd;
1708 			if (pr->ps_textvp)
1709 				FILLIT(NULL, NULL, KERN_FILE_TEXT, pr->ps_textvp, pr);
1710 			if (fdp->fd_cdir)
1711 				FILLIT(NULL, NULL, KERN_FILE_CDIR, fdp->fd_cdir, pr);
1712 			if (fdp->fd_rdir)
1713 				FILLIT(NULL, NULL, KERN_FILE_RDIR, fdp->fd_rdir, pr);
1714 			if (pr->ps_tracevp)
1715 				FILLIT(NULL, NULL, KERN_FILE_TRACE, pr->ps_tracevp, pr);
1716 			for (i = 0; i < fdp->fd_nfiles; i++) {
1717 				if ((fp = fd_getfile(fdp, i)) == NULL)
1718 					continue;
1719 				FILLIT(fp, fdp, i, NULL, pr);
1720 				FRELE(fp, p);
1721 			}
1722 
1723 			refcnt_rele_wake(&pr->ps_refcnt);
1724 
1725 			/* pid is unique, stop searching */
1726 			if (arg >= 0)
1727 				break;
1728 		}
1729 		if (!matched)
1730 			error = ESRCH;
1731 		break;
1732 	case KERN_FILE_BYUID:
1733 		LIST_FOREACH(pr, &allprocess, ps_list) {
1734 			/*
1735 			 * skip system, exiting, embryonic and undead
1736 			 * processes
1737 			 */
1738 			if (pr->ps_flags & (PS_SYSTEM | PS_EMBRYO | PS_EXITING))
1739 				continue;
1740 			if (arg >= 0 && pr->ps_ucred->cr_uid != (uid_t)arg) {
1741 				/* not the uid we are looking for */
1742 				continue;
1743 			}
1744 
1745 			refcnt_take(&pr->ps_refcnt);
1746 
1747 			fdp = pr->ps_fd;
1748 			if (fdp->fd_cdir)
1749 				FILLIT(NULL, NULL, KERN_FILE_CDIR, fdp->fd_cdir, pr);
1750 			if (fdp->fd_rdir)
1751 				FILLIT(NULL, NULL, KERN_FILE_RDIR, fdp->fd_rdir, pr);
1752 			if (pr->ps_tracevp)
1753 				FILLIT(NULL, NULL, KERN_FILE_TRACE, pr->ps_tracevp, pr);
1754 			for (i = 0; i < fdp->fd_nfiles; i++) {
1755 				if ((fp = fd_getfile(fdp, i)) == NULL)
1756 					continue;
1757 				FILLIT(fp, fdp, i, NULL, pr);
1758 				FRELE(fp, p);
1759 			}
1760 
1761 			refcnt_rele_wake(&pr->ps_refcnt);
1762 		}
1763 		break;
1764 	default:
1765 		error = EINVAL;
1766 		break;
1767 	}
1768 	free(kf, M_TEMP, sizeof(*kf));
1769 
1770 	if (!error) {
1771 		if (where == NULL)
1772 			needed += KERN_FILESLOP * elem_size;
1773 		else if (*sizep < needed)
1774 			error = ENOMEM;
1775 		*sizep = needed;
1776 	}
1777 
1778 	return (error);
1779 }
1780 
1781 /*
1782  * try over estimating by 5 procs
1783  */
1784 #define KERN_PROCSLOP	5
1785 
1786 int
1787 sysctl_doproc(int *name, u_int namelen, char *where, size_t *sizep)
1788 {
1789 	struct kinfo_proc *kproc = NULL;
1790 	struct proc *p;
1791 	struct process *pr;
1792 	char *dp;
1793 	int arg, buflen, doingzomb, elem_size, elem_count;
1794 	int error, needed, op;
1795 	int dothreads = 0;
1796 	int show_pointers;
1797 
1798 	dp = where;
1799 	buflen = where != NULL ? *sizep : 0;
1800 	needed = error = 0;
1801 
1802 	if (namelen != 4 || name[2] <= 0 || name[3] < 0 ||
1803 	    name[2] > sizeof(*kproc))
1804 		return (EINVAL);
1805 	op = name[0];
1806 	arg = name[1];
1807 	elem_size = name[2];
1808 	elem_count = name[3];
1809 
1810 	dothreads = op & KERN_PROC_SHOW_THREADS;
1811 	op &= ~KERN_PROC_SHOW_THREADS;
1812 
1813 	show_pointers = suser(curproc) == 0;
1814 
1815 	if (where != NULL)
1816 		kproc = malloc(sizeof(*kproc), M_TEMP, M_WAITOK);
1817 
1818 	pr = LIST_FIRST(&allprocess);
1819 	doingzomb = 0;
1820 again:
1821 	for (; pr != NULL; pr = LIST_NEXT(pr, ps_list)) {
1822 		/* XXX skip processes in the middle of being zapped */
1823 		if (pr->ps_pgrp == NULL)
1824 			continue;
1825 
1826 		/*
1827 		 * Skip embryonic processes.
1828 		 */
1829 		if (pr->ps_flags & PS_EMBRYO)
1830 			continue;
1831 
1832 		/*
1833 		 * TODO - make more efficient (see notes below).
1834 		 */
1835 		switch (op) {
1836 
1837 		case KERN_PROC_PID:
1838 			/* could do this with just a lookup */
1839 			if (pr->ps_pid != (pid_t)arg)
1840 				continue;
1841 			break;
1842 
1843 		case KERN_PROC_PGRP:
1844 			/* could do this by traversing pgrp */
1845 			if (pr->ps_pgrp->pg_id != (pid_t)arg)
1846 				continue;
1847 			break;
1848 
1849 		case KERN_PROC_SESSION:
1850 			if (pr->ps_session->s_leader == NULL ||
1851 			    pr->ps_session->s_leader->ps_pid != (pid_t)arg)
1852 				continue;
1853 			break;
1854 
1855 		case KERN_PROC_TTY:
1856 			if ((pr->ps_flags & PS_CONTROLT) == 0 ||
1857 			    pr->ps_session->s_ttyp == NULL ||
1858 			    pr->ps_session->s_ttyp->t_dev != (dev_t)arg)
1859 				continue;
1860 			break;
1861 
1862 		case KERN_PROC_UID:
1863 			if (pr->ps_ucred->cr_uid != (uid_t)arg)
1864 				continue;
1865 			break;
1866 
1867 		case KERN_PROC_RUID:
1868 			if (pr->ps_ucred->cr_ruid != (uid_t)arg)
1869 				continue;
1870 			break;
1871 
1872 		case KERN_PROC_ALL:
1873 			if (pr->ps_flags & PS_SYSTEM)
1874 				continue;
1875 			break;
1876 
1877 		case KERN_PROC_KTHREAD:
1878 			/* no filtering */
1879 			break;
1880 
1881 		default:
1882 			error = EINVAL;
1883 			goto err;
1884 		}
1885 
1886 		if (buflen >= elem_size && elem_count > 0) {
1887 			fill_kproc(pr, kproc, NULL, show_pointers);
1888 			error = copyout(kproc, dp, elem_size);
1889 			if (error)
1890 				goto err;
1891 			dp += elem_size;
1892 			buflen -= elem_size;
1893 			elem_count--;
1894 		}
1895 		needed += elem_size;
1896 
1897 		/* Skip per-thread entries if not required by op */
1898 		if (!dothreads)
1899 			continue;
1900 
1901 		TAILQ_FOREACH(p, &pr->ps_threads, p_thr_link) {
1902 			if (buflen >= elem_size && elem_count > 0) {
1903 				fill_kproc(pr, kproc, p, show_pointers);
1904 				error = copyout(kproc, dp, elem_size);
1905 				if (error)
1906 					goto err;
1907 				dp += elem_size;
1908 				buflen -= elem_size;
1909 				elem_count--;
1910 			}
1911 			needed += elem_size;
1912 		}
1913 	}
1914 	if (doingzomb == 0) {
1915 		pr = LIST_FIRST(&zombprocess);
1916 		doingzomb++;
1917 		goto again;
1918 	}
1919 	if (where != NULL) {
1920 		*sizep = dp - where;
1921 		if (needed > *sizep) {
1922 			error = ENOMEM;
1923 			goto err;
1924 		}
1925 	} else {
1926 		needed += KERN_PROCSLOP * elem_size;
1927 		*sizep = needed;
1928 	}
1929 err:
1930 	if (kproc)
1931 		free(kproc, M_TEMP, sizeof(*kproc));
1932 	return (error);
1933 }
1934 
1935 /*
1936  * Fill in a kproc structure for the specified process.
1937  */
1938 void
1939 fill_kproc(struct process *pr, struct kinfo_proc *ki, struct proc *p,
1940     int show_pointers)
1941 {
1942 	struct session *s = pr->ps_session;
1943 	struct tty *tp;
1944 	struct vmspace *vm = pr->ps_vmspace;
1945 	struct timespec booted, st, ut, utc;
1946 	struct tusage tu;
1947 	int isthread;
1948 
1949 	isthread = p != NULL;
1950 	if (!isthread) {
1951 		p = pr->ps_mainproc;		/* XXX */
1952 		tuagg_get_process(&tu, pr);
1953 	} else
1954 		tuagg_get_proc(&tu, p);
1955 
1956 	FILL_KPROC(ki, strlcpy, p, pr, pr->ps_ucred, pr->ps_pgrp,
1957 	    p, pr, s, vm, pr->ps_limit, pr->ps_sigacts, &tu, isthread,
1958 	    show_pointers);
1959 
1960 	/* stuff that's too painful to generalize into the macros */
1961 	if (pr->ps_pptr)
1962 		ki->p_ppid = pr->ps_ppid;
1963 	if (s->s_leader)
1964 		ki->p_sid = s->s_leader->ps_pid;
1965 
1966 	if ((pr->ps_flags & PS_CONTROLT) && (tp = s->s_ttyp)) {
1967 		ki->p_tdev = tp->t_dev;
1968 		ki->p_tpgid = tp->t_pgrp ? tp->t_pgrp->pg_id : -1;
1969 		if (show_pointers)
1970 			ki->p_tsess = PTRTOINT64(tp->t_session);
1971 	} else {
1972 		ki->p_tdev = NODEV;
1973 		ki->p_tpgid = -1;
1974 	}
1975 
1976 	/* fixups that can only be done in the kernel */
1977 	if ((pr->ps_flags & PS_ZOMBIE) == 0) {
1978 		if ((pr->ps_flags & PS_EMBRYO) == 0 && vm != NULL)
1979 			ki->p_vm_rssize = vm_resident_count(vm);
1980 		calctsru(&tu, &ut, &st, NULL);
1981 		ki->p_uutime_sec = ut.tv_sec;
1982 		ki->p_uutime_usec = ut.tv_nsec/1000;
1983 		ki->p_ustime_sec = st.tv_sec;
1984 		ki->p_ustime_usec = st.tv_nsec/1000;
1985 
1986 		/* Convert starting uptime to a starting UTC time. */
1987 		nanoboottime(&booted);
1988 		timespecadd(&booted, &pr->ps_start, &utc);
1989 		ki->p_ustart_sec = utc.tv_sec;
1990 		ki->p_ustart_usec = utc.tv_nsec / 1000;
1991 
1992 #ifdef MULTIPROCESSOR
1993 		if (p->p_cpu != NULL)
1994 			ki->p_cpuid = CPU_INFO_UNIT(p->p_cpu);
1995 #endif
1996 	}
1997 
1998 	/* get %cpu and schedule state: just one thread or sum of all? */
1999 	if (isthread) {
2000 		ki->p_pctcpu = p->p_pctcpu;
2001 		ki->p_stat   = p->p_stat;
2002 	} else {
2003 		ki->p_pctcpu = 0;
2004 		ki->p_stat = (pr->ps_flags & PS_ZOMBIE) ? SDEAD : SIDL;
2005 		TAILQ_FOREACH(p, &pr->ps_threads, p_thr_link) {
2006 			ki->p_pctcpu += p->p_pctcpu;
2007 			/* find best state: ONPROC > RUN > STOP > SLEEP > .. */
2008 			if (p->p_stat == SONPROC || ki->p_stat == SONPROC)
2009 				ki->p_stat = SONPROC;
2010 			else if (p->p_stat == SRUN || ki->p_stat == SRUN)
2011 				ki->p_stat = SRUN;
2012 			else if (p->p_stat == SSTOP || ki->p_stat == SSTOP)
2013 				ki->p_stat = SSTOP;
2014 			else if (p->p_stat == SSLEEP)
2015 				ki->p_stat = SSLEEP;
2016 		}
2017 	}
2018 }
2019 
2020 int
2021 sysctl_proc_args(int *name, u_int namelen, void *oldp, size_t *oldlenp,
2022     struct proc *cp)
2023 {
2024 	struct process *vpr;
2025 	pid_t pid;
2026 	struct ps_strings pss;
2027 	struct iovec iov;
2028 	struct uio uio;
2029 	int error, cnt, op;
2030 	size_t limit;
2031 	char **rargv, **vargv;		/* reader vs. victim */
2032 	char *rarg, *varg, *buf;
2033 	struct vmspace *vm;
2034 	vaddr_t ps_strings;
2035 
2036 	if (namelen > 2)
2037 		return (ENOTDIR);
2038 	if (namelen < 2)
2039 		return (EINVAL);
2040 
2041 	pid = name[0];
2042 	op = name[1];
2043 
2044 	switch (op) {
2045 	case KERN_PROC_ARGV:
2046 	case KERN_PROC_NARGV:
2047 	case KERN_PROC_ENV:
2048 	case KERN_PROC_NENV:
2049 		break;
2050 	default:
2051 		return (EOPNOTSUPP);
2052 	}
2053 
2054 	if ((vpr = prfind(pid)) == NULL)
2055 		return (ESRCH);
2056 
2057 	if (oldp == NULL) {
2058 		if (op == KERN_PROC_NARGV || op == KERN_PROC_NENV)
2059 			*oldlenp = sizeof(int);
2060 		else
2061 			*oldlenp = ARG_MAX;	/* XXX XXX XXX */
2062 		return (0);
2063 	}
2064 
2065 	/* Either system process or exiting/zombie */
2066 	if (vpr->ps_flags & (PS_SYSTEM | PS_EXITING))
2067 		return (EINVAL);
2068 
2069 	/* Execing - danger. */
2070 	if ((vpr->ps_flags & PS_INEXEC))
2071 		return (EBUSY);
2072 
2073 	/* Only owner or root can get env */
2074 	if ((op == KERN_PROC_NENV || op == KERN_PROC_ENV) &&
2075 	    (vpr->ps_ucred->cr_uid != cp->p_ucred->cr_uid &&
2076 	    (error = suser(cp)) != 0))
2077 		return (error);
2078 
2079 	ps_strings = vpr->ps_strings;
2080 	vm = vpr->ps_vmspace;
2081 	uvmspace_addref(vm);
2082 	vpr = NULL;
2083 
2084 	buf = malloc(PAGE_SIZE, M_TEMP, M_WAITOK);
2085 
2086 	iov.iov_base = &pss;
2087 	iov.iov_len = sizeof(pss);
2088 	uio.uio_iov = &iov;
2089 	uio.uio_iovcnt = 1;
2090 	uio.uio_offset = (off_t)ps_strings;
2091 	uio.uio_resid = sizeof(pss);
2092 	uio.uio_segflg = UIO_SYSSPACE;
2093 	uio.uio_rw = UIO_READ;
2094 	uio.uio_procp = cp;
2095 
2096 	if ((error = uvm_io(&vm->vm_map, &uio, 0)) != 0)
2097 		goto out;
2098 
2099 	if (op == KERN_PROC_NARGV) {
2100 		error = sysctl_rdint(oldp, oldlenp, NULL, pss.ps_nargvstr);
2101 		goto out;
2102 	}
2103 	if (op == KERN_PROC_NENV) {
2104 		error = sysctl_rdint(oldp, oldlenp, NULL, pss.ps_nenvstr);
2105 		goto out;
2106 	}
2107 
2108 	if (op == KERN_PROC_ARGV) {
2109 		cnt = pss.ps_nargvstr;
2110 		vargv = pss.ps_argvstr;
2111 	} else {
2112 		cnt = pss.ps_nenvstr;
2113 		vargv = pss.ps_envstr;
2114 	}
2115 
2116 	/* -1 to have space for a terminating NUL */
2117 	limit = *oldlenp - 1;
2118 	*oldlenp = 0;
2119 
2120 	rargv = oldp;
2121 
2122 	/*
2123 	 * *oldlenp - number of bytes copied out into readers buffer.
2124 	 * limit - maximal number of bytes allowed into readers buffer.
2125 	 * rarg - pointer into readers buffer where next arg will be stored.
2126 	 * rargv - pointer into readers buffer where the next rarg pointer
2127 	 *  will be stored.
2128 	 * vargv - pointer into victim address space where the next argument
2129 	 *  will be read.
2130 	 */
2131 
2132 	/* space for cnt pointers and a NULL */
2133 	rarg = (char *)(rargv + cnt + 1);
2134 	*oldlenp += (cnt + 1) * sizeof(char **);
2135 
2136 	while (cnt > 0 && *oldlenp < limit) {
2137 		size_t len, vstrlen;
2138 
2139 		/* Write to readers argv */
2140 		if ((error = copyout(&rarg, rargv, sizeof(rarg))) != 0)
2141 			goto out;
2142 
2143 		/* read the victim argv */
2144 		iov.iov_base = &varg;
2145 		iov.iov_len = sizeof(varg);
2146 		uio.uio_iov = &iov;
2147 		uio.uio_iovcnt = 1;
2148 		uio.uio_offset = (off_t)(vaddr_t)vargv;
2149 		uio.uio_resid = sizeof(varg);
2150 		uio.uio_segflg = UIO_SYSSPACE;
2151 		uio.uio_rw = UIO_READ;
2152 		uio.uio_procp = cp;
2153 		if ((error = uvm_io(&vm->vm_map, &uio, 0)) != 0)
2154 			goto out;
2155 
2156 		if (varg == NULL)
2157 			break;
2158 
2159 		/*
2160 		 * read the victim arg. We must jump through hoops to avoid
2161 		 * crossing a page boundary too much and returning an error.
2162 		 */
2163 more:
2164 		len = PAGE_SIZE - (((vaddr_t)varg) & PAGE_MASK);
2165 		/* leave space for the terminating NUL */
2166 		iov.iov_base = buf;
2167 		iov.iov_len = len;
2168 		uio.uio_iov = &iov;
2169 		uio.uio_iovcnt = 1;
2170 		uio.uio_offset = (off_t)(vaddr_t)varg;
2171 		uio.uio_resid = len;
2172 		uio.uio_segflg = UIO_SYSSPACE;
2173 		uio.uio_rw = UIO_READ;
2174 		uio.uio_procp = cp;
2175 		if ((error = uvm_io(&vm->vm_map, &uio, 0)) != 0)
2176 			goto out;
2177 
2178 		for (vstrlen = 0; vstrlen < len; vstrlen++) {
2179 			if (buf[vstrlen] == '\0')
2180 				break;
2181 		}
2182 
2183 		/* Don't overflow readers buffer. */
2184 		if (*oldlenp + vstrlen + 1 >= limit) {
2185 			error = ENOMEM;
2186 			goto out;
2187 		}
2188 
2189 		if ((error = copyout(buf, rarg, vstrlen)) != 0)
2190 			goto out;
2191 
2192 		*oldlenp += vstrlen;
2193 		rarg += vstrlen;
2194 
2195 		/* The string didn't end in this page? */
2196 		if (vstrlen == len) {
2197 			varg += vstrlen;
2198 			goto more;
2199 		}
2200 
2201 		/* End of string. Terminate it with a NUL */
2202 		buf[0] = '\0';
2203 		if ((error = copyout(buf, rarg, 1)) != 0)
2204 			goto out;
2205 		*oldlenp += 1;
2206 		rarg += 1;
2207 
2208 		vargv++;
2209 		rargv++;
2210 		cnt--;
2211 	}
2212 
2213 	if (*oldlenp >= limit) {
2214 		error = ENOMEM;
2215 		goto out;
2216 	}
2217 
2218 	/* Write the terminating null */
2219 	rarg = NULL;
2220 	error = copyout(&rarg, rargv, sizeof(rarg));
2221 
2222 out:
2223 	uvmspace_free(vm);
2224 	free(buf, M_TEMP, PAGE_SIZE);
2225 	return (error);
2226 }
2227 
2228 int
2229 sysctl_proc_cwd(int *name, u_int namelen, void *oldp, size_t *oldlenp,
2230     struct proc *cp)
2231 {
2232 	struct process *findpr;
2233 	struct vnode *vp;
2234 	pid_t pid;
2235 	int error;
2236 	size_t lenused, len;
2237 	char *path, *bp, *bend;
2238 
2239 	if (namelen > 1)
2240 		return (ENOTDIR);
2241 	if (namelen < 1)
2242 		return (EINVAL);
2243 
2244 	pid = name[0];
2245 	if ((findpr = prfind(pid)) == NULL)
2246 		return (ESRCH);
2247 
2248 	if (oldp == NULL) {
2249 		*oldlenp = MAXPATHLEN * 4;
2250 		return (0);
2251 	}
2252 
2253 	/* Either system process or exiting/zombie */
2254 	if (findpr->ps_flags & (PS_SYSTEM | PS_EXITING))
2255 		return (EINVAL);
2256 
2257 	/* Only owner or root can get cwd */
2258 	if (findpr->ps_ucred->cr_uid != cp->p_ucred->cr_uid &&
2259 	    (error = suser(cp)) != 0)
2260 		return (error);
2261 
2262 	len = *oldlenp;
2263 	if (len > MAXPATHLEN * 4)
2264 		len = MAXPATHLEN * 4;
2265 	else if (len < 2)
2266 		return (ERANGE);
2267 	*oldlenp = 0;
2268 
2269 	/* snag a reference to the vnode before we can sleep */
2270 	vp = findpr->ps_fd->fd_cdir;
2271 	vref(vp);
2272 
2273 	path = malloc(len, M_TEMP, M_WAITOK);
2274 
2275 	bp = &path[len];
2276 	bend = bp;
2277 	*(--bp) = '\0';
2278 
2279 	/* Same as sys__getcwd */
2280 	error = vfs_getcwd_common(vp, NULL,
2281 	    &bp, path, len / 2, GETCWD_CHECK_ACCESS, cp);
2282 	if (error == 0) {
2283 		*oldlenp = lenused = bend - bp;
2284 		error = copyout(bp, oldp, lenused);
2285 	}
2286 
2287 	vrele(vp);
2288 	free(path, M_TEMP, len);
2289 
2290 	return (error);
2291 }
2292 
2293 int
2294 sysctl_proc_nobroadcastkill(int *name, u_int namelen, void *newp, size_t newlen,
2295     void *oldp, size_t *oldlenp, struct proc *cp)
2296 {
2297 	struct process *findpr;
2298 	pid_t pid;
2299 	int error, flag;
2300 
2301 	if (namelen > 1)
2302 		return (ENOTDIR);
2303 	if (namelen < 1)
2304 		return (EINVAL);
2305 
2306 	pid = name[0];
2307 	if ((findpr = prfind(pid)) == NULL)
2308 		return (ESRCH);
2309 
2310 	/* Either system process or exiting/zombie */
2311 	if (findpr->ps_flags & (PS_SYSTEM | PS_EXITING))
2312 		return (EINVAL);
2313 
2314 	/* Only root can change PS_NOBROADCASTKILL */
2315 	if (newp != NULL && (error = suser(cp)) != 0)
2316 		return (error);
2317 
2318 	/* get the PS_NOBROADCASTKILL flag */
2319 	flag = findpr->ps_flags & PS_NOBROADCASTKILL ? 1 : 0;
2320 
2321 	error = sysctl_int(oldp, oldlenp, newp, newlen, &flag);
2322 	if (error == 0 && newp) {
2323 		if (flag)
2324 			atomic_setbits_int(&findpr->ps_flags,
2325 			    PS_NOBROADCASTKILL);
2326 		else
2327 			atomic_clearbits_int(&findpr->ps_flags,
2328 			    PS_NOBROADCASTKILL);
2329 	}
2330 
2331 	return (error);
2332 }
2333 
2334 /* Arbitrary but reasonable limit for one iteration. */
2335 #define	VMMAP_MAXLEN	MAXPHYS
2336 
2337 int
2338 sysctl_proc_vmmap(int *name, u_int namelen, void *oldp, size_t *oldlenp,
2339     struct proc *cp)
2340 {
2341 	struct process *findpr;
2342 	pid_t pid;
2343 	int error;
2344 	size_t oldlen, len;
2345 	struct kinfo_vmentry *kve, *ukve;
2346 	u_long *ustart, start;
2347 
2348 	if (namelen > 1)
2349 		return (ENOTDIR);
2350 	if (namelen < 1)
2351 		return (EINVAL);
2352 
2353 	/* Provide max buffer length as hint. */
2354 	if (oldp == NULL) {
2355 		if (oldlenp == NULL)
2356 			return (EINVAL);
2357 		else {
2358 			*oldlenp = VMMAP_MAXLEN;
2359 			return (0);
2360 		}
2361 	}
2362 
2363 	pid = name[0];
2364 	if (pid == cp->p_p->ps_pid) {
2365 		/* Self process mapping. */
2366 		findpr = cp->p_p;
2367 	} else if (pid > 0) {
2368 		if ((findpr = prfind(pid)) == NULL)
2369 			return (ESRCH);
2370 
2371 		/* Either system process or exiting/zombie */
2372 		if (findpr->ps_flags & (PS_SYSTEM | PS_EXITING))
2373 			return (EINVAL);
2374 
2375 #if 1
2376 		/* XXX Allow only root for now */
2377 		if ((error = suser(cp)) != 0)
2378 			return (error);
2379 #else
2380 		/* Only owner or root can get vmmap */
2381 		if (findpr->ps_ucred->cr_uid != cp->p_ucred->cr_uid &&
2382 		    (error = suser(cp)) != 0)
2383 			return (error);
2384 #endif
2385 	} else {
2386 		/* Only root can get kernel_map */
2387 		if ((error = suser(cp)) != 0)
2388 			return (error);
2389 		findpr = NULL;
2390 	}
2391 
2392 	/* Check the given size. */
2393 	oldlen = *oldlenp;
2394 	if (oldlen == 0 || oldlen % sizeof(*kve) != 0)
2395 		return (EINVAL);
2396 
2397 	/* Deny huge allocation. */
2398 	if (oldlen > VMMAP_MAXLEN)
2399 		return (EINVAL);
2400 
2401 	/*
2402 	 * Iterate from the given address passed as the first element's
2403 	 * kve_start via oldp.
2404 	 */
2405 	ukve = (struct kinfo_vmentry *)oldp;
2406 	ustart = &ukve->kve_start;
2407 	error = copyin(ustart, &start, sizeof(start));
2408 	if (error != 0)
2409 		return (error);
2410 
2411 	/* Allocate wired memory to not block. */
2412 	kve = malloc(oldlen, M_TEMP, M_WAITOK);
2413 
2414 	/* Set the base address and read entries. */
2415 	kve[0].kve_start = start;
2416 	len = oldlen;
2417 	error = fill_vmmap(findpr, kve, &len);
2418 	if (error != 0 && error != ENOMEM)
2419 		goto done;
2420 	if (len == 0)
2421 		goto done;
2422 
2423 	KASSERT(len <= oldlen);
2424 	KASSERT((len % sizeof(struct kinfo_vmentry)) == 0);
2425 
2426 	error = copyout(kve, oldp, len);
2427 
2428 done:
2429 	*oldlenp = len;
2430 
2431 	free(kve, M_TEMP, oldlen);
2432 
2433 	return (error);
2434 }
2435 #endif
2436 
2437 /*
2438  * Initialize disknames/diskstats for export by sysctl. If update is set,
2439  * then we simply update the disk statistics information.
2440  */
2441 int
2442 sysctl_diskinit(int update, struct proc *p)
2443 {
2444 	struct diskstats *sdk;
2445 	struct disk *dk;
2446 	const char *duid;
2447 	int error, changed = 0;
2448 
2449 	KERNEL_ASSERT_LOCKED();
2450 
2451 	if ((error = rw_enter(&sysctl_disklock, RW_WRITE|RW_INTR)) != 0)
2452 		return error;
2453 
2454 	/* Run in a loop, disks may change while malloc sleeps. */
2455 	while (disk_change) {
2456 		int tlen;
2457 
2458 		disk_change = 0;
2459 
2460 		tlen = 0;
2461 		TAILQ_FOREACH(dk, &disklist, dk_link) {
2462 			if (dk->dk_name)
2463 				tlen += strlen(dk->dk_name);
2464 			tlen += 18;	/* label uid + separators */
2465 		}
2466 		tlen++;
2467 
2468 		/*
2469 		 * The sysctl_disklock ensures that no other process can
2470 		 * allocate disknames and diskstats while our malloc sleeps.
2471 		 */
2472 		free(disknames, M_SYSCTL, disknameslen);
2473 		free(diskstats, M_SYSCTL, diskstatslen);
2474 		diskstats = NULL;
2475 		disknames = NULL;
2476 		diskstats = mallocarray(disk_count, sizeof(struct diskstats),
2477 		    M_SYSCTL, M_WAITOK|M_ZERO);
2478 		diskstatslen = disk_count * sizeof(struct diskstats);
2479 		disknames = malloc(tlen, M_SYSCTL, M_WAITOK|M_ZERO);
2480 		disknameslen = tlen;
2481 		disknames[0] = '\0';
2482 		changed = 1;
2483 	}
2484 
2485 	if (changed) {
2486 		int l;
2487 
2488 		l = 0;
2489 		sdk = diskstats;
2490 		TAILQ_FOREACH(dk, &disklist, dk_link) {
2491 			duid = NULL;
2492 			if (dk->dk_label && !duid_iszero(dk->dk_label->d_uid))
2493 				duid = duid_format(dk->dk_label->d_uid);
2494 			snprintf(disknames + l, disknameslen - l, "%s:%s,",
2495 			    dk->dk_name ? dk->dk_name : "",
2496 			    duid ? duid : "");
2497 			l += strlen(disknames + l);
2498 			strlcpy(sdk->ds_name, dk->dk_name,
2499 			    sizeof(sdk->ds_name));
2500 			mtx_enter(&dk->dk_mtx);
2501 			sdk->ds_busy = dk->dk_busy;
2502 			sdk->ds_rxfer = dk->dk_rxfer;
2503 			sdk->ds_wxfer = dk->dk_wxfer;
2504 			sdk->ds_seek = dk->dk_seek;
2505 			sdk->ds_rbytes = dk->dk_rbytes;
2506 			sdk->ds_wbytes = dk->dk_wbytes;
2507 			sdk->ds_attachtime = dk->dk_attachtime;
2508 			sdk->ds_timestamp = dk->dk_timestamp;
2509 			sdk->ds_time = dk->dk_time;
2510 			mtx_leave(&dk->dk_mtx);
2511 			sdk++;
2512 		}
2513 
2514 		/* Eliminate trailing comma */
2515 		if (l != 0)
2516 			disknames[l - 1] = '\0';
2517 	} else if (update) {
2518 		/* Just update, number of drives hasn't changed */
2519 		sdk = diskstats;
2520 		TAILQ_FOREACH(dk, &disklist, dk_link) {
2521 			strlcpy(sdk->ds_name, dk->dk_name,
2522 			    sizeof(sdk->ds_name));
2523 			mtx_enter(&dk->dk_mtx);
2524 			sdk->ds_busy = dk->dk_busy;
2525 			sdk->ds_rxfer = dk->dk_rxfer;
2526 			sdk->ds_wxfer = dk->dk_wxfer;
2527 			sdk->ds_seek = dk->dk_seek;
2528 			sdk->ds_rbytes = dk->dk_rbytes;
2529 			sdk->ds_wbytes = dk->dk_wbytes;
2530 			sdk->ds_attachtime = dk->dk_attachtime;
2531 			sdk->ds_timestamp = dk->dk_timestamp;
2532 			sdk->ds_time = dk->dk_time;
2533 			mtx_leave(&dk->dk_mtx);
2534 			sdk++;
2535 		}
2536 	}
2537 	rw_exit_write(&sysctl_disklock);
2538 	return 0;
2539 }
2540 
2541 #if defined(SYSVMSG) || defined(SYSVSEM) || defined(SYSVSHM)
2542 int
2543 sysctl_sysvipc(int *name, u_int namelen, void *where, size_t *sizep)
2544 {
2545 #ifdef SYSVSEM
2546 	struct sem_sysctl_info *semsi;
2547 #endif
2548 #ifdef SYSVSHM
2549 	struct shm_sysctl_info *shmsi;
2550 #endif
2551 	size_t infosize, dssize, tsize, buflen, bufsiz;
2552 	int i, nds, error, ret;
2553 	void *buf;
2554 
2555 	if (namelen != 1)
2556 		return (EINVAL);
2557 
2558 	buflen = *sizep;
2559 
2560 	switch (*name) {
2561 	case KERN_SYSVIPC_MSG_INFO:
2562 #ifdef SYSVMSG
2563 		return (sysctl_sysvmsg(name, namelen, where, sizep));
2564 #else
2565 		return (EOPNOTSUPP);
2566 #endif
2567 	case KERN_SYSVIPC_SEM_INFO:
2568 #ifdef SYSVSEM
2569 		infosize = sizeof(semsi->seminfo);
2570 		nds = seminfo.semmni;
2571 		dssize = sizeof(semsi->semids[0]);
2572 		break;
2573 #else
2574 		return (EOPNOTSUPP);
2575 #endif
2576 	case KERN_SYSVIPC_SHM_INFO:
2577 #ifdef SYSVSHM
2578 		infosize = sizeof(shmsi->shminfo);
2579 		nds = shminfo.shmmni;
2580 		dssize = sizeof(shmsi->shmids[0]);
2581 		break;
2582 #else
2583 		return (EOPNOTSUPP);
2584 #endif
2585 	default:
2586 		return (EINVAL);
2587 	}
2588 	tsize = infosize + (nds * dssize);
2589 
2590 	/* Return just the total size required. */
2591 	if (where == NULL) {
2592 		*sizep = tsize;
2593 		return (0);
2594 	}
2595 
2596 	/* Not enough room for even the info struct. */
2597 	if (buflen < infosize) {
2598 		*sizep = 0;
2599 		return (ENOMEM);
2600 	}
2601 	bufsiz = min(tsize, buflen);
2602 	buf = malloc(bufsiz, M_TEMP, M_WAITOK|M_ZERO);
2603 
2604 	switch (*name) {
2605 #ifdef SYSVSEM
2606 	case KERN_SYSVIPC_SEM_INFO:
2607 		semsi = (struct sem_sysctl_info *)buf;
2608 		semsi->seminfo = seminfo;
2609 		break;
2610 #endif
2611 #ifdef SYSVSHM
2612 	case KERN_SYSVIPC_SHM_INFO:
2613 		shmsi = (struct shm_sysctl_info *)buf;
2614 		shmsi->shminfo = shminfo;
2615 		break;
2616 #endif
2617 	}
2618 	buflen -= infosize;
2619 
2620 	ret = 0;
2621 	if (buflen > 0) {
2622 		/* Fill in the IPC data structures.  */
2623 		for (i = 0; i < nds; i++) {
2624 			if (buflen < dssize) {
2625 				ret = ENOMEM;
2626 				break;
2627 			}
2628 			switch (*name) {
2629 #ifdef SYSVSEM
2630 			case KERN_SYSVIPC_SEM_INFO:
2631 				if (sema[i] != NULL)
2632 					memcpy(&semsi->semids[i], sema[i],
2633 					    dssize);
2634 				else
2635 					memset(&semsi->semids[i], 0, dssize);
2636 				break;
2637 #endif
2638 #ifdef SYSVSHM
2639 			case KERN_SYSVIPC_SHM_INFO:
2640 				if (shmsegs[i] != NULL)
2641 					memcpy(&shmsi->shmids[i], shmsegs[i],
2642 					    dssize);
2643 				else
2644 					memset(&shmsi->shmids[i], 0, dssize);
2645 				break;
2646 #endif
2647 			}
2648 			buflen -= dssize;
2649 		}
2650 	}
2651 	*sizep -= buflen;
2652 	error = copyout(buf, where, *sizep);
2653 	free(buf, M_TEMP, bufsiz);
2654 	/* If copyout succeeded, use return code set earlier. */
2655 	return (error ? error : ret);
2656 }
2657 #endif /* SYSVMSG || SYSVSEM || SYSVSHM */
2658 
2659 #ifndef	SMALL_KERNEL
2660 
2661 int
2662 sysctl_intrcnt(int *name, u_int namelen, void *oldp, size_t *oldlenp)
2663 {
2664 	return (evcount_sysctl(name, namelen, oldp, oldlenp, NULL, 0));
2665 }
2666 
2667 
2668 int
2669 sysctl_sensors(int *name, u_int namelen, void *oldp, size_t *oldlenp,
2670     void *newp, size_t newlen)
2671 {
2672 	struct ksensor *ks;
2673 	struct sensor *us;
2674 	struct ksensordev *ksd;
2675 	struct sensordev *usd;
2676 	int dev, numt, ret;
2677 	enum sensor_type type;
2678 
2679 	if (namelen != 1 && namelen != 3)
2680 		return (ENOTDIR);
2681 
2682 	dev = name[0];
2683 	if (namelen == 1) {
2684 		ret = sensordev_get(dev, &ksd);
2685 		if (ret)
2686 			return (ret);
2687 
2688 		/* Grab a copy, to clear the kernel pointers */
2689 		usd = malloc(sizeof(*usd), M_TEMP, M_WAITOK|M_ZERO);
2690 		usd->num = ksd->num;
2691 		strlcpy(usd->xname, ksd->xname, sizeof(usd->xname));
2692 		memcpy(usd->maxnumt, ksd->maxnumt, sizeof(usd->maxnumt));
2693 		usd->sensors_count = ksd->sensors_count;
2694 
2695 		ret = sysctl_rdstruct(oldp, oldlenp, newp, usd,
2696 		    sizeof(struct sensordev));
2697 
2698 		free(usd, M_TEMP, sizeof(*usd));
2699 		return (ret);
2700 	}
2701 
2702 	type = name[1];
2703 	numt = name[2];
2704 
2705 	ret = sensor_find(dev, type, numt, &ks);
2706 	if (ret)
2707 		return (ret);
2708 
2709 	/* Grab a copy, to clear the kernel pointers */
2710 	us = malloc(sizeof(*us), M_TEMP, M_WAITOK|M_ZERO);
2711 	memcpy(us->desc, ks->desc, sizeof(us->desc));
2712 	us->tv = ks->tv;
2713 	us->value = ks->value;
2714 	us->type = ks->type;
2715 	us->status = ks->status;
2716 	us->numt = ks->numt;
2717 	us->flags = ks->flags;
2718 
2719 	ret = sysctl_rdstruct(oldp, oldlenp, newp, us,
2720 	    sizeof(struct sensor));
2721 	free(us, M_TEMP, sizeof(*us));
2722 	return (ret);
2723 }
2724 #endif	/* SMALL_KERNEL */
2725 
2726 int
2727 sysctl_cptime2(int *name, u_int namelen, void *oldp, size_t *oldlenp,
2728     void *newp, size_t newlen)
2729 {
2730 	CPU_INFO_ITERATOR cii;
2731 	struct cpu_info *ci;
2732 	int found = 0;
2733 
2734 	if (namelen != 1)
2735 		return (ENOTDIR);
2736 
2737 	CPU_INFO_FOREACH(cii, ci) {
2738 		if (name[0] == CPU_INFO_UNIT(ci)) {
2739 			found = 1;
2740 			break;
2741 		}
2742 	}
2743 	if (!found)
2744 		return (ENOENT);
2745 
2746 	return (sysctl_rdstruct(oldp, oldlenp, newp,
2747 	    &ci->ci_schedstate.spc_cp_time,
2748 	    sizeof(ci->ci_schedstate.spc_cp_time)));
2749 }
2750 
2751 #if NAUDIO > 0
2752 int
2753 sysctl_audio(int *name, u_int namelen, void *oldp, size_t *oldlenp,
2754     void *newp, size_t newlen)
2755 {
2756 	if (namelen != 1)
2757 		return (ENOTDIR);
2758 
2759 	if (name[0] != KERN_AUDIO_RECORD)
2760 		return (ENOENT);
2761 
2762 	return (sysctl_int(oldp, oldlenp, newp, newlen, &audio_record_enable));
2763 }
2764 #endif
2765 
2766 #if NVIDEO > 0
2767 int
2768 sysctl_video(int *name, u_int namelen, void *oldp, size_t *oldlenp,
2769     void *newp, size_t newlen)
2770 {
2771 	if (namelen != 1)
2772 		return (ENOTDIR);
2773 
2774 	if (name[0] != KERN_VIDEO_RECORD)
2775 		return (ENOENT);
2776 
2777 	return (sysctl_int(oldp, oldlenp, newp, newlen, &video_record_enable));
2778 }
2779 #endif
2780 
2781 int
2782 sysctl_cpustats(int *name, u_int namelen, void *oldp, size_t *oldlenp,
2783     void *newp, size_t newlen)
2784 {
2785 	CPU_INFO_ITERATOR cii;
2786 	struct cpustats cs;
2787 	struct cpu_info *ci;
2788 	int found = 0;
2789 
2790 	if (namelen != 1)
2791 		return (ENOTDIR);
2792 
2793 	CPU_INFO_FOREACH(cii, ci) {
2794 		if (name[0] == CPU_INFO_UNIT(ci)) {
2795 			found = 1;
2796 			break;
2797 		}
2798 	}
2799 	if (!found)
2800 		return (ENOENT);
2801 
2802 	memset(&cs, 0, sizeof cs);
2803 	memcpy(&cs.cs_time, &ci->ci_schedstate.spc_cp_time, sizeof(cs.cs_time));
2804 	cs.cs_flags = 0;
2805 	if (cpu_is_online(ci))
2806 		cs.cs_flags |= CPUSTATS_ONLINE;
2807 
2808 	return (sysctl_rdstruct(oldp, oldlenp, newp, &cs, sizeof(cs)));
2809 }
2810 
2811 int
2812 sysctl_utc_offset(void *oldp, size_t *oldlenp, void *newp, size_t newlen)
2813 {
2814 	struct timespec adjusted, now;
2815 	int adjustment_seconds, error, new_offset_minutes, old_offset_minutes;
2816 
2817 	old_offset_minutes = utc_offset / 60;	/* seconds -> minutes */
2818 	new_offset_minutes = old_offset_minutes;
2819 	error = sysctl_securelevel_int(oldp, oldlenp, newp, newlen,
2820 	     &new_offset_minutes);
2821 	if (error)
2822 		return error;
2823 	if (new_offset_minutes < -24 * 60 || new_offset_minutes > 24 * 60)
2824 		return EINVAL;
2825 	if (new_offset_minutes == old_offset_minutes)
2826 		return 0;
2827 
2828 	utc_offset = new_offset_minutes * 60;	/* minutes -> seconds */
2829 	adjustment_seconds = (new_offset_minutes - old_offset_minutes) * 60;
2830 
2831 	nanotime(&now);
2832 	adjusted = now;
2833 	adjusted.tv_sec -= adjustment_seconds;
2834 	tc_setrealtimeclock(&adjusted);
2835 	resettodr();
2836 
2837 	return 0;
2838 }
2839