xref: /openbsd-src/sys/kern/kern_sysctl.c (revision dcc91c2622318df8f66a9bca2d2864253df1bfc3)
1 /*	$OpenBSD: kern_sysctl.c,v 1.446 2024/08/29 10:44:40 bluhm Exp $	*/
2 /*	$NetBSD: kern_sysctl.c,v 1.17 1996/05/20 17:49:05 mrg Exp $	*/
3 
4 /*-
5  * Copyright (c) 1982, 1986, 1989, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * This code is derived from software contributed to Berkeley by
9  * Mike Karels at Berkeley Software Design, Inc.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. Neither the name of the University nor the names of its contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  *
35  *	@(#)kern_sysctl.c	8.4 (Berkeley) 4/14/94
36  */
37 
38 /*
39  * sysctl system call.
40  */
41 
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/atomic.h>
45 #include <sys/kernel.h>
46 #include <sys/malloc.h>
47 #include <sys/pool.h>
48 #include <sys/proc.h>
49 #include <sys/resourcevar.h>
50 #include <sys/signalvar.h>
51 #include <sys/fcntl.h>
52 #include <sys/file.h>
53 #include <sys/filedesc.h>
54 #include <sys/vnode.h>
55 #include <sys/unistd.h>
56 #include <sys/buf.h>
57 #include <sys/clockintr.h>
58 #include <sys/tty.h>
59 #include <sys/disklabel.h>
60 #include <sys/disk.h>
61 #include <sys/sysctl.h>
62 #include <sys/msgbuf.h>
63 #include <sys/vmmeter.h>
64 #include <sys/namei.h>
65 #include <sys/exec.h>
66 #include <sys/mbuf.h>
67 #include <sys/percpu.h>
68 #include <sys/sensors.h>
69 #include <sys/pipe.h>
70 #include <sys/eventvar.h>
71 #include <sys/socketvar.h>
72 #include <sys/socket.h>
73 #include <sys/domain.h>
74 #include <sys/protosw.h>
75 #include <sys/pledge.h>
76 #include <sys/timetc.h>
77 #include <sys/evcount.h>
78 #include <sys/un.h>
79 #include <sys/unpcb.h>
80 #include <sys/sched.h>
81 #include <sys/mount.h>
82 #include <sys/syscallargs.h>
83 #include <sys/wait.h>
84 #include <sys/witness.h>
85 
86 #include <uvm/uvm_extern.h>
87 
88 #include <dev/cons.h>
89 
90 #include <dev/usb/ucomvar.h>
91 
92 #include <net/route.h>
93 #include <netinet/in.h>
94 #include <netinet/ip.h>
95 #include <netinet/ip_var.h>
96 #include <netinet/in_pcb.h>
97 #include <netinet/ip6.h>
98 #include <netinet/tcp.h>
99 #include <netinet/tcp_timer.h>
100 #include <netinet/tcp_var.h>
101 #include <netinet/udp.h>
102 #include <netinet/udp_var.h>
103 #include <netinet6/ip6_var.h>
104 
105 #ifdef DDB
106 #include <ddb/db_var.h>
107 #endif
108 
109 #ifdef SYSVMSG
110 #include <sys/msg.h>
111 #endif
112 #ifdef SYSVSEM
113 #include <sys/sem.h>
114 #endif
115 #ifdef SYSVSHM
116 #include <sys/shm.h>
117 #endif
118 
119 #include "audio.h"
120 #include "dt.h"
121 #include "pf.h"
122 #include "ucom.h"
123 #include "video.h"
124 
125 extern struct forkstat forkstat;
126 extern struct nchstats nchstats;
127 extern int fscale;
128 extern fixpt_t ccpu;
129 extern long numvnodes;
130 extern int allowdt;
131 extern int audio_record_enable;
132 extern int video_record_enable;
133 extern int autoconf_serial;
134 
135 int allowkmem;
136 
137 int sysctl_securelevel(void *, size_t *, void *, size_t, struct proc *);
138 int sysctl_diskinit(int, struct proc *);
139 int sysctl_proc_args(int *, u_int, void *, size_t *, struct proc *);
140 int sysctl_proc_cwd(int *, u_int, void *, size_t *, struct proc *);
141 int sysctl_proc_nobroadcastkill(int *, u_int, void *, size_t, void *, size_t *,
142 	struct proc *);
143 int sysctl_proc_vmmap(int *, u_int, void *, size_t *, struct proc *);
144 int sysctl_intrcnt(int *, u_int, void *, size_t *);
145 int sysctl_sensors(int *, u_int, void *, size_t *, void *, size_t);
146 int sysctl_cptime2(int *, u_int, void *, size_t *, void *, size_t);
147 int sysctl_audio(int *, u_int, void *, size_t *, void *, size_t);
148 int sysctl_video(int *, u_int, void *, size_t *, void *, size_t);
149 int sysctl_cpustats(int *, u_int, void *, size_t *, void *, size_t);
150 int sysctl_utc_offset(void *, size_t *, void *, size_t);
151 int sysctl_hwbattery(int *, u_int, void *, size_t *, void *, size_t);
152 
153 void fill_file(struct kinfo_file *, struct file *, struct filedesc *, int,
154     struct vnode *, struct process *, struct proc *, struct socket *, int);
155 void fill_kproc(struct process *, struct kinfo_proc *, struct proc *, int);
156 
157 int kern_sysctl_locked(int *, u_int, void *, size_t *, void *, size_t,
158 	struct proc *);
159 int hw_sysctl_locked(int *, u_int, void *, size_t *,void *, size_t,
160 	struct proc *);
161 
162 int (*cpu_cpuspeed)(int *);
163 
164 /*
165  * Lock to avoid too many processes vslocking a large amount of memory
166  * at the same time.
167  */
168 struct rwlock sysctl_lock = RWLOCK_INITIALIZER("sysctllk");
169 struct rwlock sysctl_disklock = RWLOCK_INITIALIZER("sysctldlk");
170 
171 int
172 sysctl_vslock(void *addr, size_t len)
173 {
174 	int error;
175 
176 	error = rw_enter(&sysctl_lock, RW_WRITE|RW_INTR);
177 	if (error)
178 		return (error);
179 	KERNEL_LOCK();
180 
181 	if (addr) {
182 		if (atop(len) > uvmexp.wiredmax - uvmexp.wired) {
183 			error = ENOMEM;
184 			goto out;
185 		}
186 		error = uvm_vslock(curproc, addr, len, PROT_READ | PROT_WRITE);
187 		if (error)
188 			goto out;
189 	}
190 
191 	return (0);
192 out:
193 	KERNEL_UNLOCK();
194 	rw_exit_write(&sysctl_lock);
195 	return (error);
196 }
197 
198 void
199 sysctl_vsunlock(void *addr, size_t len)
200 {
201 	KERNEL_ASSERT_LOCKED();
202 
203 	if (addr)
204 		uvm_vsunlock(curproc, addr, len);
205 	KERNEL_UNLOCK();
206 	rw_exit_write(&sysctl_lock);
207 }
208 
209 int
210 sys_sysctl(struct proc *p, void *v, register_t *retval)
211 {
212 	struct sys_sysctl_args /* {
213 		syscallarg(const int *) name;
214 		syscallarg(u_int) namelen;
215 		syscallarg(void *) old;
216 		syscallarg(size_t *) oldlenp;
217 		syscallarg(void *) new;
218 		syscallarg(size_t) newlen;
219 	} */ *uap = v;
220 	int error, dolock = 1;
221 	size_t savelen = 0, oldlen = 0;
222 	sysctlfn *fn;
223 	int name[CTL_MAXNAME];
224 
225 	if (SCARG(uap, new) != NULL &&
226 	    (error = suser(p)))
227 		return (error);
228 	/*
229 	 * all top-level sysctl names are non-terminal
230 	 */
231 	if (SCARG(uap, namelen) > CTL_MAXNAME || SCARG(uap, namelen) < 2)
232 		return (EINVAL);
233 	error = copyin(SCARG(uap, name), name,
234 		       SCARG(uap, namelen) * sizeof(int));
235 	if (error)
236 		return (error);
237 
238 	error = pledge_sysctl(p, SCARG(uap, namelen),
239 	    name, SCARG(uap, new));
240 	if (error)
241 		return (error);
242 
243 	switch (name[0]) {
244 	case CTL_KERN:
245 		dolock = 0;
246 		fn = kern_sysctl;
247 		break;
248 	case CTL_HW:
249 		dolock = 0;
250 		fn = hw_sysctl;
251 		break;
252 	case CTL_VM:
253 		fn = uvm_sysctl;
254 		break;
255 	case CTL_NET:
256 		dolock = 0;
257 		fn = net_sysctl;
258 		break;
259 	case CTL_FS:
260 		fn = fs_sysctl;
261 		break;
262 	case CTL_VFS:
263 		fn = vfs_sysctl;
264 		break;
265 	case CTL_MACHDEP:
266 		fn = cpu_sysctl;
267 		break;
268 #ifdef DEBUG_SYSCTL
269 	case CTL_DEBUG:
270 		fn = debug_sysctl;
271 		break;
272 #endif
273 #ifdef DDB
274 	case CTL_DDB:
275 		fn = ddb_sysctl;
276 		break;
277 #endif
278 	default:
279 		return (EOPNOTSUPP);
280 	}
281 
282 	if (SCARG(uap, oldlenp) &&
283 	    (error = copyin(SCARG(uap, oldlenp), &oldlen, sizeof(oldlen))))
284 		return (error);
285 
286 	if (dolock) {
287 		error = sysctl_vslock(SCARG(uap, old), oldlen);
288 		if (error)
289 			return (error);
290 		savelen = oldlen;
291 	}
292 	error = (*fn)(&name[1], SCARG(uap, namelen) - 1, SCARG(uap, old),
293 	    &oldlen, SCARG(uap, new), SCARG(uap, newlen), p);
294 	if (dolock)
295 		sysctl_vsunlock(SCARG(uap, old), savelen);
296 
297 	if (error)
298 		return (error);
299 	if (SCARG(uap, oldlenp))
300 		error = copyout(&oldlen, SCARG(uap, oldlenp), sizeof(oldlen));
301 	return (error);
302 }
303 
304 /*
305  * Attributes stored in the kernel.
306  */
307 char hostname[MAXHOSTNAMELEN];
308 int hostnamelen;
309 char domainname[MAXHOSTNAMELEN];
310 int domainnamelen;
311 int hostid;
312 char *disknames = NULL;
313 size_t disknameslen;
314 struct diskstats *diskstats = NULL;
315 size_t diskstatslen;
316 int securelevel;
317 
318 /* morally const values reported by sysctl_bounded_arr */
319 static int arg_max = ARG_MAX;
320 static int openbsd = OpenBSD;
321 static int posix_version = _POSIX_VERSION;
322 static int ngroups_max = NGROUPS_MAX;
323 static int int_zero = 0;
324 static int int_one = 1;
325 static int maxpartitions = MAXPARTITIONS;
326 static int raw_part = RAW_PART;
327 
328 extern int somaxconn, sominconn;
329 extern int nosuidcoredump;
330 extern int maxlocksperuid;
331 extern int uvm_wxabort;
332 extern int global_ptrace;
333 
334 const struct sysctl_bounded_args kern_vars[] = {
335 	{KERN_OSREV, &openbsd, SYSCTL_INT_READONLY},
336 	{KERN_MAXVNODES, &maxvnodes, 0, INT_MAX},
337 	{KERN_MAXPROC, &maxprocess, 0, INT_MAX},
338 	{KERN_MAXFILES, &maxfiles, 0, INT_MAX},
339 	{KERN_NFILES, &numfiles, SYSCTL_INT_READONLY},
340 	{KERN_TTYCOUNT, &tty_count, SYSCTL_INT_READONLY},
341 	{KERN_ARGMAX, &arg_max, SYSCTL_INT_READONLY},
342 	{KERN_POSIX1, &posix_version, SYSCTL_INT_READONLY},
343 	{KERN_NGROUPS, &ngroups_max, SYSCTL_INT_READONLY},
344 	{KERN_JOB_CONTROL, &int_one, SYSCTL_INT_READONLY},
345 	{KERN_SAVED_IDS, &int_one, SYSCTL_INT_READONLY},
346 	{KERN_MAXPARTITIONS, &maxpartitions, SYSCTL_INT_READONLY},
347 	{KERN_RAWPARTITION, &raw_part, SYSCTL_INT_READONLY},
348 	{KERN_MAXTHREAD, &maxthread, 0, INT_MAX},
349 	{KERN_NTHREADS, &nthreads, SYSCTL_INT_READONLY},
350 	{KERN_SOMAXCONN, &somaxconn, 0, SHRT_MAX},
351 	{KERN_SOMINCONN, &sominconn, 0, SHRT_MAX},
352 	{KERN_NOSUIDCOREDUMP, &nosuidcoredump, 0, 3},
353 	{KERN_FSYNC, &int_one, SYSCTL_INT_READONLY},
354 	{KERN_SYSVMSG,
355 #ifdef SYSVMSG
356 	 &int_one,
357 #else
358 	 &int_zero,
359 #endif
360 	 SYSCTL_INT_READONLY},
361 	{KERN_SYSVSEM,
362 #ifdef SYSVSEM
363 	 &int_one,
364 #else
365 	 &int_zero,
366 #endif
367 	 SYSCTL_INT_READONLY},
368 	{KERN_SYSVSHM,
369 #ifdef SYSVSHM
370 	 &int_one,
371 #else
372 	 &int_zero,
373 #endif
374 	 SYSCTL_INT_READONLY},
375 	{KERN_FSCALE, &fscale, SYSCTL_INT_READONLY},
376 	{KERN_CCPU, &ccpu, SYSCTL_INT_READONLY},
377 	{KERN_NPROCS, &nprocesses, SYSCTL_INT_READONLY},
378 	{KERN_SPLASSERT, &splassert_ctl, 0, 3},
379 	{KERN_MAXLOCKSPERUID, &maxlocksperuid, 0, INT_MAX},
380 	{KERN_WXABORT, &uvm_wxabort, 0, 1},
381 	{KERN_NETLIVELOCKS, &int_zero, SYSCTL_INT_READONLY},
382 #ifdef PTRACE
383 	{KERN_GLOBAL_PTRACE, &global_ptrace, 0, 1},
384 #endif
385 	{KERN_AUTOCONF_SERIAL, &autoconf_serial, SYSCTL_INT_READONLY},
386 };
387 
388 int
389 kern_sysctl_dirs(int top_name, int *name, u_int namelen,
390     void *oldp, size_t *oldlenp, void *newp, size_t newlen, struct proc *p)
391 {
392 	switch (top_name) {
393 #ifndef SMALL_KERNEL
394 	case KERN_PROC:
395 		return (sysctl_doproc(name, namelen, oldp, oldlenp));
396 	case KERN_PROC_ARGS:
397 		return (sysctl_proc_args(name, namelen, oldp, oldlenp, p));
398 	case KERN_PROC_CWD:
399 		return (sysctl_proc_cwd(name, namelen, oldp, oldlenp, p));
400 	case KERN_PROC_NOBROADCASTKILL:
401 		return (sysctl_proc_nobroadcastkill(name, namelen,
402 		     newp, newlen, oldp, oldlenp, p));
403 	case KERN_PROC_VMMAP:
404 		return (sysctl_proc_vmmap(name, namelen, oldp, oldlenp, p));
405 	case KERN_FILE:
406 		return (sysctl_file(name, namelen, oldp, oldlenp, p));
407 #endif
408 #if defined(GPROF) || defined(DDBPROF)
409 	case KERN_PROF:
410 		return (sysctl_doprof(name, namelen, oldp, oldlenp,
411 		    newp, newlen));
412 #endif
413 	case KERN_MALLOCSTATS:
414 		return (sysctl_malloc(name, namelen, oldp, oldlenp,
415 		    newp, newlen, p));
416 	case KERN_TTY:
417 		return (sysctl_tty(name, namelen, oldp, oldlenp,
418 		    newp, newlen));
419 	case KERN_POOL:
420 		return (sysctl_dopool(name, namelen, oldp, oldlenp));
421 #if defined(SYSVMSG) || defined(SYSVSEM) || defined(SYSVSHM)
422 	case KERN_SYSVIPC_INFO:
423 		return (sysctl_sysvipc(name, namelen, oldp, oldlenp));
424 #endif
425 #ifdef SYSVSEM
426 	case KERN_SEMINFO:
427 		return (sysctl_sysvsem(name, namelen, oldp, oldlenp,
428 		    newp, newlen));
429 #endif
430 #ifdef SYSVSHM
431 	case KERN_SHMINFO:
432 		return (sysctl_sysvshm(name, namelen, oldp, oldlenp,
433 		    newp, newlen));
434 #endif
435 #ifndef SMALL_KERNEL
436 	case KERN_INTRCNT:
437 		return (sysctl_intrcnt(name, namelen, oldp, oldlenp));
438 	case KERN_WATCHDOG:
439 		return (sysctl_wdog(name, namelen, oldp, oldlenp,
440 		    newp, newlen));
441 #endif
442 #ifndef SMALL_KERNEL
443 	case KERN_EVCOUNT:
444 		return (evcount_sysctl(name, namelen, oldp, oldlenp,
445 		    newp, newlen));
446 #endif
447 	case KERN_TIMECOUNTER:
448 		return (sysctl_tc(name, namelen, oldp, oldlenp, newp, newlen));
449 	case KERN_CPTIME2:
450 		return (sysctl_cptime2(name, namelen, oldp, oldlenp,
451 		    newp, newlen));
452 #ifdef WITNESS
453 	case KERN_WITNESSWATCH:
454 		return witness_sysctl_watch(oldp, oldlenp, newp, newlen);
455 	case KERN_WITNESS:
456 		return witness_sysctl(name, namelen, oldp, oldlenp,
457 		    newp, newlen);
458 #endif
459 #if NVIDEO > 0
460 	case KERN_VIDEO:
461 		return (sysctl_video(name, namelen, oldp, oldlenp,
462 		    newp, newlen));
463 #endif
464 	case KERN_CPUSTATS:
465 		return (sysctl_cpustats(name, namelen, oldp, oldlenp,
466 		    newp, newlen));
467 	case KERN_CLOCKINTR:
468 		return sysctl_clockintr(name, namelen, oldp, oldlenp, newp,
469 		    newlen);
470 	default:
471 		return (ENOTDIR);	/* overloaded */
472 	}
473 }
474 
475 /*
476  * kernel related system variables.
477  */
478 int
479 kern_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
480     size_t newlen, struct proc *p)
481 {
482 	int error;
483 	size_t savelen;
484 
485 	/* dispatch the non-terminal nodes first */
486 	if (namelen != 1) {
487 		switch (name[0]) {
488 #if NAUDIO > 0
489 		case KERN_AUDIO:
490 			return (sysctl_audio(name + 1, namelen - 1,
491 			    oldp, oldlenp, newp, newlen));
492 #endif
493 		default:
494 			break;
495 		}
496 
497 		savelen = *oldlenp;
498 		if ((error = sysctl_vslock(oldp, savelen)))
499 			return (error);
500 		error = kern_sysctl_dirs(name[0], name + 1, namelen - 1,
501 		    oldp, oldlenp, newp, newlen, p);
502 		sysctl_vsunlock(oldp, savelen);
503 		return (error);
504 	}
505 
506 	switch (name[0]) {
507 	case KERN_OSTYPE:
508 		return (sysctl_rdstring(oldp, oldlenp, newp, ostype));
509 	case KERN_OSRELEASE:
510 		return (sysctl_rdstring(oldp, oldlenp, newp, osrelease));
511 	case KERN_OSVERSION:
512 		return (sysctl_rdstring(oldp, oldlenp, newp, osversion));
513 	case KERN_VERSION:
514 		return (sysctl_rdstring(oldp, oldlenp, newp, version));
515 	case KERN_NUMVNODES:  /* XXX numvnodes is a long */
516 		return (sysctl_rdint(oldp, oldlenp, newp, numvnodes));
517 #if NDT > 0
518 	case KERN_ALLOWDT:
519 		return (sysctl_securelevel_int(oldp, oldlenp, newp, newlen,
520 		    &allowdt));
521 #endif
522 	case KERN_HOSTID:
523 		return (sysctl_int(oldp, oldlenp, newp, newlen, &hostid));
524 	case KERN_CLOCKRATE:
525 		return (sysctl_clockrate(oldp, oldlenp, newp));
526 	case KERN_BOOTTIME: {
527 		struct timeval bt;
528 		memset(&bt, 0, sizeof bt);
529 		microboottime(&bt);
530 		return (sysctl_rdstruct(oldp, oldlenp, newp, &bt, sizeof bt));
531 	}
532 	case KERN_MBSTAT: {
533 		uint64_t counters[MBSTAT_COUNT];
534 		struct mbstat mbs;
535 		unsigned int i;
536 
537 		memset(&mbs, 0, sizeof(mbs));
538 		counters_read(mbstat, counters, MBSTAT_COUNT, NULL);
539 		for (i = 0; i < MBSTAT_TYPES; i++)
540 			mbs.m_mtypes[i] = counters[i];
541 
542 		mbs.m_drops = counters[MBSTAT_DROPS];
543 		mbs.m_wait = counters[MBSTAT_WAIT];
544 		mbs.m_drain = counters[MBSTAT_DRAIN];
545 		mbs.m_defrag_alloc = counters[MBSTAT_DEFRAG_ALLOC];
546 		mbs.m_prepend_alloc = counters[MBSTAT_PREPEND_ALLOC];
547 		mbs.m_pullup_alloc = counters[MBSTAT_PULLUP_ALLOC];
548 		mbs.m_pullup_copy = counters[MBSTAT_PULLUP_COPY];
549 		mbs.m_pulldown_alloc = counters[MBSTAT_PULLDOWN_ALLOC];
550 		mbs.m_pulldown_copy = counters[MBSTAT_PULLDOWN_COPY];
551 
552 		return (sysctl_rdstruct(oldp, oldlenp, newp,
553 		    &mbs, sizeof(mbs)));
554 	}
555 	case KERN_MSGBUFSIZE:
556 	case KERN_CONSBUFSIZE: {
557 		struct msgbuf *mp;
558 		mp = (name[0] == KERN_MSGBUFSIZE) ? msgbufp : consbufp;
559 		/*
560 		 * deal with cases where the message buffer has
561 		 * become corrupted.
562 		 */
563 		if (!mp || mp->msg_magic != MSG_MAGIC)
564 			return (ENXIO);
565 		return (sysctl_rdint(oldp, oldlenp, newp, mp->msg_bufs));
566 	}
567 	case KERN_OSREV:
568 	case KERN_MAXPROC:
569 	case KERN_MAXFILES:
570 	case KERN_NFILES:
571 	case KERN_TTYCOUNT:
572 	case KERN_ARGMAX:
573 	case KERN_POSIX1:
574 	case KERN_NGROUPS:
575 	case KERN_JOB_CONTROL:
576 	case KERN_SAVED_IDS:
577 	case KERN_MAXPARTITIONS:
578 	case KERN_RAWPARTITION:
579 	case KERN_MAXTHREAD:
580 	case KERN_NTHREADS:
581 	case KERN_SOMAXCONN:
582 	case KERN_SOMINCONN:
583 	case KERN_FSYNC:
584 	case KERN_SYSVMSG:
585 	case KERN_SYSVSEM:
586 	case KERN_SYSVSHM:
587 	case KERN_FSCALE:
588 	case KERN_CCPU:
589 	case KERN_NPROCS:
590 	case KERN_NETLIVELOCKS:
591 	case KERN_AUTOCONF_SERIAL:
592 		return (sysctl_bounded_arr(kern_vars, nitems(kern_vars), name,
593 		    namelen, oldp, oldlenp, newp, newlen));
594 	}
595 
596 	savelen = *oldlenp;
597 	if ((error = sysctl_vslock(oldp, savelen)))
598 		return (error);
599 	error = kern_sysctl_locked(name, namelen, oldp, oldlenp,
600 	    newp, newlen, p);
601 	sysctl_vsunlock(oldp, savelen);
602 
603 	return (error);
604 }
605 
606 int
607 kern_sysctl_locked(int *name, u_int namelen, void *oldp, size_t *oldlenp,
608     void *newp, size_t newlen, struct proc *p)
609 {
610 	int error, stackgap;
611 	dev_t dev;
612 	extern int pool_debug;
613 
614 	switch (name[0]) {
615 	case KERN_SECURELVL:
616 		return (sysctl_securelevel(oldp, oldlenp, newp, newlen, p));
617 	case KERN_ALLOWKMEM:
618 		return (sysctl_securelevel_int(oldp, oldlenp, newp, newlen,
619 		    &allowkmem));
620 	case KERN_HOSTNAME:
621 		error = sysctl_tstring(oldp, oldlenp, newp, newlen,
622 		    hostname, sizeof(hostname));
623 		if (newp && !error)
624 			hostnamelen = newlen;
625 		return (error);
626 	case KERN_DOMAINNAME:
627 		if (securelevel >= 1 && domainnamelen && newp)
628 			error = EPERM;
629 		else
630 			error = sysctl_tstring(oldp, oldlenp, newp, newlen,
631 			    domainname, sizeof(domainname));
632 		if (newp && !error)
633 			domainnamelen = newlen;
634 		return (error);
635 	case KERN_CONSBUF:
636 		if ((error = suser(p)))
637 			return (error);
638 		/* FALLTHROUGH */
639 	case KERN_MSGBUF: {
640 		struct msgbuf *mp;
641 		mp = (name[0] == KERN_MSGBUF) ? msgbufp : consbufp;
642 		/*
643 		 * deal with cases where the message buffer has
644 		 * become corrupted.
645 		 */
646 		if (!mp || mp->msg_magic != MSG_MAGIC)
647 			return (ENXIO);
648 		return (sysctl_rdstruct(oldp, oldlenp, newp, mp,
649 		    mp->msg_bufs + offsetof(struct msgbuf, msg_bufc)));
650 	}
651 	case KERN_CPTIME:
652 	{
653 		CPU_INFO_ITERATOR cii;
654 		struct cpu_info *ci;
655 		long cp_time[CPUSTATES];
656 		int i, n = 0;
657 
658 		memset(cp_time, 0, sizeof(cp_time));
659 
660 		CPU_INFO_FOREACH(cii, ci) {
661 			if (!cpu_is_online(ci))
662 				continue;
663 			n++;
664 			for (i = 0; i < CPUSTATES; i++)
665 				cp_time[i] += ci->ci_schedstate.spc_cp_time[i];
666 		}
667 
668 		for (i = 0; i < CPUSTATES; i++)
669 			cp_time[i] /= n;
670 
671 		return (sysctl_rdstruct(oldp, oldlenp, newp, &cp_time,
672 		    sizeof(cp_time)));
673 	}
674 	case KERN_NCHSTATS:
675 		return (sysctl_rdstruct(oldp, oldlenp, newp, &nchstats,
676 		    sizeof(struct nchstats)));
677 	case KERN_FORKSTAT:
678 		return (sysctl_rdstruct(oldp, oldlenp, newp, &forkstat,
679 		    sizeof(struct forkstat)));
680 	case KERN_STACKGAPRANDOM:
681 		stackgap = stackgap_random;
682 		error = sysctl_int(oldp, oldlenp, newp, newlen, &stackgap);
683 		if (error)
684 			return (error);
685 		/*
686 		 * Safety harness.
687 		 */
688 		if ((stackgap < ALIGNBYTES && stackgap != 0) ||
689 		    !powerof2(stackgap) || stackgap >= MAXSSIZ)
690 			return (EINVAL);
691 		stackgap_random = stackgap;
692 		return (0);
693 	case KERN_MAXCLUSTERS: {
694 		int val = nmbclust;
695 		error = sysctl_int(oldp, oldlenp, newp, newlen, &val);
696 		if (error == 0 && val != nmbclust)
697 			error = nmbclust_update(val);
698 		return (error);
699 	}
700 	case KERN_CACHEPCT: {
701 		u_int64_t dmapages;
702 		int opct, pgs;
703 		opct = bufcachepercent;
704 		error = sysctl_int(oldp, oldlenp, newp, newlen,
705 		    &bufcachepercent);
706 		if (error)
707 			return(error);
708 		if (bufcachepercent > 90 || bufcachepercent < 5) {
709 			bufcachepercent = opct;
710 			return (EINVAL);
711 		}
712 		dmapages = uvm_pagecount(&dma_constraint);
713 		if (bufcachepercent != opct) {
714 			pgs = bufcachepercent * dmapages / 100;
715 			bufadjust(pgs); /* adjust bufpages */
716 			bufhighpages = bufpages; /* set high water mark */
717 		}
718 		return(0);
719 	}
720 	case KERN_CONSDEV:
721 		if (cn_tab != NULL)
722 			dev = cn_tab->cn_dev;
723 		else
724 			dev = NODEV;
725 		return sysctl_rdstruct(oldp, oldlenp, newp, &dev, sizeof(dev));
726 	case KERN_POOL_DEBUG: {
727 		int old_pool_debug = pool_debug;
728 
729 		error = sysctl_int(oldp, oldlenp, newp, newlen,
730 		    &pool_debug);
731 		if (error == 0 && pool_debug != old_pool_debug)
732 			pool_reclaim_all();
733 		return (error);
734 	}
735 #if NPF > 0
736 	case KERN_PFSTATUS:
737 		return (pf_sysctl(oldp, oldlenp, newp, newlen));
738 #endif
739 	case KERN_TIMEOUT_STATS:
740 		return (timeout_sysctl(oldp, oldlenp, newp, newlen));
741 	case KERN_UTC_OFFSET:
742 		return (sysctl_utc_offset(oldp, oldlenp, newp, newlen));
743 	default:
744 		return (sysctl_bounded_arr(kern_vars, nitems(kern_vars), name,
745 		    namelen, oldp, oldlenp, newp, newlen));
746 	}
747 	/* NOTREACHED */
748 }
749 
750 /*
751  * hardware related system variables.
752  */
753 char *hw_vendor, *hw_prod, *hw_uuid, *hw_serial, *hw_ver;
754 int allowpowerdown = 1;
755 int hw_power = 1;
756 
757 /* morally const values reported by sysctl_bounded_arr */
758 static int byte_order = BYTE_ORDER;
759 
760 const struct sysctl_bounded_args hw_vars[] = {
761 	{HW_NCPU, &ncpus, SYSCTL_INT_READONLY},
762 	{HW_NCPUFOUND, &ncpusfound, SYSCTL_INT_READONLY},
763 	{HW_BYTEORDER, &byte_order, SYSCTL_INT_READONLY},
764 	{HW_PAGESIZE, &uvmexp.pagesize, SYSCTL_INT_READONLY},
765 	{HW_DISKCOUNT, &disk_count, SYSCTL_INT_READONLY},
766 	{HW_POWER, &hw_power, SYSCTL_INT_READONLY},
767 };
768 
769 int
770 hw_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
771     size_t newlen, struct proc *p)
772 {
773 	extern char machine[], cpu_model[];
774 	int err;
775 
776 	/*
777 	 * all sysctl names at this level except sensors and battery
778 	 * are terminal
779 	 */
780 	if (name[0] != HW_SENSORS && name[0] != HW_BATTERY && namelen != 1)
781 		return (ENOTDIR);		/* overloaded */
782 
783 	switch (name[0]) {
784 	case HW_MACHINE:
785 		return (sysctl_rdstring(oldp, oldlenp, newp, machine));
786 	case HW_MODEL:
787 		return (sysctl_rdstring(oldp, oldlenp, newp, cpu_model));
788 	case HW_NCPUONLINE:
789 		return (sysctl_rdint(oldp, oldlenp, newp,
790 		    sysctl_hwncpuonline()));
791 	case HW_PHYSMEM:
792 		return (sysctl_rdint(oldp, oldlenp, newp, ptoa(physmem)));
793 	case HW_USERMEM:
794 		return (sysctl_rdint(oldp, oldlenp, newp,
795 		    ptoa(physmem - uvmexp.wired)));
796 	case HW_DISKNAMES:
797 	case HW_DISKSTATS:
798 	case HW_CPUSPEED:
799 #ifndef	SMALL_KERNEL
800 	case HW_SENSORS:
801 	case HW_SETPERF:
802 	case HW_PERFPOLICY:
803 	case HW_BATTERY:
804 #endif /* !SMALL_KERNEL */
805 	case HW_ALLOWPOWERDOWN:
806 	case HW_UCOMNAMES:
807 #ifdef __HAVE_CPU_TOPOLOGY
808 	case HW_SMT:
809 #endif
810 	{
811 		size_t savelen = *oldlenp;
812 		if ((err = sysctl_vslock(oldp, savelen)))
813 			return (err);
814 		err = hw_sysctl_locked(name, namelen, oldp, oldlenp,
815 		    newp, newlen, p);
816 		sysctl_vsunlock(oldp, savelen);
817 		return (err);
818 	}
819 	case HW_VENDOR:
820 		if (hw_vendor)
821 			return (sysctl_rdstring(oldp, oldlenp, newp,
822 			    hw_vendor));
823 		else
824 			return (EOPNOTSUPP);
825 	case HW_PRODUCT:
826 		if (hw_prod)
827 			return (sysctl_rdstring(oldp, oldlenp, newp, hw_prod));
828 		else
829 			return (EOPNOTSUPP);
830 	case HW_VERSION:
831 		if (hw_ver)
832 			return (sysctl_rdstring(oldp, oldlenp, newp, hw_ver));
833 		else
834 			return (EOPNOTSUPP);
835 	case HW_SERIALNO:
836 		if (hw_serial)
837 			return (sysctl_rdstring(oldp, oldlenp, newp,
838 			    hw_serial));
839 		else
840 			return (EOPNOTSUPP);
841 	case HW_UUID:
842 		if (hw_uuid)
843 			return (sysctl_rdstring(oldp, oldlenp, newp, hw_uuid));
844 		else
845 			return (EOPNOTSUPP);
846 	case HW_PHYSMEM64:
847 		return (sysctl_rdquad(oldp, oldlenp, newp,
848 		    ptoa((psize_t)physmem)));
849 	case HW_USERMEM64:
850 		return (sysctl_rdquad(oldp, oldlenp, newp,
851 		    ptoa((psize_t)physmem - uvmexp.wired)));
852 	default:
853 		return sysctl_bounded_arr(hw_vars, nitems(hw_vars), name,
854 		    namelen, oldp, oldlenp, newp, newlen);
855 	}
856 	/* NOTREACHED */
857 }
858 
859 int
860 hw_sysctl_locked(int *name, u_int namelen, void *oldp, size_t *oldlenp,
861     void *newp, size_t newlen, struct proc *p)
862 {
863 	int err, cpuspeed;
864 
865 	switch (name[0]) {
866 	case HW_DISKNAMES:
867 		err = sysctl_diskinit(0, p);
868 		if (err)
869 			return err;
870 		if (disknames)
871 			return (sysctl_rdstring(oldp, oldlenp, newp,
872 			    disknames));
873 		else
874 			return (sysctl_rdstring(oldp, oldlenp, newp, ""));
875 	case HW_DISKSTATS:
876 		err = sysctl_diskinit(1, p);
877 		if (err)
878 			return err;
879 		return (sysctl_rdstruct(oldp, oldlenp, newp, diskstats,
880 		    disk_count * sizeof(struct diskstats)));
881 	case HW_CPUSPEED:
882 		if (!cpu_cpuspeed)
883 			return (EOPNOTSUPP);
884 		err = cpu_cpuspeed(&cpuspeed);
885 		if (err)
886 			return err;
887 		return (sysctl_rdint(oldp, oldlenp, newp, cpuspeed));
888 #ifndef SMALL_KERNEL
889 	case HW_SENSORS:
890 		return (sysctl_sensors(name + 1, namelen - 1, oldp, oldlenp,
891 		    newp, newlen));
892 	case HW_SETPERF:
893 		return (sysctl_hwsetperf(oldp, oldlenp, newp, newlen));
894 	case HW_PERFPOLICY:
895 		return (sysctl_hwperfpolicy(oldp, oldlenp, newp, newlen));
896 #endif /* !SMALL_KERNEL */
897 	case HW_ALLOWPOWERDOWN:
898 		return (sysctl_securelevel_int(oldp, oldlenp, newp, newlen,
899 		    &allowpowerdown));
900 	case HW_UCOMNAMES: {
901 		const char *str = "";
902 #if NUCOM > 0
903 		str = sysctl_ucominit();
904 #endif	/* NUCOM > 0 */
905 		return (sysctl_rdstring(oldp, oldlenp, newp, str));
906 	}
907 #ifdef __HAVE_CPU_TOPOLOGY
908 	case HW_SMT:
909 		return (sysctl_hwsmt(oldp, oldlenp, newp, newlen));
910 #endif
911 #ifndef SMALL_KERNEL
912 	case HW_BATTERY:
913 		return (sysctl_hwbattery(name + 1, namelen - 1, oldp, oldlenp,
914 		    newp, newlen));
915 #endif
916 	default:
917 		return (EOPNOTSUPP);
918 	}
919 	/* NOTREACHED */
920 }
921 
922 #ifndef SMALL_KERNEL
923 
924 int hw_battery_chargemode;
925 int hw_battery_chargestart;
926 int hw_battery_chargestop;
927 int (*hw_battery_setchargemode)(int);
928 int (*hw_battery_setchargestart)(int);
929 int (*hw_battery_setchargestop)(int);
930 
931 int
932 sysctl_hwchargemode(void *oldp, size_t *oldlenp, void *newp, size_t newlen)
933 {
934 	int mode = hw_battery_chargemode;
935 	int error;
936 
937 	if (!hw_battery_setchargemode)
938 		return EOPNOTSUPP;
939 
940 	error = sysctl_int_bounded(oldp, oldlenp, newp, newlen,
941 	    &mode, -1, 1);
942 	if (error)
943 		return error;
944 
945 	if (newp != NULL)
946 		error = hw_battery_setchargemode(mode);
947 
948 	return error;
949 }
950 
951 int
952 sysctl_hwchargestart(void *oldp, size_t *oldlenp, void *newp, size_t newlen)
953 {
954 	int start = hw_battery_chargestart;
955 	int error;
956 
957 	if (!hw_battery_setchargestart)
958 		return EOPNOTSUPP;
959 
960 	error = sysctl_int_bounded(oldp, oldlenp, newp, newlen,
961 	    &start, 0, 100);
962 	if (error)
963 		return error;
964 
965 	if (newp != NULL)
966 		error = hw_battery_setchargestart(start);
967 
968 	return error;
969 }
970 
971 int
972 sysctl_hwchargestop(void *oldp, size_t *oldlenp, void *newp, size_t newlen)
973 {
974 	int stop = hw_battery_chargestop;
975 	int error;
976 
977 	if (!hw_battery_setchargestop)
978 		return EOPNOTSUPP;
979 
980 	error = sysctl_int_bounded(oldp, oldlenp, newp, newlen,
981 	    &stop, 0, 100);
982 	if (error)
983 		return error;
984 
985 	if (newp != NULL)
986 		error = hw_battery_setchargestop(stop);
987 
988 	return error;
989 }
990 
991 int
992 sysctl_hwbattery(int *name, u_int namelen, void *oldp, size_t *oldlenp,
993     void *newp, size_t newlen)
994 {
995 	if (namelen != 1)
996 		return (ENOTDIR);
997 
998 	switch (name[0]) {
999 	case HW_BATTERY_CHARGEMODE:
1000 		return (sysctl_hwchargemode(oldp, oldlenp, newp, newlen));
1001 	case HW_BATTERY_CHARGESTART:
1002 		return (sysctl_hwchargestart(oldp, oldlenp, newp, newlen));
1003 	case HW_BATTERY_CHARGESTOP:
1004 		return (sysctl_hwchargestop(oldp, oldlenp, newp, newlen));
1005 	default:
1006 		return (EOPNOTSUPP);
1007 	}
1008 	/* NOTREACHED */
1009 }
1010 
1011 #endif
1012 
1013 #ifdef DEBUG_SYSCTL
1014 /*
1015  * Debugging related system variables.
1016  */
1017 extern struct ctldebug debug_vfs_busyprt;
1018 struct ctldebug debug1, debug2, debug3, debug4;
1019 struct ctldebug debug5, debug6, debug7, debug8, debug9;
1020 struct ctldebug debug10, debug11, debug12, debug13, debug14;
1021 struct ctldebug debug15, debug16, debug17, debug18, debug19;
1022 static struct ctldebug *debugvars[CTL_DEBUG_MAXID] = {
1023 	&debug_vfs_busyprt,
1024 	&debug1, &debug2, &debug3, &debug4,
1025 	&debug5, &debug6, &debug7, &debug8, &debug9,
1026 	&debug10, &debug11, &debug12, &debug13, &debug14,
1027 	&debug15, &debug16, &debug17, &debug18, &debug19,
1028 };
1029 int
1030 debug_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
1031     size_t newlen, struct proc *p)
1032 {
1033 	struct ctldebug *cdp;
1034 
1035 	/* all sysctl names at this level are name and field */
1036 	if (namelen != 2)
1037 		return (ENOTDIR);		/* overloaded */
1038 	if (name[0] < 0 || name[0] >= nitems(debugvars))
1039 		return (EOPNOTSUPP);
1040 	cdp = debugvars[name[0]];
1041 	if (cdp->debugname == 0)
1042 		return (EOPNOTSUPP);
1043 	switch (name[1]) {
1044 	case CTL_DEBUG_NAME:
1045 		return (sysctl_rdstring(oldp, oldlenp, newp, cdp->debugname));
1046 	case CTL_DEBUG_VALUE:
1047 		return (sysctl_int(oldp, oldlenp, newp, newlen, cdp->debugvar));
1048 	default:
1049 		return (EOPNOTSUPP);
1050 	}
1051 	/* NOTREACHED */
1052 }
1053 #endif /* DEBUG_SYSCTL */
1054 
1055 /*
1056  * Reads, or writes that lower the value
1057  */
1058 int
1059 sysctl_int_lower(void *oldp, size_t *oldlenp, void *newp, size_t newlen,
1060     int *valp)
1061 {
1062 	unsigned int oldval, newval;
1063 	int error;
1064 
1065 	if (oldp && *oldlenp < sizeof(int))
1066 		return (ENOMEM);
1067 	if (newp && newlen != sizeof(int))
1068 		return (EINVAL);
1069 	*oldlenp = sizeof(int);
1070 
1071 	if (newp) {
1072 		if ((error = copyin(newp, &newval, sizeof(int))))
1073 			return (error);
1074 		do {
1075 			oldval = atomic_load_int(valp);
1076 			if (oldval < (unsigned int)newval)
1077 				return (EPERM);	/* do not allow raising */
1078 		} while (atomic_cas_uint(valp, oldval, newval) != oldval);
1079 
1080 		if (oldp) {
1081 			/* new value has been set although user gets error */
1082 			if ((error = copyout(&oldval, oldp, sizeof(int))))
1083 				return (error);
1084 		}
1085 	} else if (oldp) {
1086 		oldval = atomic_load_int(valp);
1087 
1088 		if ((error = copyout(&oldval, oldp, sizeof(int))))
1089 			return (error);
1090 	}
1091 
1092 	return (0);
1093 }
1094 
1095 /*
1096  * Validate parameters and get old / set new parameters
1097  * for an integer-valued sysctl function.
1098  */
1099 int
1100 sysctl_int(void *oldp, size_t *oldlenp, void *newp, size_t newlen, int *valp)
1101 {
1102 	return (sysctl_int_bounded(oldp, oldlenp, newp, newlen, valp,
1103 	    INT_MIN, INT_MAX));
1104 }
1105 
1106 /*
1107  * As above, but read-only.
1108  */
1109 int
1110 sysctl_rdint(void *oldp, size_t *oldlenp, void *newp, int val)
1111 {
1112 	int error = 0;
1113 
1114 	if (oldp && *oldlenp < sizeof(int))
1115 		return (ENOMEM);
1116 	if (newp)
1117 		return (EPERM);
1118 	*oldlenp = sizeof(int);
1119 	if (oldp)
1120 		error = copyout((caddr_t)&val, oldp, sizeof(int));
1121 	return (error);
1122 }
1123 
1124 int
1125 sysctl_securelevel(void *oldp, size_t *oldlenp, void *newp, size_t newlen,
1126     struct proc *p)
1127 {
1128 	int oldval, newval;
1129 	int error;
1130 
1131 	if (oldp && *oldlenp < sizeof(int))
1132 		return (ENOMEM);
1133 	if (newp && newlen != sizeof(int))
1134 		return (EINVAL);
1135 	*oldlenp = sizeof(int);
1136 
1137 	if (newp) {
1138 		if ((error = copyin(newp, &newval, sizeof(int))))
1139 			return (error);
1140 		do {
1141 			oldval = atomic_load_int(&securelevel);
1142 			if ((oldval > 0 || newval < -1) && newval < oldval &&
1143 			    p->p_p->ps_pid != 1)
1144 				return (EPERM);
1145 		} while (atomic_cas_uint(&securelevel, oldval, newval) !=
1146 		    oldval);
1147 
1148 		if (oldp) {
1149 			/* new value has been set although user gets error */
1150 			if ((error = copyout(&oldval, oldp, sizeof(int))))
1151 				return (error);
1152 		}
1153 	} else if (oldp) {
1154 		oldval = atomic_load_int(&securelevel);
1155 
1156 		if ((error = copyout(&oldval, oldp, sizeof(int))))
1157 			return (error);
1158 	}
1159 
1160 	return (0);
1161 }
1162 
1163 /*
1164  * Selects between sysctl_rdint and sysctl_int according to securelevel.
1165  */
1166 int
1167 sysctl_securelevel_int(void *oldp, size_t *oldlenp, void *newp, size_t newlen,
1168     int *valp)
1169 {
1170 	if (atomic_load_int(&securelevel) > 0)
1171 		return (sysctl_rdint(oldp, oldlenp, newp, *valp));
1172 	return (sysctl_int(oldp, oldlenp, newp, newlen, valp));
1173 }
1174 
1175 /*
1176  * Read-only or bounded integer values.
1177  */
1178 int
1179 sysctl_int_bounded(void *oldp, size_t *oldlenp, void *newp, size_t newlen,
1180     int *valp, int minimum, int maximum)
1181 {
1182 	int oldval, newval;
1183 	int error;
1184 
1185 	/* read only */
1186 	if (newp != NULL && minimum > maximum)
1187 		return (EPERM);
1188 
1189 	if (oldp != NULL && *oldlenp < sizeof(int))
1190 		return (ENOMEM);
1191 	if (newp != NULL && newlen != sizeof(int))
1192 		return (EINVAL);
1193 	*oldlenp = sizeof(int);
1194 
1195 	/* copyin() may sleep, call it first */
1196 	if (newp != NULL) {
1197 		if ((error = copyin(newp, &newval, sizeof(int))))
1198 			return (error);
1199 		/* outside limits */
1200 		if (newval < minimum || maximum < newval)
1201 			return (EINVAL);
1202 	}
1203 	if (oldp != NULL) {
1204 		if (newp != NULL)
1205 			oldval = atomic_swap_uint(valp, newval);
1206 		else
1207 			oldval = atomic_load_int(valp);
1208 		if ((error = copyout(&oldval, oldp, sizeof(int)))) {
1209 			/* new value has been set although user gets error */
1210 			return (error);
1211 		}
1212 	} else if (newp != NULL)
1213 		atomic_store_int(valp, newval);
1214 
1215 	return (0);
1216 }
1217 
1218 /*
1219  * Array of read-only or bounded integer values.
1220  */
1221 int
1222 sysctl_bounded_arr(const struct sysctl_bounded_args *valpp, u_int valplen,
1223     int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
1224     size_t newlen)
1225 {
1226 	u_int i;
1227 	if (namelen != 1)
1228 		return (ENOTDIR);
1229 	for (i = 0; i < valplen; ++i) {
1230 		if (valpp[i].mib == name[0]) {
1231 			return (sysctl_int_bounded(oldp, oldlenp, newp, newlen,
1232 			    valpp[i].var, valpp[i].minimum, valpp[i].maximum));
1233 		}
1234 	}
1235 	return (EOPNOTSUPP);
1236 }
1237 
1238 /*
1239  * Validate parameters and get old / set new parameters
1240  * for an integer-valued sysctl function.
1241  */
1242 int
1243 sysctl_quad(void *oldp, size_t *oldlenp, void *newp, size_t newlen,
1244     int64_t *valp)
1245 {
1246 	int error = 0;
1247 
1248 	if (oldp && *oldlenp < sizeof(int64_t))
1249 		return (ENOMEM);
1250 	if (newp && newlen != sizeof(int64_t))
1251 		return (EINVAL);
1252 	*oldlenp = sizeof(int64_t);
1253 	if (oldp)
1254 		error = copyout(valp, oldp, sizeof(int64_t));
1255 	if (error == 0 && newp)
1256 		error = copyin(newp, valp, sizeof(int64_t));
1257 	return (error);
1258 }
1259 
1260 /*
1261  * As above, but read-only.
1262  */
1263 int
1264 sysctl_rdquad(void *oldp, size_t *oldlenp, void *newp, int64_t val)
1265 {
1266 	int error = 0;
1267 
1268 	if (oldp && *oldlenp < sizeof(int64_t))
1269 		return (ENOMEM);
1270 	if (newp)
1271 		return (EPERM);
1272 	*oldlenp = sizeof(int64_t);
1273 	if (oldp)
1274 		error = copyout((caddr_t)&val, oldp, sizeof(int64_t));
1275 	return (error);
1276 }
1277 
1278 /*
1279  * Validate parameters and get old / set new parameters
1280  * for a string-valued sysctl function.
1281  */
1282 int
1283 sysctl_string(void *oldp, size_t *oldlenp, void *newp, size_t newlen, char *str,
1284     size_t maxlen)
1285 {
1286 	return sysctl__string(oldp, oldlenp, newp, newlen, str, maxlen, 0);
1287 }
1288 
1289 int
1290 sysctl_tstring(void *oldp, size_t *oldlenp, void *newp, size_t newlen,
1291     char *str, size_t maxlen)
1292 {
1293 	return sysctl__string(oldp, oldlenp, newp, newlen, str, maxlen, 1);
1294 }
1295 
1296 int
1297 sysctl__string(void *oldp, size_t *oldlenp, void *newp, size_t newlen,
1298     char *str, size_t maxlen, int trunc)
1299 {
1300 	size_t len;
1301 	int error = 0;
1302 
1303 	len = strlen(str) + 1;
1304 	if (oldp && *oldlenp < len) {
1305 		if (trunc == 0 || *oldlenp == 0)
1306 			return (ENOMEM);
1307 	}
1308 	if (newp && newlen >= maxlen)
1309 		return (EINVAL);
1310 	if (oldp) {
1311 		if (trunc && *oldlenp < len) {
1312 			len = *oldlenp;
1313 			error = copyout(str, oldp, len - 1);
1314 			if (error == 0)
1315 				error = copyout("", (char *)oldp + len - 1, 1);
1316 		} else {
1317 			error = copyout(str, oldp, len);
1318 		}
1319 	}
1320 	*oldlenp = len;
1321 	if (error == 0 && newp) {
1322 		error = copyin(newp, str, newlen);
1323 		str[newlen] = 0;
1324 	}
1325 	return (error);
1326 }
1327 
1328 /*
1329  * As above, but read-only.
1330  */
1331 int
1332 sysctl_rdstring(void *oldp, size_t *oldlenp, void *newp, const char *str)
1333 {
1334 	size_t len;
1335 	int error = 0;
1336 
1337 	len = strlen(str) + 1;
1338 	if (oldp && *oldlenp < len)
1339 		return (ENOMEM);
1340 	if (newp)
1341 		return (EPERM);
1342 	*oldlenp = len;
1343 	if (oldp)
1344 		error = copyout(str, oldp, len);
1345 	return (error);
1346 }
1347 
1348 /*
1349  * Validate parameters and get old / set new parameters
1350  * for a structure oriented sysctl function.
1351  */
1352 int
1353 sysctl_struct(void *oldp, size_t *oldlenp, void *newp, size_t newlen, void *sp,
1354     size_t len)
1355 {
1356 	int error = 0;
1357 
1358 	if (oldp && *oldlenp < len)
1359 		return (ENOMEM);
1360 	if (newp && newlen > len)
1361 		return (EINVAL);
1362 	if (oldp) {
1363 		*oldlenp = len;
1364 		error = copyout(sp, oldp, len);
1365 	}
1366 	if (error == 0 && newp)
1367 		error = copyin(newp, sp, len);
1368 	return (error);
1369 }
1370 
1371 /*
1372  * Validate parameters and get old parameters
1373  * for a structure oriented sysctl function.
1374  */
1375 int
1376 sysctl_rdstruct(void *oldp, size_t *oldlenp, void *newp, const void *sp,
1377     size_t len)
1378 {
1379 	int error = 0;
1380 
1381 	if (oldp && *oldlenp < len)
1382 		return (ENOMEM);
1383 	if (newp)
1384 		return (EPERM);
1385 	*oldlenp = len;
1386 	if (oldp)
1387 		error = copyout(sp, oldp, len);
1388 	return (error);
1389 }
1390 
1391 #ifndef SMALL_KERNEL
1392 void
1393 fill_file(struct kinfo_file *kf, struct file *fp, struct filedesc *fdp,
1394 	  int fd, struct vnode *vp, struct process *pr, struct proc *p,
1395 	  struct socket *so, int show_pointers)
1396 {
1397 	struct vattr va;
1398 
1399 	memset(kf, 0, sizeof(*kf));
1400 
1401 	kf->fd_fd = fd;		/* might not really be an fd */
1402 
1403 	if (fp != NULL) {
1404 		if (show_pointers)
1405 			kf->f_fileaddr = PTRTOINT64(fp);
1406 		kf->f_flag = fp->f_flag;
1407 		kf->f_iflags = fp->f_iflags;
1408 		kf->f_type = fp->f_type;
1409 		kf->f_count = fp->f_count;
1410 		if (show_pointers)
1411 			kf->f_ucred = PTRTOINT64(fp->f_cred);
1412 		kf->f_uid = fp->f_cred->cr_uid;
1413 		kf->f_gid = fp->f_cred->cr_gid;
1414 		if (show_pointers)
1415 			kf->f_ops = PTRTOINT64(fp->f_ops);
1416 		if (show_pointers)
1417 			kf->f_data = PTRTOINT64(fp->f_data);
1418 		kf->f_usecount = 0;
1419 
1420 		if (suser(p) == 0 || p->p_ucred->cr_uid == fp->f_cred->cr_uid) {
1421 			mtx_enter(&fp->f_mtx);
1422 			kf->f_offset = fp->f_offset;
1423 			kf->f_rxfer = fp->f_rxfer;
1424 			kf->f_rwfer = fp->f_wxfer;
1425 			kf->f_seek = fp->f_seek;
1426 			kf->f_rbytes = fp->f_rbytes;
1427 			kf->f_wbytes = fp->f_wbytes;
1428 			mtx_leave(&fp->f_mtx);
1429 		} else
1430 			kf->f_offset = -1;
1431 	} else if (vp != NULL) {
1432 		/* fake it */
1433 		kf->f_type = DTYPE_VNODE;
1434 		kf->f_flag = FREAD;
1435 		if (fd == KERN_FILE_TRACE)
1436 			kf->f_flag |= FWRITE;
1437 	} else if (so != NULL) {
1438 		/* fake it */
1439 		kf->f_type = DTYPE_SOCKET;
1440 	}
1441 
1442 	/* information about the object associated with this file */
1443 	switch (kf->f_type) {
1444 	case DTYPE_VNODE:
1445 		if (fp != NULL)
1446 			vp = (struct vnode *)fp->f_data;
1447 
1448 		if (show_pointers)
1449 			kf->v_un = PTRTOINT64(vp->v_un.vu_socket);
1450 		kf->v_type = vp->v_type;
1451 		kf->v_tag = vp->v_tag;
1452 		kf->v_flag = vp->v_flag;
1453 		if (show_pointers)
1454 			kf->v_data = PTRTOINT64(vp->v_data);
1455 		if (show_pointers)
1456 			kf->v_mount = PTRTOINT64(vp->v_mount);
1457 		if (vp->v_mount)
1458 			strlcpy(kf->f_mntonname,
1459 			    vp->v_mount->mnt_stat.f_mntonname,
1460 			    sizeof(kf->f_mntonname));
1461 
1462 		if (VOP_GETATTR(vp, &va, p->p_ucred, p) == 0) {
1463 			kf->va_fileid = va.va_fileid;
1464 			kf->va_mode = MAKEIMODE(va.va_type, va.va_mode);
1465 			kf->va_size = va.va_size;
1466 			kf->va_rdev = va.va_rdev;
1467 			kf->va_fsid = va.va_fsid & 0xffffffff;
1468 			kf->va_nlink = va.va_nlink;
1469 		}
1470 		break;
1471 
1472 	case DTYPE_SOCKET: {
1473 		int locked = 0;
1474 
1475 		if (so == NULL) {
1476 			so = (struct socket *)fp->f_data;
1477 			/* if so is passed as parameter it is already locked */
1478 			solock(so);
1479 			locked = 1;
1480 		}
1481 
1482 		kf->so_type = so->so_type;
1483 		kf->so_state = so->so_state | so->so_snd.sb_state |
1484 		    so->so_rcv.sb_state;
1485 		if (show_pointers)
1486 			kf->so_pcb = PTRTOINT64(so->so_pcb);
1487 		else
1488 			kf->so_pcb = -1;
1489 		kf->so_protocol = so->so_proto->pr_protocol;
1490 		kf->so_family = so->so_proto->pr_domain->dom_family;
1491 		kf->so_rcv_cc = so->so_rcv.sb_cc;
1492 		kf->so_snd_cc = so->so_snd.sb_cc;
1493 		if (isspliced(so)) {
1494 			if (show_pointers)
1495 				kf->so_splice =
1496 				    PTRTOINT64(so->so_sp->ssp_socket);
1497 			kf->so_splicelen = so->so_sp->ssp_len;
1498 		} else if (issplicedback(so))
1499 			kf->so_splicelen = -1;
1500 		if (so->so_pcb == NULL) {
1501 			if (locked)
1502 				sounlock(so);
1503 			break;
1504 		}
1505 		switch (kf->so_family) {
1506 		case AF_INET: {
1507 			struct inpcb *inpcb = so->so_pcb;
1508 
1509 			soassertlocked(so);
1510 			if (show_pointers)
1511 				kf->inp_ppcb = PTRTOINT64(inpcb->inp_ppcb);
1512 			kf->inp_lport = inpcb->inp_lport;
1513 			kf->inp_laddru[0] = inpcb->inp_laddr.s_addr;
1514 			kf->inp_fport = inpcb->inp_fport;
1515 			kf->inp_faddru[0] = inpcb->inp_faddr.s_addr;
1516 			kf->inp_rtableid = inpcb->inp_rtableid;
1517 			if (so->so_type == SOCK_RAW)
1518 				kf->inp_proto = inpcb->inp_ip.ip_p;
1519 			if (so->so_proto->pr_protocol == IPPROTO_TCP) {
1520 				struct tcpcb *tcpcb = (void *)inpcb->inp_ppcb;
1521 				kf->t_rcv_wnd = tcpcb->rcv_wnd;
1522 				kf->t_snd_wnd = tcpcb->snd_wnd;
1523 				kf->t_snd_cwnd = tcpcb->snd_cwnd;
1524 				kf->t_state = tcpcb->t_state;
1525 			}
1526 			break;
1527 		    }
1528 		case AF_INET6: {
1529 			struct inpcb *inpcb = so->so_pcb;
1530 
1531 			soassertlocked(so);
1532 			if (show_pointers)
1533 				kf->inp_ppcb = PTRTOINT64(inpcb->inp_ppcb);
1534 			kf->inp_lport = inpcb->inp_lport;
1535 			kf->inp_laddru[0] = inpcb->inp_laddr6.s6_addr32[0];
1536 			kf->inp_laddru[1] = inpcb->inp_laddr6.s6_addr32[1];
1537 			kf->inp_laddru[2] = inpcb->inp_laddr6.s6_addr32[2];
1538 			kf->inp_laddru[3] = inpcb->inp_laddr6.s6_addr32[3];
1539 			kf->inp_fport = inpcb->inp_fport;
1540 			kf->inp_faddru[0] = inpcb->inp_faddr6.s6_addr32[0];
1541 			kf->inp_faddru[1] = inpcb->inp_faddr6.s6_addr32[1];
1542 			kf->inp_faddru[2] = inpcb->inp_faddr6.s6_addr32[2];
1543 			kf->inp_faddru[3] = inpcb->inp_faddr6.s6_addr32[3];
1544 			kf->inp_rtableid = inpcb->inp_rtableid;
1545 			if (so->so_type == SOCK_RAW)
1546 				kf->inp_proto = inpcb->inp_ipv6.ip6_nxt;
1547 			if (so->so_proto->pr_protocol == IPPROTO_TCP) {
1548 				struct tcpcb *tcpcb = (void *)inpcb->inp_ppcb;
1549 				kf->t_rcv_wnd = tcpcb->rcv_wnd;
1550 				kf->t_snd_wnd = tcpcb->snd_wnd;
1551 				kf->t_state = tcpcb->t_state;
1552 			}
1553 			break;
1554 		    }
1555 		case AF_UNIX: {
1556 			struct unpcb *unpcb = so->so_pcb;
1557 
1558 			kf->f_msgcount = unpcb->unp_msgcount;
1559 			if (show_pointers) {
1560 				kf->unp_conn	= PTRTOINT64(unpcb->unp_conn);
1561 				kf->unp_refs	= PTRTOINT64(
1562 				    SLIST_FIRST(&unpcb->unp_refs));
1563 				kf->unp_nextref	= PTRTOINT64(
1564 				    SLIST_NEXT(unpcb, unp_nextref));
1565 				kf->v_un	= PTRTOINT64(unpcb->unp_vnode);
1566 				kf->unp_addr	= PTRTOINT64(unpcb->unp_addr);
1567 			}
1568 			if (unpcb->unp_addr != NULL) {
1569 				struct sockaddr_un *un = mtod(unpcb->unp_addr,
1570 				    struct sockaddr_un *);
1571 				memcpy(kf->unp_path, un->sun_path, un->sun_len
1572 				    - offsetof(struct sockaddr_un,sun_path));
1573 			}
1574 			break;
1575 		    }
1576 		}
1577 		if (locked)
1578 			sounlock(so);
1579 		break;
1580 	    }
1581 
1582 	case DTYPE_PIPE: {
1583 		struct pipe *pipe = (struct pipe *)fp->f_data;
1584 
1585 		if (show_pointers)
1586 			kf->pipe_peer = PTRTOINT64(pipe->pipe_peer);
1587 		kf->pipe_state = pipe->pipe_state;
1588 		break;
1589 	    }
1590 
1591 	case DTYPE_KQUEUE: {
1592 		struct kqueue *kqi = (struct kqueue *)fp->f_data;
1593 
1594 		kf->kq_count = kqi->kq_count;
1595 		kf->kq_state = kqi->kq_state;
1596 		break;
1597 	    }
1598 	}
1599 
1600 	/* per-process information for KERN_FILE_BY[PU]ID */
1601 	if (pr != NULL) {
1602 		kf->p_pid = pr->ps_pid;
1603 		kf->p_uid = pr->ps_ucred->cr_uid;
1604 		kf->p_gid = pr->ps_ucred->cr_gid;
1605 		kf->p_tid = -1;
1606 		strlcpy(kf->p_comm, pr->ps_comm, sizeof(kf->p_comm));
1607 	}
1608 	if (fdp != NULL) {
1609 		fdplock(fdp);
1610 		kf->fd_ofileflags = fdp->fd_ofileflags[fd];
1611 		fdpunlock(fdp);
1612 	}
1613 }
1614 
1615 /*
1616  * Get file structures.
1617  */
1618 int
1619 sysctl_file(int *name, u_int namelen, char *where, size_t *sizep,
1620     struct proc *p)
1621 {
1622 	struct kinfo_file *kf;
1623 	struct filedesc *fdp;
1624 	struct file *fp;
1625 	struct process *pr;
1626 	size_t buflen, elem_size, elem_count, outsize;
1627 	char *dp = where;
1628 	int arg, i, error = 0, needed = 0, matched;
1629 	u_int op;
1630 	int show_pointers;
1631 
1632 	if (namelen > 4)
1633 		return (ENOTDIR);
1634 	if (namelen < 4 || name[2] > sizeof(*kf))
1635 		return (EINVAL);
1636 
1637 	buflen = where != NULL ? *sizep : 0;
1638 	op = name[0];
1639 	arg = name[1];
1640 	elem_size = name[2];
1641 	elem_count = name[3];
1642 	outsize = MIN(sizeof(*kf), elem_size);
1643 
1644 	if (elem_size < 1)
1645 		return (EINVAL);
1646 
1647 	show_pointers = suser(curproc) == 0;
1648 
1649 	kf = malloc(sizeof(*kf), M_TEMP, M_WAITOK);
1650 
1651 #define FILLIT2(fp, fdp, i, vp, pr, so) do {				\
1652 	if (buflen >= elem_size && elem_count > 0) {			\
1653 		fill_file(kf, fp, fdp, i, vp, pr, p, so, show_pointers);\
1654 		error = copyout(kf, dp, outsize);			\
1655 		if (error)						\
1656 			break;						\
1657 		dp += elem_size;					\
1658 		buflen -= elem_size;					\
1659 		elem_count--;						\
1660 	}								\
1661 	needed += elem_size;						\
1662 } while (0)
1663 #define FILLIT(fp, fdp, i, vp, pr) \
1664 	FILLIT2(fp, fdp, i, vp, pr, NULL)
1665 #define FILLSO(so) \
1666 	FILLIT2(NULL, NULL, 0, NULL, NULL, so)
1667 
1668 	switch (op) {
1669 	case KERN_FILE_BYFILE:
1670 		/* use the inp-tables to pick up closed connections, too */
1671 		if (arg == DTYPE_SOCKET) {
1672 			struct inpcb *inp;
1673 
1674 			NET_LOCK();
1675 			mtx_enter(&tcbtable.inpt_mtx);
1676 			TAILQ_FOREACH(inp, &tcbtable.inpt_queue, inp_queue)
1677 				FILLSO(inp->inp_socket);
1678 			mtx_leave(&tcbtable.inpt_mtx);
1679 #ifdef INET6
1680 			mtx_enter(&tcb6table.inpt_mtx);
1681 			TAILQ_FOREACH(inp, &tcb6table.inpt_queue, inp_queue)
1682 				FILLSO(inp->inp_socket);
1683 			mtx_leave(&tcb6table.inpt_mtx);
1684 #endif
1685 			mtx_enter(&udbtable.inpt_mtx);
1686 			TAILQ_FOREACH(inp, &udbtable.inpt_queue, inp_queue)
1687 				FILLSO(inp->inp_socket);
1688 			mtx_leave(&udbtable.inpt_mtx);
1689 #ifdef INET6
1690 			mtx_enter(&udb6table.inpt_mtx);
1691 			TAILQ_FOREACH(inp, &udb6table.inpt_queue, inp_queue)
1692 				FILLSO(inp->inp_socket);
1693 			mtx_leave(&udb6table.inpt_mtx);
1694 #endif
1695 			mtx_enter(&rawcbtable.inpt_mtx);
1696 			TAILQ_FOREACH(inp, &rawcbtable.inpt_queue, inp_queue)
1697 				FILLSO(inp->inp_socket);
1698 			mtx_leave(&rawcbtable.inpt_mtx);
1699 #ifdef INET6
1700 			mtx_enter(&rawin6pcbtable.inpt_mtx);
1701 			TAILQ_FOREACH(inp, &rawin6pcbtable.inpt_queue,
1702 			    inp_queue)
1703 				FILLSO(inp->inp_socket);
1704 			mtx_leave(&rawin6pcbtable.inpt_mtx);
1705 #endif
1706 			NET_UNLOCK();
1707 		}
1708 		fp = NULL;
1709 		while ((fp = fd_iterfile(fp, p)) != NULL) {
1710 			if ((arg == 0 || fp->f_type == arg)) {
1711 				int af, skip = 0;
1712 				if (arg == DTYPE_SOCKET && fp->f_type == arg) {
1713 					af = ((struct socket *)fp->f_data)->
1714 					    so_proto->pr_domain->dom_family;
1715 					if (af == AF_INET || af == AF_INET6)
1716 						skip = 1;
1717 				}
1718 				if (!skip)
1719 					FILLIT(fp, NULL, 0, NULL, NULL);
1720 			}
1721 		}
1722 		break;
1723 	case KERN_FILE_BYPID:
1724 		/* A arg of -1 indicates all processes */
1725 		if (arg < -1) {
1726 			error = EINVAL;
1727 			break;
1728 		}
1729 		matched = 0;
1730 		LIST_FOREACH(pr, &allprocess, ps_list) {
1731 			/*
1732 			 * skip system, exiting, embryonic and undead
1733 			 * processes
1734 			 */
1735 			if (pr->ps_flags & (PS_SYSTEM | PS_EMBRYO | PS_EXITING))
1736 				continue;
1737 			if (arg >= 0 && pr->ps_pid != (pid_t)arg) {
1738 				/* not the pid we are looking for */
1739 				continue;
1740 			}
1741 
1742 			refcnt_take(&pr->ps_refcnt);
1743 
1744 			matched = 1;
1745 			fdp = pr->ps_fd;
1746 			if (pr->ps_textvp)
1747 				FILLIT(NULL, NULL, KERN_FILE_TEXT, pr->ps_textvp, pr);
1748 			if (fdp->fd_cdir)
1749 				FILLIT(NULL, NULL, KERN_FILE_CDIR, fdp->fd_cdir, pr);
1750 			if (fdp->fd_rdir)
1751 				FILLIT(NULL, NULL, KERN_FILE_RDIR, fdp->fd_rdir, pr);
1752 			if (pr->ps_tracevp)
1753 				FILLIT(NULL, NULL, KERN_FILE_TRACE, pr->ps_tracevp, pr);
1754 			for (i = 0; i < fdp->fd_nfiles; i++) {
1755 				if ((fp = fd_getfile(fdp, i)) == NULL)
1756 					continue;
1757 				FILLIT(fp, fdp, i, NULL, pr);
1758 				FRELE(fp, p);
1759 			}
1760 
1761 			refcnt_rele_wake(&pr->ps_refcnt);
1762 
1763 			/* pid is unique, stop searching */
1764 			if (arg >= 0)
1765 				break;
1766 		}
1767 		if (!matched)
1768 			error = ESRCH;
1769 		break;
1770 	case KERN_FILE_BYUID:
1771 		LIST_FOREACH(pr, &allprocess, ps_list) {
1772 			/*
1773 			 * skip system, exiting, embryonic and undead
1774 			 * processes
1775 			 */
1776 			if (pr->ps_flags & (PS_SYSTEM | PS_EMBRYO | PS_EXITING))
1777 				continue;
1778 			if (arg >= 0 && pr->ps_ucred->cr_uid != (uid_t)arg) {
1779 				/* not the uid we are looking for */
1780 				continue;
1781 			}
1782 
1783 			refcnt_take(&pr->ps_refcnt);
1784 
1785 			fdp = pr->ps_fd;
1786 			if (fdp->fd_cdir)
1787 				FILLIT(NULL, NULL, KERN_FILE_CDIR, fdp->fd_cdir, pr);
1788 			if (fdp->fd_rdir)
1789 				FILLIT(NULL, NULL, KERN_FILE_RDIR, fdp->fd_rdir, pr);
1790 			if (pr->ps_tracevp)
1791 				FILLIT(NULL, NULL, KERN_FILE_TRACE, pr->ps_tracevp, pr);
1792 			for (i = 0; i < fdp->fd_nfiles; i++) {
1793 				if ((fp = fd_getfile(fdp, i)) == NULL)
1794 					continue;
1795 				FILLIT(fp, fdp, i, NULL, pr);
1796 				FRELE(fp, p);
1797 			}
1798 
1799 			refcnt_rele_wake(&pr->ps_refcnt);
1800 		}
1801 		break;
1802 	default:
1803 		error = EINVAL;
1804 		break;
1805 	}
1806 	free(kf, M_TEMP, sizeof(*kf));
1807 
1808 	if (!error) {
1809 		if (where == NULL)
1810 			needed += KERN_FILESLOP * elem_size;
1811 		else if (*sizep < needed)
1812 			error = ENOMEM;
1813 		*sizep = needed;
1814 	}
1815 
1816 	return (error);
1817 }
1818 
1819 /*
1820  * try over estimating by 5 procs
1821  */
1822 #define KERN_PROCSLOP	5
1823 
1824 int
1825 sysctl_doproc(int *name, u_int namelen, char *where, size_t *sizep)
1826 {
1827 	struct kinfo_proc *kproc = NULL;
1828 	struct proc *p;
1829 	struct process *pr;
1830 	char *dp;
1831 	int arg, buflen, doingzomb, elem_size, elem_count;
1832 	int error, needed, op;
1833 	int dothreads = 0;
1834 	int show_pointers;
1835 
1836 	dp = where;
1837 	buflen = where != NULL ? *sizep : 0;
1838 	needed = error = 0;
1839 
1840 	if (namelen != 4 || name[2] <= 0 || name[3] < 0 ||
1841 	    name[2] > sizeof(*kproc))
1842 		return (EINVAL);
1843 	op = name[0];
1844 	arg = name[1];
1845 	elem_size = name[2];
1846 	elem_count = name[3];
1847 
1848 	dothreads = op & KERN_PROC_SHOW_THREADS;
1849 	op &= ~KERN_PROC_SHOW_THREADS;
1850 
1851 	show_pointers = suser(curproc) == 0;
1852 
1853 	if (where != NULL)
1854 		kproc = malloc(sizeof(*kproc), M_TEMP, M_WAITOK);
1855 
1856 	pr = LIST_FIRST(&allprocess);
1857 	doingzomb = 0;
1858 again:
1859 	for (; pr != NULL; pr = LIST_NEXT(pr, ps_list)) {
1860 		/* XXX skip processes in the middle of being zapped */
1861 		if (pr->ps_pgrp == NULL)
1862 			continue;
1863 
1864 		/*
1865 		 * Skip embryonic processes.
1866 		 */
1867 		if (pr->ps_flags & PS_EMBRYO)
1868 			continue;
1869 
1870 		/*
1871 		 * TODO - make more efficient (see notes below).
1872 		 */
1873 		switch (op) {
1874 
1875 		case KERN_PROC_PID:
1876 			/* could do this with just a lookup */
1877 			if (pr->ps_pid != (pid_t)arg)
1878 				continue;
1879 			break;
1880 
1881 		case KERN_PROC_PGRP:
1882 			/* could do this by traversing pgrp */
1883 			if (pr->ps_pgrp->pg_id != (pid_t)arg)
1884 				continue;
1885 			break;
1886 
1887 		case KERN_PROC_SESSION:
1888 			if (pr->ps_session->s_leader == NULL ||
1889 			    pr->ps_session->s_leader->ps_pid != (pid_t)arg)
1890 				continue;
1891 			break;
1892 
1893 		case KERN_PROC_TTY:
1894 			if ((pr->ps_flags & PS_CONTROLT) == 0 ||
1895 			    pr->ps_session->s_ttyp == NULL ||
1896 			    pr->ps_session->s_ttyp->t_dev != (dev_t)arg)
1897 				continue;
1898 			break;
1899 
1900 		case KERN_PROC_UID:
1901 			if (pr->ps_ucred->cr_uid != (uid_t)arg)
1902 				continue;
1903 			break;
1904 
1905 		case KERN_PROC_RUID:
1906 			if (pr->ps_ucred->cr_ruid != (uid_t)arg)
1907 				continue;
1908 			break;
1909 
1910 		case KERN_PROC_ALL:
1911 			if (pr->ps_flags & PS_SYSTEM)
1912 				continue;
1913 			break;
1914 
1915 		case KERN_PROC_KTHREAD:
1916 			/* no filtering */
1917 			break;
1918 
1919 		default:
1920 			error = EINVAL;
1921 			goto err;
1922 		}
1923 
1924 		if (buflen >= elem_size && elem_count > 0) {
1925 			fill_kproc(pr, kproc, NULL, show_pointers);
1926 			error = copyout(kproc, dp, elem_size);
1927 			if (error)
1928 				goto err;
1929 			dp += elem_size;
1930 			buflen -= elem_size;
1931 			elem_count--;
1932 		}
1933 		needed += elem_size;
1934 
1935 		/* Skip per-thread entries if not required by op */
1936 		if (!dothreads)
1937 			continue;
1938 
1939 		TAILQ_FOREACH(p, &pr->ps_threads, p_thr_link) {
1940 			if (buflen >= elem_size && elem_count > 0) {
1941 				fill_kproc(pr, kproc, p, show_pointers);
1942 				error = copyout(kproc, dp, elem_size);
1943 				if (error)
1944 					goto err;
1945 				dp += elem_size;
1946 				buflen -= elem_size;
1947 				elem_count--;
1948 			}
1949 			needed += elem_size;
1950 		}
1951 	}
1952 	if (doingzomb == 0) {
1953 		pr = LIST_FIRST(&zombprocess);
1954 		doingzomb++;
1955 		goto again;
1956 	}
1957 	if (where != NULL) {
1958 		*sizep = dp - where;
1959 		if (needed > *sizep) {
1960 			error = ENOMEM;
1961 			goto err;
1962 		}
1963 	} else {
1964 		needed += KERN_PROCSLOP * elem_size;
1965 		*sizep = needed;
1966 	}
1967 err:
1968 	if (kproc)
1969 		free(kproc, M_TEMP, sizeof(*kproc));
1970 	return (error);
1971 }
1972 
1973 /*
1974  * Fill in a kproc structure for the specified process.
1975  */
1976 void
1977 fill_kproc(struct process *pr, struct kinfo_proc *ki, struct proc *p,
1978     int show_pointers)
1979 {
1980 	struct session *s = pr->ps_session;
1981 	struct tty *tp;
1982 	struct vmspace *vm = pr->ps_vmspace;
1983 	struct timespec booted, st, ut, utc;
1984 	struct tusage tu;
1985 	int isthread;
1986 
1987 	isthread = p != NULL;
1988 	if (!isthread) {
1989 		p = pr->ps_mainproc;		/* XXX */
1990 		tuagg_get_process(&tu, pr);
1991 	} else
1992 		tuagg_get_proc(&tu, p);
1993 
1994 	FILL_KPROC(ki, strlcpy, p, pr, pr->ps_ucred, pr->ps_pgrp,
1995 	    p, pr, s, vm, pr->ps_limit, pr->ps_sigacts, &tu, isthread,
1996 	    show_pointers);
1997 
1998 	/* stuff that's too painful to generalize into the macros */
1999 	if (pr->ps_pptr)
2000 		ki->p_ppid = pr->ps_ppid;
2001 	if (s->s_leader)
2002 		ki->p_sid = s->s_leader->ps_pid;
2003 
2004 	if ((pr->ps_flags & PS_CONTROLT) && (tp = s->s_ttyp)) {
2005 		ki->p_tdev = tp->t_dev;
2006 		ki->p_tpgid = tp->t_pgrp ? tp->t_pgrp->pg_id : -1;
2007 		if (show_pointers)
2008 			ki->p_tsess = PTRTOINT64(tp->t_session);
2009 	} else {
2010 		ki->p_tdev = NODEV;
2011 		ki->p_tpgid = -1;
2012 	}
2013 
2014 	/* fixups that can only be done in the kernel */
2015 	if ((pr->ps_flags & PS_ZOMBIE) == 0) {
2016 		if ((pr->ps_flags & PS_EMBRYO) == 0 && vm != NULL)
2017 			ki->p_vm_rssize = vm_resident_count(vm);
2018 		calctsru(&tu, &ut, &st, NULL);
2019 		ki->p_uutime_sec = ut.tv_sec;
2020 		ki->p_uutime_usec = ut.tv_nsec/1000;
2021 		ki->p_ustime_sec = st.tv_sec;
2022 		ki->p_ustime_usec = st.tv_nsec/1000;
2023 
2024 		/* Convert starting uptime to a starting UTC time. */
2025 		nanoboottime(&booted);
2026 		timespecadd(&booted, &pr->ps_start, &utc);
2027 		ki->p_ustart_sec = utc.tv_sec;
2028 		ki->p_ustart_usec = utc.tv_nsec / 1000;
2029 
2030 #ifdef MULTIPROCESSOR
2031 		if (p->p_cpu != NULL)
2032 			ki->p_cpuid = CPU_INFO_UNIT(p->p_cpu);
2033 #endif
2034 	}
2035 
2036 	/* get %cpu and schedule state: just one thread or sum of all? */
2037 	if (isthread) {
2038 		ki->p_pctcpu = p->p_pctcpu;
2039 		ki->p_stat   = p->p_stat;
2040 	} else {
2041 		ki->p_pctcpu = 0;
2042 		ki->p_stat = (pr->ps_flags & PS_ZOMBIE) ? SDEAD : SIDL;
2043 		TAILQ_FOREACH(p, &pr->ps_threads, p_thr_link) {
2044 			ki->p_pctcpu += p->p_pctcpu;
2045 			/* find best state: ONPROC > RUN > STOP > SLEEP > .. */
2046 			if (p->p_stat == SONPROC || ki->p_stat == SONPROC)
2047 				ki->p_stat = SONPROC;
2048 			else if (p->p_stat == SRUN || ki->p_stat == SRUN)
2049 				ki->p_stat = SRUN;
2050 			else if (p->p_stat == SSTOP || ki->p_stat == SSTOP)
2051 				ki->p_stat = SSTOP;
2052 			else if (p->p_stat == SSLEEP)
2053 				ki->p_stat = SSLEEP;
2054 		}
2055 	}
2056 }
2057 
2058 int
2059 sysctl_proc_args(int *name, u_int namelen, void *oldp, size_t *oldlenp,
2060     struct proc *cp)
2061 {
2062 	struct process *vpr;
2063 	pid_t pid;
2064 	struct ps_strings pss;
2065 	struct iovec iov;
2066 	struct uio uio;
2067 	int error, cnt, op;
2068 	size_t limit;
2069 	char **rargv, **vargv;		/* reader vs. victim */
2070 	char *rarg, *varg, *buf;
2071 	struct vmspace *vm;
2072 	vaddr_t ps_strings;
2073 
2074 	if (namelen > 2)
2075 		return (ENOTDIR);
2076 	if (namelen < 2)
2077 		return (EINVAL);
2078 
2079 	pid = name[0];
2080 	op = name[1];
2081 
2082 	switch (op) {
2083 	case KERN_PROC_ARGV:
2084 	case KERN_PROC_NARGV:
2085 	case KERN_PROC_ENV:
2086 	case KERN_PROC_NENV:
2087 		break;
2088 	default:
2089 		return (EOPNOTSUPP);
2090 	}
2091 
2092 	if ((vpr = prfind(pid)) == NULL)
2093 		return (ESRCH);
2094 
2095 	if (oldp == NULL) {
2096 		if (op == KERN_PROC_NARGV || op == KERN_PROC_NENV)
2097 			*oldlenp = sizeof(int);
2098 		else
2099 			*oldlenp = ARG_MAX;	/* XXX XXX XXX */
2100 		return (0);
2101 	}
2102 
2103 	/* Either system process or exiting/zombie */
2104 	if (vpr->ps_flags & (PS_SYSTEM | PS_EXITING))
2105 		return (EINVAL);
2106 
2107 	/* Execing - danger. */
2108 	if ((vpr->ps_flags & PS_INEXEC))
2109 		return (EBUSY);
2110 
2111 	/* Only owner or root can get env */
2112 	if ((op == KERN_PROC_NENV || op == KERN_PROC_ENV) &&
2113 	    (vpr->ps_ucred->cr_uid != cp->p_ucred->cr_uid &&
2114 	    (error = suser(cp)) != 0))
2115 		return (error);
2116 
2117 	ps_strings = vpr->ps_strings;
2118 	vm = vpr->ps_vmspace;
2119 	uvmspace_addref(vm);
2120 	vpr = NULL;
2121 
2122 	buf = malloc(PAGE_SIZE, M_TEMP, M_WAITOK);
2123 
2124 	iov.iov_base = &pss;
2125 	iov.iov_len = sizeof(pss);
2126 	uio.uio_iov = &iov;
2127 	uio.uio_iovcnt = 1;
2128 	uio.uio_offset = (off_t)ps_strings;
2129 	uio.uio_resid = sizeof(pss);
2130 	uio.uio_segflg = UIO_SYSSPACE;
2131 	uio.uio_rw = UIO_READ;
2132 	uio.uio_procp = cp;
2133 
2134 	if ((error = uvm_io(&vm->vm_map, &uio, 0)) != 0)
2135 		goto out;
2136 
2137 	if (op == KERN_PROC_NARGV) {
2138 		error = sysctl_rdint(oldp, oldlenp, NULL, pss.ps_nargvstr);
2139 		goto out;
2140 	}
2141 	if (op == KERN_PROC_NENV) {
2142 		error = sysctl_rdint(oldp, oldlenp, NULL, pss.ps_nenvstr);
2143 		goto out;
2144 	}
2145 
2146 	if (op == KERN_PROC_ARGV) {
2147 		cnt = pss.ps_nargvstr;
2148 		vargv = pss.ps_argvstr;
2149 	} else {
2150 		cnt = pss.ps_nenvstr;
2151 		vargv = pss.ps_envstr;
2152 	}
2153 
2154 	/* -1 to have space for a terminating NUL */
2155 	limit = *oldlenp - 1;
2156 	*oldlenp = 0;
2157 
2158 	rargv = oldp;
2159 
2160 	/*
2161 	 * *oldlenp - number of bytes copied out into readers buffer.
2162 	 * limit - maximal number of bytes allowed into readers buffer.
2163 	 * rarg - pointer into readers buffer where next arg will be stored.
2164 	 * rargv - pointer into readers buffer where the next rarg pointer
2165 	 *  will be stored.
2166 	 * vargv - pointer into victim address space where the next argument
2167 	 *  will be read.
2168 	 */
2169 
2170 	/* space for cnt pointers and a NULL */
2171 	rarg = (char *)(rargv + cnt + 1);
2172 	*oldlenp += (cnt + 1) * sizeof(char **);
2173 
2174 	while (cnt > 0 && *oldlenp < limit) {
2175 		size_t len, vstrlen;
2176 
2177 		/* Write to readers argv */
2178 		if ((error = copyout(&rarg, rargv, sizeof(rarg))) != 0)
2179 			goto out;
2180 
2181 		/* read the victim argv */
2182 		iov.iov_base = &varg;
2183 		iov.iov_len = sizeof(varg);
2184 		uio.uio_iov = &iov;
2185 		uio.uio_iovcnt = 1;
2186 		uio.uio_offset = (off_t)(vaddr_t)vargv;
2187 		uio.uio_resid = sizeof(varg);
2188 		uio.uio_segflg = UIO_SYSSPACE;
2189 		uio.uio_rw = UIO_READ;
2190 		uio.uio_procp = cp;
2191 		if ((error = uvm_io(&vm->vm_map, &uio, 0)) != 0)
2192 			goto out;
2193 
2194 		if (varg == NULL)
2195 			break;
2196 
2197 		/*
2198 		 * read the victim arg. We must jump through hoops to avoid
2199 		 * crossing a page boundary too much and returning an error.
2200 		 */
2201 more:
2202 		len = PAGE_SIZE - (((vaddr_t)varg) & PAGE_MASK);
2203 		/* leave space for the terminating NUL */
2204 		iov.iov_base = buf;
2205 		iov.iov_len = len;
2206 		uio.uio_iov = &iov;
2207 		uio.uio_iovcnt = 1;
2208 		uio.uio_offset = (off_t)(vaddr_t)varg;
2209 		uio.uio_resid = len;
2210 		uio.uio_segflg = UIO_SYSSPACE;
2211 		uio.uio_rw = UIO_READ;
2212 		uio.uio_procp = cp;
2213 		if ((error = uvm_io(&vm->vm_map, &uio, 0)) != 0)
2214 			goto out;
2215 
2216 		for (vstrlen = 0; vstrlen < len; vstrlen++) {
2217 			if (buf[vstrlen] == '\0')
2218 				break;
2219 		}
2220 
2221 		/* Don't overflow readers buffer. */
2222 		if (*oldlenp + vstrlen + 1 >= limit) {
2223 			error = ENOMEM;
2224 			goto out;
2225 		}
2226 
2227 		if ((error = copyout(buf, rarg, vstrlen)) != 0)
2228 			goto out;
2229 
2230 		*oldlenp += vstrlen;
2231 		rarg += vstrlen;
2232 
2233 		/* The string didn't end in this page? */
2234 		if (vstrlen == len) {
2235 			varg += vstrlen;
2236 			goto more;
2237 		}
2238 
2239 		/* End of string. Terminate it with a NUL */
2240 		buf[0] = '\0';
2241 		if ((error = copyout(buf, rarg, 1)) != 0)
2242 			goto out;
2243 		*oldlenp += 1;
2244 		rarg += 1;
2245 
2246 		vargv++;
2247 		rargv++;
2248 		cnt--;
2249 	}
2250 
2251 	if (*oldlenp >= limit) {
2252 		error = ENOMEM;
2253 		goto out;
2254 	}
2255 
2256 	/* Write the terminating null */
2257 	rarg = NULL;
2258 	error = copyout(&rarg, rargv, sizeof(rarg));
2259 
2260 out:
2261 	uvmspace_free(vm);
2262 	free(buf, M_TEMP, PAGE_SIZE);
2263 	return (error);
2264 }
2265 
2266 int
2267 sysctl_proc_cwd(int *name, u_int namelen, void *oldp, size_t *oldlenp,
2268     struct proc *cp)
2269 {
2270 	struct process *findpr;
2271 	struct vnode *vp;
2272 	pid_t pid;
2273 	int error;
2274 	size_t lenused, len;
2275 	char *path, *bp, *bend;
2276 
2277 	if (namelen > 1)
2278 		return (ENOTDIR);
2279 	if (namelen < 1)
2280 		return (EINVAL);
2281 
2282 	pid = name[0];
2283 	if ((findpr = prfind(pid)) == NULL)
2284 		return (ESRCH);
2285 
2286 	if (oldp == NULL) {
2287 		*oldlenp = MAXPATHLEN * 4;
2288 		return (0);
2289 	}
2290 
2291 	/* Either system process or exiting/zombie */
2292 	if (findpr->ps_flags & (PS_SYSTEM | PS_EXITING))
2293 		return (EINVAL);
2294 
2295 	/* Only owner or root can get cwd */
2296 	if (findpr->ps_ucred->cr_uid != cp->p_ucred->cr_uid &&
2297 	    (error = suser(cp)) != 0)
2298 		return (error);
2299 
2300 	len = *oldlenp;
2301 	if (len > MAXPATHLEN * 4)
2302 		len = MAXPATHLEN * 4;
2303 	else if (len < 2)
2304 		return (ERANGE);
2305 	*oldlenp = 0;
2306 
2307 	/* snag a reference to the vnode before we can sleep */
2308 	vp = findpr->ps_fd->fd_cdir;
2309 	vref(vp);
2310 
2311 	path = malloc(len, M_TEMP, M_WAITOK);
2312 
2313 	bp = &path[len];
2314 	bend = bp;
2315 	*(--bp) = '\0';
2316 
2317 	/* Same as sys__getcwd */
2318 	error = vfs_getcwd_common(vp, NULL,
2319 	    &bp, path, len / 2, GETCWD_CHECK_ACCESS, cp);
2320 	if (error == 0) {
2321 		*oldlenp = lenused = bend - bp;
2322 		error = copyout(bp, oldp, lenused);
2323 	}
2324 
2325 	vrele(vp);
2326 	free(path, M_TEMP, len);
2327 
2328 	return (error);
2329 }
2330 
2331 int
2332 sysctl_proc_nobroadcastkill(int *name, u_int namelen, void *newp, size_t newlen,
2333     void *oldp, size_t *oldlenp, struct proc *cp)
2334 {
2335 	struct process *findpr;
2336 	pid_t pid;
2337 	int error, flag;
2338 
2339 	if (namelen > 1)
2340 		return (ENOTDIR);
2341 	if (namelen < 1)
2342 		return (EINVAL);
2343 
2344 	pid = name[0];
2345 	if ((findpr = prfind(pid)) == NULL)
2346 		return (ESRCH);
2347 
2348 	/* Either system process or exiting/zombie */
2349 	if (findpr->ps_flags & (PS_SYSTEM | PS_EXITING))
2350 		return (EINVAL);
2351 
2352 	/* Only root can change PS_NOBROADCASTKILL */
2353 	if (newp != NULL && (error = suser(cp)) != 0)
2354 		return (error);
2355 
2356 	/* get the PS_NOBROADCASTKILL flag */
2357 	flag = findpr->ps_flags & PS_NOBROADCASTKILL ? 1 : 0;
2358 
2359 	error = sysctl_int(oldp, oldlenp, newp, newlen, &flag);
2360 	if (error == 0 && newp) {
2361 		if (flag)
2362 			atomic_setbits_int(&findpr->ps_flags,
2363 			    PS_NOBROADCASTKILL);
2364 		else
2365 			atomic_clearbits_int(&findpr->ps_flags,
2366 			    PS_NOBROADCASTKILL);
2367 	}
2368 
2369 	return (error);
2370 }
2371 
2372 /* Arbitrary but reasonable limit for one iteration. */
2373 #define	VMMAP_MAXLEN	MAXPHYS
2374 
2375 int
2376 sysctl_proc_vmmap(int *name, u_int namelen, void *oldp, size_t *oldlenp,
2377     struct proc *cp)
2378 {
2379 	struct process *findpr;
2380 	pid_t pid;
2381 	int error;
2382 	size_t oldlen, len;
2383 	struct kinfo_vmentry *kve, *ukve;
2384 	u_long *ustart, start;
2385 
2386 	if (namelen > 1)
2387 		return (ENOTDIR);
2388 	if (namelen < 1)
2389 		return (EINVAL);
2390 
2391 	/* Provide max buffer length as hint. */
2392 	if (oldp == NULL) {
2393 		if (oldlenp == NULL)
2394 			return (EINVAL);
2395 		else {
2396 			*oldlenp = VMMAP_MAXLEN;
2397 			return (0);
2398 		}
2399 	}
2400 
2401 	pid = name[0];
2402 	if (pid == cp->p_p->ps_pid) {
2403 		/* Self process mapping. */
2404 		findpr = cp->p_p;
2405 	} else if (pid > 0) {
2406 		if ((findpr = prfind(pid)) == NULL)
2407 			return (ESRCH);
2408 
2409 		/* Either system process or exiting/zombie */
2410 		if (findpr->ps_flags & (PS_SYSTEM | PS_EXITING))
2411 			return (EINVAL);
2412 
2413 #if 1
2414 		/* XXX Allow only root for now */
2415 		if ((error = suser(cp)) != 0)
2416 			return (error);
2417 #else
2418 		/* Only owner or root can get vmmap */
2419 		if (findpr->ps_ucred->cr_uid != cp->p_ucred->cr_uid &&
2420 		    (error = suser(cp)) != 0)
2421 			return (error);
2422 #endif
2423 	} else {
2424 		/* Only root can get kernel_map */
2425 		if ((error = suser(cp)) != 0)
2426 			return (error);
2427 		findpr = NULL;
2428 	}
2429 
2430 	/* Check the given size. */
2431 	oldlen = *oldlenp;
2432 	if (oldlen == 0 || oldlen % sizeof(*kve) != 0)
2433 		return (EINVAL);
2434 
2435 	/* Deny huge allocation. */
2436 	if (oldlen > VMMAP_MAXLEN)
2437 		return (EINVAL);
2438 
2439 	/*
2440 	 * Iterate from the given address passed as the first element's
2441 	 * kve_start via oldp.
2442 	 */
2443 	ukve = (struct kinfo_vmentry *)oldp;
2444 	ustart = &ukve->kve_start;
2445 	error = copyin(ustart, &start, sizeof(start));
2446 	if (error != 0)
2447 		return (error);
2448 
2449 	/* Allocate wired memory to not block. */
2450 	kve = malloc(oldlen, M_TEMP, M_WAITOK);
2451 
2452 	/* Set the base address and read entries. */
2453 	kve[0].kve_start = start;
2454 	len = oldlen;
2455 	error = fill_vmmap(findpr, kve, &len);
2456 	if (error != 0 && error != ENOMEM)
2457 		goto done;
2458 	if (len == 0)
2459 		goto done;
2460 
2461 	KASSERT(len <= oldlen);
2462 	KASSERT((len % sizeof(struct kinfo_vmentry)) == 0);
2463 
2464 	error = copyout(kve, oldp, len);
2465 
2466 done:
2467 	*oldlenp = len;
2468 
2469 	free(kve, M_TEMP, oldlen);
2470 
2471 	return (error);
2472 }
2473 #endif
2474 
2475 /*
2476  * Initialize disknames/diskstats for export by sysctl. If update is set,
2477  * then we simply update the disk statistics information.
2478  */
2479 int
2480 sysctl_diskinit(int update, struct proc *p)
2481 {
2482 	struct diskstats *sdk;
2483 	struct disk *dk;
2484 	const char *duid;
2485 	int error, changed = 0;
2486 
2487 	KERNEL_ASSERT_LOCKED();
2488 
2489 	if ((error = rw_enter(&sysctl_disklock, RW_WRITE|RW_INTR)) != 0)
2490 		return error;
2491 
2492 	/* Run in a loop, disks may change while malloc sleeps. */
2493 	while (disk_change) {
2494 		int tlen;
2495 
2496 		disk_change = 0;
2497 
2498 		tlen = 0;
2499 		TAILQ_FOREACH(dk, &disklist, dk_link) {
2500 			if (dk->dk_name)
2501 				tlen += strlen(dk->dk_name);
2502 			tlen += 18;	/* label uid + separators */
2503 		}
2504 		tlen++;
2505 
2506 		/*
2507 		 * The sysctl_disklock ensures that no other process can
2508 		 * allocate disknames and diskstats while our malloc sleeps.
2509 		 */
2510 		free(disknames, M_SYSCTL, disknameslen);
2511 		free(diskstats, M_SYSCTL, diskstatslen);
2512 		diskstats = NULL;
2513 		disknames = NULL;
2514 		diskstats = mallocarray(disk_count, sizeof(struct diskstats),
2515 		    M_SYSCTL, M_WAITOK|M_ZERO);
2516 		diskstatslen = disk_count * sizeof(struct diskstats);
2517 		disknames = malloc(tlen, M_SYSCTL, M_WAITOK|M_ZERO);
2518 		disknameslen = tlen;
2519 		disknames[0] = '\0';
2520 		changed = 1;
2521 	}
2522 
2523 	if (changed) {
2524 		int l;
2525 
2526 		l = 0;
2527 		sdk = diskstats;
2528 		TAILQ_FOREACH(dk, &disklist, dk_link) {
2529 			duid = NULL;
2530 			if (dk->dk_label && !duid_iszero(dk->dk_label->d_uid))
2531 				duid = duid_format(dk->dk_label->d_uid);
2532 			snprintf(disknames + l, disknameslen - l, "%s:%s,",
2533 			    dk->dk_name ? dk->dk_name : "",
2534 			    duid ? duid : "");
2535 			l += strlen(disknames + l);
2536 			strlcpy(sdk->ds_name, dk->dk_name,
2537 			    sizeof(sdk->ds_name));
2538 			mtx_enter(&dk->dk_mtx);
2539 			sdk->ds_busy = dk->dk_busy;
2540 			sdk->ds_rxfer = dk->dk_rxfer;
2541 			sdk->ds_wxfer = dk->dk_wxfer;
2542 			sdk->ds_seek = dk->dk_seek;
2543 			sdk->ds_rbytes = dk->dk_rbytes;
2544 			sdk->ds_wbytes = dk->dk_wbytes;
2545 			sdk->ds_attachtime = dk->dk_attachtime;
2546 			sdk->ds_timestamp = dk->dk_timestamp;
2547 			sdk->ds_time = dk->dk_time;
2548 			mtx_leave(&dk->dk_mtx);
2549 			sdk++;
2550 		}
2551 
2552 		/* Eliminate trailing comma */
2553 		if (l != 0)
2554 			disknames[l - 1] = '\0';
2555 	} else if (update) {
2556 		/* Just update, number of drives hasn't changed */
2557 		sdk = diskstats;
2558 		TAILQ_FOREACH(dk, &disklist, dk_link) {
2559 			strlcpy(sdk->ds_name, dk->dk_name,
2560 			    sizeof(sdk->ds_name));
2561 			mtx_enter(&dk->dk_mtx);
2562 			sdk->ds_busy = dk->dk_busy;
2563 			sdk->ds_rxfer = dk->dk_rxfer;
2564 			sdk->ds_wxfer = dk->dk_wxfer;
2565 			sdk->ds_seek = dk->dk_seek;
2566 			sdk->ds_rbytes = dk->dk_rbytes;
2567 			sdk->ds_wbytes = dk->dk_wbytes;
2568 			sdk->ds_attachtime = dk->dk_attachtime;
2569 			sdk->ds_timestamp = dk->dk_timestamp;
2570 			sdk->ds_time = dk->dk_time;
2571 			mtx_leave(&dk->dk_mtx);
2572 			sdk++;
2573 		}
2574 	}
2575 	rw_exit_write(&sysctl_disklock);
2576 	return 0;
2577 }
2578 
2579 #if defined(SYSVMSG) || defined(SYSVSEM) || defined(SYSVSHM)
2580 int
2581 sysctl_sysvipc(int *name, u_int namelen, void *where, size_t *sizep)
2582 {
2583 #ifdef SYSVSEM
2584 	struct sem_sysctl_info *semsi;
2585 #endif
2586 #ifdef SYSVSHM
2587 	struct shm_sysctl_info *shmsi;
2588 #endif
2589 	size_t infosize, dssize, tsize, buflen, bufsiz;
2590 	int i, nds, error, ret;
2591 	void *buf;
2592 
2593 	if (namelen != 1)
2594 		return (EINVAL);
2595 
2596 	buflen = *sizep;
2597 
2598 	switch (*name) {
2599 	case KERN_SYSVIPC_MSG_INFO:
2600 #ifdef SYSVMSG
2601 		return (sysctl_sysvmsg(name, namelen, where, sizep));
2602 #else
2603 		return (EOPNOTSUPP);
2604 #endif
2605 	case KERN_SYSVIPC_SEM_INFO:
2606 #ifdef SYSVSEM
2607 		infosize = sizeof(semsi->seminfo);
2608 		nds = seminfo.semmni;
2609 		dssize = sizeof(semsi->semids[0]);
2610 		break;
2611 #else
2612 		return (EOPNOTSUPP);
2613 #endif
2614 	case KERN_SYSVIPC_SHM_INFO:
2615 #ifdef SYSVSHM
2616 		infosize = sizeof(shmsi->shminfo);
2617 		nds = shminfo.shmmni;
2618 		dssize = sizeof(shmsi->shmids[0]);
2619 		break;
2620 #else
2621 		return (EOPNOTSUPP);
2622 #endif
2623 	default:
2624 		return (EINVAL);
2625 	}
2626 	tsize = infosize + (nds * dssize);
2627 
2628 	/* Return just the total size required. */
2629 	if (where == NULL) {
2630 		*sizep = tsize;
2631 		return (0);
2632 	}
2633 
2634 	/* Not enough room for even the info struct. */
2635 	if (buflen < infosize) {
2636 		*sizep = 0;
2637 		return (ENOMEM);
2638 	}
2639 	bufsiz = min(tsize, buflen);
2640 	buf = malloc(bufsiz, M_TEMP, M_WAITOK|M_ZERO);
2641 
2642 	switch (*name) {
2643 #ifdef SYSVSEM
2644 	case KERN_SYSVIPC_SEM_INFO:
2645 		semsi = (struct sem_sysctl_info *)buf;
2646 		semsi->seminfo = seminfo;
2647 		break;
2648 #endif
2649 #ifdef SYSVSHM
2650 	case KERN_SYSVIPC_SHM_INFO:
2651 		shmsi = (struct shm_sysctl_info *)buf;
2652 		shmsi->shminfo = shminfo;
2653 		break;
2654 #endif
2655 	}
2656 	buflen -= infosize;
2657 
2658 	ret = 0;
2659 	if (buflen > 0) {
2660 		/* Fill in the IPC data structures.  */
2661 		for (i = 0; i < nds; i++) {
2662 			if (buflen < dssize) {
2663 				ret = ENOMEM;
2664 				break;
2665 			}
2666 			switch (*name) {
2667 #ifdef SYSVSEM
2668 			case KERN_SYSVIPC_SEM_INFO:
2669 				if (sema[i] != NULL)
2670 					memcpy(&semsi->semids[i], sema[i],
2671 					    dssize);
2672 				else
2673 					memset(&semsi->semids[i], 0, dssize);
2674 				break;
2675 #endif
2676 #ifdef SYSVSHM
2677 			case KERN_SYSVIPC_SHM_INFO:
2678 				if (shmsegs[i] != NULL)
2679 					memcpy(&shmsi->shmids[i], shmsegs[i],
2680 					    dssize);
2681 				else
2682 					memset(&shmsi->shmids[i], 0, dssize);
2683 				break;
2684 #endif
2685 			}
2686 			buflen -= dssize;
2687 		}
2688 	}
2689 	*sizep -= buflen;
2690 	error = copyout(buf, where, *sizep);
2691 	free(buf, M_TEMP, bufsiz);
2692 	/* If copyout succeeded, use return code set earlier. */
2693 	return (error ? error : ret);
2694 }
2695 #endif /* SYSVMSG || SYSVSEM || SYSVSHM */
2696 
2697 #ifndef	SMALL_KERNEL
2698 
2699 int
2700 sysctl_intrcnt(int *name, u_int namelen, void *oldp, size_t *oldlenp)
2701 {
2702 	return (evcount_sysctl(name, namelen, oldp, oldlenp, NULL, 0));
2703 }
2704 
2705 
2706 int
2707 sysctl_sensors(int *name, u_int namelen, void *oldp, size_t *oldlenp,
2708     void *newp, size_t newlen)
2709 {
2710 	struct ksensor *ks;
2711 	struct sensor *us;
2712 	struct ksensordev *ksd;
2713 	struct sensordev *usd;
2714 	int dev, numt, ret;
2715 	enum sensor_type type;
2716 
2717 	if (namelen != 1 && namelen != 3)
2718 		return (ENOTDIR);
2719 
2720 	dev = name[0];
2721 	if (namelen == 1) {
2722 		ret = sensordev_get(dev, &ksd);
2723 		if (ret)
2724 			return (ret);
2725 
2726 		/* Grab a copy, to clear the kernel pointers */
2727 		usd = malloc(sizeof(*usd), M_TEMP, M_WAITOK|M_ZERO);
2728 		usd->num = ksd->num;
2729 		strlcpy(usd->xname, ksd->xname, sizeof(usd->xname));
2730 		memcpy(usd->maxnumt, ksd->maxnumt, sizeof(usd->maxnumt));
2731 		usd->sensors_count = ksd->sensors_count;
2732 
2733 		ret = sysctl_rdstruct(oldp, oldlenp, newp, usd,
2734 		    sizeof(struct sensordev));
2735 
2736 		free(usd, M_TEMP, sizeof(*usd));
2737 		return (ret);
2738 	}
2739 
2740 	type = name[1];
2741 	numt = name[2];
2742 
2743 	ret = sensor_find(dev, type, numt, &ks);
2744 	if (ret)
2745 		return (ret);
2746 
2747 	/* Grab a copy, to clear the kernel pointers */
2748 	us = malloc(sizeof(*us), M_TEMP, M_WAITOK|M_ZERO);
2749 	memcpy(us->desc, ks->desc, sizeof(us->desc));
2750 	us->tv = ks->tv;
2751 	us->value = ks->value;
2752 	us->type = ks->type;
2753 	us->status = ks->status;
2754 	us->numt = ks->numt;
2755 	us->flags = ks->flags;
2756 
2757 	ret = sysctl_rdstruct(oldp, oldlenp, newp, us,
2758 	    sizeof(struct sensor));
2759 	free(us, M_TEMP, sizeof(*us));
2760 	return (ret);
2761 }
2762 #endif	/* SMALL_KERNEL */
2763 
2764 int
2765 sysctl_cptime2(int *name, u_int namelen, void *oldp, size_t *oldlenp,
2766     void *newp, size_t newlen)
2767 {
2768 	CPU_INFO_ITERATOR cii;
2769 	struct cpu_info *ci;
2770 	int found = 0;
2771 
2772 	if (namelen != 1)
2773 		return (ENOTDIR);
2774 
2775 	CPU_INFO_FOREACH(cii, ci) {
2776 		if (name[0] == CPU_INFO_UNIT(ci)) {
2777 			found = 1;
2778 			break;
2779 		}
2780 	}
2781 	if (!found)
2782 		return (ENOENT);
2783 
2784 	return (sysctl_rdstruct(oldp, oldlenp, newp,
2785 	    &ci->ci_schedstate.spc_cp_time,
2786 	    sizeof(ci->ci_schedstate.spc_cp_time)));
2787 }
2788 
2789 #if NAUDIO > 0
2790 int
2791 sysctl_audio(int *name, u_int namelen, void *oldp, size_t *oldlenp,
2792     void *newp, size_t newlen)
2793 {
2794 	if (namelen != 1)
2795 		return (ENOTDIR);
2796 
2797 	if (name[0] != KERN_AUDIO_RECORD)
2798 		return (ENOENT);
2799 
2800 	return (sysctl_int(oldp, oldlenp, newp, newlen, &audio_record_enable));
2801 }
2802 #endif
2803 
2804 #if NVIDEO > 0
2805 int
2806 sysctl_video(int *name, u_int namelen, void *oldp, size_t *oldlenp,
2807     void *newp, size_t newlen)
2808 {
2809 	if (namelen != 1)
2810 		return (ENOTDIR);
2811 
2812 	if (name[0] != KERN_VIDEO_RECORD)
2813 		return (ENOENT);
2814 
2815 	return (sysctl_int(oldp, oldlenp, newp, newlen, &video_record_enable));
2816 }
2817 #endif
2818 
2819 int
2820 sysctl_cpustats(int *name, u_int namelen, void *oldp, size_t *oldlenp,
2821     void *newp, size_t newlen)
2822 {
2823 	CPU_INFO_ITERATOR cii;
2824 	struct cpustats cs;
2825 	struct cpu_info *ci;
2826 	int found = 0;
2827 
2828 	if (namelen != 1)
2829 		return (ENOTDIR);
2830 
2831 	CPU_INFO_FOREACH(cii, ci) {
2832 		if (name[0] == CPU_INFO_UNIT(ci)) {
2833 			found = 1;
2834 			break;
2835 		}
2836 	}
2837 	if (!found)
2838 		return (ENOENT);
2839 
2840 	memset(&cs, 0, sizeof cs);
2841 	memcpy(&cs.cs_time, &ci->ci_schedstate.spc_cp_time, sizeof(cs.cs_time));
2842 	cs.cs_flags = 0;
2843 	if (cpu_is_online(ci))
2844 		cs.cs_flags |= CPUSTATS_ONLINE;
2845 
2846 	return (sysctl_rdstruct(oldp, oldlenp, newp, &cs, sizeof(cs)));
2847 }
2848 
2849 int
2850 sysctl_utc_offset(void *oldp, size_t *oldlenp, void *newp, size_t newlen)
2851 {
2852 	struct timespec adjusted, now;
2853 	int adjustment_seconds, error, new_offset_minutes, old_offset_minutes;
2854 
2855 	old_offset_minutes = utc_offset / 60;	/* seconds -> minutes */
2856 	new_offset_minutes = old_offset_minutes;
2857 	error = sysctl_securelevel_int(oldp, oldlenp, newp, newlen,
2858 	     &new_offset_minutes);
2859 	if (error)
2860 		return error;
2861 	if (new_offset_minutes < -24 * 60 || new_offset_minutes > 24 * 60)
2862 		return EINVAL;
2863 	if (new_offset_minutes == old_offset_minutes)
2864 		return 0;
2865 
2866 	utc_offset = new_offset_minutes * 60;	/* minutes -> seconds */
2867 	adjustment_seconds = (new_offset_minutes - old_offset_minutes) * 60;
2868 
2869 	nanotime(&now);
2870 	adjusted = now;
2871 	adjusted.tv_sec -= adjustment_seconds;
2872 	tc_setrealtimeclock(&adjusted);
2873 	resettodr();
2874 
2875 	return 0;
2876 }
2877