xref: /openbsd-src/sys/kern/kern_sysctl.c (revision d1df930ffab53da22f3324c32bed7ac5709915e6)
1 /*	$OpenBSD: kern_sysctl.c,v 1.348 2018/09/26 17:23:13 cheloha Exp $	*/
2 /*	$NetBSD: kern_sysctl.c,v 1.17 1996/05/20 17:49:05 mrg Exp $	*/
3 
4 /*-
5  * Copyright (c) 1982, 1986, 1989, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * This code is derived from software contributed to Berkeley by
9  * Mike Karels at Berkeley Software Design, Inc.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. Neither the name of the University nor the names of its contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  *
35  *	@(#)kern_sysctl.c	8.4 (Berkeley) 4/14/94
36  */
37 
38 /*
39  * sysctl system call.
40  */
41 
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/kernel.h>
45 #include <sys/malloc.h>
46 #include <sys/pool.h>
47 #include <sys/proc.h>
48 #include <sys/resourcevar.h>
49 #include <sys/signalvar.h>
50 #include <sys/fcntl.h>
51 #include <sys/file.h>
52 #include <sys/filedesc.h>
53 #include <sys/vnode.h>
54 #include <sys/unistd.h>
55 #include <sys/buf.h>
56 #include <sys/ioctl.h>
57 #include <sys/tty.h>
58 #include <sys/disklabel.h>
59 #include <sys/disk.h>
60 #include <sys/sysctl.h>
61 #include <sys/msgbuf.h>
62 #include <sys/vmmeter.h>
63 #include <sys/namei.h>
64 #include <sys/exec.h>
65 #include <sys/mbuf.h>
66 #include <sys/percpu.h>
67 #include <sys/sensors.h>
68 #include <sys/pipe.h>
69 #include <sys/eventvar.h>
70 #include <sys/socketvar.h>
71 #include <sys/socket.h>
72 #include <sys/domain.h>
73 #include <sys/protosw.h>
74 #include <sys/pledge.h>
75 #include <sys/timetc.h>
76 #include <sys/evcount.h>
77 #include <sys/un.h>
78 #include <sys/unpcb.h>
79 #include <sys/sched.h>
80 #include <sys/mount.h>
81 #include <sys/syscallargs.h>
82 #include <sys/witness.h>
83 
84 #include <uvm/uvm_extern.h>
85 
86 #include <dev/cons.h>
87 #include <dev/rndvar.h>
88 
89 #include <net/route.h>
90 #include <netinet/in.h>
91 #include <netinet/ip.h>
92 #include <netinet/ip_var.h>
93 #include <netinet/in_pcb.h>
94 #include <netinet/ip6.h>
95 #include <netinet/tcp.h>
96 #include <netinet/tcp_timer.h>
97 #include <netinet/tcp_var.h>
98 #include <netinet/udp.h>
99 #include <netinet/udp_var.h>
100 #include <netinet6/ip6_var.h>
101 
102 #ifdef DDB
103 #include <ddb/db_var.h>
104 #endif
105 
106 #ifdef SYSVMSG
107 #include <sys/msg.h>
108 #endif
109 #ifdef SYSVSEM
110 #include <sys/sem.h>
111 #endif
112 #ifdef SYSVSHM
113 #include <sys/shm.h>
114 #endif
115 
116 #include "audio.h"
117 
118 extern struct forkstat forkstat;
119 extern struct nchstats nchstats;
120 extern int nselcoll, fscale;
121 extern struct disklist_head disklist;
122 extern fixpt_t ccpu;
123 extern  long numvnodes;
124 extern u_int net_livelocks;
125 #if NAUDIO > 0
126 extern int audio_record_enable;
127 #endif
128 
129 int allowkmem;
130 
131 extern void nmbclust_update(void);
132 
133 int sysctl_diskinit(int, struct proc *);
134 int sysctl_proc_args(int *, u_int, void *, size_t *, struct proc *);
135 int sysctl_proc_cwd(int *, u_int, void *, size_t *, struct proc *);
136 int sysctl_proc_nobroadcastkill(int *, u_int, void *, size_t, void *, size_t *,
137 	struct proc *);
138 int sysctl_proc_vmmap(int *, u_int, void *, size_t *, struct proc *);
139 int sysctl_intrcnt(int *, u_int, void *, size_t *);
140 int sysctl_sensors(int *, u_int, void *, size_t *, void *, size_t);
141 int sysctl_cptime2(int *, u_int, void *, size_t *, void *, size_t);
142 #if NAUDIO > 0
143 int sysctl_audio(int *, u_int, void *, size_t *, void *, size_t);
144 #endif
145 
146 void fill_file(struct kinfo_file *, struct file *, struct filedesc *, int,
147     struct vnode *, struct process *, struct proc *, struct socket *, int);
148 void fill_kproc(struct process *, struct kinfo_proc *, struct proc *, int);
149 
150 int (*cpu_cpuspeed)(int *);
151 
152 /*
153  * Lock to avoid too many processes vslocking a large amount of memory
154  * at the same time.
155  */
156 struct rwlock sysctl_lock = RWLOCK_INITIALIZER("sysctllk");
157 struct rwlock sysctl_disklock = RWLOCK_INITIALIZER("sysctldlk");
158 
159 int
160 sys_sysctl(struct proc *p, void *v, register_t *retval)
161 {
162 	struct sys_sysctl_args /* {
163 		syscallarg(const int *) name;
164 		syscallarg(u_int) namelen;
165 		syscallarg(void *) old;
166 		syscallarg(size_t *) oldlenp;
167 		syscallarg(void *) new;
168 		syscallarg(size_t) newlen;
169 	} */ *uap = v;
170 	int error, dolock = 1;
171 	size_t savelen = 0, oldlen = 0;
172 	sysctlfn *fn;
173 	int name[CTL_MAXNAME];
174 
175 	if (SCARG(uap, new) != NULL &&
176 	    (error = suser(p)))
177 		return (error);
178 	/*
179 	 * all top-level sysctl names are non-terminal
180 	 */
181 	if (SCARG(uap, namelen) > CTL_MAXNAME || SCARG(uap, namelen) < 2)
182 		return (EINVAL);
183 	error = copyin(SCARG(uap, name), name,
184 		       SCARG(uap, namelen) * sizeof(int));
185 	if (error)
186 		return (error);
187 
188 	error = pledge_sysctl(p, SCARG(uap, namelen),
189 	    name, SCARG(uap, new));
190 	if (error)
191 		return (error);
192 
193 	switch (name[0]) {
194 	case CTL_KERN:
195 		fn = kern_sysctl;
196 		break;
197 	case CTL_HW:
198 		fn = hw_sysctl;
199 		break;
200 	case CTL_VM:
201 		fn = uvm_sysctl;
202 		break;
203 	case CTL_NET:
204 		fn = net_sysctl;
205 		break;
206 	case CTL_FS:
207 		fn = fs_sysctl;
208 		break;
209 	case CTL_VFS:
210 		fn = vfs_sysctl;
211 		break;
212 	case CTL_MACHDEP:
213 		fn = cpu_sysctl;
214 		break;
215 #ifdef DEBUG
216 	case CTL_DEBUG:
217 		fn = debug_sysctl;
218 		break;
219 #endif
220 #ifdef DDB
221 	case CTL_DDB:
222 		fn = ddb_sysctl;
223 		break;
224 #endif
225 	default:
226 		return (EOPNOTSUPP);
227 	}
228 
229 	if (SCARG(uap, oldlenp) &&
230 	    (error = copyin(SCARG(uap, oldlenp), &oldlen, sizeof(oldlen))))
231 		return (error);
232 	if (SCARG(uap, old) != NULL) {
233 		if ((error = rw_enter(&sysctl_lock, RW_WRITE|RW_INTR)) != 0)
234 			return (error);
235 		if (dolock) {
236 			if (atop(oldlen) > uvmexp.wiredmax - uvmexp.wired) {
237 				rw_exit_write(&sysctl_lock);
238 				return (ENOMEM);
239 			}
240 			error = uvm_vslock(p, SCARG(uap, old), oldlen,
241 			    PROT_READ | PROT_WRITE);
242 			if (error) {
243 				rw_exit_write(&sysctl_lock);
244 				return (error);
245 			}
246 		}
247 		savelen = oldlen;
248 	}
249 	error = (*fn)(&name[1], SCARG(uap, namelen) - 1, SCARG(uap, old),
250 	    &oldlen, SCARG(uap, new), SCARG(uap, newlen), p);
251 	if (SCARG(uap, old) != NULL) {
252 		if (dolock)
253 			uvm_vsunlock(p, SCARG(uap, old), savelen);
254 		rw_exit_write(&sysctl_lock);
255 	}
256 	if (error)
257 		return (error);
258 	if (SCARG(uap, oldlenp))
259 		error = copyout(&oldlen, SCARG(uap, oldlenp), sizeof(oldlen));
260 	return (error);
261 }
262 
263 /*
264  * Attributes stored in the kernel.
265  */
266 char hostname[MAXHOSTNAMELEN];
267 int hostnamelen;
268 char domainname[MAXHOSTNAMELEN];
269 int domainnamelen;
270 long hostid;
271 char *disknames = NULL;
272 size_t disknameslen;
273 struct diskstats *diskstats = NULL;
274 size_t diskstatslen;
275 int securelevel;
276 
277 /*
278  * kernel related system variables.
279  */
280 int
281 kern_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
282     size_t newlen, struct proc *p)
283 {
284 	int error, level, inthostid, stackgap;
285 	dev_t dev;
286 	extern int somaxconn, sominconn;
287 	extern int nosuidcoredump;
288 	extern int maxlocksperuid;
289 	extern int pool_debug;
290 	extern int uvm_wxabort;
291 
292 	/* all sysctl names at this level are terminal except a ton of them */
293 	if (namelen != 1) {
294 		switch (name[0]) {
295 		case KERN_PROC:
296 		case KERN_PROF:
297 		case KERN_MALLOCSTATS:
298 		case KERN_TTY:
299 		case KERN_POOL:
300 		case KERN_PROC_ARGS:
301 		case KERN_PROC_CWD:
302 		case KERN_PROC_NOBROADCASTKILL:
303 		case KERN_PROC_VMMAP:
304 		case KERN_SYSVIPC_INFO:
305 		case KERN_SEMINFO:
306 		case KERN_SHMINFO:
307 		case KERN_INTRCNT:
308 		case KERN_WATCHDOG:
309 		case KERN_EVCOUNT:
310 		case KERN_TIMECOUNTER:
311 		case KERN_CPTIME2:
312 		case KERN_FILE:
313 		case KERN_AUDIO:
314 			break;
315 		default:
316 			return (ENOTDIR);	/* overloaded */
317 		}
318 	}
319 
320 	switch (name[0]) {
321 	case KERN_OSTYPE:
322 		return (sysctl_rdstring(oldp, oldlenp, newp, ostype));
323 	case KERN_OSRELEASE:
324 		return (sysctl_rdstring(oldp, oldlenp, newp, osrelease));
325 	case KERN_OSREV:
326 		return (sysctl_rdint(oldp, oldlenp, newp, OpenBSD));
327 	case KERN_OSVERSION:
328 		return (sysctl_rdstring(oldp, oldlenp, newp, osversion));
329 	case KERN_VERSION:
330 		return (sysctl_rdstring(oldp, oldlenp, newp, version));
331 	case KERN_MAXVNODES:
332 		return(sysctl_int(oldp, oldlenp, newp, newlen, &maxvnodes));
333 	case KERN_MAXPROC:
334 		return (sysctl_int(oldp, oldlenp, newp, newlen, &maxprocess));
335 	case KERN_MAXFILES:
336 		return (sysctl_int(oldp, oldlenp, newp, newlen, &maxfiles));
337 	case KERN_NFILES:
338 		return (sysctl_rdint(oldp, oldlenp, newp, numfiles));
339 	case KERN_TTYCOUNT:
340 		return (sysctl_rdint(oldp, oldlenp, newp, tty_count));
341 	case KERN_NUMVNODES:
342 		return (sysctl_rdint(oldp, oldlenp, newp, numvnodes));
343 	case KERN_ARGMAX:
344 		return (sysctl_rdint(oldp, oldlenp, newp, ARG_MAX));
345 	case KERN_NSELCOLL:
346 		return (sysctl_rdint(oldp, oldlenp, newp, nselcoll));
347 	case KERN_SECURELVL:
348 		level = securelevel;
349 		if ((error = sysctl_int(oldp, oldlenp, newp, newlen, &level)) ||
350 		    newp == NULL)
351 			return (error);
352 		if ((securelevel > 0 || level < -1) &&
353 		    level < securelevel && p->p_p->ps_pid != 1)
354 			return (EPERM);
355 		securelevel = level;
356 		return (0);
357 	case KERN_ALLOWKMEM:
358 		if (securelevel > 0)
359 			return (sysctl_rdint(oldp, oldlenp, newp,
360 			    allowkmem));
361 		return (sysctl_int(oldp, oldlenp, newp, newlen,
362 		    &allowkmem));
363 	case KERN_HOSTNAME:
364 		error = sysctl_tstring(oldp, oldlenp, newp, newlen,
365 		    hostname, sizeof(hostname));
366 		if (newp && !error)
367 			hostnamelen = newlen;
368 		return (error);
369 	case KERN_DOMAINNAME:
370 		error = sysctl_tstring(oldp, oldlenp, newp, newlen,
371 		    domainname, sizeof(domainname));
372 		if (newp && !error)
373 			domainnamelen = newlen;
374 		return (error);
375 	case KERN_HOSTID:
376 		inthostid = hostid;  /* XXX assumes sizeof long <= sizeof int */
377 		error =  sysctl_int(oldp, oldlenp, newp, newlen, &inthostid);
378 		hostid = inthostid;
379 		return (error);
380 	case KERN_CLOCKRATE:
381 		return (sysctl_clockrate(oldp, oldlenp, newp));
382 	case KERN_BOOTTIME: {
383 		struct timeval bt;
384 		memset(&bt, 0, sizeof bt);
385 		TIMESPEC_TO_TIMEVAL(&bt, &boottime);
386 		return (sysctl_rdstruct(oldp, oldlenp, newp, &bt, sizeof bt));
387 	  }
388 #ifndef SMALL_KERNEL
389 	case KERN_PROC:
390 		return (sysctl_doproc(name + 1, namelen - 1, oldp, oldlenp));
391 	case KERN_PROC_ARGS:
392 		return (sysctl_proc_args(name + 1, namelen - 1, oldp, oldlenp,
393 		     p));
394 	case KERN_PROC_CWD:
395 		return (sysctl_proc_cwd(name + 1, namelen - 1, oldp, oldlenp,
396 		     p));
397 	case KERN_PROC_NOBROADCASTKILL:
398 		return (sysctl_proc_nobroadcastkill(name + 1, namelen - 1,
399 		     newp, newlen, oldp, oldlenp, p));
400 	case KERN_PROC_VMMAP:
401 		return (sysctl_proc_vmmap(name + 1, namelen - 1, oldp, oldlenp,
402 		     p));
403 	case KERN_FILE:
404 		return (sysctl_file(name + 1, namelen - 1, oldp, oldlenp, p));
405 #endif
406 	case KERN_MBSTAT: {
407 		extern struct cpumem *mbstat;
408 		uint64_t counters[MBSTAT_COUNT];
409 		struct mbstat mbs;
410 		unsigned int i;
411 
412 		memset(&mbs, 0, sizeof(mbs));
413 		counters_read(mbstat, counters, MBSTAT_COUNT);
414 		for (i = 0; i < MBSTAT_TYPES; i++)
415 			mbs.m_mtypes[i] = counters[i];
416 
417 		mbs.m_drops = counters[MBSTAT_DROPS];
418 		mbs.m_wait = counters[MBSTAT_WAIT];
419 		mbs.m_drain = counters[MBSTAT_DRAIN];
420 
421 		return (sysctl_rdstruct(oldp, oldlenp, newp,
422 		    &mbs, sizeof(mbs)));
423 	}
424 #if defined(GPROF) || defined(DDBPROF)
425 	case KERN_PROF:
426 		return (sysctl_doprof(name + 1, namelen - 1, oldp, oldlenp,
427 		    newp, newlen));
428 #endif
429 	case KERN_POSIX1:
430 		return (sysctl_rdint(oldp, oldlenp, newp, _POSIX_VERSION));
431 	case KERN_NGROUPS:
432 		return (sysctl_rdint(oldp, oldlenp, newp, NGROUPS_MAX));
433 	case KERN_JOB_CONTROL:
434 		return (sysctl_rdint(oldp, oldlenp, newp, 1));
435 	case KERN_SAVED_IDS:
436 		return (sysctl_rdint(oldp, oldlenp, newp, 1));
437 	case KERN_MAXPARTITIONS:
438 		return (sysctl_rdint(oldp, oldlenp, newp, MAXPARTITIONS));
439 	case KERN_RAWPARTITION:
440 		return (sysctl_rdint(oldp, oldlenp, newp, RAW_PART));
441 	case KERN_MAXTHREAD:
442 		return (sysctl_int(oldp, oldlenp, newp, newlen, &maxthread));
443 	case KERN_NTHREADS:
444 		return (sysctl_rdint(oldp, oldlenp, newp, nthreads));
445 	case KERN_SOMAXCONN: {
446 		int val = somaxconn;
447 		error = sysctl_int(oldp, oldlenp, newp, newlen, &val);
448 		if (error)
449 			return error;
450 		if (val < 0 || val > SHRT_MAX)
451 			return EINVAL;
452 		somaxconn = val;
453 		return 0;
454 	}
455 	case KERN_SOMINCONN: {
456 		int val = sominconn;
457 		error = sysctl_int(oldp, oldlenp, newp, newlen, &val);
458 		if (error)
459 			return error;
460 		if (val < 0 || val > SHRT_MAX)
461 			return EINVAL;
462 		sominconn = val;
463 		return 0;
464 	}
465 	case KERN_NOSUIDCOREDUMP:
466 		return (sysctl_int(oldp, oldlenp, newp, newlen, &nosuidcoredump));
467 	case KERN_FSYNC:
468 		return (sysctl_rdint(oldp, oldlenp, newp, 1));
469 	case KERN_SYSVMSG:
470 #ifdef SYSVMSG
471 		return (sysctl_rdint(oldp, oldlenp, newp, 1));
472 #else
473 		return (sysctl_rdint(oldp, oldlenp, newp, 0));
474 #endif
475 	case KERN_SYSVSEM:
476 #ifdef SYSVSEM
477 		return (sysctl_rdint(oldp, oldlenp, newp, 1));
478 #else
479 		return (sysctl_rdint(oldp, oldlenp, newp, 0));
480 #endif
481 	case KERN_SYSVSHM:
482 #ifdef SYSVSHM
483 		return (sysctl_rdint(oldp, oldlenp, newp, 1));
484 #else
485 		return (sysctl_rdint(oldp, oldlenp, newp, 0));
486 #endif
487 	case KERN_MSGBUFSIZE:
488 	case KERN_CONSBUFSIZE: {
489 		struct msgbuf *mp;
490 		mp = (name[0] == KERN_MSGBUFSIZE) ? msgbufp : consbufp;
491 		/*
492 		 * deal with cases where the message buffer has
493 		 * become corrupted.
494 		 */
495 		if (!mp || mp->msg_magic != MSG_MAGIC)
496 			return (ENXIO);
497 		return (sysctl_rdint(oldp, oldlenp, newp, mp->msg_bufs));
498 	}
499 	case KERN_CONSBUF:
500 		if ((error = suser(p)))
501 			return (error);
502 		/* FALLTHROUGH */
503 	case KERN_MSGBUF: {
504 		struct msgbuf *mp;
505 		mp = (name[0] == KERN_MSGBUF) ? msgbufp : consbufp;
506 		/* see note above */
507 		if (!mp || mp->msg_magic != MSG_MAGIC)
508 			return (ENXIO);
509 		return (sysctl_rdstruct(oldp, oldlenp, newp, mp,
510 		    mp->msg_bufs + offsetof(struct msgbuf, msg_bufc)));
511 	}
512 	case KERN_MALLOCSTATS:
513 		return (sysctl_malloc(name + 1, namelen - 1, oldp, oldlenp,
514 		    newp, newlen, p));
515 	case KERN_CPTIME:
516 	{
517 		CPU_INFO_ITERATOR cii;
518 		struct cpu_info *ci;
519 		long cp_time[CPUSTATES];
520 		int i;
521 
522 		memset(cp_time, 0, sizeof(cp_time));
523 
524 		CPU_INFO_FOREACH(cii, ci) {
525 			for (i = 0; i < CPUSTATES; i++)
526 				cp_time[i] += ci->ci_schedstate.spc_cp_time[i];
527 		}
528 
529 		for (i = 0; i < CPUSTATES; i++)
530 			cp_time[i] /= ncpus;
531 
532 		return (sysctl_rdstruct(oldp, oldlenp, newp, &cp_time,
533 		    sizeof(cp_time)));
534 	}
535 	case KERN_NCHSTATS:
536 		return (sysctl_rdstruct(oldp, oldlenp, newp, &nchstats,
537 		    sizeof(struct nchstats)));
538 	case KERN_FORKSTAT:
539 		return (sysctl_rdstruct(oldp, oldlenp, newp, &forkstat,
540 		    sizeof(struct forkstat)));
541 	case KERN_TTY:
542 		return (sysctl_tty(name + 1, namelen - 1, oldp, oldlenp,
543 		    newp, newlen));
544 	case KERN_FSCALE:
545 		return (sysctl_rdint(oldp, oldlenp, newp, fscale));
546 	case KERN_CCPU:
547 		return (sysctl_rdint(oldp, oldlenp, newp, ccpu));
548 	case KERN_NPROCS:
549 		return (sysctl_rdint(oldp, oldlenp, newp, nprocesses));
550 	case KERN_POOL:
551 		return (sysctl_dopool(name + 1, namelen - 1, oldp, oldlenp));
552 	case KERN_STACKGAPRANDOM:
553 		stackgap = stackgap_random;
554 		error = sysctl_int(oldp, oldlenp, newp, newlen, &stackgap);
555 		if (error)
556 			return (error);
557 		/*
558 		 * Safety harness.
559 		 */
560 		if ((stackgap < ALIGNBYTES && stackgap != 0) ||
561 		    !powerof2(stackgap) || stackgap >= MAXSSIZ)
562 			return (EINVAL);
563 		stackgap_random = stackgap;
564 		return (0);
565 #if defined(SYSVMSG) || defined(SYSVSEM) || defined(SYSVSHM)
566 	case KERN_SYSVIPC_INFO:
567 		return (sysctl_sysvipc(name + 1, namelen - 1, oldp, oldlenp));
568 #endif
569 	case KERN_SPLASSERT:
570 		return (sysctl_int(oldp, oldlenp, newp, newlen,
571 		    &splassert_ctl));
572 #ifdef SYSVSEM
573 	case KERN_SEMINFO:
574 		return (sysctl_sysvsem(name + 1, namelen - 1, oldp, oldlenp,
575 		    newp, newlen));
576 #endif
577 #ifdef SYSVSHM
578 	case KERN_SHMINFO:
579 		return (sysctl_sysvshm(name + 1, namelen - 1, oldp, oldlenp,
580 		    newp, newlen));
581 #endif
582 #ifndef SMALL_KERNEL
583 	case KERN_INTRCNT:
584 		return (sysctl_intrcnt(name + 1, namelen - 1, oldp, oldlenp));
585 	case KERN_WATCHDOG:
586 		return (sysctl_wdog(name + 1, namelen - 1, oldp, oldlenp,
587 		    newp, newlen));
588 #endif
589 	case KERN_MAXCLUSTERS:
590 		error = sysctl_int(oldp, oldlenp, newp, newlen, &nmbclust);
591 		if (!error)
592 			nmbclust_update();
593 		return (error);
594 #ifndef SMALL_KERNEL
595 	case KERN_EVCOUNT:
596 		return (evcount_sysctl(name + 1, namelen - 1, oldp, oldlenp,
597 		    newp, newlen));
598 #endif
599 	case KERN_TIMECOUNTER:
600 		return (sysctl_tc(name + 1, namelen - 1, oldp, oldlenp,
601 		    newp, newlen));
602 	case KERN_MAXLOCKSPERUID:
603 		return (sysctl_int(oldp, oldlenp, newp, newlen, &maxlocksperuid));
604 	case KERN_CPTIME2:
605 		return (sysctl_cptime2(name + 1, namelen -1, oldp, oldlenp,
606 		    newp, newlen));
607 	case KERN_CACHEPCT: {
608 		u_int64_t dmapages;
609 		int opct, pgs;
610 		opct = bufcachepercent;
611 		error = sysctl_int(oldp, oldlenp, newp, newlen,
612 		    &bufcachepercent);
613 		if (error)
614 			return(error);
615 		if (bufcachepercent > 90 || bufcachepercent < 5) {
616 			bufcachepercent = opct;
617 			return (EINVAL);
618 		}
619 		dmapages = uvm_pagecount(&dma_constraint);
620 		if (bufcachepercent != opct) {
621 			pgs = bufcachepercent * dmapages / 100;
622 			bufadjust(pgs); /* adjust bufpages */
623 			bufhighpages = bufpages; /* set high water mark */
624 		}
625 		return(0);
626 	}
627 	case KERN_WXABORT:
628 		return (sysctl_int(oldp, oldlenp, newp, newlen, &uvm_wxabort));
629 	case KERN_CONSDEV:
630 		if (cn_tab != NULL)
631 			dev = cn_tab->cn_dev;
632 		else
633 			dev = NODEV;
634 		return sysctl_rdstruct(oldp, oldlenp, newp, &dev, sizeof(dev));
635 	case KERN_NETLIVELOCKS:
636 		return (sysctl_rdint(oldp, oldlenp, newp, net_livelocks));
637 	case KERN_POOL_DEBUG: {
638 		int old_pool_debug = pool_debug;
639 
640 		error = sysctl_int(oldp, oldlenp, newp, newlen,
641 		    &pool_debug);
642 		if (error == 0 && pool_debug != old_pool_debug)
643 			pool_reclaim_all();
644 		return (error);
645 	}
646 #ifdef PTRACE
647 	case KERN_GLOBAL_PTRACE: {
648 		extern int global_ptrace;
649 
650 		return sysctl_int(oldp, oldlenp, newp, newlen, &global_ptrace);
651 	}
652 #endif
653 	case KERN_DNSJACKPORT: {
654 		extern uint16_t dnsjackport;
655 		int port = dnsjackport;
656 		if ((error = sysctl_int(oldp, oldlenp, newp, newlen, &port)))
657 			return error;
658 		if (port < 0 || port > USHRT_MAX)
659 			return EINVAL;
660 		dnsjackport = port;
661 		return 0;
662 	}
663 #ifdef WITNESS
664 	case KERN_WITNESSWATCH:
665 		return witness_sysctl_watch(oldp, oldlenp, newp, newlen);
666 #endif
667 #if NAUDIO > 0
668 	case KERN_AUDIO:
669 		return (sysctl_audio(name + 1, namelen - 1, oldp, oldlenp,
670 		    newp, newlen));
671 #endif
672 	default:
673 		return (EOPNOTSUPP);
674 	}
675 	/* NOTREACHED */
676 }
677 
678 /*
679  * hardware related system variables.
680  */
681 char *hw_vendor, *hw_prod, *hw_uuid, *hw_serial, *hw_ver;
682 int allowpowerdown = 1;
683 
684 int
685 hw_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
686     size_t newlen, struct proc *p)
687 {
688 	extern char machine[], cpu_model[];
689 	int err, cpuspeed;
690 
691 	/* all sysctl names at this level except sensors are terminal */
692 	if (name[0] != HW_SENSORS && namelen != 1)
693 		return (ENOTDIR);		/* overloaded */
694 
695 	switch (name[0]) {
696 	case HW_MACHINE:
697 		return (sysctl_rdstring(oldp, oldlenp, newp, machine));
698 	case HW_MODEL:
699 		return (sysctl_rdstring(oldp, oldlenp, newp, cpu_model));
700 	case HW_NCPU:
701 		return (sysctl_rdint(oldp, oldlenp, newp, ncpus));
702 	case HW_NCPUFOUND:
703 		return (sysctl_rdint(oldp, oldlenp, newp, ncpusfound));
704 	case HW_NCPUONLINE:
705 		return (sysctl_rdint(oldp, oldlenp, newp,
706 		    sysctl_hwncpuonline()));
707 	case HW_BYTEORDER:
708 		return (sysctl_rdint(oldp, oldlenp, newp, BYTE_ORDER));
709 	case HW_PHYSMEM:
710 		return (sysctl_rdint(oldp, oldlenp, newp, ptoa(physmem)));
711 	case HW_USERMEM:
712 		return (sysctl_rdint(oldp, oldlenp, newp,
713 		    ptoa(physmem - uvmexp.wired)));
714 	case HW_PAGESIZE:
715 		return (sysctl_rdint(oldp, oldlenp, newp, PAGE_SIZE));
716 	case HW_DISKNAMES:
717 		err = sysctl_diskinit(0, p);
718 		if (err)
719 			return err;
720 		if (disknames)
721 			return (sysctl_rdstring(oldp, oldlenp, newp,
722 			    disknames));
723 		else
724 			return (sysctl_rdstring(oldp, oldlenp, newp, ""));
725 	case HW_DISKSTATS:
726 		err = sysctl_diskinit(1, p);
727 		if (err)
728 			return err;
729 		return (sysctl_rdstruct(oldp, oldlenp, newp, diskstats,
730 		    disk_count * sizeof(struct diskstats)));
731 	case HW_DISKCOUNT:
732 		return (sysctl_rdint(oldp, oldlenp, newp, disk_count));
733 	case HW_CPUSPEED:
734 		if (!cpu_cpuspeed)
735 			return (EOPNOTSUPP);
736 		err = cpu_cpuspeed(&cpuspeed);
737 		if (err)
738 			return err;
739 		return (sysctl_rdint(oldp, oldlenp, newp, cpuspeed));
740 #ifndef	SMALL_KERNEL
741 	case HW_SENSORS:
742 		return (sysctl_sensors(name + 1, namelen - 1, oldp, oldlenp,
743 		    newp, newlen));
744 	case HW_SETPERF:
745 		return (sysctl_hwsetperf(oldp, oldlenp, newp, newlen));
746 	case HW_PERFPOLICY:
747 		return (sysctl_hwperfpolicy(oldp, oldlenp, newp, newlen));
748 #endif /* !SMALL_KERNEL */
749 	case HW_VENDOR:
750 		if (hw_vendor)
751 			return (sysctl_rdstring(oldp, oldlenp, newp,
752 			    hw_vendor));
753 		else
754 			return (EOPNOTSUPP);
755 	case HW_PRODUCT:
756 		if (hw_prod)
757 			return (sysctl_rdstring(oldp, oldlenp, newp, hw_prod));
758 		else
759 			return (EOPNOTSUPP);
760 	case HW_VERSION:
761 		if (hw_ver)
762 			return (sysctl_rdstring(oldp, oldlenp, newp, hw_ver));
763 		else
764 			return (EOPNOTSUPP);
765 	case HW_SERIALNO:
766 		if (hw_serial)
767 			return (sysctl_rdstring(oldp, oldlenp, newp,
768 			    hw_serial));
769 		else
770 			return (EOPNOTSUPP);
771 	case HW_UUID:
772 		if (hw_uuid)
773 			return (sysctl_rdstring(oldp, oldlenp, newp, hw_uuid));
774 		else
775 			return (EOPNOTSUPP);
776 	case HW_PHYSMEM64:
777 		return (sysctl_rdquad(oldp, oldlenp, newp,
778 		    ptoa((psize_t)physmem)));
779 	case HW_USERMEM64:
780 		return (sysctl_rdquad(oldp, oldlenp, newp,
781 		    ptoa((psize_t)physmem - uvmexp.wired)));
782 	case HW_ALLOWPOWERDOWN:
783 		if (securelevel > 0)
784 			return (sysctl_rdint(oldp, oldlenp, newp,
785 			    allowpowerdown));
786 		return (sysctl_int(oldp, oldlenp, newp, newlen,
787 		    &allowpowerdown));
788 #ifdef __HAVE_CPU_TOPOLOGY
789 	case HW_SMT:
790 		return (sysctl_hwsmt(oldp, oldlenp, newp, newlen));
791 #endif
792 	default:
793 		return (EOPNOTSUPP);
794 	}
795 	/* NOTREACHED */
796 }
797 
798 #ifdef DEBUG
799 /*
800  * Debugging related system variables.
801  */
802 extern struct ctldebug debug0, debug1;
803 struct ctldebug debug2, debug3, debug4;
804 struct ctldebug debug5, debug6, debug7, debug8, debug9;
805 struct ctldebug debug10, debug11, debug12, debug13, debug14;
806 struct ctldebug debug15, debug16, debug17, debug18, debug19;
807 static struct ctldebug *debugvars[CTL_DEBUG_MAXID] = {
808 	&debug0, &debug1, &debug2, &debug3, &debug4,
809 	&debug5, &debug6, &debug7, &debug8, &debug9,
810 	&debug10, &debug11, &debug12, &debug13, &debug14,
811 	&debug15, &debug16, &debug17, &debug18, &debug19,
812 };
813 int
814 debug_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
815     size_t newlen, struct proc *p)
816 {
817 	struct ctldebug *cdp;
818 
819 	/* all sysctl names at this level are name and field */
820 	if (namelen != 2)
821 		return (ENOTDIR);		/* overloaded */
822 	if (name[0] < 0 || name[0] >= nitems(debugvars))
823 		return (EOPNOTSUPP);
824 	cdp = debugvars[name[0]];
825 	if (cdp->debugname == 0)
826 		return (EOPNOTSUPP);
827 	switch (name[1]) {
828 	case CTL_DEBUG_NAME:
829 		return (sysctl_rdstring(oldp, oldlenp, newp, cdp->debugname));
830 	case CTL_DEBUG_VALUE:
831 		return (sysctl_int(oldp, oldlenp, newp, newlen, cdp->debugvar));
832 	default:
833 		return (EOPNOTSUPP);
834 	}
835 	/* NOTREACHED */
836 }
837 #endif /* DEBUG */
838 
839 /*
840  * Reads, or writes that lower the value
841  */
842 int
843 sysctl_int_lower(void *oldp, size_t *oldlenp, void *newp, size_t newlen, int *valp)
844 {
845 	unsigned int oval = *valp, val = *valp;
846 	int error;
847 
848 	if (newp == NULL)
849 		return (sysctl_rdint(oldp, oldlenp, newp, *valp));
850 
851 	if ((error = sysctl_int(oldp, oldlenp, newp, newlen, &val)))
852 		return (error);
853 	if (val > oval)
854 		return (EPERM);		/* do not allow raising */
855 	*(unsigned int *)valp = val;
856 	return (0);
857 }
858 
859 /*
860  * Validate parameters and get old / set new parameters
861  * for an integer-valued sysctl function.
862  */
863 int
864 sysctl_int(void *oldp, size_t *oldlenp, void *newp, size_t newlen, int *valp)
865 {
866 	int error = 0;
867 
868 	if (oldp && *oldlenp < sizeof(int))
869 		return (ENOMEM);
870 	if (newp && newlen != sizeof(int))
871 		return (EINVAL);
872 	*oldlenp = sizeof(int);
873 	if (oldp)
874 		error = copyout(valp, oldp, sizeof(int));
875 	if (error == 0 && newp)
876 		error = copyin(newp, valp, sizeof(int));
877 	return (error);
878 }
879 
880 /*
881  * As above, but read-only.
882  */
883 int
884 sysctl_rdint(void *oldp, size_t *oldlenp, void *newp, int val)
885 {
886 	int error = 0;
887 
888 	if (oldp && *oldlenp < sizeof(int))
889 		return (ENOMEM);
890 	if (newp)
891 		return (EPERM);
892 	*oldlenp = sizeof(int);
893 	if (oldp)
894 		error = copyout((caddr_t)&val, oldp, sizeof(int));
895 	return (error);
896 }
897 
898 /*
899  * Array of integer values.
900  */
901 int
902 sysctl_int_arr(int **valpp, int *name, u_int namelen, void *oldp,
903     size_t *oldlenp, void *newp, size_t newlen)
904 {
905 	if (namelen > 1)
906 		return (ENOTDIR);
907 	if (name[0] < 0 || valpp[name[0]] == NULL)
908 		return (EOPNOTSUPP);
909 	return (sysctl_int(oldp, oldlenp, newp, newlen, valpp[name[0]]));
910 }
911 
912 /*
913  * Validate parameters and get old / set new parameters
914  * for an integer-valued sysctl function.
915  */
916 int
917 sysctl_quad(void *oldp, size_t *oldlenp, void *newp, size_t newlen,
918     int64_t *valp)
919 {
920 	int error = 0;
921 
922 	if (oldp && *oldlenp < sizeof(int64_t))
923 		return (ENOMEM);
924 	if (newp && newlen != sizeof(int64_t))
925 		return (EINVAL);
926 	*oldlenp = sizeof(int64_t);
927 	if (oldp)
928 		error = copyout(valp, oldp, sizeof(int64_t));
929 	if (error == 0 && newp)
930 		error = copyin(newp, valp, sizeof(int64_t));
931 	return (error);
932 }
933 
934 /*
935  * As above, but read-only.
936  */
937 int
938 sysctl_rdquad(void *oldp, size_t *oldlenp, void *newp, int64_t val)
939 {
940 	int error = 0;
941 
942 	if (oldp && *oldlenp < sizeof(int64_t))
943 		return (ENOMEM);
944 	if (newp)
945 		return (EPERM);
946 	*oldlenp = sizeof(int64_t);
947 	if (oldp)
948 		error = copyout((caddr_t)&val, oldp, sizeof(int64_t));
949 	return (error);
950 }
951 
952 /*
953  * Validate parameters and get old / set new parameters
954  * for a string-valued sysctl function.
955  */
956 int
957 sysctl_string(void *oldp, size_t *oldlenp, void *newp, size_t newlen, char *str,
958     size_t maxlen)
959 {
960 	return sysctl__string(oldp, oldlenp, newp, newlen, str, maxlen, 0);
961 }
962 
963 int
964 sysctl_tstring(void *oldp, size_t *oldlenp, void *newp, size_t newlen,
965     char *str, size_t maxlen)
966 {
967 	return sysctl__string(oldp, oldlenp, newp, newlen, str, maxlen, 1);
968 }
969 
970 int
971 sysctl__string(void *oldp, size_t *oldlenp, void *newp, size_t newlen,
972     char *str, size_t maxlen, int trunc)
973 {
974 	size_t len;
975 	int error = 0;
976 
977 	len = strlen(str) + 1;
978 	if (oldp && *oldlenp < len) {
979 		if (trunc == 0 || *oldlenp == 0)
980 			return (ENOMEM);
981 	}
982 	if (newp && newlen >= maxlen)
983 		return (EINVAL);
984 	if (oldp) {
985 		if (trunc && *oldlenp < len) {
986 			len = *oldlenp;
987 			error = copyout(str, oldp, len - 1);
988 			if (error == 0)
989 				error = copyout("", (char *)oldp + len - 1, 1);
990 		} else {
991 			error = copyout(str, oldp, len);
992 		}
993 	}
994 	*oldlenp = len;
995 	if (error == 0 && newp) {
996 		error = copyin(newp, str, newlen);
997 		str[newlen] = 0;
998 	}
999 	return (error);
1000 }
1001 
1002 /*
1003  * As above, but read-only.
1004  */
1005 int
1006 sysctl_rdstring(void *oldp, size_t *oldlenp, void *newp, const char *str)
1007 {
1008 	size_t len;
1009 	int error = 0;
1010 
1011 	len = strlen(str) + 1;
1012 	if (oldp && *oldlenp < len)
1013 		return (ENOMEM);
1014 	if (newp)
1015 		return (EPERM);
1016 	*oldlenp = len;
1017 	if (oldp)
1018 		error = copyout(str, oldp, len);
1019 	return (error);
1020 }
1021 
1022 /*
1023  * Validate parameters and get old / set new parameters
1024  * for a structure oriented sysctl function.
1025  */
1026 int
1027 sysctl_struct(void *oldp, size_t *oldlenp, void *newp, size_t newlen, void *sp,
1028     size_t len)
1029 {
1030 	int error = 0;
1031 
1032 	if (oldp && *oldlenp < len)
1033 		return (ENOMEM);
1034 	if (newp && newlen > len)
1035 		return (EINVAL);
1036 	if (oldp) {
1037 		*oldlenp = len;
1038 		error = copyout(sp, oldp, len);
1039 	}
1040 	if (error == 0 && newp)
1041 		error = copyin(newp, sp, len);
1042 	return (error);
1043 }
1044 
1045 /*
1046  * Validate parameters and get old parameters
1047  * for a structure oriented sysctl function.
1048  */
1049 int
1050 sysctl_rdstruct(void *oldp, size_t *oldlenp, void *newp, const void *sp,
1051     size_t len)
1052 {
1053 	int error = 0;
1054 
1055 	if (oldp && *oldlenp < len)
1056 		return (ENOMEM);
1057 	if (newp)
1058 		return (EPERM);
1059 	*oldlenp = len;
1060 	if (oldp)
1061 		error = copyout(sp, oldp, len);
1062 	return (error);
1063 }
1064 
1065 #ifndef SMALL_KERNEL
1066 void
1067 fill_file(struct kinfo_file *kf, struct file *fp, struct filedesc *fdp,
1068 	  int fd, struct vnode *vp, struct process *pr, struct proc *p,
1069 	  struct socket *so, int show_pointers)
1070 {
1071 	struct vattr va;
1072 
1073 	memset(kf, 0, sizeof(*kf));
1074 
1075 	kf->fd_fd = fd;		/* might not really be an fd */
1076 
1077 	if (fp != NULL) {
1078 		if (show_pointers)
1079 			kf->f_fileaddr = PTRTOINT64(fp);
1080 		kf->f_flag = fp->f_flag;
1081 		kf->f_iflags = fp->f_iflags;
1082 		kf->f_type = fp->f_type;
1083 		kf->f_count = fp->f_count;
1084 		if (show_pointers)
1085 			kf->f_ucred = PTRTOINT64(fp->f_cred);
1086 		kf->f_uid = fp->f_cred->cr_uid;
1087 		kf->f_gid = fp->f_cred->cr_gid;
1088 		if (show_pointers)
1089 			kf->f_ops = PTRTOINT64(fp->f_ops);
1090 		if (show_pointers)
1091 			kf->f_data = PTRTOINT64(fp->f_data);
1092 		kf->f_usecount = 0;
1093 
1094 		if (suser(p) == 0 || p->p_ucred->cr_uid == fp->f_cred->cr_uid) {
1095 			kf->f_offset = fp->f_offset;
1096 			mtx_enter(&fp->f_mtx);
1097 			kf->f_rxfer = fp->f_rxfer;
1098 			kf->f_rwfer = fp->f_wxfer;
1099 			kf->f_seek = fp->f_seek;
1100 			kf->f_rbytes = fp->f_rbytes;
1101 			kf->f_wbytes = fp->f_wbytes;
1102 			mtx_leave(&fp->f_mtx);
1103 		} else
1104 			kf->f_offset = -1;
1105 	} else if (vp != NULL) {
1106 		/* fake it */
1107 		kf->f_type = DTYPE_VNODE;
1108 		kf->f_flag = FREAD;
1109 		if (fd == KERN_FILE_TRACE)
1110 			kf->f_flag |= FWRITE;
1111 	} else if (so != NULL) {
1112 		/* fake it */
1113 		kf->f_type = DTYPE_SOCKET;
1114 	}
1115 
1116 	/* information about the object associated with this file */
1117 	switch (kf->f_type) {
1118 	case DTYPE_VNODE:
1119 		if (fp != NULL)
1120 			vp = (struct vnode *)fp->f_data;
1121 
1122 		if (show_pointers)
1123 			kf->v_un = PTRTOINT64(vp->v_un.vu_socket);
1124 		kf->v_type = vp->v_type;
1125 		kf->v_tag = vp->v_tag;
1126 		kf->v_flag = vp->v_flag;
1127 		if (show_pointers)
1128 			kf->v_data = PTRTOINT64(vp->v_data);
1129 		if (show_pointers)
1130 			kf->v_mount = PTRTOINT64(vp->v_mount);
1131 		if (vp->v_mount)
1132 			strlcpy(kf->f_mntonname,
1133 			    vp->v_mount->mnt_stat.f_mntonname,
1134 			    sizeof(kf->f_mntonname));
1135 
1136 		if (VOP_GETATTR(vp, &va, p->p_ucred, p) == 0) {
1137 			kf->va_fileid = va.va_fileid;
1138 			kf->va_mode = MAKEIMODE(va.va_type, va.va_mode);
1139 			kf->va_size = va.va_size;
1140 			kf->va_rdev = va.va_rdev;
1141 			kf->va_fsid = va.va_fsid & 0xffffffff;
1142 			kf->va_nlink = va.va_nlink;
1143 		}
1144 		break;
1145 
1146 	case DTYPE_SOCKET: {
1147 		if (so == NULL)
1148 			so = (struct socket *)fp->f_data;
1149 
1150 		kf->so_type = so->so_type;
1151 		kf->so_state = so->so_state;
1152 		if (show_pointers)
1153 			kf->so_pcb = PTRTOINT64(so->so_pcb);
1154 		else
1155 			kf->so_pcb = -1;
1156 		kf->so_protocol = so->so_proto->pr_protocol;
1157 		kf->so_family = so->so_proto->pr_domain->dom_family;
1158 		kf->so_rcv_cc = so->so_rcv.sb_cc;
1159 		kf->so_snd_cc = so->so_snd.sb_cc;
1160 		if (isspliced(so)) {
1161 			if (show_pointers)
1162 				kf->so_splice =
1163 				    PTRTOINT64(so->so_sp->ssp_socket);
1164 			kf->so_splicelen = so->so_sp->ssp_len;
1165 		} else if (issplicedback(so))
1166 			kf->so_splicelen = -1;
1167 		if (!so->so_pcb)
1168 			break;
1169 		switch (kf->so_family) {
1170 		case AF_INET: {
1171 			struct inpcb *inpcb = so->so_pcb;
1172 
1173 			if (show_pointers)
1174 				kf->inp_ppcb = PTRTOINT64(inpcb->inp_ppcb);
1175 			kf->inp_lport = inpcb->inp_lport;
1176 			kf->inp_laddru[0] = inpcb->inp_laddr.s_addr;
1177 			kf->inp_fport = inpcb->inp_fport;
1178 			kf->inp_faddru[0] = inpcb->inp_faddr.s_addr;
1179 			kf->inp_rtableid = inpcb->inp_rtableid;
1180 			if (so->so_type == SOCK_RAW)
1181 				kf->inp_proto = inpcb->inp_ip.ip_p;
1182 			if (so->so_proto->pr_protocol == IPPROTO_TCP &&
1183 			    inpcb->inp_ppcb != NULL) {
1184 				struct tcpcb *tcpcb = (void *)inpcb->inp_ppcb;
1185 				kf->t_rcv_wnd = tcpcb->rcv_wnd;
1186 				kf->t_snd_wnd = tcpcb->snd_wnd;
1187 				kf->t_snd_cwnd = tcpcb->snd_cwnd;
1188 				kf->t_state = tcpcb->t_state;
1189 			}
1190 			break;
1191 		    }
1192 		case AF_INET6: {
1193 			struct inpcb *inpcb = so->so_pcb;
1194 
1195 			if (show_pointers)
1196 				kf->inp_ppcb = PTRTOINT64(inpcb->inp_ppcb);
1197 			kf->inp_lport = inpcb->inp_lport;
1198 			kf->inp_laddru[0] = inpcb->inp_laddr6.s6_addr32[0];
1199 			kf->inp_laddru[1] = inpcb->inp_laddr6.s6_addr32[1];
1200 			kf->inp_laddru[2] = inpcb->inp_laddr6.s6_addr32[2];
1201 			kf->inp_laddru[3] = inpcb->inp_laddr6.s6_addr32[3];
1202 			kf->inp_fport = inpcb->inp_fport;
1203 			kf->inp_faddru[0] = inpcb->inp_faddr6.s6_addr32[0];
1204 			kf->inp_faddru[1] = inpcb->inp_faddr6.s6_addr32[1];
1205 			kf->inp_faddru[2] = inpcb->inp_faddr6.s6_addr32[2];
1206 			kf->inp_faddru[3] = inpcb->inp_faddr6.s6_addr32[3];
1207 			kf->inp_rtableid = inpcb->inp_rtableid;
1208 			if (so->so_type == SOCK_RAW)
1209 				kf->inp_proto = inpcb->inp_ipv6.ip6_nxt;
1210 			if (so->so_proto->pr_protocol == IPPROTO_TCP &&
1211 			    inpcb->inp_ppcb != NULL) {
1212 				struct tcpcb *tcpcb = (void *)inpcb->inp_ppcb;
1213 				kf->t_rcv_wnd = tcpcb->rcv_wnd;
1214 				kf->t_snd_wnd = tcpcb->snd_wnd;
1215 				kf->t_state = tcpcb->t_state;
1216 			}
1217 			break;
1218 		    }
1219 		case AF_UNIX: {
1220 			struct unpcb *unpcb = so->so_pcb;
1221 
1222 			kf->f_msgcount = unpcb->unp_msgcount;
1223 			if (show_pointers) {
1224 				kf->unp_conn	= PTRTOINT64(unpcb->unp_conn);
1225 				kf->unp_refs	= PTRTOINT64(
1226 				    SLIST_FIRST(&unpcb->unp_refs));
1227 				kf->unp_nextref	= PTRTOINT64(
1228 				    SLIST_NEXT(unpcb, unp_nextref));
1229 				kf->v_un	= PTRTOINT64(unpcb->unp_vnode);
1230 				kf->unp_addr	= PTRTOINT64(unpcb->unp_addr);
1231 			}
1232 			if (unpcb->unp_addr != NULL) {
1233 				struct sockaddr_un *un = mtod(unpcb->unp_addr,
1234 				    struct sockaddr_un *);
1235 				memcpy(kf->unp_path, un->sun_path, un->sun_len
1236 				    - offsetof(struct sockaddr_un,sun_path));
1237 			}
1238 			break;
1239 		    }
1240 		}
1241 		break;
1242 	    }
1243 
1244 	case DTYPE_PIPE: {
1245 		struct pipe *pipe = (struct pipe *)fp->f_data;
1246 
1247 		if (show_pointers)
1248 			kf->pipe_peer = PTRTOINT64(pipe->pipe_peer);
1249 		kf->pipe_state = pipe->pipe_state;
1250 		break;
1251 	    }
1252 
1253 	case DTYPE_KQUEUE: {
1254 		struct kqueue *kqi = (struct kqueue *)fp->f_data;
1255 
1256 		kf->kq_count = kqi->kq_count;
1257 		kf->kq_state = kqi->kq_state;
1258 		break;
1259 	    }
1260 	}
1261 
1262 	/* per-process information for KERN_FILE_BY[PU]ID */
1263 	if (pr != NULL) {
1264 		kf->p_pid = pr->ps_pid;
1265 		kf->p_uid = pr->ps_ucred->cr_uid;
1266 		kf->p_gid = pr->ps_ucred->cr_gid;
1267 		kf->p_tid = -1;
1268 		strlcpy(kf->p_comm, pr->ps_comm, sizeof(kf->p_comm));
1269 	}
1270 	if (fdp != NULL) {
1271 		fdplock(fdp);
1272 		kf->fd_ofileflags = fdp->fd_ofileflags[fd];
1273 		fdpunlock(fdp);
1274 	}
1275 }
1276 
1277 /*
1278  * Get file structures.
1279  */
1280 int
1281 sysctl_file(int *name, u_int namelen, char *where, size_t *sizep,
1282     struct proc *p)
1283 {
1284 	struct kinfo_file *kf;
1285 	struct filedesc *fdp;
1286 	struct file *fp;
1287 	struct process *pr;
1288 	size_t buflen, elem_size, elem_count, outsize;
1289 	char *dp = where;
1290 	int arg, i, error = 0, needed = 0, matched;
1291 	u_int op;
1292 	int show_pointers;
1293 
1294 	if (namelen > 4)
1295 		return (ENOTDIR);
1296 	if (namelen < 4 || name[2] > sizeof(*kf))
1297 		return (EINVAL);
1298 
1299 	buflen = where != NULL ? *sizep : 0;
1300 	op = name[0];
1301 	arg = name[1];
1302 	elem_size = name[2];
1303 	elem_count = name[3];
1304 	outsize = MIN(sizeof(*kf), elem_size);
1305 
1306 	if (elem_size < 1)
1307 		return (EINVAL);
1308 
1309 	show_pointers = suser(curproc) == 0;
1310 
1311 	kf = malloc(sizeof(*kf), M_TEMP, M_WAITOK);
1312 
1313 #define FILLIT2(fp, fdp, i, vp, pr, so) do {				\
1314 	if (buflen >= elem_size && elem_count > 0) {			\
1315 		fill_file(kf, fp, fdp, i, vp, pr, p, so, show_pointers);\
1316 		error = copyout(kf, dp, outsize);			\
1317 		if (error)						\
1318 			break;						\
1319 		dp += elem_size;					\
1320 		buflen -= elem_size;					\
1321 		elem_count--;						\
1322 	}								\
1323 	needed += elem_size;						\
1324 } while (0)
1325 
1326 #define FILLIT(fp, fdp, i, vp, pr) \
1327 	FILLIT2(fp, fdp, i, vp, pr, NULL)
1328 
1329 #define FILLINPCB(inp) do {						\
1330 	mtx_enter(&inp->inp_mtx);					\
1331 	if (inp->inp_socket != NULL)					\
1332 		FILLIT2(NULL, NULL, 0, NULL, NULL, inp->inp_socket);	\
1333 	mtx_leave(&inp->inp_mtx);					\
1334 } while (0)
1335 
1336 	switch (op) {
1337 	case KERN_FILE_BYFILE:
1338 		/* use the inp-tables to pick up closed connections, too */
1339 		if (arg == DTYPE_SOCKET) {
1340 			struct inpcb *inp;
1341 
1342 			/*
1343 			 * The inpcb and socket fields are accessed and read
1344 			 * without net lock.  This may result in inconsistent
1345 			 * data provided to userland.  The fix will be to
1346 			 * protect the socket fields with the inpcb mutex.
1347 			 * XXXSMP
1348 			 */
1349 			mtx_enter(&inpcbtable_mtx);
1350 			TAILQ_FOREACH(inp, &tcbtable.inpt_queue, inp_queue)
1351 				FILLINPCB(inp);
1352 			TAILQ_FOREACH(inp, &udbtable.inpt_queue, inp_queue)
1353 				FILLINPCB(inp);
1354 			TAILQ_FOREACH(inp, &rawcbtable.inpt_queue, inp_queue)
1355 				FILLINPCB(inp);
1356 #ifdef INET6
1357 			TAILQ_FOREACH(inp, &rawin6pcbtable.inpt_queue,
1358 			    inp_queue)
1359 				FILLINPCB(inp);
1360 #endif
1361 			mtx_leave(&inpcbtable_mtx);
1362 		}
1363 		fp = NULL;
1364 		while ((fp = fd_iterfile(fp, p)) != NULL) {
1365 			if ((arg == 0 || fp->f_type == arg)) {
1366 				int af, skip = 0;
1367 				if (arg == DTYPE_SOCKET && fp->f_type == arg) {
1368 					af = ((struct socket *)fp->f_data)->
1369 					    so_proto->pr_domain->dom_family;
1370 					if (af == AF_INET || af == AF_INET6)
1371 						skip = 1;
1372 				}
1373 				if (!skip)
1374 					FILLIT(fp, NULL, 0, NULL, NULL);
1375 			}
1376 		}
1377 		break;
1378 	case KERN_FILE_BYPID:
1379 		/* A arg of -1 indicates all processes */
1380 		if (arg < -1) {
1381 			error = EINVAL;
1382 			break;
1383 		}
1384 		matched = 0;
1385 		LIST_FOREACH(pr, &allprocess, ps_list) {
1386 			/*
1387 			 * skip system, exiting, embryonic and undead
1388 			 * processes
1389 			 */
1390 			if (pr->ps_flags & (PS_SYSTEM | PS_EMBRYO | PS_EXITING))
1391 				continue;
1392 			if (arg > 0 && pr->ps_pid != (pid_t)arg) {
1393 				/* not the pid we are looking for */
1394 				continue;
1395 			}
1396 			matched = 1;
1397 			fdp = pr->ps_fd;
1398 			if (pr->ps_textvp)
1399 				FILLIT(NULL, NULL, KERN_FILE_TEXT, pr->ps_textvp, pr);
1400 			if (fdp->fd_cdir)
1401 				FILLIT(NULL, NULL, KERN_FILE_CDIR, fdp->fd_cdir, pr);
1402 			if (fdp->fd_rdir)
1403 				FILLIT(NULL, NULL, KERN_FILE_RDIR, fdp->fd_rdir, pr);
1404 			if (pr->ps_tracevp)
1405 				FILLIT(NULL, NULL, KERN_FILE_TRACE, pr->ps_tracevp, pr);
1406 			for (i = 0; i < fdp->fd_nfiles; i++) {
1407 				if ((fp = fd_getfile(fdp, i)) == NULL)
1408 					continue;
1409 				FILLIT(fp, fdp, i, NULL, pr);
1410 				FRELE(fp, p);
1411 			}
1412 		}
1413 		if (!matched)
1414 			error = ESRCH;
1415 		break;
1416 	case KERN_FILE_BYUID:
1417 		LIST_FOREACH(pr, &allprocess, ps_list) {
1418 			/*
1419 			 * skip system, exiting, embryonic and undead
1420 			 * processes
1421 			 */
1422 			if (pr->ps_flags & (PS_SYSTEM | PS_EMBRYO | PS_EXITING))
1423 				continue;
1424 			if (arg >= 0 && pr->ps_ucred->cr_uid != (uid_t)arg) {
1425 				/* not the uid we are looking for */
1426 				continue;
1427 			}
1428 			fdp = pr->ps_fd;
1429 			if (fdp->fd_cdir)
1430 				FILLIT(NULL, NULL, KERN_FILE_CDIR, fdp->fd_cdir, pr);
1431 			if (fdp->fd_rdir)
1432 				FILLIT(NULL, NULL, KERN_FILE_RDIR, fdp->fd_rdir, pr);
1433 			if (pr->ps_tracevp)
1434 				FILLIT(NULL, NULL, KERN_FILE_TRACE, pr->ps_tracevp, pr);
1435 			for (i = 0; i < fdp->fd_nfiles; i++) {
1436 				if ((fp = fd_getfile(fdp, i)) == NULL)
1437 					continue;
1438 				FILLIT(fp, fdp, i, NULL, pr);
1439 				FRELE(fp, p);
1440 			}
1441 		}
1442 		break;
1443 	default:
1444 		error = EINVAL;
1445 		break;
1446 	}
1447 	free(kf, M_TEMP, sizeof(*kf));
1448 
1449 	if (!error) {
1450 		if (where == NULL)
1451 			needed += KERN_FILESLOP * elem_size;
1452 		else if (*sizep < needed)
1453 			error = ENOMEM;
1454 		*sizep = needed;
1455 	}
1456 
1457 	return (error);
1458 }
1459 
1460 /*
1461  * try over estimating by 5 procs
1462  */
1463 #define KERN_PROCSLOP	5
1464 
1465 int
1466 sysctl_doproc(int *name, u_int namelen, char *where, size_t *sizep)
1467 {
1468 	struct kinfo_proc *kproc = NULL;
1469 	struct proc *p;
1470 	struct process *pr;
1471 	char *dp;
1472 	int arg, buflen, doingzomb, elem_size, elem_count;
1473 	int error, needed, op;
1474 	int dothreads = 0;
1475 	int show_pointers;
1476 
1477 	dp = where;
1478 	buflen = where != NULL ? *sizep : 0;
1479 	needed = error = 0;
1480 
1481 	if (namelen != 4 || name[2] < 0 || name[3] < 0 ||
1482 	    name[2] > sizeof(*kproc))
1483 		return (EINVAL);
1484 	op = name[0];
1485 	arg = name[1];
1486 	elem_size = name[2];
1487 	elem_count = name[3];
1488 
1489 	dothreads = op & KERN_PROC_SHOW_THREADS;
1490 	op &= ~KERN_PROC_SHOW_THREADS;
1491 
1492 	show_pointers = suser(curproc) == 0;
1493 
1494 	if (where != NULL)
1495 		kproc = malloc(sizeof(*kproc), M_TEMP, M_WAITOK);
1496 
1497 	pr = LIST_FIRST(&allprocess);
1498 	doingzomb = 0;
1499 again:
1500 	for (; pr != NULL; pr = LIST_NEXT(pr, ps_list)) {
1501 		/* XXX skip processes in the middle of being zapped */
1502 		if (pr->ps_pgrp == NULL)
1503 			continue;
1504 
1505 		/*
1506 		 * Skip embryonic processes.
1507 		 */
1508 		if (pr->ps_flags & PS_EMBRYO)
1509 			continue;
1510 
1511 		/*
1512 		 * TODO - make more efficient (see notes below).
1513 		 */
1514 		switch (op) {
1515 
1516 		case KERN_PROC_PID:
1517 			/* could do this with just a lookup */
1518 			if (pr->ps_pid != (pid_t)arg)
1519 				continue;
1520 			break;
1521 
1522 		case KERN_PROC_PGRP:
1523 			/* could do this by traversing pgrp */
1524 			if (pr->ps_pgrp->pg_id != (pid_t)arg)
1525 				continue;
1526 			break;
1527 
1528 		case KERN_PROC_SESSION:
1529 			if (pr->ps_session->s_leader == NULL ||
1530 			    pr->ps_session->s_leader->ps_pid != (pid_t)arg)
1531 				continue;
1532 			break;
1533 
1534 		case KERN_PROC_TTY:
1535 			if ((pr->ps_flags & PS_CONTROLT) == 0 ||
1536 			    pr->ps_session->s_ttyp == NULL ||
1537 			    pr->ps_session->s_ttyp->t_dev != (dev_t)arg)
1538 				continue;
1539 			break;
1540 
1541 		case KERN_PROC_UID:
1542 			if (pr->ps_ucred->cr_uid != (uid_t)arg)
1543 				continue;
1544 			break;
1545 
1546 		case KERN_PROC_RUID:
1547 			if (pr->ps_ucred->cr_ruid != (uid_t)arg)
1548 				continue;
1549 			break;
1550 
1551 		case KERN_PROC_ALL:
1552 			if (pr->ps_flags & PS_SYSTEM)
1553 				continue;
1554 			break;
1555 
1556 		case KERN_PROC_KTHREAD:
1557 			/* no filtering */
1558 			break;
1559 
1560 		default:
1561 			error = EINVAL;
1562 			goto err;
1563 		}
1564 
1565 		if (buflen >= elem_size && elem_count > 0) {
1566 			fill_kproc(pr, kproc, NULL, show_pointers);
1567 			error = copyout(kproc, dp, elem_size);
1568 			if (error)
1569 				goto err;
1570 			dp += elem_size;
1571 			buflen -= elem_size;
1572 			elem_count--;
1573 		}
1574 		needed += elem_size;
1575 
1576 		/* Skip per-thread entries if not required by op */
1577 		if (!dothreads)
1578 			continue;
1579 
1580 		TAILQ_FOREACH(p, &pr->ps_threads, p_thr_link) {
1581 			if (buflen >= elem_size && elem_count > 0) {
1582 				fill_kproc(pr, kproc, p, show_pointers);
1583 				error = copyout(kproc, dp, elem_size);
1584 				if (error)
1585 					goto err;
1586 				dp += elem_size;
1587 				buflen -= elem_size;
1588 				elem_count--;
1589 			}
1590 			needed += elem_size;
1591 		}
1592 	}
1593 	if (doingzomb == 0) {
1594 		pr = LIST_FIRST(&zombprocess);
1595 		doingzomb++;
1596 		goto again;
1597 	}
1598 	if (where != NULL) {
1599 		*sizep = dp - where;
1600 		if (needed > *sizep) {
1601 			error = ENOMEM;
1602 			goto err;
1603 		}
1604 	} else {
1605 		needed += KERN_PROCSLOP * elem_size;
1606 		*sizep = needed;
1607 	}
1608 err:
1609 	if (kproc)
1610 		free(kproc, M_TEMP, sizeof(*kproc));
1611 	return (error);
1612 }
1613 
1614 /*
1615  * Fill in a kproc structure for the specified process.
1616  */
1617 void
1618 fill_kproc(struct process *pr, struct kinfo_proc *ki, struct proc *p,
1619     int show_pointers)
1620 {
1621 	struct session *s = pr->ps_session;
1622 	struct tty *tp;
1623 	struct vmspace *vm = pr->ps_vmspace;
1624 	struct timespec ut, st;
1625 	int isthread;
1626 
1627 	isthread = p != NULL;
1628 	if (!isthread)
1629 		p = pr->ps_mainproc;		/* XXX */
1630 
1631 	FILL_KPROC(ki, strlcpy, p, pr, pr->ps_ucred, pr->ps_pgrp,
1632 	    p, pr, s, vm, pr->ps_limit, pr->ps_sigacts, isthread,
1633 	    show_pointers);
1634 
1635 	/* stuff that's too painful to generalize into the macros */
1636 	if (pr->ps_pptr)
1637 		ki->p_ppid = pr->ps_pptr->ps_pid;
1638 	if (s->s_leader)
1639 		ki->p_sid = s->s_leader->ps_pid;
1640 
1641 	if ((pr->ps_flags & PS_CONTROLT) && (tp = s->s_ttyp)) {
1642 		ki->p_tdev = tp->t_dev;
1643 		ki->p_tpgid = tp->t_pgrp ? tp->t_pgrp->pg_id : -1;
1644 		if (show_pointers)
1645 			ki->p_tsess = PTRTOINT64(tp->t_session);
1646 	} else {
1647 		ki->p_tdev = NODEV;
1648 		ki->p_tpgid = -1;
1649 	}
1650 
1651 	/* fixups that can only be done in the kernel */
1652 	if ((pr->ps_flags & PS_ZOMBIE) == 0) {
1653 		if ((pr->ps_flags & PS_EMBRYO) == 0 && vm != NULL)
1654 			ki->p_vm_rssize = vm_resident_count(vm);
1655 		calctsru(isthread ? &p->p_tu : &pr->ps_tu, &ut, &st, NULL);
1656 		ki->p_uutime_sec = ut.tv_sec;
1657 		ki->p_uutime_usec = ut.tv_nsec/1000;
1658 		ki->p_ustime_sec = st.tv_sec;
1659 		ki->p_ustime_usec = st.tv_nsec/1000;
1660 
1661 #ifdef MULTIPROCESSOR
1662 		if (p->p_cpu != NULL)
1663 			ki->p_cpuid = CPU_INFO_UNIT(p->p_cpu);
1664 #endif
1665 	}
1666 
1667 	/* get %cpu and schedule state: just one thread or sum of all? */
1668 	if (isthread) {
1669 		ki->p_pctcpu = p->p_pctcpu;
1670 		ki->p_stat   = p->p_stat;
1671 	} else {
1672 		ki->p_pctcpu = 0;
1673 		ki->p_stat = (pr->ps_flags & PS_ZOMBIE) ? SDEAD : SIDL;
1674 		TAILQ_FOREACH(p, &pr->ps_threads, p_thr_link) {
1675 			ki->p_pctcpu += p->p_pctcpu;
1676 			/* find best state: ONPROC > RUN > STOP > SLEEP > .. */
1677 			if (p->p_stat == SONPROC || ki->p_stat == SONPROC)
1678 				ki->p_stat = SONPROC;
1679 			else if (p->p_stat == SRUN || ki->p_stat == SRUN)
1680 				ki->p_stat = SRUN;
1681 			else if (p->p_stat == SSTOP || ki->p_stat == SSTOP)
1682 				ki->p_stat = SSTOP;
1683 			else if (p->p_stat == SSLEEP)
1684 				ki->p_stat = SSLEEP;
1685 		}
1686 	}
1687 }
1688 
1689 int
1690 sysctl_proc_args(int *name, u_int namelen, void *oldp, size_t *oldlenp,
1691     struct proc *cp)
1692 {
1693 	struct process *vpr;
1694 	pid_t pid;
1695 	struct ps_strings pss;
1696 	struct iovec iov;
1697 	struct uio uio;
1698 	int error, cnt, op;
1699 	size_t limit;
1700 	char **rargv, **vargv;		/* reader vs. victim */
1701 	char *rarg, *varg, *buf;
1702 	struct vmspace *vm;
1703 	vaddr_t ps_strings;
1704 
1705 	if (namelen > 2)
1706 		return (ENOTDIR);
1707 	if (namelen < 2)
1708 		return (EINVAL);
1709 
1710 	pid = name[0];
1711 	op = name[1];
1712 
1713 	switch (op) {
1714 	case KERN_PROC_ARGV:
1715 	case KERN_PROC_NARGV:
1716 	case KERN_PROC_ENV:
1717 	case KERN_PROC_NENV:
1718 		break;
1719 	default:
1720 		return (EOPNOTSUPP);
1721 	}
1722 
1723 	if ((vpr = prfind(pid)) == NULL)
1724 		return (ESRCH);
1725 
1726 	if (oldp == NULL) {
1727 		if (op == KERN_PROC_NARGV || op == KERN_PROC_NENV)
1728 			*oldlenp = sizeof(int);
1729 		else
1730 			*oldlenp = ARG_MAX;	/* XXX XXX XXX */
1731 		return (0);
1732 	}
1733 
1734 	/* Either system process or exiting/zombie */
1735 	if (vpr->ps_flags & (PS_SYSTEM | PS_EXITING))
1736 		return (EINVAL);
1737 
1738 	/* Execing - danger. */
1739 	if ((vpr->ps_flags & PS_INEXEC))
1740 		return (EBUSY);
1741 
1742 	/* Only owner or root can get env */
1743 	if ((op == KERN_PROC_NENV || op == KERN_PROC_ENV) &&
1744 	    (vpr->ps_ucred->cr_uid != cp->p_ucred->cr_uid &&
1745 	    (error = suser(cp)) != 0))
1746 		return (error);
1747 
1748 	ps_strings = vpr->ps_strings;
1749 	vm = vpr->ps_vmspace;
1750 	vm->vm_refcnt++;
1751 	vpr = NULL;
1752 
1753 	buf = malloc(PAGE_SIZE, M_TEMP, M_WAITOK);
1754 
1755 	iov.iov_base = &pss;
1756 	iov.iov_len = sizeof(pss);
1757 	uio.uio_iov = &iov;
1758 	uio.uio_iovcnt = 1;
1759 	uio.uio_offset = (off_t)ps_strings;
1760 	uio.uio_resid = sizeof(pss);
1761 	uio.uio_segflg = UIO_SYSSPACE;
1762 	uio.uio_rw = UIO_READ;
1763 	uio.uio_procp = cp;
1764 
1765 	if ((error = uvm_io(&vm->vm_map, &uio, 0)) != 0)
1766 		goto out;
1767 
1768 	if (op == KERN_PROC_NARGV) {
1769 		error = sysctl_rdint(oldp, oldlenp, NULL, pss.ps_nargvstr);
1770 		goto out;
1771 	}
1772 	if (op == KERN_PROC_NENV) {
1773 		error = sysctl_rdint(oldp, oldlenp, NULL, pss.ps_nenvstr);
1774 		goto out;
1775 	}
1776 
1777 	if (op == KERN_PROC_ARGV) {
1778 		cnt = pss.ps_nargvstr;
1779 		vargv = pss.ps_argvstr;
1780 	} else {
1781 		cnt = pss.ps_nenvstr;
1782 		vargv = pss.ps_envstr;
1783 	}
1784 
1785 	/* -1 to have space for a terminating NUL */
1786 	limit = *oldlenp - 1;
1787 	*oldlenp = 0;
1788 
1789 	rargv = oldp;
1790 
1791 	/*
1792 	 * *oldlenp - number of bytes copied out into readers buffer.
1793 	 * limit - maximal number of bytes allowed into readers buffer.
1794 	 * rarg - pointer into readers buffer where next arg will be stored.
1795 	 * rargv - pointer into readers buffer where the next rarg pointer
1796 	 *  will be stored.
1797 	 * vargv - pointer into victim address space where the next argument
1798 	 *  will be read.
1799 	 */
1800 
1801 	/* space for cnt pointers and a NULL */
1802 	rarg = (char *)(rargv + cnt + 1);
1803 	*oldlenp += (cnt + 1) * sizeof(char **);
1804 
1805 	while (cnt > 0 && *oldlenp < limit) {
1806 		size_t len, vstrlen;
1807 
1808 		/* Write to readers argv */
1809 		if ((error = copyout(&rarg, rargv, sizeof(rarg))) != 0)
1810 			goto out;
1811 
1812 		/* read the victim argv */
1813 		iov.iov_base = &varg;
1814 		iov.iov_len = sizeof(varg);
1815 		uio.uio_iov = &iov;
1816 		uio.uio_iovcnt = 1;
1817 		uio.uio_offset = (off_t)(vaddr_t)vargv;
1818 		uio.uio_resid = sizeof(varg);
1819 		uio.uio_segflg = UIO_SYSSPACE;
1820 		uio.uio_rw = UIO_READ;
1821 		uio.uio_procp = cp;
1822 		if ((error = uvm_io(&vm->vm_map, &uio, 0)) != 0)
1823 			goto out;
1824 
1825 		if (varg == NULL)
1826 			break;
1827 
1828 		/*
1829 		 * read the victim arg. We must jump through hoops to avoid
1830 		 * crossing a page boundary too much and returning an error.
1831 		 */
1832 more:
1833 		len = PAGE_SIZE - (((vaddr_t)varg) & PAGE_MASK);
1834 		/* leave space for the terminating NUL */
1835 		iov.iov_base = buf;
1836 		iov.iov_len = len;
1837 		uio.uio_iov = &iov;
1838 		uio.uio_iovcnt = 1;
1839 		uio.uio_offset = (off_t)(vaddr_t)varg;
1840 		uio.uio_resid = len;
1841 		uio.uio_segflg = UIO_SYSSPACE;
1842 		uio.uio_rw = UIO_READ;
1843 		uio.uio_procp = cp;
1844 		if ((error = uvm_io(&vm->vm_map, &uio, 0)) != 0)
1845 			goto out;
1846 
1847 		for (vstrlen = 0; vstrlen < len; vstrlen++) {
1848 			if (buf[vstrlen] == '\0')
1849 				break;
1850 		}
1851 
1852 		/* Don't overflow readers buffer. */
1853 		if (*oldlenp + vstrlen + 1 >= limit) {
1854 			error = ENOMEM;
1855 			goto out;
1856 		}
1857 
1858 		if ((error = copyout(buf, rarg, vstrlen)) != 0)
1859 			goto out;
1860 
1861 		*oldlenp += vstrlen;
1862 		rarg += vstrlen;
1863 
1864 		/* The string didn't end in this page? */
1865 		if (vstrlen == len) {
1866 			varg += vstrlen;
1867 			goto more;
1868 		}
1869 
1870 		/* End of string. Terminate it with a NUL */
1871 		buf[0] = '\0';
1872 		if ((error = copyout(buf, rarg, 1)) != 0)
1873 			goto out;
1874 		*oldlenp += 1;
1875 		rarg += 1;
1876 
1877 		vargv++;
1878 		rargv++;
1879 		cnt--;
1880 	}
1881 
1882 	if (*oldlenp >= limit) {
1883 		error = ENOMEM;
1884 		goto out;
1885 	}
1886 
1887 	/* Write the terminating null */
1888 	rarg = NULL;
1889 	error = copyout(&rarg, rargv, sizeof(rarg));
1890 
1891 out:
1892 	uvmspace_free(vm);
1893 	free(buf, M_TEMP, PAGE_SIZE);
1894 	return (error);
1895 }
1896 
1897 int
1898 sysctl_proc_cwd(int *name, u_int namelen, void *oldp, size_t *oldlenp,
1899     struct proc *cp)
1900 {
1901 	struct process *findpr;
1902 	struct vnode *vp;
1903 	pid_t pid;
1904 	int error;
1905 	size_t lenused, len;
1906 	char *path, *bp, *bend;
1907 
1908 	if (namelen > 1)
1909 		return (ENOTDIR);
1910 	if (namelen < 1)
1911 		return (EINVAL);
1912 
1913 	pid = name[0];
1914 	if ((findpr = prfind(pid)) == NULL)
1915 		return (ESRCH);
1916 
1917 	if (oldp == NULL) {
1918 		*oldlenp = MAXPATHLEN * 4;
1919 		return (0);
1920 	}
1921 
1922 	/* Either system process or exiting/zombie */
1923 	if (findpr->ps_flags & (PS_SYSTEM | PS_EXITING))
1924 		return (EINVAL);
1925 
1926 	/* Only owner or root can get cwd */
1927 	if (findpr->ps_ucred->cr_uid != cp->p_ucred->cr_uid &&
1928 	    (error = suser(cp)) != 0)
1929 		return (error);
1930 
1931 	len = *oldlenp;
1932 	if (len > MAXPATHLEN * 4)
1933 		len = MAXPATHLEN * 4;
1934 	else if (len < 2)
1935 		return (ERANGE);
1936 	*oldlenp = 0;
1937 
1938 	/* snag a reference to the vnode before we can sleep */
1939 	vp = findpr->ps_fd->fd_cdir;
1940 	vref(vp);
1941 
1942 	path = malloc(len, M_TEMP, M_WAITOK);
1943 
1944 	bp = &path[len];
1945 	bend = bp;
1946 	*(--bp) = '\0';
1947 
1948 	/* Same as sys__getcwd */
1949 	error = vfs_getcwd_common(vp, NULL,
1950 	    &bp, path, len / 2, GETCWD_CHECK_ACCESS, cp);
1951 	if (error == 0) {
1952 		*oldlenp = lenused = bend - bp;
1953 		error = copyout(bp, oldp, lenused);
1954 	}
1955 
1956 	vrele(vp);
1957 	free(path, M_TEMP, len);
1958 
1959 	return (error);
1960 }
1961 
1962 int
1963 sysctl_proc_nobroadcastkill(int *name, u_int namelen, void *newp, size_t newlen,
1964     void *oldp, size_t *oldlenp, struct proc *cp)
1965 {
1966 	struct process *findpr;
1967 	pid_t pid;
1968 	int error, flag;
1969 
1970 	if (namelen > 1)
1971 		return (ENOTDIR);
1972 	if (namelen < 1)
1973 		return (EINVAL);
1974 
1975 	pid = name[0];
1976 	if ((findpr = prfind(pid)) == NULL)
1977 		return (ESRCH);
1978 
1979 	/* Either system process or exiting/zombie */
1980 	if (findpr->ps_flags & (PS_SYSTEM | PS_EXITING))
1981 		return (EINVAL);
1982 
1983 	/* Only root can change PS_NOBROADCASTKILL */
1984 	if (newp != 0 && (error = suser(cp)) != 0)
1985 		return (error);
1986 
1987 	/* get the PS_NOBROADCASTKILL flag */
1988 	flag = findpr->ps_flags & PS_NOBROADCASTKILL ? 1 : 0;
1989 
1990 	error = sysctl_int(oldp, oldlenp, newp, newlen, &flag);
1991 	if (error == 0 && newp) {
1992 		if (flag)
1993 			atomic_setbits_int(&findpr->ps_flags,
1994 			    PS_NOBROADCASTKILL);
1995 		else
1996 			atomic_clearbits_int(&findpr->ps_flags,
1997 			    PS_NOBROADCASTKILL);
1998 	}
1999 
2000 	return (error);
2001 }
2002 
2003 /* Arbitrary but reasonable limit for one iteration. */
2004 #define	VMMAP_MAXLEN	MAXPHYS
2005 
2006 int
2007 sysctl_proc_vmmap(int *name, u_int namelen, void *oldp, size_t *oldlenp,
2008     struct proc *cp)
2009 {
2010 	struct process *findpr;
2011 	pid_t pid;
2012 	int error;
2013 	size_t oldlen, len;
2014 	struct kinfo_vmentry *kve, *ukve;
2015 	u_long *ustart, start;
2016 
2017 	if (namelen > 1)
2018 		return (ENOTDIR);
2019 	if (namelen < 1)
2020 		return (EINVAL);
2021 
2022 	/* Provide max buffer length as hint. */
2023 	if (oldp == NULL) {
2024 		if (oldlenp == NULL)
2025 			return (EINVAL);
2026 		else {
2027 			*oldlenp = VMMAP_MAXLEN;
2028 			return (0);
2029 		}
2030 	}
2031 
2032 	pid = name[0];
2033 	if (pid == cp->p_p->ps_pid) {
2034 		/* Self process mapping. */
2035 		findpr = cp->p_p;
2036 	} else if (pid > 0) {
2037 		if ((findpr = prfind(pid)) == NULL)
2038 			return (ESRCH);
2039 
2040 		/* Either system process or exiting/zombie */
2041 		if (findpr->ps_flags & (PS_SYSTEM | PS_EXITING))
2042 			return (EINVAL);
2043 
2044 #if 1
2045 		/* XXX Allow only root for now */
2046 		if ((error = suser(cp)) != 0)
2047 			return (error);
2048 #else
2049 		/* Only owner or root can get vmmap */
2050 		if (findpr->ps_ucred->cr_uid != cp->p_ucred->cr_uid &&
2051 		    (error = suser(cp)) != 0)
2052 			return (error);
2053 #endif
2054 	} else {
2055 		/* Only root can get kernel_map */
2056 		if ((error = suser(cp)) != 0)
2057 			return (error);
2058 		findpr = NULL;
2059 	}
2060 
2061 	/* Check the given size. */
2062 	oldlen = *oldlenp;
2063 	if (oldlen == 0 || oldlen % sizeof(*kve) != 0)
2064 		return (EINVAL);
2065 
2066 	/* Deny huge allocation. */
2067 	if (oldlen > VMMAP_MAXLEN)
2068 		return (EINVAL);
2069 
2070 	/*
2071 	 * Iterate from the given address passed as the first element's
2072 	 * kve_start via oldp.
2073 	 */
2074 	ukve = (struct kinfo_vmentry *)oldp;
2075 	ustart = &ukve->kve_start;
2076 	error = copyin(ustart, &start, sizeof(start));
2077 	if (error != 0)
2078 		return (error);
2079 
2080 	/* Allocate wired memory to not block. */
2081 	kve = malloc(oldlen, M_TEMP, M_WAITOK);
2082 
2083 	/* Set the base address and read entries. */
2084 	kve[0].kve_start = start;
2085 	len = oldlen;
2086 	error = fill_vmmap(findpr, kve, &len);
2087 	if (error != 0 && error != ENOMEM)
2088 		goto done;
2089 	if (len == 0)
2090 		goto done;
2091 
2092 	KASSERT(len <= oldlen);
2093 	KASSERT((len % sizeof(struct kinfo_vmentry)) == 0);
2094 
2095 	error = copyout(kve, oldp, len);
2096 
2097 done:
2098 	*oldlenp = len;
2099 
2100 	free(kve, M_TEMP, oldlen);
2101 
2102 	return (error);
2103 }
2104 #endif
2105 
2106 /*
2107  * Initialize disknames/diskstats for export by sysctl. If update is set,
2108  * then we simply update the disk statistics information.
2109  */
2110 int
2111 sysctl_diskinit(int update, struct proc *p)
2112 {
2113 	struct diskstats *sdk;
2114 	struct disk *dk;
2115 	const char *duid;
2116 	int i, tlen, l;
2117 
2118 	if ((i = rw_enter(&sysctl_disklock, RW_WRITE|RW_INTR)) != 0)
2119 		return i;
2120 
2121 	if (disk_change) {
2122 		for (dk = TAILQ_FIRST(&disklist), tlen = 0; dk;
2123 		    dk = TAILQ_NEXT(dk, dk_link)) {
2124 			if (dk->dk_name)
2125 				tlen += strlen(dk->dk_name);
2126 			tlen += 18;	/* label uid + separators */
2127 		}
2128 		tlen++;
2129 
2130 		if (disknames)
2131 			free(disknames, M_SYSCTL, disknameslen);
2132 		if (diskstats)
2133 			free(diskstats, M_SYSCTL, diskstatslen);
2134 		diskstats = NULL;
2135 		disknames = NULL;
2136 		diskstats = mallocarray(disk_count, sizeof(struct diskstats),
2137 		    M_SYSCTL, M_WAITOK|M_ZERO);
2138 		diskstatslen = disk_count * sizeof(struct diskstats);
2139 		disknames = malloc(tlen, M_SYSCTL, M_WAITOK|M_ZERO);
2140 		disknameslen = tlen;
2141 		disknames[0] = '\0';
2142 
2143 		for (dk = TAILQ_FIRST(&disklist), i = 0, l = 0; dk;
2144 		    dk = TAILQ_NEXT(dk, dk_link), i++) {
2145 			duid = NULL;
2146 			if (dk->dk_label && !duid_iszero(dk->dk_label->d_uid))
2147 				duid = duid_format(dk->dk_label->d_uid);
2148 			snprintf(disknames + l, tlen - l, "%s:%s,",
2149 			    dk->dk_name ? dk->dk_name : "",
2150 			    duid ? duid : "");
2151 			l += strlen(disknames + l);
2152 			sdk = diskstats + i;
2153 			strlcpy(sdk->ds_name, dk->dk_name,
2154 			    sizeof(sdk->ds_name));
2155 			mtx_enter(&dk->dk_mtx);
2156 			sdk->ds_busy = dk->dk_busy;
2157 			sdk->ds_rxfer = dk->dk_rxfer;
2158 			sdk->ds_wxfer = dk->dk_wxfer;
2159 			sdk->ds_seek = dk->dk_seek;
2160 			sdk->ds_rbytes = dk->dk_rbytes;
2161 			sdk->ds_wbytes = dk->dk_wbytes;
2162 			sdk->ds_attachtime = dk->dk_attachtime;
2163 			sdk->ds_timestamp = dk->dk_timestamp;
2164 			sdk->ds_time = dk->dk_time;
2165 			mtx_leave(&dk->dk_mtx);
2166 		}
2167 
2168 		/* Eliminate trailing comma */
2169 		if (l != 0)
2170 			disknames[l - 1] = '\0';
2171 		disk_change = 0;
2172 	} else if (update) {
2173 		/* Just update, number of drives hasn't changed */
2174 		for (dk = TAILQ_FIRST(&disklist), i = 0; dk;
2175 		    dk = TAILQ_NEXT(dk, dk_link), i++) {
2176 			sdk = diskstats + i;
2177 			strlcpy(sdk->ds_name, dk->dk_name,
2178 			    sizeof(sdk->ds_name));
2179 			mtx_enter(&dk->dk_mtx);
2180 			sdk->ds_busy = dk->dk_busy;
2181 			sdk->ds_rxfer = dk->dk_rxfer;
2182 			sdk->ds_wxfer = dk->dk_wxfer;
2183 			sdk->ds_seek = dk->dk_seek;
2184 			sdk->ds_rbytes = dk->dk_rbytes;
2185 			sdk->ds_wbytes = dk->dk_wbytes;
2186 			sdk->ds_attachtime = dk->dk_attachtime;
2187 			sdk->ds_timestamp = dk->dk_timestamp;
2188 			sdk->ds_time = dk->dk_time;
2189 			mtx_leave(&dk->dk_mtx);
2190 		}
2191 	}
2192 	rw_exit_write(&sysctl_disklock);
2193 	return 0;
2194 }
2195 
2196 #if defined(SYSVMSG) || defined(SYSVSEM) || defined(SYSVSHM)
2197 int
2198 sysctl_sysvipc(int *name, u_int namelen, void *where, size_t *sizep)
2199 {
2200 #ifdef SYSVSEM
2201 	struct sem_sysctl_info *semsi;
2202 #endif
2203 #ifdef SYSVSHM
2204 	struct shm_sysctl_info *shmsi;
2205 #endif
2206 	size_t infosize, dssize, tsize, buflen, bufsiz;
2207 	int i, nds, error, ret;
2208 	void *buf;
2209 
2210 	if (namelen != 1)
2211 		return (EINVAL);
2212 
2213 	buflen = *sizep;
2214 
2215 	switch (*name) {
2216 	case KERN_SYSVIPC_MSG_INFO:
2217 #ifdef SYSVMSG
2218 		return (sysctl_sysvmsg(name, namelen, where, sizep));
2219 #else
2220 		return (EOPNOTSUPP);
2221 #endif
2222 	case KERN_SYSVIPC_SEM_INFO:
2223 #ifdef SYSVSEM
2224 		infosize = sizeof(semsi->seminfo);
2225 		nds = seminfo.semmni;
2226 		dssize = sizeof(semsi->semids[0]);
2227 		break;
2228 #else
2229 		return (EOPNOTSUPP);
2230 #endif
2231 	case KERN_SYSVIPC_SHM_INFO:
2232 #ifdef SYSVSHM
2233 		infosize = sizeof(shmsi->shminfo);
2234 		nds = shminfo.shmmni;
2235 		dssize = sizeof(shmsi->shmids[0]);
2236 		break;
2237 #else
2238 		return (EOPNOTSUPP);
2239 #endif
2240 	default:
2241 		return (EINVAL);
2242 	}
2243 	tsize = infosize + (nds * dssize);
2244 
2245 	/* Return just the total size required. */
2246 	if (where == NULL) {
2247 		*sizep = tsize;
2248 		return (0);
2249 	}
2250 
2251 	/* Not enough room for even the info struct. */
2252 	if (buflen < infosize) {
2253 		*sizep = 0;
2254 		return (ENOMEM);
2255 	}
2256 	bufsiz = min(tsize, buflen);
2257 	buf = malloc(bufsiz, M_TEMP, M_WAITOK|M_ZERO);
2258 
2259 	switch (*name) {
2260 #ifdef SYSVSEM
2261 	case KERN_SYSVIPC_SEM_INFO:
2262 		semsi = (struct sem_sysctl_info *)buf;
2263 		semsi->seminfo = seminfo;
2264 		break;
2265 #endif
2266 #ifdef SYSVSHM
2267 	case KERN_SYSVIPC_SHM_INFO:
2268 		shmsi = (struct shm_sysctl_info *)buf;
2269 		shmsi->shminfo = shminfo;
2270 		break;
2271 #endif
2272 	}
2273 	buflen -= infosize;
2274 
2275 	ret = 0;
2276 	if (buflen > 0) {
2277 		/* Fill in the IPC data structures.  */
2278 		for (i = 0; i < nds; i++) {
2279 			if (buflen < dssize) {
2280 				ret = ENOMEM;
2281 				break;
2282 			}
2283 			switch (*name) {
2284 #ifdef SYSVSEM
2285 			case KERN_SYSVIPC_SEM_INFO:
2286 				if (sema[i] != NULL)
2287 					memcpy(&semsi->semids[i], sema[i],
2288 					    dssize);
2289 				else
2290 					memset(&semsi->semids[i], 0, dssize);
2291 				break;
2292 #endif
2293 #ifdef SYSVSHM
2294 			case KERN_SYSVIPC_SHM_INFO:
2295 				if (shmsegs[i] != NULL)
2296 					memcpy(&shmsi->shmids[i], shmsegs[i],
2297 					    dssize);
2298 				else
2299 					memset(&shmsi->shmids[i], 0, dssize);
2300 				break;
2301 #endif
2302 			}
2303 			buflen -= dssize;
2304 		}
2305 	}
2306 	*sizep -= buflen;
2307 	error = copyout(buf, where, *sizep);
2308 	free(buf, M_TEMP, bufsiz);
2309 	/* If copyout succeeded, use return code set earlier. */
2310 	return (error ? error : ret);
2311 }
2312 #endif /* SYSVMSG || SYSVSEM || SYSVSHM */
2313 
2314 #ifndef	SMALL_KERNEL
2315 
2316 int
2317 sysctl_intrcnt(int *name, u_int namelen, void *oldp, size_t *oldlenp)
2318 {
2319 	return (evcount_sysctl(name, namelen, oldp, oldlenp, NULL, 0));
2320 }
2321 
2322 
2323 int
2324 sysctl_sensors(int *name, u_int namelen, void *oldp, size_t *oldlenp,
2325     void *newp, size_t newlen)
2326 {
2327 	struct ksensor *ks;
2328 	struct sensor *us;
2329 	struct ksensordev *ksd;
2330 	struct sensordev *usd;
2331 	int dev, numt, ret;
2332 	enum sensor_type type;
2333 
2334 	if (namelen != 1 && namelen != 3)
2335 		return (ENOTDIR);
2336 
2337 	dev = name[0];
2338 	if (namelen == 1) {
2339 		ret = sensordev_get(dev, &ksd);
2340 		if (ret)
2341 			return (ret);
2342 
2343 		/* Grab a copy, to clear the kernel pointers */
2344 		usd = malloc(sizeof(*usd), M_TEMP, M_WAITOK|M_ZERO);
2345 		usd->num = ksd->num;
2346 		strlcpy(usd->xname, ksd->xname, sizeof(usd->xname));
2347 		memcpy(usd->maxnumt, ksd->maxnumt, sizeof(usd->maxnumt));
2348 		usd->sensors_count = ksd->sensors_count;
2349 
2350 		ret = sysctl_rdstruct(oldp, oldlenp, newp, usd,
2351 		    sizeof(struct sensordev));
2352 
2353 		free(usd, M_TEMP, sizeof(*usd));
2354 		return (ret);
2355 	}
2356 
2357 	type = name[1];
2358 	numt = name[2];
2359 
2360 	ret = sensor_find(dev, type, numt, &ks);
2361 	if (ret)
2362 		return (ret);
2363 
2364 	/* Grab a copy, to clear the kernel pointers */
2365 	us = malloc(sizeof(*us), M_TEMP, M_WAITOK|M_ZERO);
2366 	memcpy(us->desc, ks->desc, sizeof(us->desc));
2367 	us->tv = ks->tv;
2368 	us->value = ks->value;
2369 	us->type = ks->type;
2370 	us->status = ks->status;
2371 	us->numt = ks->numt;
2372 	us->flags = ks->flags;
2373 
2374 	ret = sysctl_rdstruct(oldp, oldlenp, newp, us,
2375 	    sizeof(struct sensor));
2376 	free(us, M_TEMP, sizeof(*us));
2377 	return (ret);
2378 }
2379 #endif	/* SMALL_KERNEL */
2380 
2381 int
2382 sysctl_cptime2(int *name, u_int namelen, void *oldp, size_t *oldlenp,
2383     void *newp, size_t newlen)
2384 {
2385 	CPU_INFO_ITERATOR cii;
2386 	struct cpu_info *ci;
2387 	int found = 0;
2388 
2389 	if (namelen != 1)
2390 		return (ENOTDIR);
2391 
2392 	CPU_INFO_FOREACH(cii, ci) {
2393 		if (name[0] == CPU_INFO_UNIT(ci)) {
2394 			found = 1;
2395 			break;
2396 		}
2397 	}
2398 	if (!found)
2399 		return (ENOENT);
2400 	if (!cpu_is_online(ci))
2401 		return (ENODEV);
2402 
2403 	return (sysctl_rdstruct(oldp, oldlenp, newp,
2404 	    &ci->ci_schedstate.spc_cp_time,
2405 	    sizeof(ci->ci_schedstate.spc_cp_time)));
2406 }
2407 
2408 #if NAUDIO > 0
2409 int
2410 sysctl_audio(int *name, u_int namelen, void *oldp, size_t *oldlenp,
2411     void *newp, size_t newlen)
2412 {
2413 	if (namelen != 1)
2414 		return (ENOTDIR);
2415 
2416 	if (name[0] != KERN_AUDIO_RECORD)
2417 		return (ENOENT);
2418 
2419 	return (sysctl_int(oldp, oldlenp, newp, newlen, &audio_record_enable));
2420 }
2421 #endif
2422