xref: /netbsd-src/sys/kern/kern_sysctl.c (revision 5b84b3983f71fd20a534cfa5d1556623a8aaa717)
1 /*	$NetBSD: kern_sysctl.c,v 1.186 2005/08/21 13:14:54 yamt Exp $	*/
2 
3 /*-
4  * Copyright (c) 2003 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Andrew Brown.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *      This product includes software developed by the NetBSD
21  *      Foundation, Inc. and its contributors.
22  * 4. Neither the name of The NetBSD Foundation nor the names of its
23  *    contributors may be used to endorse or promote products derived
24  *    from this software without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36  * POSSIBILITY OF SUCH DAMAGE.
37  */
38 
39 /*-
40  * Copyright (c) 1982, 1986, 1989, 1993
41  *	The Regents of the University of California.  All rights reserved.
42  *
43  * This code is derived from software contributed to Berkeley by
44  * Mike Karels at Berkeley Software Design, Inc.
45  *
46  * Redistribution and use in source and binary forms, with or without
47  * modification, are permitted provided that the following conditions
48  * are met:
49  * 1. Redistributions of source code must retain the above copyright
50  *    notice, this list of conditions and the following disclaimer.
51  * 2. Redistributions in binary form must reproduce the above copyright
52  *    notice, this list of conditions and the following disclaimer in the
53  *    documentation and/or other materials provided with the distribution.
54  * 3. Neither the name of the University nor the names of its contributors
55  *    may be used to endorse or promote products derived from this software
56  *    without specific prior written permission.
57  *
58  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
59  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
60  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
61  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
62  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
63  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
64  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
65  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
66  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
67  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
68  * SUCH DAMAGE.
69  *
70  *	@(#)kern_sysctl.c	8.9 (Berkeley) 5/20/95
71  */
72 
73 /*
74  * sysctl system call.
75  */
76 
77 #include <sys/cdefs.h>
78 __KERNEL_RCSID(0, "$NetBSD: kern_sysctl.c,v 1.186 2005/08/21 13:14:54 yamt Exp $");
79 
80 #include "opt_defcorename.h"
81 #include "opt_insecure.h"
82 #include "ksyms.h"
83 
84 #include <sys/param.h>
85 #define __COMPAT_SYSCTL
86 #include <sys/sysctl.h>
87 #include <sys/systm.h>
88 #include <sys/buf.h>
89 #include <sys/ksyms.h>
90 #include <sys/malloc.h>
91 #include <sys/mount.h>
92 #include <sys/sa.h>
93 #include <sys/syscallargs.h>
94 #include <machine/stdarg.h>
95 
96 MALLOC_DEFINE(M_SYSCTLNODE, "sysctlnode", "sysctl node structures");
97 MALLOC_DEFINE(M_SYSCTLDATA, "sysctldata", "misc sysctl data");
98 
99 static int sysctl_mmap(SYSCTLFN_PROTO);
100 static int sysctl_alloc(struct sysctlnode *, int);
101 static int sysctl_realloc(struct sysctlnode *);
102 
103 static int sysctl_cvt_in(struct lwp *, int *, const void *, size_t,
104 			 struct sysctlnode *);
105 static int sysctl_cvt_out(struct lwp *, int, const struct sysctlnode *,
106 			  void *, size_t, size_t *);
107 
108 static int sysctl_log_add(struct sysctllog **, const struct sysctlnode *);
109 static int sysctl_log_realloc(struct sysctllog *);
110 
111 struct sysctllog {
112 	const struct sysctlnode *log_root;
113 	int *log_num;
114 	int log_size, log_left;
115 };
116 
117 /*
118  * the "root" of the new sysctl tree
119  */
120 struct sysctlnode sysctl_root = {
121 	.sysctl_flags = SYSCTL_VERSION|
122 	    CTLFLAG_ROOT|CTLFLAG_READWRITE|
123 	    CTLTYPE_NODE,
124 	.sysctl_num = 0,
125 	/*
126 	 * XXX once all ports are on gcc3, we can get rid of this
127 	 * ugliness and simply make it into
128 	 *
129 	 *	.sysctl_size = sizeof(struct sysctlnode),
130 	 */
131 	sysc_init_field(_sysctl_size, sizeof(struct sysctlnode)),
132 	.sysctl_name = "(root)",
133 };
134 
135 /*
136  * link set of functions that add nodes at boot time (see also
137  * sysctl_buildtree())
138  */
139 __link_set_decl(sysctl_funcs, sysctl_setup_func);
140 
141 /*
142  * The `sysctl_lock' is intended to serialize access to the sysctl
143  * tree.  Given that it is now (a) dynamic, and (b) most consumers of
144  * sysctl are going to be copying data out, the old `sysctl_memlock'
145  * has been `upgraded' to simply guard the whole tree.
146  *
147  * The two new data here are to keep track of the locked chunk of
148  * memory, if there is one, so that it can be released more easily
149  * from anywhere.
150  */
151 struct lock sysctl_treelock;
152 caddr_t sysctl_memaddr;
153 size_t sysctl_memsize;
154 
155 /*
156  * Attributes stored in the kernel.
157  */
158 char hostname[MAXHOSTNAMELEN];
159 int hostnamelen;
160 
161 char domainname[MAXHOSTNAMELEN];
162 int domainnamelen;
163 
164 long hostid;
165 
166 #ifdef INSECURE
167 int securelevel = -1;
168 #else
169 int securelevel = 0;
170 #endif
171 
172 #ifndef DEFCORENAME
173 #define	DEFCORENAME	"%n.core"
174 #endif
175 char defcorename[MAXPATHLEN] = DEFCORENAME;
176 
177 /*
178  * ********************************************************************
179  * Section 0: Some simple glue
180  * ********************************************************************
181  * By wrapping copyin(), copyout(), and copyinstr() like this, we can
182  * stop caring about who's calling us and simplify some code a bunch.
183  * ********************************************************************
184  */
185 static inline int
186 sysctl_copyin(const struct lwp *l, const void *uaddr, void *kaddr, size_t len)
187 {
188 
189 	if (l != NULL)
190 		return (copyin(uaddr, kaddr, len));
191 	else
192 		return (kcopy(uaddr, kaddr, len));
193 }
194 
195 static inline int
196 sysctl_copyout(const struct lwp *l, const void *kaddr, void *uaddr, size_t len)
197 {
198 
199 	if (l != NULL)
200 		return (copyout(kaddr, uaddr, len));
201 	else
202 		return (kcopy(kaddr, uaddr, len));
203 }
204 
205 static inline int
206 sysctl_copyinstr(const struct lwp *l, const void *uaddr, void *kaddr,
207 		 size_t len, size_t *done)
208 {
209 
210 	if (l != NULL)
211 		return (copyinstr(uaddr, kaddr, len, done));
212 	else
213 		return (copystr(uaddr, kaddr, len, done));
214 }
215 
216 /*
217  * ********************************************************************
218  * Initialize sysctl subsystem.
219  * ********************************************************************
220  */
221 void
222 sysctl_init(void)
223 {
224 	sysctl_setup_func * const *sysctl_setup, f;
225 
226 	lockinit(&sysctl_treelock, PRIBIO|PCATCH, "sysctl", 0, 0);
227 
228 	/*
229 	 * dynamic mib numbers start here
230 	 */
231 	sysctl_root.sysctl_num = CREATE_BASE;
232 
233         __link_set_foreach(sysctl_setup, sysctl_funcs) {
234 		/*
235 		 * XXX - why do i have to coerce the pointers like this?
236 		 */
237 		f = (void*)*sysctl_setup;
238 		(*f)(NULL);
239 	}
240 
241 	/*
242 	 * setting this means no more permanent nodes can be added,
243 	 * trees that claim to be readonly at the root now are, and if
244 	 * the main tree is readonly, *everything* is.
245 	 */
246 	sysctl_root.sysctl_flags |= CTLFLAG_PERMANENT;
247 
248 }
249 
250 /*
251  * ********************************************************************
252  * The main native sysctl system call itself.
253  * ********************************************************************
254  */
255 int
256 sys___sysctl(struct lwp *l, void *v, register_t *retval)
257 {
258 	struct sys___sysctl_args /* {
259 		syscallarg(int *) name;
260 		syscallarg(u_int) namelen;
261 		syscallarg(void *) old;
262 		syscallarg(size_t *) oldlenp;
263 		syscallarg(void *) new;
264 		syscallarg(size_t) newlen;
265 	} */ *uap = v;
266 	int error, nerror, name[CTL_MAXNAME];
267 	size_t oldlen, savelen, *oldlenp;
268 
269 	/*
270 	 * get oldlen
271 	 */
272 	oldlen = 0;
273 	oldlenp = SCARG(uap, oldlenp);
274 	if (oldlenp != NULL) {
275 		error = copyin(oldlenp, &oldlen, sizeof(oldlen));
276 		if (error)
277 			return (error);
278 	}
279 	savelen = oldlen;
280 
281 	/*
282 	 * top-level sysctl names may or may not be non-terminal, but
283 	 * we don't care
284 	 */
285 	if (SCARG(uap, namelen) > CTL_MAXNAME || SCARG(uap, namelen) < 1)
286 		return (EINVAL);
287 	error = copyin(SCARG(uap, name), &name,
288 		       SCARG(uap, namelen) * sizeof(int));
289 	if (error)
290 		return (error);
291 
292 	/*
293 	 * wire old so that copyout() is less likely to fail?
294 	 */
295 	error = sysctl_lock(l, SCARG(uap, old), savelen);
296 	if (error)
297 		return (error);
298 
299 	/*
300 	 * do sysctl work (NULL means main built-in default tree)
301 	 */
302 	error = sysctl_dispatch(&name[0], SCARG(uap, namelen),
303 				SCARG(uap, old), &oldlen,
304 				SCARG(uap, new), SCARG(uap, newlen),
305 				&name[0], l, NULL);
306 
307 	/*
308 	 * release the sysctl lock
309 	 */
310 	sysctl_unlock(l);
311 
312 	/*
313 	 * set caller's oldlen to new value even in the face of an
314 	 * error (if this gets an error and they didn't have one, they
315 	 * get this one)
316 	 */
317 	if (oldlenp) {
318 		nerror = copyout(&oldlen, oldlenp, sizeof(oldlen));
319 		if (error == 0)
320 			error = nerror;
321 	}
322 
323 	/*
324 	 * if the only problem is that we weren't given enough space,
325 	 * that's an ENOMEM error
326 	 */
327 	if (error == 0 && SCARG(uap, old) != NULL && savelen < oldlen)
328 		error = ENOMEM;
329 
330 	return (error);
331 }
332 
333 /*
334  * ********************************************************************
335  * Section 1: How the tree is used
336  * ********************************************************************
337  * Implementations of sysctl for emulations should typically need only
338  * these three functions in this order: lock the tree, dispatch
339  * request into it, unlock the tree.
340  * ********************************************************************
341  */
342 int
343 sysctl_lock(struct lwp *l, void *oldp, size_t savelen)
344 {
345 	int error = 0;
346 
347 	error = lockmgr(&sysctl_treelock, LK_EXCLUSIVE, NULL);
348 	if (error)
349 		return (error);
350 
351 	if (l != NULL && oldp != NULL && savelen) {
352 		error = uvm_vslock(l->l_proc, oldp, savelen, VM_PROT_WRITE);
353 		if (error) {
354 			(void) lockmgr(&sysctl_treelock, LK_RELEASE, NULL);
355 			return (error);
356 		}
357 		sysctl_memaddr = oldp;
358 		sysctl_memsize = savelen;
359 	}
360 
361 	return (0);
362 }
363 
364 /*
365  * ********************************************************************
366  * the main sysctl dispatch routine.  scans the given tree and picks a
367  * function to call based on what it finds.
368  * ********************************************************************
369  */
370 int
371 sysctl_dispatch(SYSCTLFN_ARGS)
372 {
373 	int error;
374 	sysctlfn fn;
375 	int ni;
376 
377 	if (rnode && SYSCTL_VERS(rnode->sysctl_flags) != SYSCTL_VERSION) {
378 		printf("sysctl_dispatch: rnode %p wrong version\n", rnode);
379 		return (EINVAL);
380 	}
381 
382 	fn = NULL;
383 	error = sysctl_locate(l, name, namelen, &rnode, &ni);
384 
385 	/*
386 	 * the node we ended up at has a function, so call it.  it can
387 	 * hand off to query or create if it wants to.
388 	 */
389 	if (rnode->sysctl_func != NULL)
390 		fn = rnode->sysctl_func;
391 
392 	/*
393 	 * we found the node they were looking for, so do a lookup.
394 	 */
395 	else if (error == 0)
396 		fn = (sysctlfn)sysctl_lookup; /* XXX may write to rnode */
397 
398 	/*
399 	 * prospective parent node found, but the terminal node was
400 	 * not.  generic operations associate with the parent.
401 	 */
402 	else if (error == ENOENT && (ni + 1) == namelen && name[ni] < 0) {
403 		switch (name[ni]) {
404 		case CTL_QUERY:
405 			fn = sysctl_query;
406 			break;
407 		case CTL_CREATE:
408 #if NKSYMS > 0
409 		case CTL_CREATESYM:
410 #endif /* NKSYMS > 0 */
411 			fn = (sysctlfn)sysctl_create; /* we own the rnode */
412 			break;
413 		case CTL_DESTROY:
414 			fn = (sysctlfn)sysctl_destroy; /* we own the rnode */
415 			break;
416 		case CTL_MMAP:
417 			fn = (sysctlfn)sysctl_mmap; /* we own the rnode */
418 			break;
419 		case CTL_DESCRIBE:
420 			fn = sysctl_describe;
421 			break;
422 		default:
423 			error = EOPNOTSUPP;
424 			break;
425 		}
426 	}
427 
428 	/*
429 	 * after all of that, maybe we found someone who knows how to
430 	 * get us what we want?
431 	 */
432 	if (fn != NULL)
433 		error = (*fn)(name + ni, namelen - ni, oldp, oldlenp,
434 			      newp, newlen, name, l, rnode);
435 
436 	else if (error == 0)
437 		error = EOPNOTSUPP;
438 
439 	return (error);
440 }
441 
442 /*
443  * ********************************************************************
444  * Releases the tree lock.  Note that if uvm_vslock() was called when
445  * the lock was taken, we release that memory now.  By keeping track
446  * of where and how much by ourselves, the lock can be released much
447  * more easily from anywhere.
448  * ********************************************************************
449  */
450 void
451 sysctl_unlock(struct lwp *l)
452 {
453 
454 	if (l != NULL && sysctl_memsize != 0) {
455 		uvm_vsunlock(l->l_proc, sysctl_memaddr, sysctl_memsize);
456 		sysctl_memsize = 0;
457 	}
458 
459 	(void) lockmgr(&sysctl_treelock, LK_RELEASE, NULL);
460 }
461 
462 /*
463  * ********************************************************************
464  * Section 2: The main tree interfaces
465  * ********************************************************************
466  * This is how sysctl_dispatch() does its work, and you can too, by
467  * calling these routines from helpers (though typically only
468  * sysctl_lookup() will be used).  The tree MUST BE LOCKED when these
469  * are called.
470  * ********************************************************************
471  */
472 
473 /*
474  * sysctl_locate -- Finds the node matching the given mib under the
475  * given tree (via rv).  If no tree is given, we fall back to the
476  * native tree.  The current process (via l) is used for access
477  * control on the tree (some nodes may be traversable only by root) and
478  * on return, nip will show how many numbers in the mib were consumed.
479  */
480 int
481 sysctl_locate(struct lwp *l, const int *name, u_int namelen,
482 	      const struct sysctlnode **rnode, int *nip)
483 {
484 	const struct sysctlnode *node, *pnode;
485 	int tn, si, ni, error, alias;
486 
487 	/*
488 	 * basic checks and setup
489 	 */
490 	if (*rnode == NULL)
491 		*rnode = &sysctl_root;
492 	if (nip)
493 		*nip = 0;
494 	if (namelen < 0)
495 		return (EINVAL);
496 	if (namelen == 0)
497 		return (0);
498 
499 	/*
500 	 * search starts from "root"
501 	 */
502 	pnode = *rnode;
503 	if (SYSCTL_VERS(pnode->sysctl_flags) != SYSCTL_VERSION) {
504 		printf("sysctl_locate: pnode %p wrong version\n", pnode);
505 		return (EINVAL);
506 	}
507 	node = pnode->sysctl_child;
508 	error = 0;
509 
510 	/*
511 	 * scan for node to which new node should be attached
512 	 */
513 	for (ni = 0; ni < namelen; ni++) {
514 		/*
515 		 * walked off bottom of tree
516 		 */
517 		if (node == NULL) {
518 			if (SYSCTL_TYPE(pnode->sysctl_flags) == CTLTYPE_NODE)
519 				error = ENOENT;
520 			else
521 				error = ENOTDIR;
522 			break;
523 		}
524 		/*
525 		 * can anyone traverse this node or only root?
526 		 */
527 		if (l != NULL && (pnode->sysctl_flags & CTLFLAG_PRIVATE) &&
528 		    (error = suser(l->l_proc->p_ucred, &l->l_proc->p_acflag))
529 		    != 0)
530 			return (error);
531 		/*
532 		 * find a child node with the right number
533 		 */
534 		tn = name[ni];
535 		alias = 0;
536 
537 		si = 0;
538 		/*
539 		 * Note: ANYNUMBER only matches positive integers.
540 		 * Since ANYNUMBER is only permitted on single-node
541 		 * sub-trees (eg proc), check before the loop and skip
542 		 * it if we can.
543 		 */
544 		if ((node[si].sysctl_flags & CTLFLAG_ANYNUMBER) && (tn >= 0))
545 			goto foundit;
546 		for (; si < pnode->sysctl_clen; si++) {
547 			if (node[si].sysctl_num == tn) {
548 				if (node[si].sysctl_flags & CTLFLAG_ALIAS) {
549 					if (alias++ == 4)
550 						break;
551 					else {
552 						tn = node[si].sysctl_alias;
553 						si = -1;
554 					}
555 				}
556 				else
557 					goto foundit;
558 			}
559 		}
560 		/*
561 		 * if we ran off the end, it obviously doesn't exist
562 		 */
563 		error = ENOENT;
564 		break;
565 
566 		/*
567 		 * so far so good, move on down the line
568 		 */
569 	  foundit:
570 		pnode = &node[si];
571 		if (SYSCTL_TYPE(pnode->sysctl_flags) == CTLTYPE_NODE)
572 			node = node[si].sysctl_child;
573 		else
574 			node = NULL;
575 	}
576 
577 	*rnode = pnode;
578 	if (nip)
579 		*nip = ni;
580 
581 	return (error);
582 }
583 
584 /*
585  * sysctl_query -- The auto-discovery engine.  Copies out the structs
586  * describing nodes under the given node and handles overlay trees.
587  */
588 int
589 sysctl_query(SYSCTLFN_ARGS)
590 {
591 	int error, ni, elim, v;
592 	size_t out, left, t;
593 	const struct sysctlnode *enode, *onode;
594 	struct sysctlnode qnode;
595 
596 	if (SYSCTL_VERS(rnode->sysctl_flags) != SYSCTL_VERSION) {
597 		printf("sysctl_query: rnode %p wrong version\n", rnode);
598 		return (EINVAL);
599 	}
600 
601 	if (SYSCTL_TYPE(rnode->sysctl_flags) != CTLTYPE_NODE)
602 		return (ENOTDIR);
603 	if (namelen != 1 || name[0] != CTL_QUERY)
604 		return (EINVAL);
605 
606 	error = 0;
607 	out = 0;
608 	left = *oldlenp;
609 	elim = 0;
610 	enode = NULL;
611 
612 	/*
613 	 * translate the given request to a current node
614 	 */
615 	error = sysctl_cvt_in(l, &v, newp, newlen, &qnode);
616 	if (error)
617 		return (error);
618 
619 	/*
620 	 * if the request specifies a version, check it
621 	 */
622 	if (qnode.sysctl_ver != 0) {
623 		enode = rnode;
624 		if (qnode.sysctl_ver != enode->sysctl_ver &&
625 		    qnode.sysctl_ver != sysctl_rootof(enode)->sysctl_ver)
626 			return (EINVAL);
627 	}
628 
629 	/*
630 	 * process has overlay tree
631 	 */
632 	if (l && l->l_proc->p_emul->e_sysctlovly) {
633 		enode = l->l_proc->p_emul->e_sysctlovly;
634 		elim = (name - oname);
635 		error = sysctl_locate(l, oname, elim, &enode, NULL);
636 		if (error == 0) {
637 			/* ah, found parent in overlay */
638 			elim = enode->sysctl_clen;
639 			enode = enode->sysctl_child;
640 		}
641 		else {
642 			error = 0;
643 			elim = 0;
644 			enode = NULL;
645 		}
646 	}
647 
648 	for (ni = 0; ni < rnode->sysctl_clen; ni++) {
649 		onode = &rnode->sysctl_child[ni];
650 		if (enode && enode->sysctl_num == onode->sysctl_num) {
651 			if (SYSCTL_TYPE(enode->sysctl_flags) != CTLTYPE_NODE)
652 				onode = enode;
653 			if (--elim > 0)
654 				enode++;
655 			else
656 				enode = NULL;
657 		}
658 		error = sysctl_cvt_out(l, v, onode, oldp, left, &t);
659 		if (error)
660 			return (error);
661 		if (oldp != NULL)
662 			oldp = (char*)oldp + t;
663 		out += t;
664 		left -= MIN(left, t);
665 	}
666 
667 	/*
668 	 * overlay trees *MUST* be entirely consumed
669 	 */
670 	KASSERT(enode == NULL);
671 
672 	*oldlenp = out;
673 
674 	return (error);
675 }
676 
677 #ifdef SYSCTL_DEBUG_CREATE
678 #undef sysctl_create
679 #endif /* SYSCTL_DEBUG_CREATE */
680 
681 /*
682  * sysctl_create -- Adds a node (the description of which is taken
683  * from newp) to the tree, returning a copy of it in the space pointed
684  * to by oldp.  In the event that the requested slot is already taken
685  * (either by name or by number), the offending node is returned
686  * instead.  Yes, this is complex, but we want to make sure everything
687  * is proper.
688  */
689 int
690 sysctl_create(SYSCTLFN_ARGS)
691 {
692 	struct sysctlnode nnode, *node, *pnode;
693 	int error, ni, at, nm, type, sz, flags, anum, v;
694 	void *own;
695 
696 	error = 0;
697 	own = NULL;
698 	anum = -1;
699 
700 	if (SYSCTL_VERS(rnode->sysctl_flags) != SYSCTL_VERSION) {
701 		printf("sysctl_create: rnode %p wrong version\n", rnode);
702 		return (EINVAL);
703 	}
704 
705 	if (namelen != 1 || (name[namelen - 1] != CTL_CREATE
706 #if NKSYMS > 0
707 			     && name[namelen - 1] != CTL_CREATESYM
708 #endif /* NKSYMS > 0 */
709 			     ))
710 		return (EINVAL);
711 
712 	/*
713 	 * processes can only add nodes at securelevel 0, must be
714 	 * root, and can't add nodes to a parent that's not writeable
715 	 */
716 	if (l != NULL) {
717 #ifndef SYSCTL_DISALLOW_CREATE
718 		if (securelevel > 0)
719 			return (EPERM);
720 		error = suser(l->l_proc->p_ucred, &l->l_proc->p_acflag);
721 		if (error)
722 			return (error);
723 		if (!(rnode->sysctl_flags & CTLFLAG_READWRITE))
724 #endif /* SYSCTL_DISALLOW_CREATE */
725 			return (EPERM);
726 	}
727 
728 	/*
729 	 * nothing can add a node if:
730 	 * we've finished initial set up and
731 	 * the tree itself is not writeable or
732 	 * the entire sysctl system is not writeable
733 	 */
734 	if ((sysctl_root.sysctl_flags & CTLFLAG_PERMANENT) &&
735 	    (!(sysctl_rootof(rnode)->sysctl_flags & CTLFLAG_READWRITE) ||
736 	     !(sysctl_root.sysctl_flags & CTLFLAG_READWRITE)))
737 		return (EPERM);
738 
739 	/*
740 	 * it must be a "node", not a "int" or something
741 	 */
742 	if (SYSCTL_TYPE(rnode->sysctl_flags) != CTLTYPE_NODE)
743 		return (ENOTDIR);
744 	if (rnode->sysctl_flags & CTLFLAG_ALIAS) {
745 		printf("sysctl_create: attempt to add node to aliased "
746 		       "node %p\n", rnode);
747 		return (EINVAL);
748 	}
749 	pnode = __UNCONST(rnode); /* we are adding children to this node */
750 
751 	if (newp == NULL)
752 		return (EINVAL);
753 	error = sysctl_cvt_in(l, &v, newp, newlen, &nnode);
754 	if (error)
755 		return (error);
756 
757 	/*
758 	 * nodes passed in don't *have* parents
759 	 */
760 	if (nnode.sysctl_parent != NULL)
761 		return (EINVAL);
762 
763 	/*
764 	 * if we are indeed adding it, it should be a "good" name and
765 	 * number
766 	 */
767 	nm = nnode.sysctl_num;
768 #if NKSYMS > 0
769 	if (nm == CTL_CREATESYM)
770 		nm = CTL_CREATE;
771 #endif /* NKSYMS > 0 */
772 	if (nm < 0 && nm != CTL_CREATE)
773 		return (EINVAL);
774 	sz = 0;
775 
776 	/*
777 	 * the name can't start with a digit
778 	 */
779 	if (nnode.sysctl_name[sz] >= '0' &&
780 	    nnode.sysctl_name[sz] <= '9')
781 		return (EINVAL);
782 
783 	/*
784 	 * the name must be only alphanumerics or - or _, longer than
785 	 * 0 bytes and less that SYSCTL_NAMELEN
786 	 */
787 	while (sz < SYSCTL_NAMELEN && nnode.sysctl_name[sz] != '\0') {
788 		if ((nnode.sysctl_name[sz] >= '0' &&
789 		     nnode.sysctl_name[sz] <= '9') ||
790 		    (nnode.sysctl_name[sz] >= 'A' &&
791 		     nnode.sysctl_name[sz] <= 'Z') ||
792 		    (nnode.sysctl_name[sz] >= 'a' &&
793 		     nnode.sysctl_name[sz] <= 'z') ||
794 		    nnode.sysctl_name[sz] == '-' ||
795 		    nnode.sysctl_name[sz] == '_')
796 			sz++;
797 		else
798 			return (EINVAL);
799 	}
800 	if (sz == 0 || sz == SYSCTL_NAMELEN)
801 		return (EINVAL);
802 
803 	/*
804 	 * various checks revolve around size vs type, etc
805 	 */
806 	type = SYSCTL_TYPE(nnode.sysctl_flags);
807 	flags = SYSCTL_FLAGS(nnode.sysctl_flags);
808 	sz = nnode.sysctl_size;
809 
810 	/*
811 	 * find out if there's a collision, and if so, let the caller
812 	 * know what they collided with
813 	 */
814 	node = pnode->sysctl_child;
815 	if (((flags & CTLFLAG_ANYNUMBER) && node) ||
816 	    (node && node->sysctl_flags & CTLFLAG_ANYNUMBER))
817 		return (EINVAL);
818 	for (ni = at = 0; ni < pnode->sysctl_clen; ni++) {
819 		if (nm == node[ni].sysctl_num ||
820 		    strcmp(nnode.sysctl_name, node[ni].sysctl_name) == 0) {
821 			/*
822 			 * ignore error here, since we
823 			 * are already fixed on EEXIST
824 			 */
825 			(void)sysctl_cvt_out(l, v, &node[ni], oldp,
826 					     *oldlenp, oldlenp);
827 			return (EEXIST);
828 		}
829 		if (nm > node[ni].sysctl_num)
830 			at++;
831 	}
832 
833 	/*
834 	 * use sysctl_ver to add to the tree iff it hasn't changed
835 	 */
836 	if (nnode.sysctl_ver != 0) {
837 		/*
838 		 * a specified value must match either the parent
839 		 * node's version or the root node's version
840 		 */
841 		if (nnode.sysctl_ver != sysctl_rootof(rnode)->sysctl_ver &&
842 		    nnode.sysctl_ver != rnode->sysctl_ver) {
843 			return (EINVAL);
844 		}
845 	}
846 
847 	/*
848 	 * only the kernel can assign functions to entries
849 	 */
850 	if (l != NULL && nnode.sysctl_func != NULL)
851 		return (EPERM);
852 
853 	/*
854 	 * only the kernel can create permanent entries, and only then
855 	 * before the kernel is finished setting itself up
856 	 */
857 	if (l != NULL && (flags & ~SYSCTL_USERFLAGS))
858 		return (EPERM);
859 	if ((flags & CTLFLAG_PERMANENT) &
860 	    (sysctl_root.sysctl_flags & CTLFLAG_PERMANENT))
861 		return (EPERM);
862 	if ((flags & (CTLFLAG_OWNDATA | CTLFLAG_IMMEDIATE)) ==
863 	    (CTLFLAG_OWNDATA | CTLFLAG_IMMEDIATE))
864 		return (EINVAL);
865 	if ((flags & CTLFLAG_IMMEDIATE) &&
866 	    type != CTLTYPE_INT && type != CTLTYPE_QUAD)
867 		return (EINVAL);
868 
869 	/*
870 	 * check size, or set it if unset and we can figure it out.
871 	 * kernel created nodes are allowed to have a function instead
872 	 * of a size (or a data pointer).
873 	 */
874 	switch (type) {
875 	case CTLTYPE_NODE:
876 		/*
877 		 * only *i* can assert the size of a node
878 		 */
879 		if (flags & CTLFLAG_ALIAS) {
880 			anum = nnode.sysctl_alias;
881 			if (anum < 0)
882 				return (EINVAL);
883 			nnode.sysctl_alias = 0;
884 		}
885 		if (sz != 0 || nnode.sysctl_data != NULL)
886 			return (EINVAL);
887 		if (nnode.sysctl_csize != 0 ||
888 		    nnode.sysctl_clen != 0 ||
889 		    nnode.sysctl_child != 0)
890 			return (EINVAL);
891 		if (flags & CTLFLAG_OWNDATA)
892 			return (EINVAL);
893 		sz = sizeof(struct sysctlnode);
894 		break;
895 	case CTLTYPE_INT:
896 		/*
897 		 * since an int is an int, if the size is not given or
898 		 * is wrong, we can "int-uit" it.
899 		 */
900 		if (sz != 0 && sz != sizeof(int))
901 			return (EINVAL);
902 		sz = sizeof(int);
903 		break;
904 	case CTLTYPE_STRING:
905 		/*
906 		 * strings are a little more tricky
907 		 */
908 		if (sz == 0) {
909 			if (l == NULL) {
910 				if (nnode.sysctl_func == NULL) {
911 					if (nnode.sysctl_data == NULL)
912 						return (EINVAL);
913 					else
914 						sz = strlen(nnode.sysctl_data) +
915 						    1;
916 				}
917 			}
918 			else if (nnode.sysctl_data == NULL &&
919 				 flags & CTLFLAG_OWNDATA) {
920 				return (EINVAL);
921 			}
922 			else {
923 				char *vp, *e;
924 				size_t s;
925 
926 				/*
927 				 * we want a rough idea of what the
928 				 * size is now
929 				 */
930 				vp = malloc(PAGE_SIZE, M_SYSCTLDATA,
931 					     M_WAITOK|M_CANFAIL);
932 				if (vp == NULL)
933 					return (ENOMEM);
934 				e = nnode.sysctl_data;
935 				do {
936 					error = copyinstr(e, vp, PAGE_SIZE, &s);
937 					if (error) {
938 						if (error != ENAMETOOLONG) {
939 							free(vp, M_SYSCTLDATA);
940 							return (error);
941 						}
942 						e += PAGE_SIZE;
943 						if ((e - 32 * PAGE_SIZE) >
944 						    (char*)nnode.sysctl_data) {
945 							free(vp, M_SYSCTLDATA);
946 							return (ERANGE);
947 						}
948 					}
949 				} while (error != 0);
950 				sz = s + (e - (char*)nnode.sysctl_data);
951 				free(vp, M_SYSCTLDATA);
952 			}
953 		}
954 		break;
955 	case CTLTYPE_QUAD:
956 		if (sz != 0 && sz != sizeof(u_quad_t))
957 			return (EINVAL);
958 		sz = sizeof(u_quad_t);
959 		break;
960 	case CTLTYPE_STRUCT:
961 		if (sz == 0) {
962 			if (l != NULL || nnode.sysctl_func == NULL)
963 				return (EINVAL);
964 			if (flags & CTLFLAG_OWNDATA)
965 				return (EINVAL);
966 		}
967 		break;
968 	default:
969 		return (EINVAL);
970 	}
971 
972 	/*
973 	 * at this point, if sz is zero, we *must* have a
974 	 * function to go with it and we can't own it.
975 	 */
976 
977 	/*
978 	 *  l  ptr own
979 	 *  0   0   0  -> EINVAL (if no func)
980 	 *  0   0   1  -> own
981 	 *  0   1   0  -> kptr
982 	 *  0   1   1  -> kptr
983 	 *  1   0   0  -> EINVAL
984 	 *  1   0   1  -> own
985 	 *  1   1   0  -> kptr, no own (fault on lookup)
986 	 *  1   1   1  -> uptr, own
987 	 */
988 	if (type != CTLTYPE_NODE) {
989 		if (sz != 0) {
990 			if (flags & CTLFLAG_OWNDATA) {
991 				own = malloc(sz, M_SYSCTLDATA,
992 					     M_WAITOK|M_CANFAIL);
993 				if (nnode.sysctl_data == NULL)
994 					memset(own, 0, sz);
995 				else {
996 					error = sysctl_copyin(l,
997 					    nnode.sysctl_data, own, sz);
998 					if (error != 0) {
999 						FREE(own, M_SYSCTLDATA);
1000 						return (error);
1001 					}
1002 				}
1003 			}
1004 			else if ((nnode.sysctl_data != NULL) &&
1005 				 !(flags & CTLFLAG_IMMEDIATE)) {
1006 #if NKSYMS > 0
1007 				if (name[namelen - 1] == CTL_CREATESYM) {
1008 					char symname[128]; /* XXX enough? */
1009 					u_long symaddr;
1010 					size_t symlen;
1011 
1012 					error = sysctl_copyinstr(l,
1013 					    nnode.sysctl_data, symname,
1014 					    sizeof(symname), &symlen);
1015 					if (error)
1016 						return (error);
1017 					error = ksyms_getval(NULL, symname,
1018 					    &symaddr, KSYMS_EXTERN);
1019 					if (error)
1020 						return (error); /* EINVAL? */
1021 					nnode.sysctl_data = (void*)symaddr;
1022 				}
1023 #endif /* NKSYMS > 0 */
1024 				/*
1025 				 * Ideally, we'd like to verify here
1026 				 * that this address is acceptable,
1027 				 * but...
1028 				 *
1029 				 * - it might be valid now, only to
1030 				 *   become invalid later
1031 				 *
1032 				 * - it might be invalid only for the
1033 				 *   moment and valid later
1034 				 *
1035 				 * - or something else.
1036 				 *
1037 				 * Since we can't get a good answer,
1038 				 * we'll just accept the address as
1039 				 * given, and fault on individual
1040 				 * lookups.
1041 				 */
1042 			}
1043 		}
1044 		else if (nnode.sysctl_func == NULL)
1045 			return (EINVAL);
1046 	}
1047 
1048 	/*
1049 	 * a process can't assign a function to a node, and the kernel
1050 	 * can't create a node that has no function or data.
1051 	 * (XXX somewhat redundant check)
1052 	 */
1053 	if (l != NULL || nnode.sysctl_func == NULL) {
1054 		if (type != CTLTYPE_NODE &&
1055 		    nnode.sysctl_data == NULL &&
1056 		    !(flags & CTLFLAG_IMMEDIATE) &&
1057 		    own == NULL)
1058 			return (EINVAL);
1059 	}
1060 
1061 #ifdef SYSCTL_DISALLOW_KWRITE
1062 	/*
1063 	 * a process can't create a writable node unless it refers to
1064 	 * new data.
1065 	 */
1066 	if (l != NULL && own == NULL && type != CTLTYPE_NODE &&
1067 	    (flags & CTLFLAG_READWRITE) != CTLFLAG_READONLY &&
1068 	    !(flags & CTLFLAG_IMMEDIATE))
1069 		return (EPERM);
1070 #endif /* SYSCTL_DISALLOW_KWRITE */
1071 
1072 	/*
1073 	 * make sure there's somewhere to put the new stuff.
1074 	 */
1075 	if (pnode->sysctl_child == NULL) {
1076 		if (flags & CTLFLAG_ANYNUMBER)
1077 			error = sysctl_alloc(pnode, 1);
1078 		else
1079 			error = sysctl_alloc(pnode, 0);
1080 		if (error)
1081 			return (error);
1082 	}
1083 	node = pnode->sysctl_child;
1084 
1085 	/*
1086 	 * no collisions, so pick a good dynamic number if we need to.
1087 	 */
1088 	if (nm == CTL_CREATE) {
1089 		nm = ++sysctl_root.sysctl_num;
1090 		for (ni = 0; ni < pnode->sysctl_clen; ni++) {
1091 			if (nm == node[ni].sysctl_num) {
1092 				nm++;
1093 				ni = -1;
1094 			}
1095 			else if (nm > node[ni].sysctl_num)
1096 				at = ni + 1;
1097 		}
1098 	}
1099 
1100 	/*
1101 	 * oops...ran out of space
1102 	 */
1103 	if (pnode->sysctl_clen == pnode->sysctl_csize) {
1104 		error = sysctl_realloc(pnode);
1105 		if (error)
1106 			return (error);
1107 		node = pnode->sysctl_child;
1108 	}
1109 
1110 	/*
1111 	 * insert new node data
1112 	 */
1113 	if (at < pnode->sysctl_clen) {
1114 		int t;
1115 
1116 		/*
1117 		 * move the nodes that should come after the new one
1118 		 */
1119 		memmove(&node[at + 1], &node[at],
1120 			(pnode->sysctl_clen - at) * sizeof(struct sysctlnode));
1121 		memset(&node[at], 0, sizeof(struct sysctlnode));
1122 		node[at].sysctl_parent = pnode;
1123 		/*
1124 		 * and...reparent any children of any moved nodes
1125 		 */
1126 		for (ni = at; ni <= pnode->sysctl_clen; ni++)
1127 			if (SYSCTL_TYPE(node[ni].sysctl_flags) == CTLTYPE_NODE)
1128 				for (t = 0; t < node[ni].sysctl_clen; t++)
1129 					node[ni].sysctl_child[t].sysctl_parent =
1130 						&node[ni];
1131 	}
1132 	node = &node[at];
1133 	pnode->sysctl_clen++;
1134 
1135 	strlcpy(node->sysctl_name, nnode.sysctl_name,
1136 		sizeof(node->sysctl_name));
1137 	node->sysctl_num = nm;
1138 	node->sysctl_size = sz;
1139 	node->sysctl_flags = SYSCTL_VERSION|type|flags; /* XXX other trees */
1140 	node->sysctl_csize = 0;
1141 	node->sysctl_clen = 0;
1142 	if (own) {
1143 		node->sysctl_data = own;
1144 		node->sysctl_flags |= CTLFLAG_OWNDATA;
1145 	}
1146 	else if (flags & CTLFLAG_ALIAS) {
1147 		node->sysctl_alias = anum;
1148 	}
1149 	else if (flags & CTLFLAG_IMMEDIATE) {
1150 		switch (type) {
1151 		case CTLTYPE_INT:
1152 			node->sysctl_idata = nnode.sysctl_idata;
1153 			break;
1154 		case CTLTYPE_QUAD:
1155 			node->sysctl_qdata = nnode.sysctl_qdata;
1156 			break;
1157 		}
1158 	}
1159 	else {
1160 		node->sysctl_data = nnode.sysctl_data;
1161 		node->sysctl_flags &= ~CTLFLAG_OWNDATA;
1162 	}
1163         node->sysctl_func = nnode.sysctl_func;
1164         node->sysctl_child = NULL;
1165 	/* node->sysctl_parent should already be done */
1166 
1167 	/*
1168 	 * update "version" on path to "root"
1169 	 */
1170 	for (; rnode->sysctl_parent != NULL; rnode = rnode->sysctl_parent)
1171 		;
1172 	pnode = node;
1173 	for (nm = rnode->sysctl_ver + 1; pnode != NULL;
1174 	     pnode = pnode->sysctl_parent)
1175 		pnode->sysctl_ver = nm;
1176 
1177 	error = sysctl_cvt_out(l, v, node, oldp, *oldlenp, oldlenp);
1178 
1179 	return (error);
1180 }
1181 
1182 /*
1183  * ********************************************************************
1184  * A wrapper around sysctl_create() that prints the thing we're trying
1185  * to add.
1186  * ********************************************************************
1187  */
1188 #ifdef SYSCTL_DEBUG_CREATE
1189 int _sysctl_create(SYSCTLFN_PROTO);
1190 int
1191 _sysctl_create(SYSCTLFN_ARGS)
1192 {
1193 	const struct sysctlnode *node;
1194 	int k, rc, ni, nl = namelen + (name - oname);
1195 
1196 	node = newp;
1197 
1198 	printf("namelen %d (", nl);
1199 	for (ni = 0; ni < nl - 1; ni++)
1200 		printf(" %d", oname[ni]);
1201 	printf(" %d )\t[%s]\tflags %08x (%08x %d %zu)\n",
1202 	       k = node->sysctl_num,
1203 	       node->sysctl_name,
1204 	       node->sysctl_flags,
1205 	       SYSCTL_FLAGS(node->sysctl_flags),
1206 	       SYSCTL_TYPE(node->sysctl_flags),
1207 	       node->sysctl_size);
1208 
1209 	node = rnode;
1210 	rc = sysctl_create(SYSCTLFN_CALL(rnode));
1211 
1212 	printf("sysctl_create(");
1213 	for (ni = 0; ni < nl - 1; ni++)
1214 		printf(" %d", oname[ni]);
1215 	printf(" %d ) returned %d\n", k, rc);
1216 
1217 	return (rc);
1218 }
1219 #define sysctl_create _sysctl_create
1220 #endif /* SYSCTL_DEBUG_CREATE */
1221 
1222 /*
1223  * sysctl_destroy -- Removes a node (as described by newp) from the
1224  * given tree, returning (if successful) a copy of the dead node in
1225  * oldp.  Since we're removing stuff, there's not much to check.
1226  */
1227 int
1228 sysctl_destroy(SYSCTLFN_ARGS)
1229 {
1230 	struct sysctlnode *node, *pnode, onode, nnode;
1231 	int ni, error, v;
1232 
1233 	if (SYSCTL_VERS(rnode->sysctl_flags) != SYSCTL_VERSION) {
1234 		printf("sysctl_destroy: rnode %p wrong version\n", rnode);
1235 		return (EINVAL);
1236 	}
1237 
1238 	error = 0;
1239 
1240 	if (namelen != 1 || name[namelen - 1] != CTL_DESTROY)
1241 		return (EINVAL);
1242 
1243 	/*
1244 	 * processes can only destroy nodes at securelevel 0, must be
1245 	 * root, and can't remove nodes from a parent that's not
1246 	 * writeable
1247 	 */
1248 	if (l != NULL) {
1249 #ifndef SYSCTL_DISALLOW_CREATE
1250 		if (securelevel > 0)
1251 			return (EPERM);
1252 		error = suser(l->l_proc->p_ucred, &l->l_proc->p_acflag);
1253 		if (error)
1254 			return (error);
1255 		if (!(rnode->sysctl_flags & CTLFLAG_READWRITE))
1256 #endif /* SYSCTL_DISALLOW_CREATE */
1257 			return (EPERM);
1258 	}
1259 
1260 	/*
1261 	 * nothing can remove a node if:
1262 	 * the node is permanent (checked later) or
1263 	 * the tree itself is not writeable or
1264 	 * the entire sysctl system is not writeable
1265 	 *
1266 	 * note that we ignore whether setup is complete or not,
1267 	 * because these rules always apply.
1268 	 */
1269 	if (!(sysctl_rootof(rnode)->sysctl_flags & CTLFLAG_READWRITE) ||
1270 	    !(sysctl_root.sysctl_flags & CTLFLAG_READWRITE))
1271 		return (EPERM);
1272 
1273 	if (newp == NULL)
1274 		return (EINVAL);
1275 	error = sysctl_cvt_in(l, &v, newp, newlen, &nnode);
1276 	if (error)
1277 		return (error);
1278 	memset(&onode, 0, sizeof(struct sysctlnode));
1279 
1280 	node = rnode->sysctl_child;
1281 	for (ni = 0; ni < rnode->sysctl_clen; ni++) {
1282 		if (nnode.sysctl_num == node[ni].sysctl_num) {
1283 			/*
1284 			 * if name specified, must match
1285 			 */
1286 			if (nnode.sysctl_name[0] != '\0' &&
1287 			    strcmp(nnode.sysctl_name, node[ni].sysctl_name))
1288 				continue;
1289 			/*
1290 			 * if version specified, must match
1291 			 */
1292 			if (nnode.sysctl_ver != 0 &&
1293 			    nnode.sysctl_ver != node[ni].sysctl_ver)
1294 				continue;
1295 			/*
1296 			 * this must be the one
1297 			 */
1298 			break;
1299 		}
1300 	}
1301 	if (ni == rnode->sysctl_clen)
1302 		return (ENOENT);
1303 	node = &node[ni];
1304 	pnode = node->sysctl_parent;
1305 
1306 	/*
1307 	 * if the kernel says permanent, it is, so there.  nyah.
1308 	 */
1309 	if (SYSCTL_FLAGS(node->sysctl_flags) & CTLFLAG_PERMANENT)
1310 		return (EPERM);
1311 
1312 	/*
1313 	 * can't delete non-empty nodes
1314 	 */
1315 	if (SYSCTL_TYPE(node->sysctl_flags) == CTLTYPE_NODE &&
1316 	    node->sysctl_clen != 0)
1317 		return (ENOTEMPTY);
1318 
1319 	/*
1320 	 * if the node "owns" data, release it now
1321 	 */
1322 	if (node->sysctl_flags & CTLFLAG_OWNDATA) {
1323 		if (node->sysctl_data != NULL)
1324 			FREE(node->sysctl_data, M_SYSCTLDATA);
1325 		node->sysctl_data = NULL;
1326 	}
1327 	if (node->sysctl_flags & CTLFLAG_OWNDESC) {
1328 		if (node->sysctl_desc != NULL)
1329 			/*XXXUNCONST*/
1330 			FREE(__UNCONST(node->sysctl_desc), M_SYSCTLDATA);
1331 		node->sysctl_desc = NULL;
1332 	}
1333 
1334 	/*
1335 	 * if the node to be removed is not the last one on the list,
1336 	 * move the remaining nodes up, and reparent any grandchildren
1337 	 */
1338 	onode = *node;
1339 	if (ni < pnode->sysctl_clen - 1) {
1340 		int t;
1341 
1342 		memmove(&pnode->sysctl_child[ni], &pnode->sysctl_child[ni + 1],
1343 			(pnode->sysctl_clen - ni - 1) *
1344 			sizeof(struct sysctlnode));
1345 		for (; ni < pnode->sysctl_clen - 1; ni++)
1346 			if (SYSCTL_TYPE(pnode->sysctl_child[ni].sysctl_flags) ==
1347 			    CTLTYPE_NODE)
1348 				for (t = 0;
1349 				     t < pnode->sysctl_child[ni].sysctl_clen;
1350 				     t++)
1351 					pnode->sysctl_child[ni].sysctl_child[t].
1352 						sysctl_parent =
1353 						&pnode->sysctl_child[ni];
1354 		ni = pnode->sysctl_clen - 1;
1355 		node = &pnode->sysctl_child[ni];
1356 	}
1357 
1358 	/*
1359 	 * reset the space we just vacated
1360 	 */
1361 	memset(node, 0, sizeof(struct sysctlnode));
1362 	node->sysctl_parent = pnode;
1363 	pnode->sysctl_clen--;
1364 
1365 	/*
1366 	 * if this parent just lost its last child, nuke the creche
1367 	 */
1368 	if (pnode->sysctl_clen == 0) {
1369 		FREE(pnode->sysctl_child, M_SYSCTLNODE);
1370 		pnode->sysctl_csize = 0;
1371 		pnode->sysctl_child = NULL;
1372 	}
1373 
1374 	/*
1375 	 * update "version" on path to "root"
1376 	 */
1377         for (; rnode->sysctl_parent != NULL; rnode = rnode->sysctl_parent)
1378                 ;
1379 	for (ni = rnode->sysctl_ver + 1; pnode != NULL;
1380 	     pnode = pnode->sysctl_parent)
1381 		pnode->sysctl_ver = ni;
1382 
1383 	error = sysctl_cvt_out(l, v, &onode, oldp, *oldlenp, oldlenp);
1384 
1385 	return (error);
1386 }
1387 
1388 /*
1389  * sysctl_lookup -- Handles copyin/copyout of new and old values.
1390  * Partial reads are globally allowed.  Only root can write to things
1391  * unless the node says otherwise.
1392  */
1393 int
1394 sysctl_lookup(SYSCTLFN_ARGS)
1395 {
1396 	int error, rw;
1397 	size_t sz, len;
1398 	void *d;
1399 
1400 	if (SYSCTL_VERS(rnode->sysctl_flags) != SYSCTL_VERSION) {
1401 		printf("sysctl_lookup: rnode %p wrong version\n", rnode);
1402 		return (EINVAL);
1403 	}
1404 
1405 	error = 0;
1406 
1407 	/*
1408 	 * you can't "look up" a node.  you can "query" it, but you
1409 	 * can't "look it up".
1410 	 */
1411 	if (SYSCTL_TYPE(rnode->sysctl_flags) == CTLTYPE_NODE || namelen != 0)
1412 		return (EINVAL);
1413 
1414 	/*
1415 	 * some nodes are private, so only root can look into them.
1416 	 */
1417 	if (l != NULL && (rnode->sysctl_flags & CTLFLAG_PRIVATE) &&
1418 	    (error = suser(l->l_proc->p_ucred, &l->l_proc->p_acflag)) != 0)
1419 		return (error);
1420 
1421 	/*
1422 	 * if a node wants to be writable according to different rules
1423 	 * other than "only root can write to stuff unless a flag is
1424 	 * set", then it needs its own function which should have been
1425 	 * called and not us.
1426 	 */
1427 	if (l != NULL && newp != NULL &&
1428 	    !(rnode->sysctl_flags & CTLFLAG_ANYWRITE) &&
1429 	    (error = suser(l->l_proc->p_ucred, &l->l_proc->p_acflag)) != 0)
1430 		return (error);
1431 
1432 	/*
1433 	 * is this node supposedly writable?
1434 	 */
1435 	rw = 0;
1436 	switch (rnode->sysctl_flags & CTLFLAG_READWRITE) {
1437 	    case CTLFLAG_READONLY1:
1438 		rw = (securelevel < 1) ? 1 : 0;
1439 		break;
1440 	    case CTLFLAG_READONLY2:
1441 		rw = (securelevel < 2) ? 1 : 0;
1442 		break;
1443 	    case CTLFLAG_READWRITE:
1444 		rw = 1;
1445 		break;
1446 	}
1447 
1448 	/*
1449 	 * it appears not to be writable at this time, so if someone
1450 	 * tried to write to it, we must tell them to go away
1451 	 */
1452 	if (!rw && newp != NULL)
1453 		return (EPERM);
1454 
1455 	/*
1456 	 * step one, copy out the stuff we have presently
1457 	 */
1458 	if (rnode->sysctl_flags & CTLFLAG_IMMEDIATE) {
1459 		/*
1460 		 * note that we discard const here because we are
1461 		 * modifying the contents of the node (which is okay
1462 		 * because it's ours)
1463 		 */
1464 		switch (SYSCTL_TYPE(rnode->sysctl_flags)) {
1465 		case CTLTYPE_INT:
1466 			d = __UNCONST(&rnode->sysctl_idata);
1467 			break;
1468 		case CTLTYPE_QUAD:
1469 			d = __UNCONST(&rnode->sysctl_qdata);
1470 			break;
1471 		default:
1472 			return (EINVAL);
1473 		}
1474 	}
1475 	else
1476 		d = rnode->sysctl_data;
1477 	if (SYSCTL_TYPE(rnode->sysctl_flags) == CTLTYPE_STRING)
1478 		sz = strlen(d) + 1; /* XXX@@@ possible fault here */
1479 	else
1480 		sz = rnode->sysctl_size;
1481 	if (oldp != NULL)
1482 		error = sysctl_copyout(l, d, oldp, MIN(sz, *oldlenp));
1483 	if (error)
1484 		return (error);
1485 	*oldlenp = sz;
1486 
1487 	/*
1488 	 * are we done?
1489 	 */
1490 	if (newp == NULL || newlen == 0)
1491 		return (0);
1492 
1493 	/*
1494 	 * hmm...not done.  must now "copy in" new value.  re-adjust
1495 	 * sz to maximum value (strings are "weird").
1496 	 */
1497 	sz = rnode->sysctl_size;
1498 	switch (SYSCTL_TYPE(rnode->sysctl_flags)) {
1499 	case CTLTYPE_INT:
1500 	case CTLTYPE_QUAD:
1501 	case CTLTYPE_STRUCT:
1502 		/*
1503 		 * these data must be *exactly* the same size coming
1504 		 * in.
1505 		 */
1506 		if (newlen != sz)
1507 			return (EINVAL);
1508 		error = sysctl_copyin(l, newp, d, sz);
1509 		break;
1510 	case CTLTYPE_STRING: {
1511 		/*
1512 		 * strings, on the other hand, can be shorter, and we
1513 		 * let userland be sloppy about the trailing nul.
1514 		 */
1515 		char *newbuf;
1516 
1517 		/*
1518 		 * too much new string?
1519 		 */
1520 		if (newlen > sz)
1521 			return (EINVAL);
1522 
1523 		/*
1524 		 * temporary copy of new inbound string
1525 		 */
1526 		len = MIN(sz, newlen);
1527 		newbuf = malloc(len, M_SYSCTLDATA, M_WAITOK|M_CANFAIL);
1528 		if (newbuf == NULL)
1529 			return (ENOMEM);
1530 		error = sysctl_copyin(l, newp, newbuf, len);
1531 		if (error) {
1532 			FREE(newbuf, M_SYSCTLDATA);
1533 			return (error);
1534 		}
1535 
1536 		/*
1537 		 * did they null terminate it, or do we have space
1538 		 * left to do it ourselves?
1539 		 */
1540 		if (newbuf[len - 1] != '\0' && len == sz) {
1541 			FREE(newbuf, M_SYSCTLDATA);
1542 			return (EINVAL);
1543 		}
1544 
1545 		/*
1546 		 * looks good, so pop it into place and zero the rest.
1547 		 */
1548 		if (len > 0)
1549 			memcpy(rnode->sysctl_data, newbuf, len);
1550 		if (sz != len)
1551 			memset((char*)rnode->sysctl_data + len, 0, sz - len);
1552 		FREE(newbuf, M_SYSCTLDATA);
1553 		break;
1554 	}
1555 	default:
1556 		return (EINVAL);
1557 	}
1558 
1559 	return (error);
1560 }
1561 
1562 /*
1563  * sysctl_mmap -- Dispatches sysctl mmap requests to those nodes that
1564  * purport to handle it.  This interface isn't fully fleshed out yet,
1565  * unfortunately.
1566  */
1567 static int
1568 sysctl_mmap(SYSCTLFN_ARGS)
1569 {
1570 	const struct sysctlnode *node;
1571 	struct sysctlnode nnode;
1572 	int error;
1573 
1574 	if (SYSCTL_VERS(rnode->sysctl_flags) != SYSCTL_VERSION) {
1575 		printf("sysctl_mmap: rnode %p wrong version\n", rnode);
1576 		return (EINVAL);
1577 	}
1578 
1579 	/*
1580 	 * let's just pretend that didn't happen, m'kay?
1581 	 */
1582 	if (l == NULL)
1583 		return (EPERM);
1584 
1585 	/*
1586 	 * is this a sysctlnode description of an mmap request?
1587 	 */
1588 	if (newp == NULL || newlen != sizeof(struct sysctlnode))
1589 		return (EINVAL);
1590 	error = sysctl_copyin(l, newp, &nnode, sizeof(nnode));
1591 	if (error)
1592 		return (error);
1593 
1594 	/*
1595 	 * does the node they asked for exist?
1596 	 */
1597 	if (namelen != 1)
1598 		return (EOPNOTSUPP);
1599 	node = rnode;
1600         error = sysctl_locate(l, &nnode.sysctl_num, 1, &node, NULL);
1601 	if (error)
1602 		return (error);
1603 
1604 	/*
1605 	 * does this node that we have found purport to handle mmap?
1606 	 */
1607 	if (node->sysctl_func == NULL ||
1608 	    !(node->sysctl_flags & CTLFLAG_MMAP))
1609 		return (EOPNOTSUPP);
1610 
1611 	/*
1612 	 * well...okay, they asked for it.
1613 	 */
1614 	return ((*node->sysctl_func)(SYSCTLFN_CALL(node)));
1615 }
1616 
1617 int
1618 sysctl_describe(SYSCTLFN_ARGS)
1619 {
1620 	struct sysctldesc *d;
1621 	char bf[1024];
1622 	size_t sz, left, tot;
1623 	int i, error, v = -1;
1624 	struct sysctlnode *node;
1625 	struct sysctlnode dnode;
1626 
1627 	if (SYSCTL_VERS(rnode->sysctl_flags) != SYSCTL_VERSION) {
1628 		printf("sysctl_query: rnode %p wrong version\n", rnode);
1629 		return (EINVAL);
1630 	}
1631 
1632 	if (SYSCTL_TYPE(rnode->sysctl_flags) != CTLTYPE_NODE)
1633 		return (ENOTDIR);
1634 	if (namelen != 1 || name[0] != CTL_DESCRIBE)
1635 		return (EINVAL);
1636 
1637 	/*
1638 	 * get ready...
1639 	 */
1640 	error = 0;
1641 	d = (void*)bf;
1642 	tot = 0;
1643 	node = rnode->sysctl_child;
1644 	left = *oldlenp;
1645 
1646 	/*
1647 	 * no request -> all descriptions at this level
1648 	 * request with desc unset -> just this node
1649 	 * request with desc set -> set descr for this node
1650 	 */
1651 	if (newp != NULL) {
1652 		error = sysctl_cvt_in(l, &v, newp, newlen, &dnode);
1653 		if (error)
1654 			return (error);
1655 		if (dnode.sysctl_desc != NULL) {
1656 			/*
1657 			 * processes cannot set descriptions above
1658 			 * securelevel 0.  and must be root.  blah
1659 			 * blah blah.  a couple more checks are made
1660 			 * once we find the node we want.
1661 			 */
1662 			if (l != NULL) {
1663 #ifndef SYSCTL_DISALLOW_CREATE
1664 				if (securelevel > 0)
1665 					return (EPERM);
1666 				error = suser(l->l_proc->p_ucred,
1667 					      &l->l_proc->p_acflag);
1668 				if (error)
1669 					return (error);
1670 #else /* SYSCTL_DISALLOW_CREATE */
1671 				return (EPERM);
1672 #endif /* SYSCTL_DISALLOW_CREATE */
1673 			}
1674 
1675 			/*
1676 			 * find node and try to set the description on it
1677 			 */
1678 			for (i = 0; i < rnode->sysctl_clen; i++)
1679 				if (node[i].sysctl_num == dnode.sysctl_num)
1680 					break;
1681 			if (i == rnode->sysctl_clen)
1682 				return (ENOENT);
1683 			node = &node[i];
1684 
1685 			/*
1686 			 * did the caller specify a node version?
1687 			 */
1688 			if (dnode.sysctl_ver != 0 &&
1689 			    dnode.sysctl_ver != node->sysctl_ver)
1690 				return (EINVAL);
1691 
1692 			/*
1693 			 * okay...some rules:
1694 			 * (1) if setup is done and the tree is
1695 			 *     read-only or the whole system is
1696 			 *     read-only
1697 			 * (2) no one can set a description on a
1698 			 *     permanent node (it must be set when
1699 			 *     using createv)
1700 			 * (3) processes cannot *change* a description
1701 			 * (4) processes *can*, however, set a
1702 			 *     description on a read-only node so that
1703 			 *     one can be created and then described
1704 			 *     in two steps
1705 			 * anything else come to mind?
1706 			 */
1707 			if ((sysctl_root.sysctl_flags & CTLFLAG_PERMANENT) &&
1708 			    (!(sysctl_rootof(node)->sysctl_flags &
1709 			       CTLFLAG_READWRITE) ||
1710 			     !(sysctl_root.sysctl_flags & CTLFLAG_READWRITE)))
1711 				return (EPERM);
1712 			if (node->sysctl_flags & CTLFLAG_PERMANENT)
1713 				return (EPERM);
1714 			if (l != NULL && node->sysctl_desc != NULL)
1715 				return (EPERM);
1716 
1717 			/*
1718 			 * right, let's go ahead.  the first step is
1719 			 * making the description into something the
1720 			 * node can "own", if need be.
1721 			 */
1722 			if (l != NULL ||
1723 			    dnode.sysctl_flags & CTLFLAG_OWNDESC) {
1724 				char *nd, k[1024];
1725 
1726 				error = sysctl_copyinstr(l, dnode.sysctl_desc,
1727 							 &k[0], sizeof(k), &sz);
1728 				if (error)
1729 					return (error);
1730 				nd = malloc(sz, M_SYSCTLDATA,
1731 					    M_WAITOK|M_CANFAIL);
1732 				if (nd == NULL)
1733 					return (ENOMEM);
1734 				memcpy(nd, k, sz);
1735 				dnode.sysctl_flags |= CTLFLAG_OWNDESC;
1736 				dnode.sysctl_desc = nd;
1737 			}
1738 
1739 			/*
1740 			 * now "release" the old description and
1741 			 * attach the new one.  ta-da.
1742 			 */
1743 			if ((node->sysctl_flags & CTLFLAG_OWNDESC) &&
1744 			    node->sysctl_desc != NULL)
1745 				/*XXXUNCONST*/
1746 				free(__UNCONST(node->sysctl_desc), M_SYSCTLDATA);
1747 			node->sysctl_desc = dnode.sysctl_desc;
1748 			node->sysctl_flags |=
1749 				(dnode.sysctl_flags & CTLFLAG_OWNDESC);
1750 
1751 			/*
1752 			 * now we "fall out" and into the loop which
1753 			 * will copy the new description back out for
1754 			 * those interested parties
1755 			 */
1756 		}
1757 	}
1758 
1759 	/*
1760 	 * scan for one description or just retrieve all descriptions
1761 	 */
1762 	for (i = 0; i < rnode->sysctl_clen; i++) {
1763 		/*
1764 		 * did they ask for the description of only one node?
1765 		 */
1766 		if (v != -1 && node[i].sysctl_num != dnode.sysctl_num)
1767 			continue;
1768 
1769 		/*
1770 		 * don't describe "private" nodes to non-suser users
1771 		 */
1772 		if ((node[i].sysctl_flags & CTLFLAG_PRIVATE) && (l != NULL) &&
1773 		    !(suser(l->l_proc->p_ucred, &l->l_proc->p_acflag)))
1774 			continue;
1775 
1776 		/*
1777 		 * is this description "valid"?
1778 		 */
1779 		memset(bf, 0, sizeof(bf));
1780 		if (node[i].sysctl_desc == NULL)
1781 			sz = 1;
1782 		else if (copystr(node[i].sysctl_desc, &d->descr_str[0],
1783 				 sizeof(bf) - sizeof(*d), &sz) != 0) {
1784 			/*
1785 			 * erase possible partial description
1786 			 */
1787 			memset(bf, 0, sizeof(bf));
1788 			sz = 1;
1789 		}
1790 
1791 		/*
1792 		 * we've got it, stuff it into the caller's buffer
1793 		 */
1794 		d->descr_num = node[i].sysctl_num;
1795 		d->descr_ver = node[i].sysctl_ver;
1796 		d->descr_len = sz; /* includes trailing nul */
1797 		sz = (caddr_t)NEXT_DESCR(d) - (caddr_t)d;
1798 		if (oldp != NULL && left >= sz) {
1799 			error = sysctl_copyout(l, d, oldp, sz);
1800 			if (error)
1801 				return (error);
1802 			left -= sz;
1803 			oldp = (void *)__sysc_desc_adv(oldp, d->descr_len);
1804 		}
1805 		tot += sz;
1806 
1807 		/*
1808 		 * if we get this far with v not "unset", they asked
1809 		 * for a specific node and we found it
1810 		 */
1811 		if (v != -1)
1812 			break;
1813 	}
1814 
1815 	/*
1816 	 * did we find it after all?
1817 	 */
1818 	if (v != -1 && tot == 0)
1819 		error = ENOENT;
1820 	else
1821 		*oldlenp = tot;
1822 
1823 	return (error);
1824 }
1825 
1826 /*
1827  * ********************************************************************
1828  * Section 3: Create and destroy from inside the kernel
1829  * ********************************************************************
1830  * sysctl_createv() and sysctl_destroyv() are simpler-to-use
1831  * interfaces for the kernel to fling new entries into the mib and rip
1832  * them out later.  In the case of sysctl_createv(), the returned copy
1833  * of the node (see sysctl_create()) will be translated back into a
1834  * pointer to the actual node.
1835  *
1836  * Note that sysctl_createv() will return 0 if the create request
1837  * matches an existing node (ala mkdir -p), and that sysctl_destroyv()
1838  * will return 0 if the node to be destroyed already does not exist
1839  * (aka rm -f) or if it is a parent of other nodes.
1840  *
1841  * This allows two (or more) different subsystems to assert sub-tree
1842  * existence before populating their own nodes, and to remove their
1843  * own nodes without orphaning the others when they are done.
1844  * ********************************************************************
1845  */
1846 int
1847 sysctl_createv(struct sysctllog **log, int cflags,
1848 	       const struct sysctlnode **rnode, const struct sysctlnode **cnode,
1849 	       int flags, int type, const char *namep, const char *descr,
1850 	       sysctlfn func, u_quad_t qv, void *newp, size_t newlen,
1851 	       ...)
1852 {
1853 	va_list ap;
1854 	int error, ni, namelen, name[CTL_MAXNAME];
1855 	const struct sysctlnode *root, *pnode;
1856 	struct sysctlnode nnode, onode, *dnode;
1857 	size_t sz;
1858 
1859 	/*
1860 	 * where are we putting this?
1861 	 */
1862 	if (rnode != NULL && *rnode == NULL) {
1863 		printf("sysctl_createv: rnode NULL\n");
1864 		return (EINVAL);
1865 	}
1866 	root = rnode ? *rnode : NULL;
1867 	if (cnode != NULL)
1868 		*cnode = NULL;
1869 	if (cflags != 0)
1870 		return (EINVAL);
1871 
1872 	/*
1873 	 * what is it?
1874 	 */
1875 	flags = SYSCTL_VERSION|SYSCTL_TYPE(type)|SYSCTL_FLAGS(flags);
1876 	if (log != NULL)
1877 		flags &= ~CTLFLAG_PERMANENT;
1878 
1879 	/*
1880 	 * where do we put it?
1881 	 */
1882 	va_start(ap, newlen);
1883 	namelen = 0;
1884 	ni = -1;
1885 	do {
1886 		if (++ni == CTL_MAXNAME)
1887 			return (ENAMETOOLONG);
1888 		name[ni] = va_arg(ap, int);
1889 		/*
1890 		 * sorry, this is not supported from here
1891 		 */
1892 		if (name[ni] == CTL_CREATESYM)
1893 			return (EINVAL);
1894 	} while (name[ni] != CTL_EOL && name[ni] != CTL_CREATE);
1895 	namelen = ni + (name[ni] == CTL_CREATE ? 1 : 0);
1896 	va_end(ap);
1897 
1898 	/*
1899 	 * what's it called
1900 	 */
1901 	if (strlcpy(nnode.sysctl_name, namep, sizeof(nnode.sysctl_name)) >=
1902 	    sizeof(nnode.sysctl_name))
1903 		return (ENAMETOOLONG);
1904 
1905 	/*
1906 	 * cons up the description of the new node
1907 	 */
1908 	nnode.sysctl_num = name[namelen - 1];
1909 	name[namelen - 1] = CTL_CREATE;
1910 	nnode.sysctl_size = newlen;
1911 	nnode.sysctl_flags = flags;
1912 	if (type == CTLTYPE_NODE) {
1913 		nnode.sysctl_csize = 0;
1914 		nnode.sysctl_clen = 0;
1915 		nnode.sysctl_child = NULL;
1916 		if (flags & CTLFLAG_ALIAS)
1917 			nnode.sysctl_alias = qv;
1918 	}
1919 	else if (flags & CTLFLAG_IMMEDIATE) {
1920 		switch (type) {
1921 		case CTLTYPE_INT:
1922 			nnode.sysctl_idata = qv;
1923 			break;
1924 		case CTLTYPE_QUAD:
1925 			nnode.sysctl_qdata = qv;
1926 			break;
1927 		default:
1928 			return (EINVAL);
1929 		}
1930 	}
1931 	else {
1932 		nnode.sysctl_data = newp;
1933 	}
1934 	nnode.sysctl_func = func;
1935 	nnode.sysctl_parent = NULL;
1936 	nnode.sysctl_ver = 0;
1937 
1938 	/*
1939 	 * initialize lock state -- we need locks if the main tree has
1940 	 * been marked as complete, but since we could be called from
1941 	 * either there, or from a device driver (say, at device
1942 	 * insertion), or from an lkm (at lkm load time, say), we
1943 	 * don't really want to "wait"...
1944 	 */
1945 	error = sysctl_lock(NULL, NULL, 0);
1946 	if (error)
1947 		return (error);
1948 
1949 	/*
1950 	 * locate the prospective parent of the new node, and if we
1951 	 * find it, add the new node.
1952 	 */
1953 	sz = sizeof(onode);
1954 	pnode = root;
1955 	error = sysctl_locate(NULL, &name[0], namelen - 1, &pnode, &ni);
1956 	if (error) {
1957 		printf("sysctl_createv: sysctl_locate(%s) returned %d\n",
1958 		       nnode.sysctl_name, error);
1959 		sysctl_unlock(NULL);
1960 		return (error);
1961 	}
1962 	error = sysctl_create(&name[ni], namelen - ni, &onode, &sz,
1963 			      &nnode, sizeof(nnode), &name[0], NULL,
1964 			      pnode);
1965 
1966 	/*
1967 	 * unfortunately the node we wanted to create is already
1968 	 * there.  if the node that's already there is a reasonable
1969 	 * facsimile of the node we wanted to create, just pretend
1970 	 * (for the caller's benefit) that we managed to create the
1971 	 * node they wanted.
1972 	 */
1973 	if (error == EEXIST) {
1974 		/* name is the same as requested... */
1975 		if (strcmp(nnode.sysctl_name, onode.sysctl_name) == 0 &&
1976 		    /* they want the same function... */
1977 		    nnode.sysctl_func == onode.sysctl_func &&
1978 		    /* number is the same as requested, or... */
1979 		    (nnode.sysctl_num == onode.sysctl_num ||
1980 		     /* they didn't pick a number... */
1981 		     nnode.sysctl_num == CTL_CREATE)) {
1982 			/*
1983 			 * collision here from trying to create
1984 			 * something that already existed; let's give
1985 			 * our customers a hand and tell them they got
1986 			 * what they wanted.
1987 			 */
1988 #ifdef SYSCTL_DEBUG_CREATE
1989 			printf("cleared\n");
1990 #endif /* SYSCTL_DEBUG_CREATE */
1991 			error = 0;
1992 		}
1993 	}
1994 
1995 	if (error == 0 &&
1996 	    (cnode != NULL || log != NULL || descr != NULL)) {
1997 		/*
1998 		 * sysctl_create() gave us back a copy of the node,
1999 		 * but we need to know where it actually is...
2000 		 */
2001 		pnode = root;
2002 		error = sysctl_locate(NULL, &name[0], namelen - 1, &pnode, &ni);
2003 
2004 		/*
2005 		 * manual scan of last layer so that aliased nodes
2006 		 * aren't followed.
2007 		 */
2008 		if (error == 0) {
2009 			for (ni = 0; ni < pnode->sysctl_clen; ni++)
2010 				if (pnode->sysctl_child[ni].sysctl_num ==
2011 				    onode.sysctl_num)
2012 					break;
2013 			if (ni < pnode->sysctl_clen)
2014 				pnode = &pnode->sysctl_child[ni];
2015 			else
2016 				error = ENOENT;
2017 		}
2018 
2019 		/*
2020 		 * not expecting an error here, but...
2021 		 */
2022 		if (error == 0) {
2023 			if (log != NULL)
2024 				sysctl_log_add(log, pnode);
2025 			if (cnode != NULL)
2026 				*cnode = pnode;
2027 			if (descr != NULL) {
2028 				/*
2029 				 * allow first caller to *set* a
2030 				 * description actually to set it
2031 				 *
2032 				 * discard const here so we can attach
2033 				 * the description
2034 				 */
2035 				dnode = __UNCONST(pnode);
2036 				if (pnode->sysctl_desc != NULL)
2037 					/* skip it...we've got one */;
2038 				else if (flags & CTLFLAG_OWNDESC) {
2039 					size_t l = strlen(descr) + 1;
2040 					char *d = malloc(l, M_SYSCTLDATA,
2041 							 M_WAITOK|M_CANFAIL);
2042 					if (d != NULL) {
2043 						memcpy(d, descr, l);
2044 						dnode->sysctl_desc = d;
2045 						dnode->sysctl_flags |=
2046 						    CTLFLAG_OWNDESC;
2047 					}
2048 				}
2049 				else
2050 					dnode->sysctl_desc = descr;
2051 			}
2052 		}
2053 		else {
2054 			printf("sysctl_create succeeded but node not found?!\n");
2055 			/*
2056 			 *  confusing, but the create said it
2057 			 * succeeded, so...
2058 			 */
2059 			error = 0;
2060 		}
2061 	}
2062 
2063 	/*
2064 	 * now it should be safe to release the lock state.  note that
2065 	 * the pointer to the newly created node being passed back may
2066 	 * not be "good" for very long.
2067 	 */
2068 	sysctl_unlock(NULL);
2069 
2070 	if (error != 0) {
2071 		printf("sysctl_createv: sysctl_create(%s) returned %d\n",
2072 		       nnode.sysctl_name, error);
2073 #if 0
2074 		if (error != ENOENT)
2075 			sysctl_dump(&onode);
2076 #endif
2077 	}
2078 
2079 	return (error);
2080 }
2081 
2082 int
2083 sysctl_destroyv(struct sysctlnode *rnode, ...)
2084 {
2085 	va_list ap;
2086 	int error, name[CTL_MAXNAME], namelen, ni;
2087 	const struct sysctlnode *pnode, *node;
2088 	struct sysctlnode dnode, *onode;
2089 	size_t sz;
2090 
2091 	va_start(ap, rnode);
2092 	namelen = 0;
2093 	ni = 0;
2094 	do {
2095 		if (ni == CTL_MAXNAME)
2096 			return (ENAMETOOLONG);
2097 		name[ni] = va_arg(ap, int);
2098 	} while (name[ni++] != CTL_EOL);
2099 	namelen = ni - 1;
2100 	va_end(ap);
2101 
2102 	/*
2103 	 * i can't imagine why we'd be destroying a node when the tree
2104 	 * wasn't complete, but who knows?
2105 	 */
2106 	error = sysctl_lock(NULL, NULL, 0);
2107 	if (error)
2108 		return (error);
2109 
2110 	/*
2111 	 * where is it?
2112 	 */
2113 	node = rnode;
2114 	error = sysctl_locate(NULL, &name[0], namelen - 1, &node, &ni);
2115 	if (error) {
2116 		/* they want it gone and it's not there, so... */
2117 		sysctl_unlock(NULL);
2118 		return (error == ENOENT ? 0 : error);
2119 	}
2120 
2121 	/*
2122 	 * set up the deletion
2123 	 */
2124 	pnode = node;
2125 	node = &dnode;
2126 	memset(&dnode, 0, sizeof(dnode));
2127 	dnode.sysctl_flags = SYSCTL_VERSION;
2128 	dnode.sysctl_num = name[namelen - 1];
2129 
2130 	/*
2131 	 * we found it, now let's nuke it
2132 	 */
2133 	name[namelen - 1] = CTL_DESTROY;
2134 	sz = 0;
2135 	error = sysctl_destroy(&name[namelen - 1], 1, NULL, &sz,
2136 			       node, sizeof(*node), &name[0], NULL,
2137 			       pnode);
2138 	if (error == ENOTEMPTY) {
2139 		/*
2140 		 * think of trying to delete "foo" when "foo.bar"
2141 		 * (which someone else put there) is still in
2142 		 * existence
2143 		 */
2144 		error = 0;
2145 
2146 		/*
2147 		 * dunno who put the description there, but if this
2148 		 * node can ever be removed, we need to make sure the
2149 		 * string doesn't go out of context.  that means we
2150 		 * need to find the node that's still there (don't use
2151 		 * sysctl_locate() because that follows aliasing).
2152 		 */
2153 		node = pnode->sysctl_child;
2154 		for (ni = 0; ni < pnode->sysctl_clen; ni++)
2155 			if (node[ni].sysctl_num == dnode.sysctl_num)
2156 				break;
2157 		node = (ni < pnode->sysctl_clen) ? &node[ni] : NULL;
2158 
2159 		/*
2160 		 * if we found it, and this node has a description,
2161 		 * and this node can be released, and it doesn't
2162 		 * already own its own description...sigh.  :)
2163 		 */
2164 		if (node != NULL && node->sysctl_desc != NULL &&
2165 		    !(node->sysctl_flags & CTLFLAG_PERMANENT) &&
2166 		    !(node->sysctl_flags & CTLFLAG_OWNDESC)) {
2167 			char *d;
2168 
2169 			sz = strlen(node->sysctl_desc) + 1;
2170 			d = malloc(sz, M_SYSCTLDATA, M_WAITOK|M_CANFAIL);
2171 			if (d != NULL) {
2172 				/*
2173 				 * discard const so that we can
2174 				 * re-attach the description
2175 				 */
2176 				memcpy(d, node->sysctl_desc, sz);
2177 				onode = __UNCONST(node);
2178 				onode->sysctl_desc = d;
2179 				onode->sysctl_flags |= CTLFLAG_OWNDESC;
2180 			}
2181 			else {
2182 				/*
2183 				 * XXX drop the description?  be
2184 				 * afraid?  don't care?
2185 				 */
2186 			}
2187 		}
2188 	}
2189 
2190         sysctl_unlock(NULL);
2191 
2192 	return (error);
2193 }
2194 
2195 #if 0
2196 /*
2197  * ********************************************************************
2198  * the dump routine.  i haven't yet decided how (if at all) i'll call
2199  * this from userland when it's in the kernel.
2200  * ********************************************************************
2201  */
2202 static const char *
2203 sf(int f)
2204 {
2205 	static char s[256];
2206 	char *c;
2207 
2208 	s[0] = '\0';
2209 	c = "";
2210 
2211 #define print_flag(_f, _s, _c, _q, _x) \
2212 	if (((_f) & (__CONCAT(CTLFLAG_,_x))) == (__CONCAT(CTLFLAG_,_q))) { \
2213 		strlcat((_s), (_c), sizeof(_s)); \
2214 		strlcat((_s), __STRING(_q), sizeof(_s)); \
2215 		(_c) = ","; \
2216 		(_f) &= ~__CONCAT(CTLFLAG_,_x); \
2217 	}
2218 
2219 	print_flag(f, s, c, READONLY,  READWRITE);
2220 	print_flag(f, s, c, READONLY1, READWRITE);
2221 	print_flag(f, s, c, READONLY2, READWRITE);
2222 	print_flag(f, s, c, READWRITE, READWRITE);
2223 	print_flag(f, s, c, ANYWRITE,  ANYWRITE);
2224 	print_flag(f, s, c, PRIVATE,   PRIVATE);
2225 	print_flag(f, s, c, PERMANENT, PERMANENT);
2226 	print_flag(f, s, c, OWNDATA,   OWNDATA);
2227 	print_flag(f, s, c, IMMEDIATE, IMMEDIATE);
2228 	print_flag(f, s, c, HEX,       HEX);
2229 	print_flag(f, s, c, ROOT,      ROOT);
2230 	print_flag(f, s, c, ANYNUMBER, ANYNUMBER);
2231 	print_flag(f, s, c, HIDDEN,    HIDDEN);
2232 	print_flag(f, s, c, ALIAS,     ALIAS);
2233 #undef print_flag
2234 
2235 	if (f) {
2236 		char foo[9];
2237 		snprintf(foo, sizeof(foo), "%x", f);
2238 		strlcat(s, c, sizeof(s));
2239 		strlcat(s, foo, sizeof(s));
2240 	}
2241 
2242 	return (s);
2243 }
2244 
2245 static const char *
2246 st(int t)
2247 {
2248 
2249 	switch (t) {
2250 	case CTLTYPE_NODE:
2251 		return "NODE";
2252 	case CTLTYPE_INT:
2253 		return "INT";
2254 	case CTLTYPE_STRING:
2255 		return "STRING";
2256 	case CTLTYPE_QUAD:
2257 		return "QUAD";
2258 	case CTLTYPE_STRUCT:
2259 		return "STRUCT";
2260 	}
2261 
2262 	return "???";
2263 }
2264 
2265 void
2266 sysctl_dump(const struct sysctlnode *d)
2267 {
2268 	static char nmib[64], smib[256];
2269 	static int indent;
2270 	struct sysctlnode *n;
2271 	char *np, *sp, tmp[20];
2272 	int i;
2273 
2274 	if (d == NULL)
2275 		return;
2276 
2277 	np = &nmib[strlen(nmib)];
2278 	sp = &smib[strlen(smib)];
2279 
2280 	if (!(d->sysctl_flags & CTLFLAG_ROOT)) {
2281 		snprintf(tmp, sizeof(tmp), "%d", d->sysctl_num);
2282 		strcat(nmib, ".");
2283 		strcat(smib, ".");
2284 		strcat(nmib, tmp);
2285 		strcat(smib, d->sysctl_name);
2286 		printf("%s -> %s (%d)\n", &nmib[1], &smib[1],
2287 		       SYSCTL_TYPE(d->sysctl_flags));
2288 	}
2289 
2290 	if (1) {
2291 		printf("%*s%p:\tsysctl_name  [%s]\n", indent, "",
2292 		       d, d->sysctl_name);
2293 		printf("%*s\t\tsysctl_num    %d\n",   indent, "",
2294 		       d->sysctl_num);
2295 		printf("%*s\t\tsysctl_flags  %x (flags=%x<%s> type=%d<%s> "
2296 		       "size=%zu)\n",
2297 		       indent, "", d->sysctl_flags,
2298 		       SYSCTL_FLAGS(d->sysctl_flags),
2299 		       sf(SYSCTL_FLAGS(d->sysctl_flags)),
2300 		       SYSCTL_TYPE(d->sysctl_flags),
2301 		       st(SYSCTL_TYPE(d->sysctl_flags)),
2302 		       d->sysctl_size);
2303 		if (SYSCTL_TYPE(d->sysctl_flags) == CTLTYPE_NODE) {
2304 			printf("%*s\t\tsysctl_csize  %d\n",   indent, "",
2305 			       d->sysctl_csize);
2306 			printf("%*s\t\tsysctl_clen   %d\n",   indent, "",
2307 			       d->sysctl_clen);
2308 			printf("%*s\t\tsysctl_child  %p\n",   indent, "",
2309 			       d->sysctl_child);
2310 		}
2311 		else
2312 			printf("%*s\t\tsysctl_data   %p\n",   indent, "",
2313 			       d->sysctl_data);
2314 		printf("%*s\t\tsysctl_func   %p\n",   indent, "",
2315 		       d->sysctl_func);
2316 		printf("%*s\t\tsysctl_parent %p\n",   indent, "",
2317 		       d->sysctl_parent);
2318 		printf("%*s\t\tsysctl_ver    %d\n",   indent, "",
2319 		       d->sysctl_ver);
2320 	}
2321 
2322 	if (SYSCTL_TYPE(d->sysctl_flags) == CTLTYPE_NODE) {
2323 		indent += 8;
2324 		n = d->sysctl_child;
2325 		for (i = 0; i < d->sysctl_clen; i++) {
2326 			sysctl_dump(&n[i]);
2327 		}
2328 		indent -= 8;
2329 	}
2330 
2331 	np[0] = '\0';
2332 	sp[0] = '\0';
2333 }
2334 #endif /* 0 */
2335 
2336 /*
2337  * ********************************************************************
2338  * Deletes an entire n-ary tree.  Not recommended unless you know why
2339  * you're doing it.  Personally, I don't know why you'd even think
2340  * about it.
2341  * ********************************************************************
2342  */
2343 void
2344 sysctl_free(struct sysctlnode *rnode)
2345 {
2346 	struct sysctlnode *node, *pnode;
2347 
2348 	if (SYSCTL_VERS(rnode->sysctl_flags) != SYSCTL_VERSION) {
2349 		printf("sysctl_free: rnode %p wrong version\n", rnode);
2350 		return;
2351 	}
2352 
2353 	if (rnode == NULL)
2354 		rnode = &sysctl_root;
2355 	pnode = rnode;
2356 
2357 	node = pnode->sysctl_child;
2358 	do {
2359 		while (node != NULL && pnode->sysctl_csize > 0) {
2360 			while (node <
2361 			       &pnode->sysctl_child[pnode->sysctl_clen] &&
2362 			       (SYSCTL_TYPE(node->sysctl_flags) !=
2363 				CTLTYPE_NODE ||
2364 				node->sysctl_csize == 0)) {
2365 				if (SYSCTL_FLAGS(node->sysctl_flags) &
2366 				    CTLFLAG_OWNDATA) {
2367 					if (node->sysctl_data != NULL) {
2368 						FREE(node->sysctl_data,
2369 						     M_SYSCTLDATA);
2370 						node->sysctl_data = NULL;
2371 					}
2372 				}
2373 				if (SYSCTL_FLAGS(node->sysctl_flags) &
2374 				    CTLFLAG_OWNDESC) {
2375 					if (node->sysctl_desc != NULL) {
2376 						/*XXXUNCONST*/
2377 						FREE(__UNCONST(node->sysctl_desc),
2378 						     M_SYSCTLDATA);
2379 						node->sysctl_desc = NULL;
2380 					}
2381 				}
2382 				node++;
2383 			}
2384 			if (node < &pnode->sysctl_child[pnode->sysctl_clen]) {
2385 				pnode = node;
2386 				node = node->sysctl_child;
2387 			}
2388 			else
2389 				break;
2390 		}
2391 		if (pnode->sysctl_child != NULL)
2392 			FREE(pnode->sysctl_child, M_SYSCTLNODE);
2393 		pnode->sysctl_clen = 0;
2394 		pnode->sysctl_csize = 0;
2395 		pnode->sysctl_child = NULL;
2396 		node = pnode;
2397 		pnode = node->sysctl_parent;
2398 	} while (pnode != NULL && node != rnode);
2399 }
2400 
2401 int
2402 sysctl_log_add(struct sysctllog **logp, const struct sysctlnode *node)
2403 {
2404 	int name[CTL_MAXNAME], namelen, i;
2405 	const struct sysctlnode *pnode;
2406 	struct sysctllog *log;
2407 
2408 	if (node->sysctl_flags & CTLFLAG_PERMANENT)
2409 		return (0);
2410 
2411 	if (logp == NULL)
2412 		return (0);
2413 
2414 	if (*logp == NULL) {
2415 		MALLOC(log, struct sysctllog *, sizeof(struct sysctllog),
2416 		       M_SYSCTLDATA, M_WAITOK|M_CANFAIL);
2417 		if (log == NULL) {
2418 			/* XXX print error message? */
2419 			return (-1);
2420 		}
2421 		MALLOC(log->log_num, int *, 16 * sizeof(int),
2422 		       M_SYSCTLDATA, M_WAITOK|M_CANFAIL);
2423 		if (log->log_num == NULL) {
2424 			/* XXX print error message? */
2425 			free(log, M_SYSCTLDATA);
2426 			return (-1);
2427 		}
2428 		memset(log->log_num, 0, 16 * sizeof(int));
2429 		log->log_root = NULL;
2430 		log->log_size = 16;
2431 		log->log_left = 16;
2432 		*logp = log;
2433 	}
2434 	else
2435 		log = *logp;
2436 
2437 	/*
2438 	 * check that the root is proper.  it's okay to record the
2439 	 * address of the root of a tree.  it's the only thing that's
2440 	 * guaranteed not to shift around as nodes come and go.
2441 	 */
2442 	if (log->log_root == NULL)
2443 		log->log_root = sysctl_rootof(node);
2444 	else if (log->log_root != sysctl_rootof(node)) {
2445 		printf("sysctl: log %p root mismatch (%p)\n",
2446 		       log->log_root, sysctl_rootof(node));
2447 		return (-1);
2448 	}
2449 
2450 	/*
2451 	 * we will copy out name in reverse order
2452 	 */
2453 	for (pnode = node, namelen = 0;
2454 	     pnode != NULL && !(pnode->sysctl_flags & CTLFLAG_ROOT);
2455 	     pnode = pnode->sysctl_parent)
2456 		name[namelen++] = pnode->sysctl_num;
2457 
2458 	/*
2459 	 * do we have space?
2460 	 */
2461 	if (log->log_left < (namelen + 3))
2462 		sysctl_log_realloc(log);
2463 	if (log->log_left < (namelen + 3))
2464 		return (-1);
2465 
2466 	/*
2467 	 * stuff name in, then namelen, then node type, and finally,
2468 	 * the version for non-node nodes.
2469 	 */
2470 	for (i = 0; i < namelen; i++)
2471 		log->log_num[--log->log_left] = name[i];
2472 	log->log_num[--log->log_left] = namelen;
2473 	log->log_num[--log->log_left] = SYSCTL_TYPE(node->sysctl_flags);
2474 	if (log->log_num[log->log_left] != CTLTYPE_NODE)
2475 		log->log_num[--log->log_left] = node->sysctl_ver;
2476 	else
2477 		log->log_num[--log->log_left] = 0;
2478 
2479 	return (0);
2480 }
2481 
2482 void
2483 sysctl_teardown(struct sysctllog **logp)
2484 {
2485 	const struct sysctlnode *rnode;
2486 	struct sysctlnode node;
2487 	struct sysctllog *log;
2488 	uint namelen;
2489 	int *name, t, v, error, ni;
2490 	size_t sz;
2491 
2492 	if (logp == NULL || *logp == NULL)
2493 		return;
2494 	log = *logp;
2495 
2496 	error = sysctl_lock(NULL, NULL, 0);
2497 	if (error)
2498 		return;
2499 
2500 	memset(&node, 0, sizeof(node));
2501 
2502 	while (log->log_left < log->log_size) {
2503 		KASSERT((log->log_left + 3 < log->log_size) &&
2504 			(log->log_left + log->log_num[log->log_left + 2] <=
2505 			 log->log_size));
2506 		v = log->log_num[log->log_left++];
2507 		t = log->log_num[log->log_left++];
2508 		namelen = log->log_num[log->log_left++];
2509 		name = &log->log_num[log->log_left];
2510 
2511 		node.sysctl_num = name[namelen - 1];
2512 		node.sysctl_flags = SYSCTL_VERSION|t;
2513 		node.sysctl_ver = v;
2514 
2515 		rnode = log->log_root;
2516 		error = sysctl_locate(NULL, &name[0], namelen, &rnode, &ni);
2517 		if (error == 0) {
2518 			name[namelen - 1] = CTL_DESTROY;
2519 			rnode = rnode->sysctl_parent;
2520 			sz = 0;
2521 			(void)sysctl_destroy(&name[namelen - 1], 1, NULL,
2522 					     &sz, &node, sizeof(node),
2523 					     &name[0], NULL, rnode);
2524 		}
2525 
2526 		log->log_left += namelen;
2527 	}
2528 
2529 	KASSERT(log->log_size == log->log_left);
2530 	free(log->log_num, M_SYSCTLDATA);
2531 	free(log, M_SYSCTLDATA);
2532 	*logp = NULL;
2533 
2534 	sysctl_unlock(NULL);
2535 }
2536 
2537 /*
2538  * ********************************************************************
2539  * old_sysctl -- A routine to bridge old-style internal calls to the
2540  * new infrastructure.
2541  * ********************************************************************
2542  */
2543 int
2544 old_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp,
2545 	   void *newp, size_t newlen, struct lwp *l)
2546 {
2547 	int error;
2548 	size_t oldlen = 0;
2549 	size_t savelen;
2550 
2551 	if (oldlenp) {
2552 		oldlen = *oldlenp;
2553 	}
2554 	savelen = oldlen;
2555 
2556 	error = sysctl_lock(l, oldp, savelen);
2557 	if (error)
2558 		return (error);
2559 	error = sysctl_dispatch(name, namelen, oldp, &oldlen,
2560 				newp, newlen, name, l, NULL);
2561 	sysctl_unlock(l);
2562 	if (error == 0 && oldp != NULL && savelen < oldlen)
2563 		error = ENOMEM;
2564 
2565 	if (oldlenp) {
2566 		*oldlenp = oldlen;
2567 	}
2568 
2569 	return (error);
2570 }
2571 
2572 /*
2573  * ********************************************************************
2574  * Section 4: Generic helper routines
2575  * ********************************************************************
2576  * "helper" routines that can do more finely grained access control,
2577  * construct structures from disparate information, create the
2578  * appearance of more nodes and sub-trees, etc.  for example, if
2579  * CTL_PROC wanted a helper function, it could respond to a CTL_QUERY
2580  * with a dynamically created list of nodes that represented the
2581  * currently running processes at that instant.
2582  * ********************************************************************
2583  */
2584 
2585 /*
2586  * first, a few generic helpers that provide:
2587  *
2588  * sysctl_needfunc()		a readonly interface that emits a warning
2589  * sysctl_notavail()		returns EOPNOTSUPP (generic error)
2590  * sysctl_null()		an empty return buffer with no error
2591  */
2592 int
2593 sysctl_needfunc(SYSCTLFN_ARGS)
2594 {
2595 	int error;
2596 
2597 	printf("!!SYSCTL_NEEDFUNC!!\n");
2598 
2599 	if (newp != NULL || namelen != 0)
2600 		return (EOPNOTSUPP);
2601 
2602 	error = 0;
2603 	if (oldp != NULL)
2604 		error = sysctl_copyout(l, rnode->sysctl_data, oldp,
2605 				       MIN(rnode->sysctl_size, *oldlenp));
2606 	*oldlenp = rnode->sysctl_size;
2607 
2608 	return (error);
2609 }
2610 
2611 int
2612 sysctl_notavail(SYSCTLFN_ARGS)
2613 {
2614 
2615 	if (namelen == 1 && name[0] == CTL_QUERY)
2616 		return (sysctl_query(SYSCTLFN_CALL(rnode)));
2617 
2618 	return (EOPNOTSUPP);
2619 }
2620 
2621 int
2622 sysctl_null(SYSCTLFN_ARGS)
2623 {
2624 
2625 	*oldlenp = 0;
2626 
2627 	return (0);
2628 }
2629 
2630 /*
2631  * ********************************************************************
2632  * Section 5: The machinery that makes it all go
2633  * ********************************************************************
2634  * Memory "manglement" routines.  Not much to this, eh?
2635  * ********************************************************************
2636  */
2637 static int
2638 sysctl_alloc(struct sysctlnode *p, int x)
2639 {
2640 	int i;
2641 	struct sysctlnode *n;
2642 
2643 	assert(p->sysctl_child == NULL);
2644 
2645 	if (x == 1)
2646 		MALLOC(n, struct sysctlnode *,
2647 		       sizeof(struct sysctlnode),
2648 		       M_SYSCTLNODE, M_WAITOK|M_CANFAIL);
2649 	else
2650 		MALLOC(n, struct sysctlnode *,
2651 		       SYSCTL_DEFSIZE * sizeof(struct sysctlnode),
2652 		       M_SYSCTLNODE, M_WAITOK|M_CANFAIL);
2653 	if (n == NULL)
2654 		return (ENOMEM);
2655 
2656 	if (x == 1) {
2657 		memset(n, 0, sizeof(struct sysctlnode));
2658 		p->sysctl_csize = 1;
2659 	}
2660 	else {
2661 		memset(n, 0, SYSCTL_DEFSIZE * sizeof(struct sysctlnode));
2662 		p->sysctl_csize = SYSCTL_DEFSIZE;
2663 	}
2664 	p->sysctl_clen = 0;
2665 
2666 	for (i = 0; i < p->sysctl_csize; i++)
2667 		n[i].sysctl_parent = p;
2668 
2669 	p->sysctl_child = n;
2670 	return (0);
2671 }
2672 
2673 static int
2674 sysctl_realloc(struct sysctlnode *p)
2675 {
2676 	int i, j;
2677 	struct sysctlnode *n;
2678 
2679 	assert(p->sysctl_csize == p->sysctl_clen);
2680 
2681 	/*
2682 	 * how many do we have...how many should we make?
2683 	 */
2684 	i = p->sysctl_clen;
2685 	n = malloc(2 * i * sizeof(struct sysctlnode), M_SYSCTLNODE,
2686 		   M_WAITOK|M_CANFAIL);
2687 	if (n == NULL)
2688 		return (ENOMEM);
2689 
2690 	/*
2691 	 * move old children over...initialize new children
2692 	 */
2693 	memcpy(n, p->sysctl_child, i * sizeof(struct sysctlnode));
2694 	memset(&n[i], 0, i * sizeof(struct sysctlnode));
2695 	p->sysctl_csize = 2 * i;
2696 
2697 	/*
2698 	 * reattach moved (and new) children to parent; if a moved
2699 	 * child node has children, reattach the parent pointers of
2700 	 * grandchildren
2701 	 */
2702         for (i = 0; i < p->sysctl_csize; i++) {
2703                 n[i].sysctl_parent = p;
2704 		if (n[i].sysctl_child != NULL) {
2705 			for (j = 0; j < n[i].sysctl_csize; j++)
2706 				n[i].sysctl_child[j].sysctl_parent = &n[i];
2707 		}
2708 	}
2709 
2710 	/*
2711 	 * get out with the old and in with the new
2712 	 */
2713 	FREE(p->sysctl_child, M_SYSCTLNODE);
2714 	p->sysctl_child = n;
2715 
2716 	return (0);
2717 }
2718 
2719 static int
2720 sysctl_log_realloc(struct sysctllog *log)
2721 {
2722 	int *n, s, d;
2723 
2724 	s = log->log_size * 2;
2725 	d = log->log_size;
2726 
2727 	n = malloc(s * sizeof(int), M_SYSCTLDATA, M_WAITOK|M_CANFAIL);
2728 	if (n == NULL)
2729 		return (-1);
2730 
2731 	memset(n, 0, s * sizeof(int));
2732 	memcpy(&n[d], log->log_num, d * sizeof(int));
2733 	free(log->log_num, M_SYSCTLDATA);
2734 	log->log_num = n;
2735 	if (d)
2736 		log->log_left += d;
2737 	else
2738 		log->log_left = s;
2739 	log->log_size = s;
2740 
2741 	return (0);
2742 }
2743 
2744 /*
2745  * ********************************************************************
2746  * Section 6: Conversion between API versions wrt the sysctlnode
2747  * ********************************************************************
2748  */
2749 static int
2750 sysctl_cvt_in(struct lwp *l, int *vp, const void *i, size_t sz,
2751 	      struct sysctlnode *node)
2752 {
2753 	int error, flags;
2754 
2755 	if (i == NULL || sz < sizeof(flags))
2756 		return (EINVAL);
2757 
2758 	error = sysctl_copyin(l, i, &flags, sizeof(flags));
2759 	if (error)
2760 		return (error);
2761 
2762 #if (SYSCTL_VERSION != SYSCTL_VERS_1)
2763 #error sysctl_cvt_in: no support for SYSCTL_VERSION
2764 #endif /*  (SYSCTL_VERSION != SYSCTL_VERS_1) */
2765 
2766 	if (sz == sizeof(*node) &&
2767 	    SYSCTL_VERS(flags) == SYSCTL_VERSION) {
2768 		error = sysctl_copyin(l, i, node, sizeof(*node));
2769 		if (error)
2770 			return (error);
2771 		*vp = SYSCTL_VERSION;
2772 		return (0);
2773 	}
2774 
2775 	return (EINVAL);
2776 }
2777 
2778 static int
2779 sysctl_cvt_out(struct lwp *l, int v, const struct sysctlnode *i,
2780 	       void *ovp, size_t left, size_t *szp)
2781 {
2782 	size_t sz = sizeof(*i);
2783 	const void *src = i;
2784 	int error;
2785 
2786 	switch (v) {
2787 	case SYSCTL_VERS_0:
2788 		return (EINVAL);
2789 
2790 #if (SYSCTL_VERSION != SYSCTL_VERS_1)
2791 #error sysctl_cvt_out: no support for SYSCTL_VERSION
2792 #endif /*  (SYSCTL_VERSION != SYSCTL_VERS_1) */
2793 
2794 	case SYSCTL_VERSION:
2795 		/* nothing more to do here */
2796 		break;
2797 	}
2798 
2799 	if (ovp != NULL && left >= sz) {
2800 		error = sysctl_copyout(l, src, ovp, sz);
2801 		if (error)
2802 			return (error);
2803 	}
2804 
2805 	if (szp != NULL)
2806 		*szp = sz;
2807 
2808 	return (0);
2809 }
2810