xref: /netbsd-src/sys/kern/kern_sysctl.c (revision e5548b402ae4c44fb816de42c7bba9581ce23ef5)
1 /*	$NetBSD: kern_sysctl.c,v 1.188 2005/11/27 12:07:24 yamt Exp $	*/
2 
3 /*-
4  * Copyright (c) 2003 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Andrew Brown.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *      This product includes software developed by the NetBSD
21  *      Foundation, Inc. and its contributors.
22  * 4. Neither the name of The NetBSD Foundation nor the names of its
23  *    contributors may be used to endorse or promote products derived
24  *    from this software without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36  * POSSIBILITY OF SUCH DAMAGE.
37  */
38 
39 /*-
40  * Copyright (c) 1982, 1986, 1989, 1993
41  *	The Regents of the University of California.  All rights reserved.
42  *
43  * This code is derived from software contributed to Berkeley by
44  * Mike Karels at Berkeley Software Design, Inc.
45  *
46  * Redistribution and use in source and binary forms, with or without
47  * modification, are permitted provided that the following conditions
48  * are met:
49  * 1. Redistributions of source code must retain the above copyright
50  *    notice, this list of conditions and the following disclaimer.
51  * 2. Redistributions in binary form must reproduce the above copyright
52  *    notice, this list of conditions and the following disclaimer in the
53  *    documentation and/or other materials provided with the distribution.
54  * 3. Neither the name of the University nor the names of its contributors
55  *    may be used to endorse or promote products derived from this software
56  *    without specific prior written permission.
57  *
58  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
59  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
60  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
61  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
62  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
63  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
64  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
65  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
66  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
67  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
68  * SUCH DAMAGE.
69  *
70  *	@(#)kern_sysctl.c	8.9 (Berkeley) 5/20/95
71  */
72 
73 /*
74  * sysctl system call.
75  */
76 
77 #include <sys/cdefs.h>
78 __KERNEL_RCSID(0, "$NetBSD: kern_sysctl.c,v 1.188 2005/11/27 12:07:24 yamt Exp $");
79 
80 #include "opt_defcorename.h"
81 #include "opt_insecure.h"
82 #include "ksyms.h"
83 
84 #include <sys/param.h>
85 #define __COMPAT_SYSCTL
86 #include <sys/sysctl.h>
87 #include <sys/systm.h>
88 #include <sys/buf.h>
89 #include <sys/ksyms.h>
90 #include <sys/malloc.h>
91 #include <sys/mount.h>
92 #include <sys/sa.h>
93 #include <sys/syscallargs.h>
94 #include <machine/stdarg.h>
95 
96 MALLOC_DEFINE(M_SYSCTLNODE, "sysctlnode", "sysctl node structures");
97 MALLOC_DEFINE(M_SYSCTLDATA, "sysctldata", "misc sysctl data");
98 
99 static int sysctl_mmap(SYSCTLFN_PROTO);
100 static int sysctl_alloc(struct sysctlnode *, int);
101 static int sysctl_realloc(struct sysctlnode *);
102 
103 static int sysctl_cvt_in(struct lwp *, int *, const void *, size_t,
104 			 struct sysctlnode *);
105 static int sysctl_cvt_out(struct lwp *, int, const struct sysctlnode *,
106 			  void *, size_t, size_t *);
107 
108 static int sysctl_log_add(struct sysctllog **, const struct sysctlnode *);
109 static int sysctl_log_realloc(struct sysctllog *);
110 
111 struct sysctllog {
112 	const struct sysctlnode *log_root;
113 	int *log_num;
114 	int log_size, log_left;
115 };
116 
117 /*
118  * the "root" of the new sysctl tree
119  */
120 struct sysctlnode sysctl_root = {
121 	.sysctl_flags = SYSCTL_VERSION|
122 	    CTLFLAG_ROOT|CTLFLAG_READWRITE|
123 	    CTLTYPE_NODE,
124 	.sysctl_num = 0,
125 	/*
126 	 * XXX once all ports are on gcc3, we can get rid of this
127 	 * ugliness and simply make it into
128 	 *
129 	 *	.sysctl_size = sizeof(struct sysctlnode),
130 	 */
131 	sysc_init_field(_sysctl_size, sizeof(struct sysctlnode)),
132 	.sysctl_name = "(root)",
133 };
134 
135 /*
136  * link set of functions that add nodes at boot time (see also
137  * sysctl_buildtree())
138  */
139 __link_set_decl(sysctl_funcs, sysctl_setup_func);
140 
141 /*
142  * The `sysctl_lock' is intended to serialize access to the sysctl
143  * tree.  Given that it is now (a) dynamic, and (b) most consumers of
144  * sysctl are going to be copying data out, the old `sysctl_memlock'
145  * has been `upgraded' to simply guard the whole tree.
146  *
147  * The two new data here are to keep track of the locked chunk of
148  * memory, if there is one, so that it can be released more easily
149  * from anywhere.
150  */
151 struct lock sysctl_treelock;
152 caddr_t sysctl_memaddr;
153 size_t sysctl_memsize;
154 
155 /*
156  * Attributes stored in the kernel.
157  */
158 char hostname[MAXHOSTNAMELEN];
159 int hostnamelen;
160 
161 char domainname[MAXHOSTNAMELEN];
162 int domainnamelen;
163 
164 long hostid;
165 
166 #ifdef INSECURE
167 int securelevel = -1;
168 #else
169 int securelevel = 0;
170 #endif
171 
172 #ifndef DEFCORENAME
173 #define	DEFCORENAME	"%n.core"
174 #endif
175 char defcorename[MAXPATHLEN] = DEFCORENAME;
176 
177 /*
178  * ********************************************************************
179  * Section 0: Some simple glue
180  * ********************************************************************
181  * By wrapping copyin(), copyout(), and copyinstr() like this, we can
182  * stop caring about who's calling us and simplify some code a bunch.
183  * ********************************************************************
184  */
185 static inline int
186 sysctl_copyin(const struct lwp *l, const void *uaddr, void *kaddr, size_t len)
187 {
188 
189 	if (l != NULL)
190 		return (copyin(uaddr, kaddr, len));
191 	else
192 		return (kcopy(uaddr, kaddr, len));
193 }
194 
195 static inline int
196 sysctl_copyout(const struct lwp *l, const void *kaddr, void *uaddr, size_t len)
197 {
198 
199 	if (l != NULL)
200 		return (copyout(kaddr, uaddr, len));
201 	else
202 		return (kcopy(kaddr, uaddr, len));
203 }
204 
205 static inline int
206 sysctl_copyinstr(const struct lwp *l, const void *uaddr, void *kaddr,
207 		 size_t len, size_t *done)
208 {
209 
210 	if (l != NULL)
211 		return (copyinstr(uaddr, kaddr, len, done));
212 	else
213 		return (copystr(uaddr, kaddr, len, done));
214 }
215 
216 /*
217  * ********************************************************************
218  * Initialize sysctl subsystem.
219  * ********************************************************************
220  */
221 void
222 sysctl_init(void)
223 {
224 	sysctl_setup_func * const *sysctl_setup, f;
225 
226 	lockinit(&sysctl_treelock, PRIBIO|PCATCH, "sysctl", 0, 0);
227 
228 	/*
229 	 * dynamic mib numbers start here
230 	 */
231 	sysctl_root.sysctl_num = CREATE_BASE;
232 
233         __link_set_foreach(sysctl_setup, sysctl_funcs) {
234 		/*
235 		 * XXX - why do i have to coerce the pointers like this?
236 		 */
237 		f = (void*)*sysctl_setup;
238 		(*f)(NULL);
239 	}
240 
241 	/*
242 	 * setting this means no more permanent nodes can be added,
243 	 * trees that claim to be readonly at the root now are, and if
244 	 * the main tree is readonly, *everything* is.
245 	 */
246 	sysctl_root.sysctl_flags |= CTLFLAG_PERMANENT;
247 
248 }
249 
250 /*
251  * ********************************************************************
252  * The main native sysctl system call itself.
253  * ********************************************************************
254  */
255 int
256 sys___sysctl(struct lwp *l, void *v, register_t *retval)
257 {
258 	struct sys___sysctl_args /* {
259 		syscallarg(int *) name;
260 		syscallarg(u_int) namelen;
261 		syscallarg(void *) old;
262 		syscallarg(size_t *) oldlenp;
263 		syscallarg(void *) new;
264 		syscallarg(size_t) newlen;
265 	} */ *uap = v;
266 	int error, nerror, name[CTL_MAXNAME];
267 	size_t oldlen, savelen, *oldlenp;
268 
269 	/*
270 	 * get oldlen
271 	 */
272 	oldlen = 0;
273 	oldlenp = SCARG(uap, oldlenp);
274 	if (oldlenp != NULL) {
275 		error = copyin(oldlenp, &oldlen, sizeof(oldlen));
276 		if (error)
277 			return (error);
278 	}
279 	savelen = oldlen;
280 
281 	/*
282 	 * top-level sysctl names may or may not be non-terminal, but
283 	 * we don't care
284 	 */
285 	if (SCARG(uap, namelen) > CTL_MAXNAME || SCARG(uap, namelen) < 1)
286 		return (EINVAL);
287 	error = copyin(SCARG(uap, name), &name,
288 		       SCARG(uap, namelen) * sizeof(int));
289 	if (error)
290 		return (error);
291 
292 	/*
293 	 * wire old so that copyout() is less likely to fail?
294 	 */
295 	error = sysctl_lock(l, SCARG(uap, old), savelen);
296 	if (error)
297 		return (error);
298 
299 	/*
300 	 * do sysctl work (NULL means main built-in default tree)
301 	 */
302 	error = sysctl_dispatch(&name[0], SCARG(uap, namelen),
303 				SCARG(uap, old), &oldlen,
304 				SCARG(uap, new), SCARG(uap, newlen),
305 				&name[0], l, NULL);
306 
307 	/*
308 	 * release the sysctl lock
309 	 */
310 	sysctl_unlock(l);
311 
312 	/*
313 	 * set caller's oldlen to new value even in the face of an
314 	 * error (if this gets an error and they didn't have one, they
315 	 * get this one)
316 	 */
317 	if (oldlenp) {
318 		nerror = copyout(&oldlen, oldlenp, sizeof(oldlen));
319 		if (error == 0)
320 			error = nerror;
321 	}
322 
323 	/*
324 	 * if the only problem is that we weren't given enough space,
325 	 * that's an ENOMEM error
326 	 */
327 	if (error == 0 && SCARG(uap, old) != NULL && savelen < oldlen)
328 		error = ENOMEM;
329 
330 	return (error);
331 }
332 
333 /*
334  * ********************************************************************
335  * Section 1: How the tree is used
336  * ********************************************************************
337  * Implementations of sysctl for emulations should typically need only
338  * these three functions in this order: lock the tree, dispatch
339  * request into it, unlock the tree.
340  * ********************************************************************
341  */
342 int
343 sysctl_lock(struct lwp *l, void *oldp, size_t savelen)
344 {
345 	int error = 0;
346 
347 	error = lockmgr(&sysctl_treelock, LK_EXCLUSIVE, NULL);
348 	if (error)
349 		return (error);
350 
351 	if (l != NULL && oldp != NULL && savelen) {
352 		error = uvm_vslock(l->l_proc, oldp, savelen, VM_PROT_WRITE);
353 		if (error) {
354 			(void) lockmgr(&sysctl_treelock, LK_RELEASE, NULL);
355 			return (error);
356 		}
357 		sysctl_memaddr = oldp;
358 		sysctl_memsize = savelen;
359 	}
360 
361 	return (0);
362 }
363 
364 /*
365  * ********************************************************************
366  * the main sysctl dispatch routine.  scans the given tree and picks a
367  * function to call based on what it finds.
368  * ********************************************************************
369  */
370 int
371 sysctl_dispatch(SYSCTLFN_ARGS)
372 {
373 	int error;
374 	sysctlfn fn;
375 	int ni;
376 
377 	if (rnode && SYSCTL_VERS(rnode->sysctl_flags) != SYSCTL_VERSION) {
378 		printf("sysctl_dispatch: rnode %p wrong version\n", rnode);
379 		return (EINVAL);
380 	}
381 
382 	fn = NULL;
383 	error = sysctl_locate(l, name, namelen, &rnode, &ni);
384 
385 	if (rnode->sysctl_func != NULL) {
386 		/*
387 		 * the node we ended up at has a function, so call it.  it can
388 		 * hand off to query or create if it wants to.
389 		 */
390 		fn = rnode->sysctl_func;
391 	} else if (error == 0) {
392 		/*
393 		 * we found the node they were looking for, so do a lookup.
394 		 */
395 		fn = (sysctlfn)sysctl_lookup; /* XXX may write to rnode */
396 	} else if (error == ENOENT && (ni + 1) == namelen && name[ni] < 0) {
397 		/*
398 		 * prospective parent node found, but the terminal node was
399 		 * not.  generic operations associate with the parent.
400 		 */
401 		switch (name[ni]) {
402 		case CTL_QUERY:
403 			fn = sysctl_query;
404 			break;
405 		case CTL_CREATE:
406 #if NKSYMS > 0
407 		case CTL_CREATESYM:
408 #endif /* NKSYMS > 0 */
409 			fn = (sysctlfn)sysctl_create; /* we own the rnode */
410 			break;
411 		case CTL_DESTROY:
412 			fn = (sysctlfn)sysctl_destroy; /* we own the rnode */
413 			break;
414 		case CTL_MMAP:
415 			fn = (sysctlfn)sysctl_mmap; /* we own the rnode */
416 			break;
417 		case CTL_DESCRIBE:
418 			fn = sysctl_describe;
419 			break;
420 		default:
421 			error = EOPNOTSUPP;
422 			break;
423 		}
424 	}
425 
426 	/*
427 	 * after all of that, maybe we found someone who knows how to
428 	 * get us what we want?
429 	 */
430 	if (fn != NULL)
431 		error = (*fn)(name + ni, namelen - ni, oldp, oldlenp,
432 			      newp, newlen, name, l, rnode);
433 	else if (error == 0)
434 		error = EOPNOTSUPP;
435 
436 	return (error);
437 }
438 
439 /*
440  * ********************************************************************
441  * Releases the tree lock.  Note that if uvm_vslock() was called when
442  * the lock was taken, we release that memory now.  By keeping track
443  * of where and how much by ourselves, the lock can be released much
444  * more easily from anywhere.
445  * ********************************************************************
446  */
447 void
448 sysctl_unlock(struct lwp *l)
449 {
450 
451 	if (l != NULL && sysctl_memsize != 0) {
452 		uvm_vsunlock(l->l_proc, sysctl_memaddr, sysctl_memsize);
453 		sysctl_memsize = 0;
454 	}
455 
456 	(void) lockmgr(&sysctl_treelock, LK_RELEASE, NULL);
457 }
458 
459 /*
460  * ********************************************************************
461  * Section 2: The main tree interfaces
462  * ********************************************************************
463  * This is how sysctl_dispatch() does its work, and you can too, by
464  * calling these routines from helpers (though typically only
465  * sysctl_lookup() will be used).  The tree MUST BE LOCKED when these
466  * are called.
467  * ********************************************************************
468  */
469 
470 /*
471  * sysctl_locate -- Finds the node matching the given mib under the
472  * given tree (via rv).  If no tree is given, we fall back to the
473  * native tree.  The current process (via l) is used for access
474  * control on the tree (some nodes may be traversable only by root) and
475  * on return, nip will show how many numbers in the mib were consumed.
476  */
477 int
478 sysctl_locate(struct lwp *l, const int *name, u_int namelen,
479 	      const struct sysctlnode **rnode, int *nip)
480 {
481 	const struct sysctlnode *node, *pnode;
482 	int tn, si, ni, error, alias;
483 
484 	/*
485 	 * basic checks and setup
486 	 */
487 	if (*rnode == NULL)
488 		*rnode = &sysctl_root;
489 	if (nip)
490 		*nip = 0;
491 	if (namelen < 0)
492 		return (EINVAL);
493 	if (namelen == 0)
494 		return (0);
495 
496 	/*
497 	 * search starts from "root"
498 	 */
499 	pnode = *rnode;
500 	if (SYSCTL_VERS(pnode->sysctl_flags) != SYSCTL_VERSION) {
501 		printf("sysctl_locate: pnode %p wrong version\n", pnode);
502 		return (EINVAL);
503 	}
504 	node = pnode->sysctl_child;
505 	error = 0;
506 
507 	/*
508 	 * scan for node to which new node should be attached
509 	 */
510 	for (ni = 0; ni < namelen; ni++) {
511 		/*
512 		 * walked off bottom of tree
513 		 */
514 		if (node == NULL) {
515 			if (SYSCTL_TYPE(pnode->sysctl_flags) == CTLTYPE_NODE)
516 				error = ENOENT;
517 			else
518 				error = ENOTDIR;
519 			break;
520 		}
521 		/*
522 		 * can anyone traverse this node or only root?
523 		 */
524 		if (l != NULL && (pnode->sysctl_flags & CTLFLAG_PRIVATE) &&
525 		    (error = suser(l->l_proc->p_ucred, &l->l_proc->p_acflag))
526 		    != 0)
527 			return (error);
528 		/*
529 		 * find a child node with the right number
530 		 */
531 		tn = name[ni];
532 		alias = 0;
533 
534 		si = 0;
535 		/*
536 		 * Note: ANYNUMBER only matches positive integers.
537 		 * Since ANYNUMBER is only permitted on single-node
538 		 * sub-trees (eg proc), check before the loop and skip
539 		 * it if we can.
540 		 */
541 		if ((node[si].sysctl_flags & CTLFLAG_ANYNUMBER) && (tn >= 0))
542 			goto foundit;
543 		for (; si < pnode->sysctl_clen; si++) {
544 			if (node[si].sysctl_num == tn) {
545 				if (node[si].sysctl_flags & CTLFLAG_ALIAS) {
546 					if (alias++ == 4)
547 						break;
548 					else {
549 						tn = node[si].sysctl_alias;
550 						si = -1;
551 					}
552 				} else
553 					goto foundit;
554 			}
555 		}
556 		/*
557 		 * if we ran off the end, it obviously doesn't exist
558 		 */
559 		error = ENOENT;
560 		break;
561 
562 		/*
563 		 * so far so good, move on down the line
564 		 */
565 	  foundit:
566 		pnode = &node[si];
567 		if (SYSCTL_TYPE(pnode->sysctl_flags) == CTLTYPE_NODE)
568 			node = node[si].sysctl_child;
569 		else
570 			node = NULL;
571 	}
572 
573 	*rnode = pnode;
574 	if (nip)
575 		*nip = ni;
576 
577 	return (error);
578 }
579 
580 /*
581  * sysctl_query -- The auto-discovery engine.  Copies out the structs
582  * describing nodes under the given node and handles overlay trees.
583  */
584 int
585 sysctl_query(SYSCTLFN_ARGS)
586 {
587 	int error, ni, elim, v;
588 	size_t out, left, t;
589 	const struct sysctlnode *enode, *onode;
590 	struct sysctlnode qnode;
591 
592 	if (SYSCTL_VERS(rnode->sysctl_flags) != SYSCTL_VERSION) {
593 		printf("sysctl_query: rnode %p wrong version\n", rnode);
594 		return (EINVAL);
595 	}
596 
597 	if (SYSCTL_TYPE(rnode->sysctl_flags) != CTLTYPE_NODE)
598 		return (ENOTDIR);
599 	if (namelen != 1 || name[0] != CTL_QUERY)
600 		return (EINVAL);
601 
602 	error = 0;
603 	out = 0;
604 	left = *oldlenp;
605 	elim = 0;
606 	enode = NULL;
607 
608 	/*
609 	 * translate the given request to a current node
610 	 */
611 	error = sysctl_cvt_in(l, &v, newp, newlen, &qnode);
612 	if (error)
613 		return (error);
614 
615 	/*
616 	 * if the request specifies a version, check it
617 	 */
618 	if (qnode.sysctl_ver != 0) {
619 		enode = rnode;
620 		if (qnode.sysctl_ver != enode->sysctl_ver &&
621 		    qnode.sysctl_ver != sysctl_rootof(enode)->sysctl_ver)
622 			return (EINVAL);
623 	}
624 
625 	/*
626 	 * process has overlay tree
627 	 */
628 	if (l && l->l_proc->p_emul->e_sysctlovly) {
629 		enode = l->l_proc->p_emul->e_sysctlovly;
630 		elim = (name - oname);
631 		error = sysctl_locate(l, oname, elim, &enode, NULL);
632 		if (error == 0) {
633 			/* ah, found parent in overlay */
634 			elim = enode->sysctl_clen;
635 			enode = enode->sysctl_child;
636 		} else {
637 			error = 0;
638 			elim = 0;
639 			enode = NULL;
640 		}
641 	}
642 
643 	for (ni = 0; ni < rnode->sysctl_clen; ni++) {
644 		onode = &rnode->sysctl_child[ni];
645 		if (enode && enode->sysctl_num == onode->sysctl_num) {
646 			if (SYSCTL_TYPE(enode->sysctl_flags) != CTLTYPE_NODE)
647 				onode = enode;
648 			if (--elim > 0)
649 				enode++;
650 			else
651 				enode = NULL;
652 		}
653 		error = sysctl_cvt_out(l, v, onode, oldp, left, &t);
654 		if (error)
655 			return (error);
656 		if (oldp != NULL)
657 			oldp = (char*)oldp + t;
658 		out += t;
659 		left -= MIN(left, t);
660 	}
661 
662 	/*
663 	 * overlay trees *MUST* be entirely consumed
664 	 */
665 	KASSERT(enode == NULL);
666 
667 	*oldlenp = out;
668 
669 	return (error);
670 }
671 
672 #ifdef SYSCTL_DEBUG_CREATE
673 #undef sysctl_create
674 #endif /* SYSCTL_DEBUG_CREATE */
675 
676 /*
677  * sysctl_create -- Adds a node (the description of which is taken
678  * from newp) to the tree, returning a copy of it in the space pointed
679  * to by oldp.  In the event that the requested slot is already taken
680  * (either by name or by number), the offending node is returned
681  * instead.  Yes, this is complex, but we want to make sure everything
682  * is proper.
683  */
684 int
685 sysctl_create(SYSCTLFN_ARGS)
686 {
687 	struct sysctlnode nnode, *node, *pnode;
688 	int error, ni, at, nm, type, sz, flags, anum, v;
689 	void *own;
690 
691 	error = 0;
692 	own = NULL;
693 	anum = -1;
694 
695 	if (SYSCTL_VERS(rnode->sysctl_flags) != SYSCTL_VERSION) {
696 		printf("sysctl_create: rnode %p wrong version\n", rnode);
697 		return (EINVAL);
698 	}
699 
700 	if (namelen != 1 || (name[namelen - 1] != CTL_CREATE
701 #if NKSYMS > 0
702 			     && name[namelen - 1] != CTL_CREATESYM
703 #endif /* NKSYMS > 0 */
704 			     ))
705 		return (EINVAL);
706 
707 	/*
708 	 * processes can only add nodes at securelevel 0, must be
709 	 * root, and can't add nodes to a parent that's not writeable
710 	 */
711 	if (l != NULL) {
712 #ifndef SYSCTL_DISALLOW_CREATE
713 		if (securelevel > 0)
714 			return (EPERM);
715 		error = suser(l->l_proc->p_ucred, &l->l_proc->p_acflag);
716 		if (error)
717 			return (error);
718 		if (!(rnode->sysctl_flags & CTLFLAG_READWRITE))
719 #endif /* SYSCTL_DISALLOW_CREATE */
720 			return (EPERM);
721 	}
722 
723 	/*
724 	 * nothing can add a node if:
725 	 * we've finished initial set up and
726 	 * the tree itself is not writeable or
727 	 * the entire sysctl system is not writeable
728 	 */
729 	if ((sysctl_root.sysctl_flags & CTLFLAG_PERMANENT) &&
730 	    (!(sysctl_rootof(rnode)->sysctl_flags & CTLFLAG_READWRITE) ||
731 	     !(sysctl_root.sysctl_flags & CTLFLAG_READWRITE)))
732 		return (EPERM);
733 
734 	/*
735 	 * it must be a "node", not a "int" or something
736 	 */
737 	if (SYSCTL_TYPE(rnode->sysctl_flags) != CTLTYPE_NODE)
738 		return (ENOTDIR);
739 	if (rnode->sysctl_flags & CTLFLAG_ALIAS) {
740 		printf("sysctl_create: attempt to add node to aliased "
741 		       "node %p\n", rnode);
742 		return (EINVAL);
743 	}
744 	pnode = __UNCONST(rnode); /* we are adding children to this node */
745 
746 	if (newp == NULL)
747 		return (EINVAL);
748 	error = sysctl_cvt_in(l, &v, newp, newlen, &nnode);
749 	if (error)
750 		return (error);
751 
752 	/*
753 	 * nodes passed in don't *have* parents
754 	 */
755 	if (nnode.sysctl_parent != NULL)
756 		return (EINVAL);
757 
758 	/*
759 	 * if we are indeed adding it, it should be a "good" name and
760 	 * number
761 	 */
762 	nm = nnode.sysctl_num;
763 #if NKSYMS > 0
764 	if (nm == CTL_CREATESYM)
765 		nm = CTL_CREATE;
766 #endif /* NKSYMS > 0 */
767 	if (nm < 0 && nm != CTL_CREATE)
768 		return (EINVAL);
769 	sz = 0;
770 
771 	/*
772 	 * the name can't start with a digit
773 	 */
774 	if (nnode.sysctl_name[sz] >= '0' &&
775 	    nnode.sysctl_name[sz] <= '9')
776 		return (EINVAL);
777 
778 	/*
779 	 * the name must be only alphanumerics or - or _, longer than
780 	 * 0 bytes and less that SYSCTL_NAMELEN
781 	 */
782 	while (sz < SYSCTL_NAMELEN && nnode.sysctl_name[sz] != '\0') {
783 		if ((nnode.sysctl_name[sz] >= '0' &&
784 		     nnode.sysctl_name[sz] <= '9') ||
785 		    (nnode.sysctl_name[sz] >= 'A' &&
786 		     nnode.sysctl_name[sz] <= 'Z') ||
787 		    (nnode.sysctl_name[sz] >= 'a' &&
788 		     nnode.sysctl_name[sz] <= 'z') ||
789 		    nnode.sysctl_name[sz] == '-' ||
790 		    nnode.sysctl_name[sz] == '_')
791 			sz++;
792 		else
793 			return (EINVAL);
794 	}
795 	if (sz == 0 || sz == SYSCTL_NAMELEN)
796 		return (EINVAL);
797 
798 	/*
799 	 * various checks revolve around size vs type, etc
800 	 */
801 	type = SYSCTL_TYPE(nnode.sysctl_flags);
802 	flags = SYSCTL_FLAGS(nnode.sysctl_flags);
803 	sz = nnode.sysctl_size;
804 
805 	/*
806 	 * find out if there's a collision, and if so, let the caller
807 	 * know what they collided with
808 	 */
809 	node = pnode->sysctl_child;
810 	if (((flags & CTLFLAG_ANYNUMBER) && node) ||
811 	    (node && node->sysctl_flags & CTLFLAG_ANYNUMBER))
812 		return (EINVAL);
813 	for (ni = at = 0; ni < pnode->sysctl_clen; ni++) {
814 		if (nm == node[ni].sysctl_num ||
815 		    strcmp(nnode.sysctl_name, node[ni].sysctl_name) == 0) {
816 			/*
817 			 * ignore error here, since we
818 			 * are already fixed on EEXIST
819 			 */
820 			(void)sysctl_cvt_out(l, v, &node[ni], oldp,
821 					     *oldlenp, oldlenp);
822 			return (EEXIST);
823 		}
824 		if (nm > node[ni].sysctl_num)
825 			at++;
826 	}
827 
828 	/*
829 	 * use sysctl_ver to add to the tree iff it hasn't changed
830 	 */
831 	if (nnode.sysctl_ver != 0) {
832 		/*
833 		 * a specified value must match either the parent
834 		 * node's version or the root node's version
835 		 */
836 		if (nnode.sysctl_ver != sysctl_rootof(rnode)->sysctl_ver &&
837 		    nnode.sysctl_ver != rnode->sysctl_ver) {
838 			return (EINVAL);
839 		}
840 	}
841 
842 	/*
843 	 * only the kernel can assign functions to entries
844 	 */
845 	if (l != NULL && nnode.sysctl_func != NULL)
846 		return (EPERM);
847 
848 	/*
849 	 * only the kernel can create permanent entries, and only then
850 	 * before the kernel is finished setting itself up
851 	 */
852 	if (l != NULL && (flags & ~SYSCTL_USERFLAGS))
853 		return (EPERM);
854 	if ((flags & CTLFLAG_PERMANENT) &
855 	    (sysctl_root.sysctl_flags & CTLFLAG_PERMANENT))
856 		return (EPERM);
857 	if ((flags & (CTLFLAG_OWNDATA | CTLFLAG_IMMEDIATE)) ==
858 	    (CTLFLAG_OWNDATA | CTLFLAG_IMMEDIATE))
859 		return (EINVAL);
860 	if ((flags & CTLFLAG_IMMEDIATE) &&
861 	    type != CTLTYPE_INT && type != CTLTYPE_QUAD)
862 		return (EINVAL);
863 
864 	/*
865 	 * check size, or set it if unset and we can figure it out.
866 	 * kernel created nodes are allowed to have a function instead
867 	 * of a size (or a data pointer).
868 	 */
869 	switch (type) {
870 	case CTLTYPE_NODE:
871 		/*
872 		 * only *i* can assert the size of a node
873 		 */
874 		if (flags & CTLFLAG_ALIAS) {
875 			anum = nnode.sysctl_alias;
876 			if (anum < 0)
877 				return (EINVAL);
878 			nnode.sysctl_alias = 0;
879 		}
880 		if (sz != 0 || nnode.sysctl_data != NULL)
881 			return (EINVAL);
882 		if (nnode.sysctl_csize != 0 ||
883 		    nnode.sysctl_clen != 0 ||
884 		    nnode.sysctl_child != 0)
885 			return (EINVAL);
886 		if (flags & CTLFLAG_OWNDATA)
887 			return (EINVAL);
888 		sz = sizeof(struct sysctlnode);
889 		break;
890 	case CTLTYPE_INT:
891 		/*
892 		 * since an int is an int, if the size is not given or
893 		 * is wrong, we can "int-uit" it.
894 		 */
895 		if (sz != 0 && sz != sizeof(int))
896 			return (EINVAL);
897 		sz = sizeof(int);
898 		break;
899 	case CTLTYPE_STRING:
900 		/*
901 		 * strings are a little more tricky
902 		 */
903 		if (sz == 0) {
904 			if (l == NULL) {
905 				if (nnode.sysctl_func == NULL) {
906 					if (nnode.sysctl_data == NULL)
907 						return (EINVAL);
908 					else
909 						sz = strlen(nnode.sysctl_data) +
910 						    1;
911 				}
912 			} else if (nnode.sysctl_data == NULL &&
913 				 flags & CTLFLAG_OWNDATA) {
914 				return (EINVAL);
915 			} else {
916 				char *vp, *e;
917 				size_t s;
918 
919 				/*
920 				 * we want a rough idea of what the
921 				 * size is now
922 				 */
923 				vp = malloc(PAGE_SIZE, M_SYSCTLDATA,
924 					     M_WAITOK|M_CANFAIL);
925 				if (vp == NULL)
926 					return (ENOMEM);
927 				e = nnode.sysctl_data;
928 				do {
929 					error = copyinstr(e, vp, PAGE_SIZE, &s);
930 					if (error) {
931 						if (error != ENAMETOOLONG) {
932 							free(vp, M_SYSCTLDATA);
933 							return (error);
934 						}
935 						e += PAGE_SIZE;
936 						if ((e - 32 * PAGE_SIZE) >
937 						    (char*)nnode.sysctl_data) {
938 							free(vp, M_SYSCTLDATA);
939 							return (ERANGE);
940 						}
941 					}
942 				} while (error != 0);
943 				sz = s + (e - (char*)nnode.sysctl_data);
944 				free(vp, M_SYSCTLDATA);
945 			}
946 		}
947 		break;
948 	case CTLTYPE_QUAD:
949 		if (sz != 0 && sz != sizeof(u_quad_t))
950 			return (EINVAL);
951 		sz = sizeof(u_quad_t);
952 		break;
953 	case CTLTYPE_STRUCT:
954 		if (sz == 0) {
955 			if (l != NULL || nnode.sysctl_func == NULL)
956 				return (EINVAL);
957 			if (flags & CTLFLAG_OWNDATA)
958 				return (EINVAL);
959 		}
960 		break;
961 	default:
962 		return (EINVAL);
963 	}
964 
965 	/*
966 	 * at this point, if sz is zero, we *must* have a
967 	 * function to go with it and we can't own it.
968 	 */
969 
970 	/*
971 	 *  l  ptr own
972 	 *  0   0   0  -> EINVAL (if no func)
973 	 *  0   0   1  -> own
974 	 *  0   1   0  -> kptr
975 	 *  0   1   1  -> kptr
976 	 *  1   0   0  -> EINVAL
977 	 *  1   0   1  -> own
978 	 *  1   1   0  -> kptr, no own (fault on lookup)
979 	 *  1   1   1  -> uptr, own
980 	 */
981 	if (type != CTLTYPE_NODE) {
982 		if (sz != 0) {
983 			if (flags & CTLFLAG_OWNDATA) {
984 				own = malloc(sz, M_SYSCTLDATA,
985 					     M_WAITOK|M_CANFAIL);
986 				if (nnode.sysctl_data == NULL)
987 					memset(own, 0, sz);
988 				else {
989 					error = sysctl_copyin(l,
990 					    nnode.sysctl_data, own, sz);
991 					if (error != 0) {
992 						free(own, M_SYSCTLDATA);
993 						return (error);
994 					}
995 				}
996 			} else if ((nnode.sysctl_data != NULL) &&
997 				 !(flags & CTLFLAG_IMMEDIATE)) {
998 #if NKSYMS > 0
999 				if (name[namelen - 1] == CTL_CREATESYM) {
1000 					char symname[128]; /* XXX enough? */
1001 					u_long symaddr;
1002 					size_t symlen;
1003 
1004 					error = sysctl_copyinstr(l,
1005 					    nnode.sysctl_data, symname,
1006 					    sizeof(symname), &symlen);
1007 					if (error)
1008 						return (error);
1009 					error = ksyms_getval(NULL, symname,
1010 					    &symaddr, KSYMS_EXTERN);
1011 					if (error)
1012 						return (error); /* EINVAL? */
1013 					nnode.sysctl_data = (void*)symaddr;
1014 				}
1015 #endif /* NKSYMS > 0 */
1016 				/*
1017 				 * Ideally, we'd like to verify here
1018 				 * that this address is acceptable,
1019 				 * but...
1020 				 *
1021 				 * - it might be valid now, only to
1022 				 *   become invalid later
1023 				 *
1024 				 * - it might be invalid only for the
1025 				 *   moment and valid later
1026 				 *
1027 				 * - or something else.
1028 				 *
1029 				 * Since we can't get a good answer,
1030 				 * we'll just accept the address as
1031 				 * given, and fault on individual
1032 				 * lookups.
1033 				 */
1034 			}
1035 		} else if (nnode.sysctl_func == NULL)
1036 			return (EINVAL);
1037 	}
1038 
1039 	/*
1040 	 * a process can't assign a function to a node, and the kernel
1041 	 * can't create a node that has no function or data.
1042 	 * (XXX somewhat redundant check)
1043 	 */
1044 	if (l != NULL || nnode.sysctl_func == NULL) {
1045 		if (type != CTLTYPE_NODE &&
1046 		    nnode.sysctl_data == NULL &&
1047 		    !(flags & CTLFLAG_IMMEDIATE) &&
1048 		    own == NULL)
1049 			return (EINVAL);
1050 	}
1051 
1052 #ifdef SYSCTL_DISALLOW_KWRITE
1053 	/*
1054 	 * a process can't create a writable node unless it refers to
1055 	 * new data.
1056 	 */
1057 	if (l != NULL && own == NULL && type != CTLTYPE_NODE &&
1058 	    (flags & CTLFLAG_READWRITE) != CTLFLAG_READONLY &&
1059 	    !(flags & CTLFLAG_IMMEDIATE))
1060 		return (EPERM);
1061 #endif /* SYSCTL_DISALLOW_KWRITE */
1062 
1063 	/*
1064 	 * make sure there's somewhere to put the new stuff.
1065 	 */
1066 	if (pnode->sysctl_child == NULL) {
1067 		if (flags & CTLFLAG_ANYNUMBER)
1068 			error = sysctl_alloc(pnode, 1);
1069 		else
1070 			error = sysctl_alloc(pnode, 0);
1071 		if (error)
1072 			return (error);
1073 	}
1074 	node = pnode->sysctl_child;
1075 
1076 	/*
1077 	 * no collisions, so pick a good dynamic number if we need to.
1078 	 */
1079 	if (nm == CTL_CREATE) {
1080 		nm = ++sysctl_root.sysctl_num;
1081 		for (ni = 0; ni < pnode->sysctl_clen; ni++) {
1082 			if (nm == node[ni].sysctl_num) {
1083 				nm++;
1084 				ni = -1;
1085 			} else if (nm > node[ni].sysctl_num)
1086 				at = ni + 1;
1087 		}
1088 	}
1089 
1090 	/*
1091 	 * oops...ran out of space
1092 	 */
1093 	if (pnode->sysctl_clen == pnode->sysctl_csize) {
1094 		error = sysctl_realloc(pnode);
1095 		if (error)
1096 			return (error);
1097 		node = pnode->sysctl_child;
1098 	}
1099 
1100 	/*
1101 	 * insert new node data
1102 	 */
1103 	if (at < pnode->sysctl_clen) {
1104 		int t;
1105 
1106 		/*
1107 		 * move the nodes that should come after the new one
1108 		 */
1109 		memmove(&node[at + 1], &node[at],
1110 			(pnode->sysctl_clen - at) * sizeof(struct sysctlnode));
1111 		memset(&node[at], 0, sizeof(struct sysctlnode));
1112 		node[at].sysctl_parent = pnode;
1113 		/*
1114 		 * and...reparent any children of any moved nodes
1115 		 */
1116 		for (ni = at; ni <= pnode->sysctl_clen; ni++)
1117 			if (SYSCTL_TYPE(node[ni].sysctl_flags) == CTLTYPE_NODE)
1118 				for (t = 0; t < node[ni].sysctl_clen; t++)
1119 					node[ni].sysctl_child[t].sysctl_parent =
1120 						&node[ni];
1121 	}
1122 	node = &node[at];
1123 	pnode->sysctl_clen++;
1124 
1125 	strlcpy(node->sysctl_name, nnode.sysctl_name,
1126 		sizeof(node->sysctl_name));
1127 	node->sysctl_num = nm;
1128 	node->sysctl_size = sz;
1129 	node->sysctl_flags = SYSCTL_VERSION|type|flags; /* XXX other trees */
1130 	node->sysctl_csize = 0;
1131 	node->sysctl_clen = 0;
1132 	if (own) {
1133 		node->sysctl_data = own;
1134 		node->sysctl_flags |= CTLFLAG_OWNDATA;
1135 	} else if (flags & CTLFLAG_ALIAS) {
1136 		node->sysctl_alias = anum;
1137 	} else if (flags & CTLFLAG_IMMEDIATE) {
1138 		switch (type) {
1139 		case CTLTYPE_INT:
1140 			node->sysctl_idata = nnode.sysctl_idata;
1141 			break;
1142 		case CTLTYPE_QUAD:
1143 			node->sysctl_qdata = nnode.sysctl_qdata;
1144 			break;
1145 		}
1146 	} else {
1147 		node->sysctl_data = nnode.sysctl_data;
1148 		node->sysctl_flags &= ~CTLFLAG_OWNDATA;
1149 	}
1150         node->sysctl_func = nnode.sysctl_func;
1151         node->sysctl_child = NULL;
1152 	/* node->sysctl_parent should already be done */
1153 
1154 	/*
1155 	 * update "version" on path to "root"
1156 	 */
1157 	for (; rnode->sysctl_parent != NULL; rnode = rnode->sysctl_parent)
1158 		;
1159 	pnode = node;
1160 	for (nm = rnode->sysctl_ver + 1; pnode != NULL;
1161 	     pnode = pnode->sysctl_parent)
1162 		pnode->sysctl_ver = nm;
1163 
1164 	error = sysctl_cvt_out(l, v, node, oldp, *oldlenp, oldlenp);
1165 
1166 	return (error);
1167 }
1168 
1169 /*
1170  * ********************************************************************
1171  * A wrapper around sysctl_create() that prints the thing we're trying
1172  * to add.
1173  * ********************************************************************
1174  */
1175 #ifdef SYSCTL_DEBUG_CREATE
1176 int _sysctl_create(SYSCTLFN_PROTO);
1177 int
1178 _sysctl_create(SYSCTLFN_ARGS)
1179 {
1180 	const struct sysctlnode *node;
1181 	int k, rc, ni, nl = namelen + (name - oname);
1182 
1183 	node = newp;
1184 
1185 	printf("namelen %d (", nl);
1186 	for (ni = 0; ni < nl - 1; ni++)
1187 		printf(" %d", oname[ni]);
1188 	printf(" %d )\t[%s]\tflags %08x (%08x %d %zu)\n",
1189 	       k = node->sysctl_num,
1190 	       node->sysctl_name,
1191 	       node->sysctl_flags,
1192 	       SYSCTL_FLAGS(node->sysctl_flags),
1193 	       SYSCTL_TYPE(node->sysctl_flags),
1194 	       node->sysctl_size);
1195 
1196 	node = rnode;
1197 	rc = sysctl_create(SYSCTLFN_CALL(rnode));
1198 
1199 	printf("sysctl_create(");
1200 	for (ni = 0; ni < nl - 1; ni++)
1201 		printf(" %d", oname[ni]);
1202 	printf(" %d ) returned %d\n", k, rc);
1203 
1204 	return (rc);
1205 }
1206 #define sysctl_create _sysctl_create
1207 #endif /* SYSCTL_DEBUG_CREATE */
1208 
1209 /*
1210  * sysctl_destroy -- Removes a node (as described by newp) from the
1211  * given tree, returning (if successful) a copy of the dead node in
1212  * oldp.  Since we're removing stuff, there's not much to check.
1213  */
1214 int
1215 sysctl_destroy(SYSCTLFN_ARGS)
1216 {
1217 	struct sysctlnode *node, *pnode, onode, nnode;
1218 	int ni, error, v;
1219 
1220 	if (SYSCTL_VERS(rnode->sysctl_flags) != SYSCTL_VERSION) {
1221 		printf("sysctl_destroy: rnode %p wrong version\n", rnode);
1222 		return (EINVAL);
1223 	}
1224 
1225 	error = 0;
1226 
1227 	if (namelen != 1 || name[namelen - 1] != CTL_DESTROY)
1228 		return (EINVAL);
1229 
1230 	/*
1231 	 * processes can only destroy nodes at securelevel 0, must be
1232 	 * root, and can't remove nodes from a parent that's not
1233 	 * writeable
1234 	 */
1235 	if (l != NULL) {
1236 #ifndef SYSCTL_DISALLOW_CREATE
1237 		if (securelevel > 0)
1238 			return (EPERM);
1239 		error = suser(l->l_proc->p_ucred, &l->l_proc->p_acflag);
1240 		if (error)
1241 			return (error);
1242 		if (!(rnode->sysctl_flags & CTLFLAG_READWRITE))
1243 #endif /* SYSCTL_DISALLOW_CREATE */
1244 			return (EPERM);
1245 	}
1246 
1247 	/*
1248 	 * nothing can remove a node if:
1249 	 * the node is permanent (checked later) or
1250 	 * the tree itself is not writeable or
1251 	 * the entire sysctl system is not writeable
1252 	 *
1253 	 * note that we ignore whether setup is complete or not,
1254 	 * because these rules always apply.
1255 	 */
1256 	if (!(sysctl_rootof(rnode)->sysctl_flags & CTLFLAG_READWRITE) ||
1257 	    !(sysctl_root.sysctl_flags & CTLFLAG_READWRITE))
1258 		return (EPERM);
1259 
1260 	if (newp == NULL)
1261 		return (EINVAL);
1262 	error = sysctl_cvt_in(l, &v, newp, newlen, &nnode);
1263 	if (error)
1264 		return (error);
1265 	memset(&onode, 0, sizeof(struct sysctlnode));
1266 
1267 	node = rnode->sysctl_child;
1268 	for (ni = 0; ni < rnode->sysctl_clen; ni++) {
1269 		if (nnode.sysctl_num == node[ni].sysctl_num) {
1270 			/*
1271 			 * if name specified, must match
1272 			 */
1273 			if (nnode.sysctl_name[0] != '\0' &&
1274 			    strcmp(nnode.sysctl_name, node[ni].sysctl_name))
1275 				continue;
1276 			/*
1277 			 * if version specified, must match
1278 			 */
1279 			if (nnode.sysctl_ver != 0 &&
1280 			    nnode.sysctl_ver != node[ni].sysctl_ver)
1281 				continue;
1282 			/*
1283 			 * this must be the one
1284 			 */
1285 			break;
1286 		}
1287 	}
1288 	if (ni == rnode->sysctl_clen)
1289 		return (ENOENT);
1290 	node = &node[ni];
1291 	pnode = node->sysctl_parent;
1292 
1293 	/*
1294 	 * if the kernel says permanent, it is, so there.  nyah.
1295 	 */
1296 	if (SYSCTL_FLAGS(node->sysctl_flags) & CTLFLAG_PERMANENT)
1297 		return (EPERM);
1298 
1299 	/*
1300 	 * can't delete non-empty nodes
1301 	 */
1302 	if (SYSCTL_TYPE(node->sysctl_flags) == CTLTYPE_NODE &&
1303 	    node->sysctl_clen != 0)
1304 		return (ENOTEMPTY);
1305 
1306 	/*
1307 	 * if the node "owns" data, release it now
1308 	 */
1309 	if (node->sysctl_flags & CTLFLAG_OWNDATA) {
1310 		if (node->sysctl_data != NULL)
1311 			free(node->sysctl_data, M_SYSCTLDATA);
1312 		node->sysctl_data = NULL;
1313 	}
1314 	if (node->sysctl_flags & CTLFLAG_OWNDESC) {
1315 		if (node->sysctl_desc != NULL)
1316 			/*XXXUNCONST*/
1317 			free(__UNCONST(node->sysctl_desc), M_SYSCTLDATA);
1318 		node->sysctl_desc = NULL;
1319 	}
1320 
1321 	/*
1322 	 * if the node to be removed is not the last one on the list,
1323 	 * move the remaining nodes up, and reparent any grandchildren
1324 	 */
1325 	onode = *node;
1326 	if (ni < pnode->sysctl_clen - 1) {
1327 		int t;
1328 
1329 		memmove(&pnode->sysctl_child[ni], &pnode->sysctl_child[ni + 1],
1330 			(pnode->sysctl_clen - ni - 1) *
1331 			sizeof(struct sysctlnode));
1332 		for (; ni < pnode->sysctl_clen - 1; ni++)
1333 			if (SYSCTL_TYPE(pnode->sysctl_child[ni].sysctl_flags) ==
1334 			    CTLTYPE_NODE)
1335 				for (t = 0;
1336 				     t < pnode->sysctl_child[ni].sysctl_clen;
1337 				     t++)
1338 					pnode->sysctl_child[ni].sysctl_child[t].
1339 						sysctl_parent =
1340 						&pnode->sysctl_child[ni];
1341 		ni = pnode->sysctl_clen - 1;
1342 		node = &pnode->sysctl_child[ni];
1343 	}
1344 
1345 	/*
1346 	 * reset the space we just vacated
1347 	 */
1348 	memset(node, 0, sizeof(struct sysctlnode));
1349 	node->sysctl_parent = pnode;
1350 	pnode->sysctl_clen--;
1351 
1352 	/*
1353 	 * if this parent just lost its last child, nuke the creche
1354 	 */
1355 	if (pnode->sysctl_clen == 0) {
1356 		free(pnode->sysctl_child, M_SYSCTLNODE);
1357 		pnode->sysctl_csize = 0;
1358 		pnode->sysctl_child = NULL;
1359 	}
1360 
1361 	/*
1362 	 * update "version" on path to "root"
1363 	 */
1364         for (; rnode->sysctl_parent != NULL; rnode = rnode->sysctl_parent)
1365                 ;
1366 	for (ni = rnode->sysctl_ver + 1; pnode != NULL;
1367 	     pnode = pnode->sysctl_parent)
1368 		pnode->sysctl_ver = ni;
1369 
1370 	error = sysctl_cvt_out(l, v, &onode, oldp, *oldlenp, oldlenp);
1371 
1372 	return (error);
1373 }
1374 
1375 /*
1376  * sysctl_lookup -- Handles copyin/copyout of new and old values.
1377  * Partial reads are globally allowed.  Only root can write to things
1378  * unless the node says otherwise.
1379  */
1380 int
1381 sysctl_lookup(SYSCTLFN_ARGS)
1382 {
1383 	int error, rw;
1384 	size_t sz, len;
1385 	void *d;
1386 
1387 	if (SYSCTL_VERS(rnode->sysctl_flags) != SYSCTL_VERSION) {
1388 		printf("sysctl_lookup: rnode %p wrong version\n", rnode);
1389 		return (EINVAL);
1390 	}
1391 
1392 	error = 0;
1393 
1394 	/*
1395 	 * you can't "look up" a node.  you can "query" it, but you
1396 	 * can't "look it up".
1397 	 */
1398 	if (SYSCTL_TYPE(rnode->sysctl_flags) == CTLTYPE_NODE || namelen != 0)
1399 		return (EINVAL);
1400 
1401 	/*
1402 	 * some nodes are private, so only root can look into them.
1403 	 */
1404 	if (l != NULL && (rnode->sysctl_flags & CTLFLAG_PRIVATE) &&
1405 	    (error = suser(l->l_proc->p_ucred, &l->l_proc->p_acflag)) != 0)
1406 		return (error);
1407 
1408 	/*
1409 	 * if a node wants to be writable according to different rules
1410 	 * other than "only root can write to stuff unless a flag is
1411 	 * set", then it needs its own function which should have been
1412 	 * called and not us.
1413 	 */
1414 	if (l != NULL && newp != NULL &&
1415 	    !(rnode->sysctl_flags & CTLFLAG_ANYWRITE) &&
1416 	    (error = suser(l->l_proc->p_ucred, &l->l_proc->p_acflag)) != 0)
1417 		return (error);
1418 
1419 	/*
1420 	 * is this node supposedly writable?
1421 	 */
1422 	rw = 0;
1423 	switch (rnode->sysctl_flags & CTLFLAG_READWRITE) {
1424 	    case CTLFLAG_READONLY1:
1425 		rw = (securelevel < 1) ? 1 : 0;
1426 		break;
1427 	    case CTLFLAG_READONLY2:
1428 		rw = (securelevel < 2) ? 1 : 0;
1429 		break;
1430 	    case CTLFLAG_READWRITE:
1431 		rw = 1;
1432 		break;
1433 	}
1434 
1435 	/*
1436 	 * it appears not to be writable at this time, so if someone
1437 	 * tried to write to it, we must tell them to go away
1438 	 */
1439 	if (!rw && newp != NULL)
1440 		return (EPERM);
1441 
1442 	/*
1443 	 * step one, copy out the stuff we have presently
1444 	 */
1445 	if (rnode->sysctl_flags & CTLFLAG_IMMEDIATE) {
1446 		/*
1447 		 * note that we discard const here because we are
1448 		 * modifying the contents of the node (which is okay
1449 		 * because it's ours)
1450 		 */
1451 		switch (SYSCTL_TYPE(rnode->sysctl_flags)) {
1452 		case CTLTYPE_INT:
1453 			d = __UNCONST(&rnode->sysctl_idata);
1454 			break;
1455 		case CTLTYPE_QUAD:
1456 			d = __UNCONST(&rnode->sysctl_qdata);
1457 			break;
1458 		default:
1459 			return (EINVAL);
1460 		}
1461 	} else
1462 		d = rnode->sysctl_data;
1463 	if (SYSCTL_TYPE(rnode->sysctl_flags) == CTLTYPE_STRING)
1464 		sz = strlen(d) + 1; /* XXX@@@ possible fault here */
1465 	else
1466 		sz = rnode->sysctl_size;
1467 	if (oldp != NULL)
1468 		error = sysctl_copyout(l, d, oldp, MIN(sz, *oldlenp));
1469 	if (error)
1470 		return (error);
1471 	*oldlenp = sz;
1472 
1473 	/*
1474 	 * are we done?
1475 	 */
1476 	if (newp == NULL || newlen == 0)
1477 		return (0);
1478 
1479 	/*
1480 	 * hmm...not done.  must now "copy in" new value.  re-adjust
1481 	 * sz to maximum value (strings are "weird").
1482 	 */
1483 	sz = rnode->sysctl_size;
1484 	switch (SYSCTL_TYPE(rnode->sysctl_flags)) {
1485 	case CTLTYPE_INT:
1486 	case CTLTYPE_QUAD:
1487 	case CTLTYPE_STRUCT:
1488 		/*
1489 		 * these data must be *exactly* the same size coming
1490 		 * in.
1491 		 */
1492 		if (newlen != sz)
1493 			return (EINVAL);
1494 		error = sysctl_copyin(l, newp, d, sz);
1495 		break;
1496 	case CTLTYPE_STRING: {
1497 		/*
1498 		 * strings, on the other hand, can be shorter, and we
1499 		 * let userland be sloppy about the trailing nul.
1500 		 */
1501 		char *newbuf;
1502 
1503 		/*
1504 		 * too much new string?
1505 		 */
1506 		if (newlen > sz)
1507 			return (EINVAL);
1508 
1509 		/*
1510 		 * temporary copy of new inbound string
1511 		 */
1512 		len = MIN(sz, newlen);
1513 		newbuf = malloc(len, M_SYSCTLDATA, M_WAITOK|M_CANFAIL);
1514 		if (newbuf == NULL)
1515 			return (ENOMEM);
1516 		error = sysctl_copyin(l, newp, newbuf, len);
1517 		if (error) {
1518 			free(newbuf, M_SYSCTLDATA);
1519 			return (error);
1520 		}
1521 
1522 		/*
1523 		 * did they null terminate it, or do we have space
1524 		 * left to do it ourselves?
1525 		 */
1526 		if (newbuf[len - 1] != '\0' && len == sz) {
1527 			free(newbuf, M_SYSCTLDATA);
1528 			return (EINVAL);
1529 		}
1530 
1531 		/*
1532 		 * looks good, so pop it into place and zero the rest.
1533 		 */
1534 		if (len > 0)
1535 			memcpy(rnode->sysctl_data, newbuf, len);
1536 		if (sz != len)
1537 			memset((char*)rnode->sysctl_data + len, 0, sz - len);
1538 		free(newbuf, M_SYSCTLDATA);
1539 		break;
1540 	}
1541 	default:
1542 		return (EINVAL);
1543 	}
1544 
1545 	return (error);
1546 }
1547 
1548 /*
1549  * sysctl_mmap -- Dispatches sysctl mmap requests to those nodes that
1550  * purport to handle it.  This interface isn't fully fleshed out yet,
1551  * unfortunately.
1552  */
1553 static int
1554 sysctl_mmap(SYSCTLFN_ARGS)
1555 {
1556 	const struct sysctlnode *node;
1557 	struct sysctlnode nnode;
1558 	int error;
1559 
1560 	if (SYSCTL_VERS(rnode->sysctl_flags) != SYSCTL_VERSION) {
1561 		printf("sysctl_mmap: rnode %p wrong version\n", rnode);
1562 		return (EINVAL);
1563 	}
1564 
1565 	/*
1566 	 * let's just pretend that didn't happen, m'kay?
1567 	 */
1568 	if (l == NULL)
1569 		return (EPERM);
1570 
1571 	/*
1572 	 * is this a sysctlnode description of an mmap request?
1573 	 */
1574 	if (newp == NULL || newlen != sizeof(struct sysctlnode))
1575 		return (EINVAL);
1576 	error = sysctl_copyin(l, newp, &nnode, sizeof(nnode));
1577 	if (error)
1578 		return (error);
1579 
1580 	/*
1581 	 * does the node they asked for exist?
1582 	 */
1583 	if (namelen != 1)
1584 		return (EOPNOTSUPP);
1585 	node = rnode;
1586         error = sysctl_locate(l, &nnode.sysctl_num, 1, &node, NULL);
1587 	if (error)
1588 		return (error);
1589 
1590 	/*
1591 	 * does this node that we have found purport to handle mmap?
1592 	 */
1593 	if (node->sysctl_func == NULL ||
1594 	    !(node->sysctl_flags & CTLFLAG_MMAP))
1595 		return (EOPNOTSUPP);
1596 
1597 	/*
1598 	 * well...okay, they asked for it.
1599 	 */
1600 	return ((*node->sysctl_func)(SYSCTLFN_CALL(node)));
1601 }
1602 
1603 int
1604 sysctl_describe(SYSCTLFN_ARGS)
1605 {
1606 	struct sysctldesc *d;
1607 	char bf[1024];
1608 	size_t sz, left, tot;
1609 	int i, error, v = -1;
1610 	struct sysctlnode *node;
1611 	struct sysctlnode dnode;
1612 
1613 	if (SYSCTL_VERS(rnode->sysctl_flags) != SYSCTL_VERSION) {
1614 		printf("sysctl_query: rnode %p wrong version\n", rnode);
1615 		return (EINVAL);
1616 	}
1617 
1618 	if (SYSCTL_TYPE(rnode->sysctl_flags) != CTLTYPE_NODE)
1619 		return (ENOTDIR);
1620 	if (namelen != 1 || name[0] != CTL_DESCRIBE)
1621 		return (EINVAL);
1622 
1623 	/*
1624 	 * get ready...
1625 	 */
1626 	error = 0;
1627 	d = (void*)bf;
1628 	tot = 0;
1629 	node = rnode->sysctl_child;
1630 	left = *oldlenp;
1631 
1632 	/*
1633 	 * no request -> all descriptions at this level
1634 	 * request with desc unset -> just this node
1635 	 * request with desc set -> set descr for this node
1636 	 */
1637 	if (newp != NULL) {
1638 		error = sysctl_cvt_in(l, &v, newp, newlen, &dnode);
1639 		if (error)
1640 			return (error);
1641 		if (dnode.sysctl_desc != NULL) {
1642 			/*
1643 			 * processes cannot set descriptions above
1644 			 * securelevel 0.  and must be root.  blah
1645 			 * blah blah.  a couple more checks are made
1646 			 * once we find the node we want.
1647 			 */
1648 			if (l != NULL) {
1649 #ifndef SYSCTL_DISALLOW_CREATE
1650 				if (securelevel > 0)
1651 					return (EPERM);
1652 				error = suser(l->l_proc->p_ucred,
1653 					      &l->l_proc->p_acflag);
1654 				if (error)
1655 					return (error);
1656 #else /* SYSCTL_DISALLOW_CREATE */
1657 				return (EPERM);
1658 #endif /* SYSCTL_DISALLOW_CREATE */
1659 			}
1660 
1661 			/*
1662 			 * find node and try to set the description on it
1663 			 */
1664 			for (i = 0; i < rnode->sysctl_clen; i++)
1665 				if (node[i].sysctl_num == dnode.sysctl_num)
1666 					break;
1667 			if (i == rnode->sysctl_clen)
1668 				return (ENOENT);
1669 			node = &node[i];
1670 
1671 			/*
1672 			 * did the caller specify a node version?
1673 			 */
1674 			if (dnode.sysctl_ver != 0 &&
1675 			    dnode.sysctl_ver != node->sysctl_ver)
1676 				return (EINVAL);
1677 
1678 			/*
1679 			 * okay...some rules:
1680 			 * (1) if setup is done and the tree is
1681 			 *     read-only or the whole system is
1682 			 *     read-only
1683 			 * (2) no one can set a description on a
1684 			 *     permanent node (it must be set when
1685 			 *     using createv)
1686 			 * (3) processes cannot *change* a description
1687 			 * (4) processes *can*, however, set a
1688 			 *     description on a read-only node so that
1689 			 *     one can be created and then described
1690 			 *     in two steps
1691 			 * anything else come to mind?
1692 			 */
1693 			if ((sysctl_root.sysctl_flags & CTLFLAG_PERMANENT) &&
1694 			    (!(sysctl_rootof(node)->sysctl_flags &
1695 			       CTLFLAG_READWRITE) ||
1696 			     !(sysctl_root.sysctl_flags & CTLFLAG_READWRITE)))
1697 				return (EPERM);
1698 			if (node->sysctl_flags & CTLFLAG_PERMANENT)
1699 				return (EPERM);
1700 			if (l != NULL && node->sysctl_desc != NULL)
1701 				return (EPERM);
1702 
1703 			/*
1704 			 * right, let's go ahead.  the first step is
1705 			 * making the description into something the
1706 			 * node can "own", if need be.
1707 			 */
1708 			if (l != NULL ||
1709 			    dnode.sysctl_flags & CTLFLAG_OWNDESC) {
1710 				char *nd, k[1024];
1711 
1712 				error = sysctl_copyinstr(l, dnode.sysctl_desc,
1713 							 &k[0], sizeof(k), &sz);
1714 				if (error)
1715 					return (error);
1716 				nd = malloc(sz, M_SYSCTLDATA,
1717 					    M_WAITOK|M_CANFAIL);
1718 				if (nd == NULL)
1719 					return (ENOMEM);
1720 				memcpy(nd, k, sz);
1721 				dnode.sysctl_flags |= CTLFLAG_OWNDESC;
1722 				dnode.sysctl_desc = nd;
1723 			}
1724 
1725 			/*
1726 			 * now "release" the old description and
1727 			 * attach the new one.  ta-da.
1728 			 */
1729 			if ((node->sysctl_flags & CTLFLAG_OWNDESC) &&
1730 			    node->sysctl_desc != NULL)
1731 				/*XXXUNCONST*/
1732 				free(__UNCONST(node->sysctl_desc), M_SYSCTLDATA);
1733 			node->sysctl_desc = dnode.sysctl_desc;
1734 			node->sysctl_flags |=
1735 				(dnode.sysctl_flags & CTLFLAG_OWNDESC);
1736 
1737 			/*
1738 			 * now we "fall out" and into the loop which
1739 			 * will copy the new description back out for
1740 			 * those interested parties
1741 			 */
1742 		}
1743 	}
1744 
1745 	/*
1746 	 * scan for one description or just retrieve all descriptions
1747 	 */
1748 	for (i = 0; i < rnode->sysctl_clen; i++) {
1749 		/*
1750 		 * did they ask for the description of only one node?
1751 		 */
1752 		if (v != -1 && node[i].sysctl_num != dnode.sysctl_num)
1753 			continue;
1754 
1755 		/*
1756 		 * don't describe "private" nodes to non-suser users
1757 		 */
1758 		if ((node[i].sysctl_flags & CTLFLAG_PRIVATE) && (l != NULL) &&
1759 		    !(suser(l->l_proc->p_ucred, &l->l_proc->p_acflag)))
1760 			continue;
1761 
1762 		/*
1763 		 * is this description "valid"?
1764 		 */
1765 		memset(bf, 0, sizeof(bf));
1766 		if (node[i].sysctl_desc == NULL)
1767 			sz = 1;
1768 		else if (copystr(node[i].sysctl_desc, &d->descr_str[0],
1769 				 sizeof(bf) - sizeof(*d), &sz) != 0) {
1770 			/*
1771 			 * erase possible partial description
1772 			 */
1773 			memset(bf, 0, sizeof(bf));
1774 			sz = 1;
1775 		}
1776 
1777 		/*
1778 		 * we've got it, stuff it into the caller's buffer
1779 		 */
1780 		d->descr_num = node[i].sysctl_num;
1781 		d->descr_ver = node[i].sysctl_ver;
1782 		d->descr_len = sz; /* includes trailing nul */
1783 		sz = (caddr_t)NEXT_DESCR(d) - (caddr_t)d;
1784 		if (oldp != NULL && left >= sz) {
1785 			error = sysctl_copyout(l, d, oldp, sz);
1786 			if (error)
1787 				return (error);
1788 			left -= sz;
1789 			oldp = (void *)__sysc_desc_adv(oldp, d->descr_len);
1790 		}
1791 		tot += sz;
1792 
1793 		/*
1794 		 * if we get this far with v not "unset", they asked
1795 		 * for a specific node and we found it
1796 		 */
1797 		if (v != -1)
1798 			break;
1799 	}
1800 
1801 	/*
1802 	 * did we find it after all?
1803 	 */
1804 	if (v != -1 && tot == 0)
1805 		error = ENOENT;
1806 	else
1807 		*oldlenp = tot;
1808 
1809 	return (error);
1810 }
1811 
1812 /*
1813  * ********************************************************************
1814  * Section 3: Create and destroy from inside the kernel
1815  * ********************************************************************
1816  * sysctl_createv() and sysctl_destroyv() are simpler-to-use
1817  * interfaces for the kernel to fling new entries into the mib and rip
1818  * them out later.  In the case of sysctl_createv(), the returned copy
1819  * of the node (see sysctl_create()) will be translated back into a
1820  * pointer to the actual node.
1821  *
1822  * Note that sysctl_createv() will return 0 if the create request
1823  * matches an existing node (ala mkdir -p), and that sysctl_destroyv()
1824  * will return 0 if the node to be destroyed already does not exist
1825  * (aka rm -f) or if it is a parent of other nodes.
1826  *
1827  * This allows two (or more) different subsystems to assert sub-tree
1828  * existence before populating their own nodes, and to remove their
1829  * own nodes without orphaning the others when they are done.
1830  * ********************************************************************
1831  */
1832 int
1833 sysctl_createv(struct sysctllog **log, int cflags,
1834 	       const struct sysctlnode **rnode, const struct sysctlnode **cnode,
1835 	       int flags, int type, const char *namep, const char *descr,
1836 	       sysctlfn func, u_quad_t qv, void *newp, size_t newlen,
1837 	       ...)
1838 {
1839 	va_list ap;
1840 	int error, ni, namelen, name[CTL_MAXNAME];
1841 	const struct sysctlnode *root, *pnode;
1842 	struct sysctlnode nnode, onode, *dnode;
1843 	size_t sz;
1844 
1845 	/*
1846 	 * where are we putting this?
1847 	 */
1848 	if (rnode != NULL && *rnode == NULL) {
1849 		printf("sysctl_createv: rnode NULL\n");
1850 		return (EINVAL);
1851 	}
1852 	root = rnode ? *rnode : NULL;
1853 	if (cnode != NULL)
1854 		*cnode = NULL;
1855 	if (cflags != 0)
1856 		return (EINVAL);
1857 
1858 	/*
1859 	 * what is it?
1860 	 */
1861 	flags = SYSCTL_VERSION|SYSCTL_TYPE(type)|SYSCTL_FLAGS(flags);
1862 	if (log != NULL)
1863 		flags &= ~CTLFLAG_PERMANENT;
1864 
1865 	/*
1866 	 * where do we put it?
1867 	 */
1868 	va_start(ap, newlen);
1869 	namelen = 0;
1870 	ni = -1;
1871 	do {
1872 		if (++ni == CTL_MAXNAME)
1873 			return (ENAMETOOLONG);
1874 		name[ni] = va_arg(ap, int);
1875 		/*
1876 		 * sorry, this is not supported from here
1877 		 */
1878 		if (name[ni] == CTL_CREATESYM)
1879 			return (EINVAL);
1880 	} while (name[ni] != CTL_EOL && name[ni] != CTL_CREATE);
1881 	namelen = ni + (name[ni] == CTL_CREATE ? 1 : 0);
1882 	va_end(ap);
1883 
1884 	/*
1885 	 * what's it called
1886 	 */
1887 	if (strlcpy(nnode.sysctl_name, namep, sizeof(nnode.sysctl_name)) >=
1888 	    sizeof(nnode.sysctl_name))
1889 		return (ENAMETOOLONG);
1890 
1891 	/*
1892 	 * cons up the description of the new node
1893 	 */
1894 	nnode.sysctl_num = name[namelen - 1];
1895 	name[namelen - 1] = CTL_CREATE;
1896 	nnode.sysctl_size = newlen;
1897 	nnode.sysctl_flags = flags;
1898 	if (type == CTLTYPE_NODE) {
1899 		nnode.sysctl_csize = 0;
1900 		nnode.sysctl_clen = 0;
1901 		nnode.sysctl_child = NULL;
1902 		if (flags & CTLFLAG_ALIAS)
1903 			nnode.sysctl_alias = qv;
1904 	} else if (flags & CTLFLAG_IMMEDIATE) {
1905 		switch (type) {
1906 		case CTLTYPE_INT:
1907 			nnode.sysctl_idata = qv;
1908 			break;
1909 		case CTLTYPE_QUAD:
1910 			nnode.sysctl_qdata = qv;
1911 			break;
1912 		default:
1913 			return (EINVAL);
1914 		}
1915 	} else {
1916 		nnode.sysctl_data = newp;
1917 	}
1918 	nnode.sysctl_func = func;
1919 	nnode.sysctl_parent = NULL;
1920 	nnode.sysctl_ver = 0;
1921 
1922 	/*
1923 	 * initialize lock state -- we need locks if the main tree has
1924 	 * been marked as complete, but since we could be called from
1925 	 * either there, or from a device driver (say, at device
1926 	 * insertion), or from an lkm (at lkm load time, say), we
1927 	 * don't really want to "wait"...
1928 	 */
1929 	error = sysctl_lock(NULL, NULL, 0);
1930 	if (error)
1931 		return (error);
1932 
1933 	/*
1934 	 * locate the prospective parent of the new node, and if we
1935 	 * find it, add the new node.
1936 	 */
1937 	sz = sizeof(onode);
1938 	pnode = root;
1939 	error = sysctl_locate(NULL, &name[0], namelen - 1, &pnode, &ni);
1940 	if (error) {
1941 		printf("sysctl_createv: sysctl_locate(%s) returned %d\n",
1942 		       nnode.sysctl_name, error);
1943 		sysctl_unlock(NULL);
1944 		return (error);
1945 	}
1946 	error = sysctl_create(&name[ni], namelen - ni, &onode, &sz,
1947 			      &nnode, sizeof(nnode), &name[0], NULL,
1948 			      pnode);
1949 
1950 	/*
1951 	 * unfortunately the node we wanted to create is already
1952 	 * there.  if the node that's already there is a reasonable
1953 	 * facsimile of the node we wanted to create, just pretend
1954 	 * (for the caller's benefit) that we managed to create the
1955 	 * node they wanted.
1956 	 */
1957 	if (error == EEXIST) {
1958 		/* name is the same as requested... */
1959 		if (strcmp(nnode.sysctl_name, onode.sysctl_name) == 0 &&
1960 		    /* they want the same function... */
1961 		    nnode.sysctl_func == onode.sysctl_func &&
1962 		    /* number is the same as requested, or... */
1963 		    (nnode.sysctl_num == onode.sysctl_num ||
1964 		     /* they didn't pick a number... */
1965 		     nnode.sysctl_num == CTL_CREATE)) {
1966 			/*
1967 			 * collision here from trying to create
1968 			 * something that already existed; let's give
1969 			 * our customers a hand and tell them they got
1970 			 * what they wanted.
1971 			 */
1972 #ifdef SYSCTL_DEBUG_CREATE
1973 			printf("cleared\n");
1974 #endif /* SYSCTL_DEBUG_CREATE */
1975 			error = 0;
1976 		}
1977 	}
1978 
1979 	if (error == 0 &&
1980 	    (cnode != NULL || log != NULL || descr != NULL)) {
1981 		/*
1982 		 * sysctl_create() gave us back a copy of the node,
1983 		 * but we need to know where it actually is...
1984 		 */
1985 		pnode = root;
1986 		error = sysctl_locate(NULL, &name[0], namelen - 1, &pnode, &ni);
1987 
1988 		/*
1989 		 * manual scan of last layer so that aliased nodes
1990 		 * aren't followed.
1991 		 */
1992 		if (error == 0) {
1993 			for (ni = 0; ni < pnode->sysctl_clen; ni++)
1994 				if (pnode->sysctl_child[ni].sysctl_num ==
1995 				    onode.sysctl_num)
1996 					break;
1997 			if (ni < pnode->sysctl_clen)
1998 				pnode = &pnode->sysctl_child[ni];
1999 			else
2000 				error = ENOENT;
2001 		}
2002 
2003 		/*
2004 		 * not expecting an error here, but...
2005 		 */
2006 		if (error == 0) {
2007 			if (log != NULL)
2008 				sysctl_log_add(log, pnode);
2009 			if (cnode != NULL)
2010 				*cnode = pnode;
2011 			if (descr != NULL) {
2012 				/*
2013 				 * allow first caller to *set* a
2014 				 * description actually to set it
2015 				 *
2016 				 * discard const here so we can attach
2017 				 * the description
2018 				 */
2019 				dnode = __UNCONST(pnode);
2020 				if (pnode->sysctl_desc != NULL)
2021 					/* skip it...we've got one */;
2022 				else if (flags & CTLFLAG_OWNDESC) {
2023 					size_t l = strlen(descr) + 1;
2024 					char *d = malloc(l, M_SYSCTLDATA,
2025 							 M_WAITOK|M_CANFAIL);
2026 					if (d != NULL) {
2027 						memcpy(d, descr, l);
2028 						dnode->sysctl_desc = d;
2029 						dnode->sysctl_flags |=
2030 						    CTLFLAG_OWNDESC;
2031 					}
2032 				} else
2033 					dnode->sysctl_desc = descr;
2034 			}
2035 		} else {
2036 			printf("sysctl_create succeeded but node not found?!\n");
2037 			/*
2038 			 *  confusing, but the create said it
2039 			 * succeeded, so...
2040 			 */
2041 			error = 0;
2042 		}
2043 	}
2044 
2045 	/*
2046 	 * now it should be safe to release the lock state.  note that
2047 	 * the pointer to the newly created node being passed back may
2048 	 * not be "good" for very long.
2049 	 */
2050 	sysctl_unlock(NULL);
2051 
2052 	if (error != 0) {
2053 		printf("sysctl_createv: sysctl_create(%s) returned %d\n",
2054 		       nnode.sysctl_name, error);
2055 #if 0
2056 		if (error != ENOENT)
2057 			sysctl_dump(&onode);
2058 #endif
2059 	}
2060 
2061 	return (error);
2062 }
2063 
2064 int
2065 sysctl_destroyv(struct sysctlnode *rnode, ...)
2066 {
2067 	va_list ap;
2068 	int error, name[CTL_MAXNAME], namelen, ni;
2069 	const struct sysctlnode *pnode, *node;
2070 	struct sysctlnode dnode, *onode;
2071 	size_t sz;
2072 
2073 	va_start(ap, rnode);
2074 	namelen = 0;
2075 	ni = 0;
2076 	do {
2077 		if (ni == CTL_MAXNAME)
2078 			return (ENAMETOOLONG);
2079 		name[ni] = va_arg(ap, int);
2080 	} while (name[ni++] != CTL_EOL);
2081 	namelen = ni - 1;
2082 	va_end(ap);
2083 
2084 	/*
2085 	 * i can't imagine why we'd be destroying a node when the tree
2086 	 * wasn't complete, but who knows?
2087 	 */
2088 	error = sysctl_lock(NULL, NULL, 0);
2089 	if (error)
2090 		return (error);
2091 
2092 	/*
2093 	 * where is it?
2094 	 */
2095 	node = rnode;
2096 	error = sysctl_locate(NULL, &name[0], namelen - 1, &node, &ni);
2097 	if (error) {
2098 		/* they want it gone and it's not there, so... */
2099 		sysctl_unlock(NULL);
2100 		return (error == ENOENT ? 0 : error);
2101 	}
2102 
2103 	/*
2104 	 * set up the deletion
2105 	 */
2106 	pnode = node;
2107 	node = &dnode;
2108 	memset(&dnode, 0, sizeof(dnode));
2109 	dnode.sysctl_flags = SYSCTL_VERSION;
2110 	dnode.sysctl_num = name[namelen - 1];
2111 
2112 	/*
2113 	 * we found it, now let's nuke it
2114 	 */
2115 	name[namelen - 1] = CTL_DESTROY;
2116 	sz = 0;
2117 	error = sysctl_destroy(&name[namelen - 1], 1, NULL, &sz,
2118 			       node, sizeof(*node), &name[0], NULL,
2119 			       pnode);
2120 	if (error == ENOTEMPTY) {
2121 		/*
2122 		 * think of trying to delete "foo" when "foo.bar"
2123 		 * (which someone else put there) is still in
2124 		 * existence
2125 		 */
2126 		error = 0;
2127 
2128 		/*
2129 		 * dunno who put the description there, but if this
2130 		 * node can ever be removed, we need to make sure the
2131 		 * string doesn't go out of context.  that means we
2132 		 * need to find the node that's still there (don't use
2133 		 * sysctl_locate() because that follows aliasing).
2134 		 */
2135 		node = pnode->sysctl_child;
2136 		for (ni = 0; ni < pnode->sysctl_clen; ni++)
2137 			if (node[ni].sysctl_num == dnode.sysctl_num)
2138 				break;
2139 		node = (ni < pnode->sysctl_clen) ? &node[ni] : NULL;
2140 
2141 		/*
2142 		 * if we found it, and this node has a description,
2143 		 * and this node can be released, and it doesn't
2144 		 * already own its own description...sigh.  :)
2145 		 */
2146 		if (node != NULL && node->sysctl_desc != NULL &&
2147 		    !(node->sysctl_flags & CTLFLAG_PERMANENT) &&
2148 		    !(node->sysctl_flags & CTLFLAG_OWNDESC)) {
2149 			char *d;
2150 
2151 			sz = strlen(node->sysctl_desc) + 1;
2152 			d = malloc(sz, M_SYSCTLDATA, M_WAITOK|M_CANFAIL);
2153 			if (d != NULL) {
2154 				/*
2155 				 * discard const so that we can
2156 				 * re-attach the description
2157 				 */
2158 				memcpy(d, node->sysctl_desc, sz);
2159 				onode = __UNCONST(node);
2160 				onode->sysctl_desc = d;
2161 				onode->sysctl_flags |= CTLFLAG_OWNDESC;
2162 			} else {
2163 				/*
2164 				 * XXX drop the description?  be
2165 				 * afraid?  don't care?
2166 				 */
2167 			}
2168 		}
2169 	}
2170 
2171         sysctl_unlock(NULL);
2172 
2173 	return (error);
2174 }
2175 
2176 #if 0
2177 /*
2178  * ********************************************************************
2179  * the dump routine.  i haven't yet decided how (if at all) i'll call
2180  * this from userland when it's in the kernel.
2181  * ********************************************************************
2182  */
2183 static const char *
2184 sf(int f)
2185 {
2186 	static char s[256];
2187 	char *c;
2188 
2189 	s[0] = '\0';
2190 	c = "";
2191 
2192 #define print_flag(_f, _s, _c, _q, _x) \
2193 	if (((_f) & (__CONCAT(CTLFLAG_,_x))) == (__CONCAT(CTLFLAG_,_q))) { \
2194 		strlcat((_s), (_c), sizeof(_s)); \
2195 		strlcat((_s), __STRING(_q), sizeof(_s)); \
2196 		(_c) = ","; \
2197 		(_f) &= ~__CONCAT(CTLFLAG_,_x); \
2198 	}
2199 
2200 	print_flag(f, s, c, READONLY,  READWRITE);
2201 	print_flag(f, s, c, READONLY1, READWRITE);
2202 	print_flag(f, s, c, READONLY2, READWRITE);
2203 	print_flag(f, s, c, READWRITE, READWRITE);
2204 	print_flag(f, s, c, ANYWRITE,  ANYWRITE);
2205 	print_flag(f, s, c, PRIVATE,   PRIVATE);
2206 	print_flag(f, s, c, PERMANENT, PERMANENT);
2207 	print_flag(f, s, c, OWNDATA,   OWNDATA);
2208 	print_flag(f, s, c, IMMEDIATE, IMMEDIATE);
2209 	print_flag(f, s, c, HEX,       HEX);
2210 	print_flag(f, s, c, ROOT,      ROOT);
2211 	print_flag(f, s, c, ANYNUMBER, ANYNUMBER);
2212 	print_flag(f, s, c, HIDDEN,    HIDDEN);
2213 	print_flag(f, s, c, ALIAS,     ALIAS);
2214 #undef print_flag
2215 
2216 	if (f) {
2217 		char foo[9];
2218 		snprintf(foo, sizeof(foo), "%x", f);
2219 		strlcat(s, c, sizeof(s));
2220 		strlcat(s, foo, sizeof(s));
2221 	}
2222 
2223 	return (s);
2224 }
2225 
2226 static const char *
2227 st(int t)
2228 {
2229 
2230 	switch (t) {
2231 	case CTLTYPE_NODE:
2232 		return "NODE";
2233 	case CTLTYPE_INT:
2234 		return "INT";
2235 	case CTLTYPE_STRING:
2236 		return "STRING";
2237 	case CTLTYPE_QUAD:
2238 		return "QUAD";
2239 	case CTLTYPE_STRUCT:
2240 		return "STRUCT";
2241 	}
2242 
2243 	return "???";
2244 }
2245 
2246 void
2247 sysctl_dump(const struct sysctlnode *d)
2248 {
2249 	static char nmib[64], smib[256];
2250 	static int indent;
2251 	struct sysctlnode *n;
2252 	char *np, *sp, tmp[20];
2253 	int i;
2254 
2255 	if (d == NULL)
2256 		return;
2257 
2258 	np = &nmib[strlen(nmib)];
2259 	sp = &smib[strlen(smib)];
2260 
2261 	if (!(d->sysctl_flags & CTLFLAG_ROOT)) {
2262 		snprintf(tmp, sizeof(tmp), "%d", d->sysctl_num);
2263 		strcat(nmib, ".");
2264 		strcat(smib, ".");
2265 		strcat(nmib, tmp);
2266 		strcat(smib, d->sysctl_name);
2267 		printf("%s -> %s (%d)\n", &nmib[1], &smib[1],
2268 		       SYSCTL_TYPE(d->sysctl_flags));
2269 	}
2270 
2271 	if (1) {
2272 		printf("%*s%p:\tsysctl_name  [%s]\n", indent, "",
2273 		       d, d->sysctl_name);
2274 		printf("%*s\t\tsysctl_num    %d\n",   indent, "",
2275 		       d->sysctl_num);
2276 		printf("%*s\t\tsysctl_flags  %x (flags=%x<%s> type=%d<%s> "
2277 		       "size=%zu)\n",
2278 		       indent, "", d->sysctl_flags,
2279 		       SYSCTL_FLAGS(d->sysctl_flags),
2280 		       sf(SYSCTL_FLAGS(d->sysctl_flags)),
2281 		       SYSCTL_TYPE(d->sysctl_flags),
2282 		       st(SYSCTL_TYPE(d->sysctl_flags)),
2283 		       d->sysctl_size);
2284 		if (SYSCTL_TYPE(d->sysctl_flags) == CTLTYPE_NODE) {
2285 			printf("%*s\t\tsysctl_csize  %d\n",   indent, "",
2286 			       d->sysctl_csize);
2287 			printf("%*s\t\tsysctl_clen   %d\n",   indent, "",
2288 			       d->sysctl_clen);
2289 			printf("%*s\t\tsysctl_child  %p\n",   indent, "",
2290 			       d->sysctl_child);
2291 		} else
2292 			printf("%*s\t\tsysctl_data   %p\n",   indent, "",
2293 			       d->sysctl_data);
2294 		printf("%*s\t\tsysctl_func   %p\n",   indent, "",
2295 		       d->sysctl_func);
2296 		printf("%*s\t\tsysctl_parent %p\n",   indent, "",
2297 		       d->sysctl_parent);
2298 		printf("%*s\t\tsysctl_ver    %d\n",   indent, "",
2299 		       d->sysctl_ver);
2300 	}
2301 
2302 	if (SYSCTL_TYPE(d->sysctl_flags) == CTLTYPE_NODE) {
2303 		indent += 8;
2304 		n = d->sysctl_child;
2305 		for (i = 0; i < d->sysctl_clen; i++) {
2306 			sysctl_dump(&n[i]);
2307 		}
2308 		indent -= 8;
2309 	}
2310 
2311 	np[0] = '\0';
2312 	sp[0] = '\0';
2313 }
2314 #endif /* 0 */
2315 
2316 /*
2317  * ********************************************************************
2318  * Deletes an entire n-ary tree.  Not recommended unless you know why
2319  * you're doing it.  Personally, I don't know why you'd even think
2320  * about it.
2321  * ********************************************************************
2322  */
2323 void
2324 sysctl_free(struct sysctlnode *rnode)
2325 {
2326 	struct sysctlnode *node, *pnode;
2327 
2328 	if (SYSCTL_VERS(rnode->sysctl_flags) != SYSCTL_VERSION) {
2329 		printf("sysctl_free: rnode %p wrong version\n", rnode);
2330 		return;
2331 	}
2332 
2333 	if (rnode == NULL)
2334 		rnode = &sysctl_root;
2335 	pnode = rnode;
2336 
2337 	node = pnode->sysctl_child;
2338 	do {
2339 		while (node != NULL && pnode->sysctl_csize > 0) {
2340 			while (node <
2341 			       &pnode->sysctl_child[pnode->sysctl_clen] &&
2342 			       (SYSCTL_TYPE(node->sysctl_flags) !=
2343 				CTLTYPE_NODE ||
2344 				node->sysctl_csize == 0)) {
2345 				if (SYSCTL_FLAGS(node->sysctl_flags) &
2346 				    CTLFLAG_OWNDATA) {
2347 					if (node->sysctl_data != NULL) {
2348 						free(node->sysctl_data,
2349 						     M_SYSCTLDATA);
2350 						node->sysctl_data = NULL;
2351 					}
2352 				}
2353 				if (SYSCTL_FLAGS(node->sysctl_flags) &
2354 				    CTLFLAG_OWNDESC) {
2355 					if (node->sysctl_desc != NULL) {
2356 						/*XXXUNCONST*/
2357 						free(__UNCONST(node->sysctl_desc),
2358 						     M_SYSCTLDATA);
2359 						node->sysctl_desc = NULL;
2360 					}
2361 				}
2362 				node++;
2363 			}
2364 			if (node < &pnode->sysctl_child[pnode->sysctl_clen]) {
2365 				pnode = node;
2366 				node = node->sysctl_child;
2367 			} else
2368 				break;
2369 		}
2370 		if (pnode->sysctl_child != NULL)
2371 			free(pnode->sysctl_child, M_SYSCTLNODE);
2372 		pnode->sysctl_clen = 0;
2373 		pnode->sysctl_csize = 0;
2374 		pnode->sysctl_child = NULL;
2375 		node = pnode;
2376 		pnode = node->sysctl_parent;
2377 	} while (pnode != NULL && node != rnode);
2378 }
2379 
2380 int
2381 sysctl_log_add(struct sysctllog **logp, const struct sysctlnode *node)
2382 {
2383 	int name[CTL_MAXNAME], namelen, i;
2384 	const struct sysctlnode *pnode;
2385 	struct sysctllog *log;
2386 
2387 	if (node->sysctl_flags & CTLFLAG_PERMANENT)
2388 		return (0);
2389 
2390 	if (logp == NULL)
2391 		return (0);
2392 
2393 	if (*logp == NULL) {
2394 		log = malloc(sizeof(struct sysctllog),
2395 		       M_SYSCTLDATA, M_WAITOK|M_CANFAIL);
2396 		if (log == NULL) {
2397 			/* XXX print error message? */
2398 			return (-1);
2399 		}
2400 		log->log_num = malloc(16 * sizeof(int),
2401 		       M_SYSCTLDATA, M_WAITOK|M_CANFAIL);
2402 		if (log->log_num == NULL) {
2403 			/* XXX print error message? */
2404 			free(log, M_SYSCTLDATA);
2405 			return (-1);
2406 		}
2407 		memset(log->log_num, 0, 16 * sizeof(int));
2408 		log->log_root = NULL;
2409 		log->log_size = 16;
2410 		log->log_left = 16;
2411 		*logp = log;
2412 	} else
2413 		log = *logp;
2414 
2415 	/*
2416 	 * check that the root is proper.  it's okay to record the
2417 	 * address of the root of a tree.  it's the only thing that's
2418 	 * guaranteed not to shift around as nodes come and go.
2419 	 */
2420 	if (log->log_root == NULL)
2421 		log->log_root = sysctl_rootof(node);
2422 	else if (log->log_root != sysctl_rootof(node)) {
2423 		printf("sysctl: log %p root mismatch (%p)\n",
2424 		       log->log_root, sysctl_rootof(node));
2425 		return (-1);
2426 	}
2427 
2428 	/*
2429 	 * we will copy out name in reverse order
2430 	 */
2431 	for (pnode = node, namelen = 0;
2432 	     pnode != NULL && !(pnode->sysctl_flags & CTLFLAG_ROOT);
2433 	     pnode = pnode->sysctl_parent)
2434 		name[namelen++] = pnode->sysctl_num;
2435 
2436 	/*
2437 	 * do we have space?
2438 	 */
2439 	if (log->log_left < (namelen + 3))
2440 		sysctl_log_realloc(log);
2441 	if (log->log_left < (namelen + 3))
2442 		return (-1);
2443 
2444 	/*
2445 	 * stuff name in, then namelen, then node type, and finally,
2446 	 * the version for non-node nodes.
2447 	 */
2448 	for (i = 0; i < namelen; i++)
2449 		log->log_num[--log->log_left] = name[i];
2450 	log->log_num[--log->log_left] = namelen;
2451 	log->log_num[--log->log_left] = SYSCTL_TYPE(node->sysctl_flags);
2452 	if (log->log_num[log->log_left] != CTLTYPE_NODE)
2453 		log->log_num[--log->log_left] = node->sysctl_ver;
2454 	else
2455 		log->log_num[--log->log_left] = 0;
2456 
2457 	return (0);
2458 }
2459 
2460 void
2461 sysctl_teardown(struct sysctllog **logp)
2462 {
2463 	const struct sysctlnode *rnode;
2464 	struct sysctlnode node;
2465 	struct sysctllog *log;
2466 	uint namelen;
2467 	int *name, t, v, error, ni;
2468 	size_t sz;
2469 
2470 	if (logp == NULL || *logp == NULL)
2471 		return;
2472 	log = *logp;
2473 
2474 	error = sysctl_lock(NULL, NULL, 0);
2475 	if (error)
2476 		return;
2477 
2478 	memset(&node, 0, sizeof(node));
2479 
2480 	while (log->log_left < log->log_size) {
2481 		KASSERT((log->log_left + 3 < log->log_size) &&
2482 			(log->log_left + log->log_num[log->log_left + 2] <=
2483 			 log->log_size));
2484 		v = log->log_num[log->log_left++];
2485 		t = log->log_num[log->log_left++];
2486 		namelen = log->log_num[log->log_left++];
2487 		name = &log->log_num[log->log_left];
2488 
2489 		node.sysctl_num = name[namelen - 1];
2490 		node.sysctl_flags = SYSCTL_VERSION|t;
2491 		node.sysctl_ver = v;
2492 
2493 		rnode = log->log_root;
2494 		error = sysctl_locate(NULL, &name[0], namelen, &rnode, &ni);
2495 		if (error == 0) {
2496 			name[namelen - 1] = CTL_DESTROY;
2497 			rnode = rnode->sysctl_parent;
2498 			sz = 0;
2499 			(void)sysctl_destroy(&name[namelen - 1], 1, NULL,
2500 					     &sz, &node, sizeof(node),
2501 					     &name[0], NULL, rnode);
2502 		}
2503 
2504 		log->log_left += namelen;
2505 	}
2506 
2507 	KASSERT(log->log_size == log->log_left);
2508 	free(log->log_num, M_SYSCTLDATA);
2509 	free(log, M_SYSCTLDATA);
2510 	*logp = NULL;
2511 
2512 	sysctl_unlock(NULL);
2513 }
2514 
2515 /*
2516  * ********************************************************************
2517  * old_sysctl -- A routine to bridge old-style internal calls to the
2518  * new infrastructure.
2519  * ********************************************************************
2520  */
2521 int
2522 old_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp,
2523 	   void *newp, size_t newlen, struct lwp *l)
2524 {
2525 	int error;
2526 	size_t oldlen = 0;
2527 	size_t savelen;
2528 
2529 	if (oldlenp) {
2530 		oldlen = *oldlenp;
2531 	}
2532 	savelen = oldlen;
2533 
2534 	error = sysctl_lock(l, oldp, savelen);
2535 	if (error)
2536 		return (error);
2537 	error = sysctl_dispatch(name, namelen, oldp, &oldlen,
2538 				newp, newlen, name, l, NULL);
2539 	sysctl_unlock(l);
2540 	if (error == 0 && oldp != NULL && savelen < oldlen)
2541 		error = ENOMEM;
2542 
2543 	if (oldlenp) {
2544 		*oldlenp = oldlen;
2545 	}
2546 
2547 	return (error);
2548 }
2549 
2550 /*
2551  * ********************************************************************
2552  * Section 4: Generic helper routines
2553  * ********************************************************************
2554  * "helper" routines that can do more finely grained access control,
2555  * construct structures from disparate information, create the
2556  * appearance of more nodes and sub-trees, etc.  for example, if
2557  * CTL_PROC wanted a helper function, it could respond to a CTL_QUERY
2558  * with a dynamically created list of nodes that represented the
2559  * currently running processes at that instant.
2560  * ********************************************************************
2561  */
2562 
2563 /*
2564  * first, a few generic helpers that provide:
2565  *
2566  * sysctl_needfunc()		a readonly interface that emits a warning
2567  * sysctl_notavail()		returns EOPNOTSUPP (generic error)
2568  * sysctl_null()		an empty return buffer with no error
2569  */
2570 int
2571 sysctl_needfunc(SYSCTLFN_ARGS)
2572 {
2573 	int error;
2574 
2575 	printf("!!SYSCTL_NEEDFUNC!!\n");
2576 
2577 	if (newp != NULL || namelen != 0)
2578 		return (EOPNOTSUPP);
2579 
2580 	error = 0;
2581 	if (oldp != NULL)
2582 		error = sysctl_copyout(l, rnode->sysctl_data, oldp,
2583 				       MIN(rnode->sysctl_size, *oldlenp));
2584 	*oldlenp = rnode->sysctl_size;
2585 
2586 	return (error);
2587 }
2588 
2589 int
2590 sysctl_notavail(SYSCTLFN_ARGS)
2591 {
2592 
2593 	if (namelen == 1 && name[0] == CTL_QUERY)
2594 		return (sysctl_query(SYSCTLFN_CALL(rnode)));
2595 
2596 	return (EOPNOTSUPP);
2597 }
2598 
2599 int
2600 sysctl_null(SYSCTLFN_ARGS)
2601 {
2602 
2603 	*oldlenp = 0;
2604 
2605 	return (0);
2606 }
2607 
2608 /*
2609  * ********************************************************************
2610  * Section 5: The machinery that makes it all go
2611  * ********************************************************************
2612  * Memory "manglement" routines.  Not much to this, eh?
2613  * ********************************************************************
2614  */
2615 static int
2616 sysctl_alloc(struct sysctlnode *p, int x)
2617 {
2618 	int i;
2619 	struct sysctlnode *n;
2620 
2621 	assert(p->sysctl_child == NULL);
2622 
2623 	if (x == 1)
2624 		n = malloc(sizeof(struct sysctlnode),
2625 		       M_SYSCTLNODE, M_WAITOK|M_CANFAIL);
2626 	else
2627 		n = malloc(SYSCTL_DEFSIZE * sizeof(struct sysctlnode),
2628 		       M_SYSCTLNODE, M_WAITOK|M_CANFAIL);
2629 	if (n == NULL)
2630 		return (ENOMEM);
2631 
2632 	if (x == 1) {
2633 		memset(n, 0, sizeof(struct sysctlnode));
2634 		p->sysctl_csize = 1;
2635 	} else {
2636 		memset(n, 0, SYSCTL_DEFSIZE * sizeof(struct sysctlnode));
2637 		p->sysctl_csize = SYSCTL_DEFSIZE;
2638 	}
2639 	p->sysctl_clen = 0;
2640 
2641 	for (i = 0; i < p->sysctl_csize; i++)
2642 		n[i].sysctl_parent = p;
2643 
2644 	p->sysctl_child = n;
2645 	return (0);
2646 }
2647 
2648 static int
2649 sysctl_realloc(struct sysctlnode *p)
2650 {
2651 	int i, j;
2652 	struct sysctlnode *n;
2653 
2654 	assert(p->sysctl_csize == p->sysctl_clen);
2655 
2656 	/*
2657 	 * how many do we have...how many should we make?
2658 	 */
2659 	i = p->sysctl_clen;
2660 	n = malloc(2 * i * sizeof(struct sysctlnode), M_SYSCTLNODE,
2661 		   M_WAITOK|M_CANFAIL);
2662 	if (n == NULL)
2663 		return (ENOMEM);
2664 
2665 	/*
2666 	 * move old children over...initialize new children
2667 	 */
2668 	memcpy(n, p->sysctl_child, i * sizeof(struct sysctlnode));
2669 	memset(&n[i], 0, i * sizeof(struct sysctlnode));
2670 	p->sysctl_csize = 2 * i;
2671 
2672 	/*
2673 	 * reattach moved (and new) children to parent; if a moved
2674 	 * child node has children, reattach the parent pointers of
2675 	 * grandchildren
2676 	 */
2677         for (i = 0; i < p->sysctl_csize; i++) {
2678                 n[i].sysctl_parent = p;
2679 		if (n[i].sysctl_child != NULL) {
2680 			for (j = 0; j < n[i].sysctl_csize; j++)
2681 				n[i].sysctl_child[j].sysctl_parent = &n[i];
2682 		}
2683 	}
2684 
2685 	/*
2686 	 * get out with the old and in with the new
2687 	 */
2688 	free(p->sysctl_child, M_SYSCTLNODE);
2689 	p->sysctl_child = n;
2690 
2691 	return (0);
2692 }
2693 
2694 static int
2695 sysctl_log_realloc(struct sysctllog *log)
2696 {
2697 	int *n, s, d;
2698 
2699 	s = log->log_size * 2;
2700 	d = log->log_size;
2701 
2702 	n = malloc(s * sizeof(int), M_SYSCTLDATA, M_WAITOK|M_CANFAIL);
2703 	if (n == NULL)
2704 		return (-1);
2705 
2706 	memset(n, 0, s * sizeof(int));
2707 	memcpy(&n[d], log->log_num, d * sizeof(int));
2708 	free(log->log_num, M_SYSCTLDATA);
2709 	log->log_num = n;
2710 	if (d)
2711 		log->log_left += d;
2712 	else
2713 		log->log_left = s;
2714 	log->log_size = s;
2715 
2716 	return (0);
2717 }
2718 
2719 /*
2720  * ********************************************************************
2721  * Section 6: Conversion between API versions wrt the sysctlnode
2722  * ********************************************************************
2723  */
2724 static int
2725 sysctl_cvt_in(struct lwp *l, int *vp, const void *i, size_t sz,
2726 	      struct sysctlnode *node)
2727 {
2728 	int error, flags;
2729 
2730 	if (i == NULL || sz < sizeof(flags))
2731 		return (EINVAL);
2732 
2733 	error = sysctl_copyin(l, i, &flags, sizeof(flags));
2734 	if (error)
2735 		return (error);
2736 
2737 #if (SYSCTL_VERSION != SYSCTL_VERS_1)
2738 #error sysctl_cvt_in: no support for SYSCTL_VERSION
2739 #endif /*  (SYSCTL_VERSION != SYSCTL_VERS_1) */
2740 
2741 	if (sz == sizeof(*node) &&
2742 	    SYSCTL_VERS(flags) == SYSCTL_VERSION) {
2743 		error = sysctl_copyin(l, i, node, sizeof(*node));
2744 		if (error)
2745 			return (error);
2746 		*vp = SYSCTL_VERSION;
2747 		return (0);
2748 	}
2749 
2750 	return (EINVAL);
2751 }
2752 
2753 static int
2754 sysctl_cvt_out(struct lwp *l, int v, const struct sysctlnode *i,
2755 	       void *ovp, size_t left, size_t *szp)
2756 {
2757 	size_t sz = sizeof(*i);
2758 	const void *src = i;
2759 	int error;
2760 
2761 	switch (v) {
2762 	case SYSCTL_VERS_0:
2763 		return (EINVAL);
2764 
2765 #if (SYSCTL_VERSION != SYSCTL_VERS_1)
2766 #error sysctl_cvt_out: no support for SYSCTL_VERSION
2767 #endif /*  (SYSCTL_VERSION != SYSCTL_VERS_1) */
2768 
2769 	case SYSCTL_VERSION:
2770 		/* nothing more to do here */
2771 		break;
2772 	}
2773 
2774 	if (ovp != NULL && left >= sz) {
2775 		error = sysctl_copyout(l, src, ovp, sz);
2776 		if (error)
2777 			return (error);
2778 	}
2779 
2780 	if (szp != NULL)
2781 		*szp = sz;
2782 
2783 	return (0);
2784 }
2785