1.\" $NetBSD: sysctl.9,v 1.24 2022/09/07 01:18:32 pgoyette Exp $ 2.\" 3.\" Copyright (c) 2004 The NetBSD Foundation, Inc. 4.\" All rights reserved. 5.\" 6.\" This code is derived from software contributed to The NetBSD Foundation 7.\" by Andrew Brown. 8.\" 9.\" Redistribution and use in source and binary forms, with or without 10.\" modification, are permitted provided that the following conditions 11.\" are met: 12.\" 1. Redistributions of source code must retain the above copyright 13.\" notice, this list of conditions and the following disclaimer. 14.\" 2. Redistributions in binary form must reproduce the above copyright 15.\" notice, this list of conditions and the following disclaimer in the 16.\" documentation and/or other materials provided with the distribution. 17.\" 18.\" THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 19.\" ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 20.\" TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 21.\" PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 22.\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28.\" POSSIBILITY OF SUCH DAMAGE. 29.\" 30.Dd September 6, 2022 31.Dt SYSCTL 9 32.Os 33.Sh NAME 34.Nm sysctl 35.Nd system variable control interfaces 36.Sh SYNOPSIS 37.In sys/param.h 38.In sys/sysctl.h 39.Pp 40Primary external interfaces: 41.Ft void 42.Fn sysctl_init void 43.Ft int 44.Fn sysctl_lock "struct lwp *l" "void *oldp" "size_t savelen" 45.Ft int 46.Fn sysctl_dispatch "const int *name" "u_int namelen" "void *oldp" \ 47"size_t *oldlenp" "const void *newp" "size_t newlen" "const int *oname" \ 48"struct lwp *l" "const struct sysctlnode *rnode" 49.Ft void 50.Fn sysctl_unlock "struct lwp *l" 51.Ft int 52.Fn sysctl_createv "struct sysctllog **log" "int cflags" \ 53"const struct sysctlnode **rnode" "const struct sysctlnode **cnode" \ 54"int flags" "int type" "const char *namep" "const char *desc" \ 55"sysctlfn func" "u_quad_t qv" "void *newp" "size_t newlen" ... 56.Ft int 57.Fn sysctl_destroyv "struct sysctlnode *rnode" ... 58.Ft void 59.Fn sysctl_free "struct sysctlnode *rnode" 60.Ft void 61.Fn sysctl_teardown "struct sysctllog **" 62.Ft int 63.Fn old_sysctl "int *name" "u_int namelen" "void *oldp" \ 64"size_t *oldlenp" "void *newp" "size_t newlen" "struct lwp *l" 65.Pp 66Core internal functions: 67.Ft int 68.Fn sysctl_locate "struct lwp *l" "const int *name" "u_int namelen" \ 69"const struct sysctlnode **rnode" "int *nip" 70.Ft int 71.Fn sysctl_lookup "const int *name" "u_int namelen" "void *oldp" \ 72"size_t *oldlenp" "const void *newp" "size_t newlen" "const int *oname" \ 73"struct lwp *l" "const struct sysctlnode *rnode" 74.Ft int 75.Fn sysctl_create "const int *name" "u_int namelen" "void *oldp" \ 76"size_t *oldlenp" "const void *newp" "size_t newlen" "const int *oname" \ 77"struct lwp *l" "const struct sysctlnode *rnode" 78.Ft int 79.Fn sysctl_destroy "const int *name" "u_int namelen" "void *oldp" \ 80"size_t *oldlenp" "const void *newp" "size_t newlen" "const int *oname" \ 81"struct lwp *l" "const struct sysctlnode *rnode" 82.Ft int 83.Fn sysctl_query "const int *name" "u_int namelen" "void *oldp" \ 84"size_t *oldlenp" "const void *newp" "size_t newlen" "const int *oname" \ 85"struct lwp *l" "const struct sysctlnode *rnode" 86.Pp 87Simple 88.Dq helper 89functions: 90.Ft int 91.Fn sysctl_needfunc "const int *name" "u_int namelen" "void *oldp" \ 92"size_t *oldlenp" "const void *newp" "size_t newlen" "const int *oname" \ 93"struct lwp *l" "const struct sysctlnode *rnode" 94.Ft int 95.Fn sysctl_notavail "const int *name" "u_int namelen" "void *oldp" \ 96"size_t *oldlenp" "const void *newp" "size_t newlen" "const int *oname" \ 97"struct lwp *l" "const struct sysctlnode *rnode" 98.Ft int 99.Fn sysctl_null "const int *name" "u_int namelen" "void *oldp" \ 100"size_t *oldlenp" "const void *newp" "size_t newlen" "const int *oname" \ 101"struct lwp *l" "const struct sysctlnode *rnode" 102.Sh DESCRIPTION 103The SYSCTL subsystem instruments a number of kernel tunables and other 104data structures via a simple MIB-like interface, primarily for 105consumption by userland programs, but also for use internally by the 106kernel. 107.Sh LOCKING 108All operations on the SYSCTL tree must be protected by acquiring the 109main SYSCTL lock. 110The only functions that can be called when the lock is not held are 111.Fn sysctl_lock , 112.Fn sysctl_createv , 113.Fn sysctl_destroyv , 114and 115.Fn old_sysctl . 116All other functions require the tree to be locked. 117This is to prevent other users of the tree from moving nodes around 118during an add operation, or from destroying nodes or subtrees that are 119actively being used. 120The lock is acquired by calling 121.Fn sysctl_lock 122with a pointer to the process's lwp 123.Fa l 124.Dv ( NULL 125may be passed to all functions as the lwp pointer if no lwp is 126appropriate, though any changes made via 127.Fn sysctl_create , 128.Fn sysctl_destroy , 129.Fn sysctl_lookup , 130or by any helper function will be done with effective superuser 131privileges). 132.Pp 133The 134.Fa oldp 135and 136.Fa savelen 137arguments are a pointer to and the size of the memory region the 138caller will be using to collect data from SYSCTL. 139These may also be 140.Dv NULL 141and 0, respectively. 142.Pp 143The memory region will be locked via 144.Fn uvm_vslock 145if it is a region in userspace. 146The address and size of the region are recorded so that when the 147SYSCTL lock is to be released via 148.Fn sysctl_unlock , 149only the lwp pointer 150.Fa l 151is required. 152.Sh LOOKUPS 153Once the lock has been acquired, it is typical to call 154.Fn sysctl_dispatch 155to handle the request. 156.Fn sysctl_dispatch 157will examine the contents of 158.Fa name , 159an array of integers at least 160.Fa namelen 161long, which is to be located in kernel space, in order to determine 162which function to call to handle the specific request. 163.Pp 164The following algorithm is used by 165.Fn sysctl_dispatch 166to determine the function to call: 167.Bl -bullet -offset indent 168.It 169Scan the tree using 170.Fn sysctl_locate . 171.It 172If the node returned has a 173.Dq helper 174function, call it. 175.It 176If the requested node was found but has no function, call 177.Fn sysctl_lookup . 178.It 179If the node was not found and 180.Fa name 181specifies one of 182.Fn sysctl_query , 183.Fn sysctl_create , 184or 185.Fn sysctl_destroy , 186call the appropriate function. 187.It 188If none of these options applies and no other error was yet recorded, 189return 190.Er EOPNOTSUPP . 191.El 192The 193.Fa oldp 194and 195.Fa oldlenp 196arguments to 197.Fn sysctl_dispatch , 198as with all the other core functions, describe an area into which the 199current or requested value may be copied. 200.Fa oldp 201may or may not be a pointer into userspace (as dictated by whether 202.Fa l 203is 204.Dv NULL 205or not). 206.Fa oldlenp 207is a 208.No non- Ns Dv NULL 209pointer to a size_t. 210.Fa newp 211and 212.Fa newlen 213describe an area where the new value for the request may be found; 214.Fa newp 215may also be a pointer into userspace. 216The 217.Fa oname 218argument is a 219.No non- Ns Dv NULL 220pointer to the base of the request currently 221being processed. 222By simple arithmetic on 223.Fa name , 224.Fa namelen , 225and 226.Fa oname , 227one can easily determine the entire original request and 228.Fa namelen 229values, if needed. 230The 231.Fa rnode 232value, as passed to 233.Fn sysctl_dispatch 234represents the root of the tree into which the current request is to 235be dispatched. 236If 237.Dv NULL , 238the main tree will be used. 239.Pp 240The 241.Fn sysctl_locate 242function scans a tree for the node most specific to a request. 243If the pointer referenced by 244.Fa rnode 245is not 246.Dv NULL , 247the tree indicated is searched, otherwise the main tree 248will be used. 249The address of the most relevant node will be returned via 250.Fa rnode 251and the number of MIB entries consumed will be returned via 252.Fa nip , 253if it is not 254.Dv NULL . 255.Pp 256The 257.Fn sysctl_lookup 258function takes the same arguments as 259.Fn sysctl_dispatch 260with the caveat that the value for 261.Fa namelen 262must be zero in order to indicate that the node referenced by the 263.Fa rnode 264argument is the one to which the lookup is being applied. 265.Sh CREATION AND DESTRUCTION OF NODES 266New nodes are created and destroyed by the 267.Fn sysctl_create 268and 269.Fn sysctl_destroy 270functions. 271These functions take the same arguments as 272.Fn sysctl_dispatch 273with the additional requirement that the 274.Fa namelen 275argument must be 1 and the 276.Fa name 277argument must point to an integer valued either 278.Dv CTL_CREATE 279or 280.Dv CTL_CREATESYM 281when creating a new node, or 282.Dv CTL_DESTROY 283when destroying 284a node. 285.Pp 286The 287.Fa newp 288and 289.Fa newlen 290arguments should point to a copy of the node to be created or 291destroyed. 292If the create or destroy operation was successful, a copy of the node 293created or destroyed will be placed in the space indicated by 294.Fa oldp 295and 296.Fa oldlenp . 297If the create operation fails because of a conflict with an existing 298node, a copy of that node will be returned instead. 299.Pp 300In order to facilitate the creation and destruction of nodes from a 301given tree by kernel subsystems, the functions 302.Fn sysctl_createv 303and 304.Fn sysctl_destroyv 305are provided. 306These functions take care of the overhead of filling in the contents 307of the create or destroy request, dealing with locking, locating the 308appropriate parent node, etc. 309.Pp 310The arguments to 311.Fn sysctl_createv 312are used to construct the new node. 313If the 314.Fa log 315argument is not 316.Dv NULL , 317a 318.Em sysctllog 319structure will be allocated and the pointer referenced 320will be changed to address it. 321The same log may be used for any number of nodes, provided they are 322all inserted into the same tree. 323This allows for a series of nodes to be created and later removed from 324the tree in a single transaction (via 325.Fn sysctl_teardown ) 326without the need for any record 327keeping on the caller's part. 328.Pp 329The 330.Fa cflags 331argument is currently unused and must be zero. 332The 333.Fa rnode 334argument must either be 335.Dv NULL 336or a valid pointer to a reference to the root of the tree into which 337the new node must be placed. 338If it is 339.Dv NULL , 340the main tree will be used. 341It is illegal for 342.Fa rnode 343to refer to a 344.Dv NULL 345pointer. 346If the 347.Fa cnode 348argument is not 349.Dv NULL , 350on return it will be adjusted to point to the address of the new node. 351.Pp 352The 353.Fa flags 354and 355.Fa type 356arguments are combined into the 357.Fa sysctl_flags 358field, and the current value for 359.Dv SYSCTL_VERSION 360is added in. 361The following types are defined: 362.Bl -tag -width ".Dv CTLTYPE_STRING " -offset indent 363.It Dv CTLTYPE_NODE 364A node intended to be a parent for other nodes. 365.It Dv CTLTYPE_INT 366A signed integer. 367.It Dv CTLTYPE_STRING 368A NUL-terminated string. 369.It Dv CTLTYPE_QUAD 370An unsigned 64-bit integer. 371.It Dv CTLTYPE_STRUCT 372A structure. 373.It Dv CTLTYPE_BOOL 374A boolean. 375.El 376.Pp 377The 378.Fa namep 379argument is copied into the 380.Fa sysctl_name 381field and must be less than 382.Dv SYSCTL_NAMELEN 383characters in length. 384The string indicated by 385.Fa desc 386will be copied if the 387.Dv CTLFLAG_OWNDESC 388flag is set, and will be used as the node's description. 389.Pp 390Two additional remarks: 391.Bl -enum -offset indent 392.It 393The 394.Dv CTLFLAG_PERMANENT 395flag can only be set from SYSCTL setup routines (see 396.Sx SETUP FUNCTIONS ) 397as called by 398.Fn sysctl_init . 399.It 400If 401.Fn sysctl_destroyv 402attempts to delete a node that does not own its own description (and 403is not marked as permanent), but the deletion fails, the description 404will be copied and 405.Fn sysctl_destroyv 406will set the 407.Dv CTLFLAG_OWNDESC 408flag. 409.El 410.Pp 411The 412.Fa func 413argument is the name of a 414.Dq helper 415function (see 416.Sx HELPER FUNCTIONS AND MACROS ) . 417If the 418.Dv CTLFLAG_IMMEDIATE 419flag is set, the 420.Fa qv 421argument will be interpreted as the initial value for the new 422.Dq bool , 423.Dq int 424or 425.Dq quad 426node. 427This flag does not apply to any other type of node. 428The 429.Fa newp 430and 431.Fa newlen 432arguments describe the data external to SYSCTL that is to be 433instrumented. 434One of 435.Fa func , 436.Fa qv 437and the 438.Dv CTLFLAG_IMMEDIATE 439flag, or 440.Fa newp 441and 442.Fa newlen 443must be given for nodes that instrument data, otherwise an error is 444returned. 445.Pp 446The remaining arguments are a list of integers specifying the path 447through the MIB to the node being created. 448The list must be terminated by the 449.Dv CTL_EOL 450value. 451The penultimate value in the list may be 452.Dv CTL_CREATE 453if a dynamic MIB entry is to be made for this node. 454.Fn sysctl_createv 455specifically does not support 456.Dv CTL_CREATESYM , 457since setup routines are 458expected to be able to use the in-kernel 459.Xr ksyms 4 460interface to discover the location of the data to be instrumented. 461If the node to be created matches a node that already exists, a return 462code of 0 is given, indicating success. 463.Pp 464When using 465.Fn sysctl_destroyv 466to destroy a given node, the 467.Fa rnode 468argument, if not 469.Dv NULL , 470is taken to be the root of the tree from which 471the node is to be destroyed, otherwise the main tree is used. 472The rest of the arguments are a list of integers specifying the path 473through the MIB to the node being destroyed. 474If the node being destroyed does not exist, a successful return code 475is given. 476Nodes marked with the 477.Dv CTLFLAG_PERMANENT 478flag cannot be destroyed. 479.Sh HELPER FUNCTIONS AND MACROS 480Helper functions are invoked with the same common argument set as 481.Fn sysctl_dispatch 482except that the 483.Fa rnode 484argument will never be 485.Dv NULL . 486It will be set to point to the node that corresponds most closely to 487the current request. 488Helpers are forbidden from modifying the node they are passed; they 489should instead copy the structure if changes are required in order to 490effect access control or other checks. 491The 492.Dq helper 493prototype and function that needs to ensure that a newly assigned 494value is within a certain range (presuming external data) would look 495like the following: 496.Pp 497.Bd -literal -offset indent -compact 498static int sysctl_helper(SYSCTLFN_PROTO); 499 500static int 501sysctl_helper(SYSCTLFN_ARGS) 502{ 503 struct sysctlnode node; 504 int t, error; 505 506 t = *(int *)rnode->sysctl_data; 507 508 node = *rnode; 509 node.sysctl_data = &t; 510 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 511 if (error || newp == NULL) 512 return (error); 513 514 if (t < 0 || t > 20) 515 return (EINVAL); 516 517 *(int *)rnode->sysctl_data = t; 518 return (0); 519} 520.Ed 521.Pp 522The use of the 523.Dv SYSCTLFN_PROTO , 524.Dv SYSCTLFN_ARGS, and 525.Dv SYSCTLFN_CALL 526 macros ensure that all arguments are passed properly. 527The single argument to the 528.Dv SYSCTLFN_CALL 529macro is the pointer to the node being examined. 530.Pp 531Three basic helper functions are available for use. 532.Fn sysctl_needfunc 533will emit a warning to the system console whenever it is invoked and 534provides a simplistic read-only interface to the given node. 535.Fn sysctl_notavail 536will forward 537.Dq queries 538to 539.Fn sysctl_query 540so that subtrees can be discovered, but will return 541.Er EOPNOTSUPP 542for any other condition. 543.Fn sysctl_null 544specifically ignores any arguments given, sets the value indicated by 545.Fa oldlenp 546to zero, and returns success. 547.Sh SETUP FUNCTIONS 548Although nodes can be added to the SYSCTL tree at any time, in order to 549add nodes during the kernel bootstrap phase (and during loadable module 550initialization), a proper 551.Dq setup 552function must be used. 553Setup functions are declared using the 554.Dv SYSCTL_SETUP 555macro, which takes the name of the function and a short string 556description of the function as arguments. 557.Po 558See the 559.Dv SYSCTL_DEBUG_SETUP 560kernel configuration in 561.Xr options 4 . 562.Pc 563.Pp 564The address of the function is added to a list of functions that 565.Fn sysctl_init 566traverses during initialization. 567For loadable kernel modules (see 568.Xr module 9 ) , 569the list of functions is called from the module loader before the module's 570initialization routine. 571Any sysctl nodes created for the loadable module are removed using 572.Fn sysctl_teardown 573after calling the module's termination code. 574.Pp 575Setup functions do not have to add nodes to the main tree, but can set 576up their own trees for emulation or other purposes. 577Emulations that require use of a main tree but with some nodes changed 578to suit their own purposes can arrange to overlay a sparse private 579tree onto their main tree by making the 580.Fa e_sysctlovly 581member of their struct emul definition point to the overlaid tree. 582.Pp 583Setup functions should take care to create all nodes from the root 584down to the subtree they are creating, since the order in which setup 585functions are called is arbitrary (the order in which setup functions 586are called is only determined by the ordering of the object files as 587passed to the linker when the kernel is built). 588.Sh MISCELLANEOUS FUNCTIONS 589.Fn sysctl_init 590is called early in the kernel bootstrap process. 591It initializes the SYSCTL lock, calls all the registered setup 592functions, and marks the tree as permanent. 593.Pp 594.Fn sysctl_free 595will unconditionally delete any and all nodes below the given node. 596Its intended use is for the deletion of entire trees, not subtrees. 597If a subtree is to be removed, 598.Fn sysctl_destroy 599or 600.Fn sysctl_destroyv 601should be used to ensure that nodes not owned by the sub-system being 602deactivated are not mistakenly destroyed. 603The SYSCTL lock must be held when calling this function. 604.Pp 605.Fn sysctl_teardown 606unwinds a 607.Em sysctllog 608and deletes the nodes in the opposite order in 609which they were created. 610.Pp 611.Fn old_sysctl 612provides an interface similar to the old SYSCTL implementation, with 613the exception that access checks on a per-node basis are performed if 614the 615.Fa l 616argument is 617.No non- Ns Dv NULL . 618If called with a 619.Dv NULL 620argument, the values for 621.Fa newp 622and 623.Fa oldp 624are interpreted as kernel addresses, and access is performed as for 625the superuser. 626.Sh NOTES 627It is expected that nodes will be added to (or removed from) the tree 628during the following stages of a machine's lifetime: 629.Pp 630.Bl -bullet -compact 631.It 632initialization \(em when the kernel is booting 633.It 634autoconfiguration \(em when devices are being probed at boot time 635.It 636.Dq plug and play 637device attachment \(em when a PC-Card, USB, or other device is plugged 638in or attached 639.It 640module initialization \(em when a module is being loaded 641.It 642.Dq run-time 643\(em when a process creates a node via the 644.Xr sysctl 3 645interface 646.El 647.Pp 648Nodes marked with 649.Dv CTLFLAG_PERMANENT 650can only be added to a tree during the first or initialization phase, 651and can never be removed. 652The initialization phase terminates when the main tree's root is 653marked with the 654.Dv CTLFLAG_PERMANENT 655flag. 656Once the main tree is marked in this manner, no nodes can be added to 657any tree that is marked with 658.Dv CTLFLAG_READONLY 659at its root, and no nodes can be added at all if the main tree's root 660is so marked. 661.Pp 662Nodes added by device drivers, modules, and at device insertion time can 663be added to (and removed from) 664.Dq read-only 665parent nodes. 666.Pp 667Nodes created by processes can only be added to 668.Dq writable 669parent nodes. 670See 671.Xr sysctl 3 672for a description of the flags that are allowed to be used by 673when creating nodes. 674.Sh SEE ALSO 675.Xr sysctl 3 676.Sh HISTORY 677The dynamic SYSCTL implementation first appeared in 678.Nx 2.0 . 679.Sh AUTHORS 680.An Andrew Brown 681.Aq atatat@NetBSD.org 682designed and implemented the dynamic SYSCTL implementation. 683