1 /* MIB service - main.c - request abstraction and first-level tree */
2 /*
3 * This is the Management Information Base (MIB) service. Its one and only
4 * task is to implement the sysctl(2) system call, which plays a fairly
5 * important role in parts of *BSD userland.
6 *
7 * The sysctl(2) interface is used to access a variety of information. In
8 * order to obtain that information, and possibly modify it, the MIB service
9 * calls into many other services. The MIB service must therefore not be
10 * called directly from other services, with the exception of ProcFS. In fact,
11 * ProcFS is currently the only service that is modeled as logically higher in
12 * the MINIX3 service stack than MIB, something that itself is possible only
13 * due to the nonblocking nature of VFS. MIB may issue blocking calls to VFS.
14 *
15 * The MIB service is in the boot image because even init(8) makes use of
16 * sysctl(2) during its own startup, so launching the MIB service at any later
17 * time would make a proper implementation of sysctl(2) impossible. Also, the
18 * service needs superuser privileges because it may need to issue privileged
19 * calls and obtain privileged information from other services.
20 *
21 * While most of the sysctl tree is maintained locally, the MIB service also
22 * allows other services to register "remote" subtrees which are then handled
23 * entirely by those services. This feature, which works much like file system
24 * mounting, allows 1) sysctl handling code to stay local to its corresponding
25 * service, and 2) parts of the sysctl tree to adapt and expand dynamically as
26 * optional services are started and stopped. Compared to the MIB service's
27 * local handling, remotely handled subtrees are subject to several additional
28 * practical restrictions, hoever. In the current implementation, the MIB
29 * service makes blocking calls to remote services as needed; in the future,
30 * these interactions could be made (more) asynchronous.
31 *
32 * The MIB service was created by David van Moolenbroek <david@minix3.org>.
33 */
34
35 #include "mib.h"
36
37 /*
38 * Most of these initially empty nodes are filled in by their corresponding
39 * modules' _init calls; see mib_init below. However, some subtrees are not
40 * populated by the MIB service itself. CTL_NET is expected to be populated
41 * through registration of remote subtrees. The libc sysctl(3) wrapper code
42 * takes care of the CTL_USER subtree. It must have an entry here though, or
43 * sysctl(8) will not list it. CTL_VENDOR is also empty, but writable, so that
44 * it may be used by third parties.
45 */
46 static struct mib_node mib_table[] = {
47 /* 1*/ [CTL_KERN] = MIB_ENODE(_P | _RO, "kern", "High kernel"),
48 /* 2*/ [CTL_VM] = MIB_ENODE(_P | _RO, "vm", "Virtual memory"),
49 /* 4*/ [CTL_NET] = MIB_ENODE(_P | _RO, "net", "Networking"),
50 /* 6*/ [CTL_HW] = MIB_ENODE(_P | _RO, "hw", "Generic CPU, I/O"),
51 /* 8*/ [CTL_USER] = MIB_ENODE(_P | _RO, "user", "User-level"),
52 /*11*/ [CTL_VENDOR] = MIB_ENODE(_P | _RW, "vendor", "Vendor specific"),
53 /*32*/ [CTL_MINIX] = MIB_ENODE(_P | _RO, "minix", "MINIX3 specific"),
54 };
55
56 /*
57 * The root node of the tree. The root node is used internally only--it is
58 * impossible to access the root node itself from userland in any way. The
59 * node is writable by default, so that programs such as init(8) may create
60 * their own top-level entries.
61 */
62 struct mib_node mib_root = MIB_NODE(_RW, mib_table, "", "");
63
64 /*
65 * Structures describing old and new data as provided by userland. The primary
66 * advantage of these opaque structures is that we could in principle use them
67 * to implement storage of small data results in the sysctl reply message, so
68 * as to avoid the kernel copy, without changing any of the handler code.
69 */
70 struct mib_oldp {
71 endpoint_t oldp_endpt;
72 vir_bytes oldp_addr;
73 size_t oldp_len;
74 };
75 /*
76 * Same structure, different type: prevent accidental mixups, and avoid the
77 * need to use __restrict everywhere.
78 */
79 struct mib_newp {
80 endpoint_t newp_endpt;
81 vir_bytes newp_addr;
82 size_t newp_len;
83 };
84
85 /*
86 * Return TRUE or FALSE indicating whether the given offset is within the range
87 * of data that is to be copied out. This call can be used to test whether
88 * certain bits of data need to be prepared for copying at all.
89 */
90 int
mib_inrange(struct mib_oldp * oldp,size_t off)91 mib_inrange(struct mib_oldp * oldp, size_t off)
92 {
93
94 if (oldp == NULL)
95 return FALSE;
96
97 return (off < oldp->oldp_len);
98 }
99
100 /*
101 * Return the total length of the requested data. This should not be used
102 * directly except in highly unusual cases, such as particular node requests
103 * where the request semantics blatantly violate overall sysctl(2) semantics.
104 */
105 size_t
mib_getoldlen(struct mib_oldp * oldp)106 mib_getoldlen(struct mib_oldp * oldp)
107 {
108
109 if (oldp == NULL)
110 return 0;
111
112 return oldp->oldp_len;
113 }
114
115 /*
116 * Copy out (partial) data to the user. The copy is automatically limited to
117 * the range of data requested by the user. Return the requested length on
118 * success (for the caller's convenience) or an error code on failure.
119 */
120 ssize_t
mib_copyout(struct mib_oldp * __restrict oldp,size_t off,const void * __restrict buf,size_t size)121 mib_copyout(struct mib_oldp * __restrict oldp, size_t off,
122 const void * __restrict buf, size_t size)
123 {
124 size_t len;
125 int r;
126
127 len = size;
128 assert(len <= SSIZE_MAX);
129
130 if (oldp == NULL || off >= oldp->oldp_len)
131 return size; /* nothing to do */
132
133 if (len > oldp->oldp_len - off)
134 len = oldp->oldp_len - off;
135
136 if ((r = sys_datacopy(SELF, (vir_bytes)buf, oldp->oldp_endpt,
137 oldp->oldp_addr + off, len)) != OK)
138 return r;
139
140 return size;
141 }
142
143 /*
144 * Override the oldlen value returned from the call, in situations where an
145 * error is thrown as well.
146 */
147 void
mib_setoldlen(struct mib_call * call,size_t oldlen)148 mib_setoldlen(struct mib_call * call, size_t oldlen)
149 {
150
151 call->call_reslen = oldlen;
152 }
153
154 /*
155 * Return the new data length as provided by the user, or 0 if the user did not
156 * supply new data.
157 */
158 size_t
mib_getnewlen(struct mib_newp * newp)159 mib_getnewlen(struct mib_newp * newp)
160 {
161
162 if (newp == NULL)
163 return 0;
164
165 return newp->newp_len;
166 }
167
168 /*
169 * Copy in data from the user. The given length must match exactly the length
170 * given by the user. Return OK or an error code.
171 */
172 int
mib_copyin(struct mib_newp * __restrict newp,void * __restrict buf,size_t len)173 mib_copyin(struct mib_newp * __restrict newp, void * __restrict buf,
174 size_t len)
175 {
176
177 if (newp == NULL || len != newp->newp_len)
178 return EINVAL;
179
180 if (len == 0)
181 return OK;
182
183 return sys_datacopy(newp->newp_endpt, newp->newp_addr, SELF,
184 (vir_bytes)buf, len);
185 }
186
187 /*
188 * Copy in auxiliary data from the user, based on a user pointer obtained from
189 * data copied in earlier through mib_copyin().
190 */
191 int
mib_copyin_aux(struct mib_newp * __restrict newp,vir_bytes addr,void * __restrict buf,size_t len)192 mib_copyin_aux(struct mib_newp * __restrict newp, vir_bytes addr,
193 void * __restrict buf, size_t len)
194 {
195
196 assert(newp != NULL);
197
198 if (len == 0)
199 return OK;
200
201 return sys_datacopy(newp->newp_endpt, addr, SELF, (vir_bytes)buf, len);
202 }
203
204 /*
205 * Create a grant for a call's old data region, if not NULL, for the given
206 * endpoint. On success, store the grant (or GRANT_INVALID) in grantp and the
207 * length in lenp, and return OK. On error, return an error code that must not
208 * be ENOMEM.
209 */
210 int
mib_relay_oldp(endpoint_t endpt,struct mib_oldp * __restrict oldp,cp_grant_id_t * grantp,size_t * __restrict lenp)211 mib_relay_oldp(endpoint_t endpt, struct mib_oldp * __restrict oldp,
212 cp_grant_id_t * grantp, size_t * __restrict lenp)
213 {
214
215 if (oldp != NULL) {
216 *grantp = cpf_grant_magic(endpt, oldp->oldp_endpt,
217 oldp->oldp_addr, oldp->oldp_len, CPF_WRITE);
218 if (!GRANT_VALID(*grantp))
219 return EINVAL;
220 *lenp = oldp->oldp_len;
221 } else {
222 *grantp = GRANT_INVALID;
223 *lenp = 0;
224 }
225
226 return OK;
227 }
228
229 /*
230 * Create a grant for a call's new data region, if not NULL, for the given
231 * endpoint. On success, store the grant (or GRANT_INVALID) in grantp and the
232 * length in lenp, and return OK. On error, return an error code that must not
233 * be ENOMEM.
234 */
235 int
mib_relay_newp(endpoint_t endpt,struct mib_newp * __restrict newp,cp_grant_id_t * grantp,size_t * __restrict lenp)236 mib_relay_newp(endpoint_t endpt, struct mib_newp * __restrict newp,
237 cp_grant_id_t * grantp, size_t * __restrict lenp)
238 {
239
240 if (newp != NULL) {
241 *grantp = cpf_grant_magic(endpt, newp->newp_endpt,
242 newp->newp_addr, newp->newp_len, CPF_READ);
243 if (!GRANT_VALID(*grantp))
244 return EINVAL;
245 *lenp = newp->newp_len;
246 } else {
247 *grantp = GRANT_INVALID;
248 *lenp = 0;
249 }
250
251 return OK;
252 }
253
254 /*
255 * Check whether the user is allowed to perform privileged operations. The
256 * function returns a nonzero value if this is the case, and zero otherwise.
257 * Authorization is performed only once per call.
258 */
259 int
mib_authed(struct mib_call * call)260 mib_authed(struct mib_call * call)
261 {
262
263 if ((call->call_flags & (MIB_FLAG_AUTH | MIB_FLAG_NOAUTH)) == 0) {
264 /* Ask PM if this endpoint has superuser privileges. */
265 if (getnuid(call->call_endpt) == SUPER_USER)
266 call->call_flags |= MIB_FLAG_AUTH;
267 else
268 call->call_flags |= MIB_FLAG_NOAUTH;
269 }
270
271 return (call->call_flags & MIB_FLAG_AUTH);
272 }
273
274 /*
275 * Implement the sysctl(2) system call.
276 */
277 static int
mib_sysctl(message * __restrict m_in,int ipc_status,message * __restrict m_out)278 mib_sysctl(message * __restrict m_in, int ipc_status,
279 message * __restrict m_out)
280 {
281 vir_bytes oldaddr, newaddr;
282 size_t oldlen, newlen;
283 unsigned int namelen;
284 int s, name[CTL_MAXNAME];
285 endpoint_t endpt;
286 struct mib_oldp oldp, *oldpp;
287 struct mib_newp newp, *newpp;
288 struct mib_call call;
289 ssize_t r;
290
291 /* Only handle blocking calls. Ignore everything else. */
292 if (IPC_STATUS_CALL(ipc_status) != SENDREC)
293 return EDONTREPLY;
294
295 endpt = m_in->m_source;
296 oldaddr = m_in->m_lc_mib_sysctl.oldp;
297 oldlen = m_in->m_lc_mib_sysctl.oldlen;
298 newaddr = m_in->m_lc_mib_sysctl.newp;
299 newlen = m_in->m_lc_mib_sysctl.newlen;
300 namelen = m_in->m_lc_mib_sysctl.namelen;
301
302 if (namelen == 0 || namelen > CTL_MAXNAME)
303 return EINVAL;
304
305 /*
306 * In most cases, the entire name fits in the request message, so we
307 * can avoid a kernel copy.
308 */
309 if (namelen > CTL_SHORTNAME) {
310 if ((s = sys_datacopy(endpt, m_in->m_lc_mib_sysctl.namep, SELF,
311 (vir_bytes)&name, sizeof(name[0]) * namelen)) != OK)
312 return s;
313 } else
314 memcpy(name, m_in->m_lc_mib_sysctl.name,
315 sizeof(name[0]) * namelen);
316
317 /*
318 * Set up a structure for the old data, if any. When no old address is
319 * given, be forgiving if oldlen is not zero, as the user may simply
320 * not have initialized the variable before passing a pointer to it.
321 */
322 if (oldaddr != 0) {
323 oldp.oldp_endpt = endpt;
324 oldp.oldp_addr = oldaddr;
325 oldp.oldp_len = oldlen;
326 oldpp = &oldp;
327 } else
328 oldpp = NULL;
329
330 /*
331 * Set up a structure for the new data, if any. If one of newaddr and
332 * newlen is zero but not the other, we (like NetBSD) disregard both.
333 */
334 if (newaddr != 0 && newlen != 0) {
335 newp.newp_endpt = endpt;
336 newp.newp_addr = newaddr;
337 newp.newp_len = newlen;
338 newpp = &newp;
339 } else
340 newpp = NULL;
341
342 /*
343 * Set up a structure for other call parameters. Most of these should
344 * be used rarely, and we may want to add more later, so do not pass
345 * all of them around as actual function parameters all the time.
346 */
347 call.call_endpt = endpt;
348 call.call_name = name;
349 call.call_namelen = namelen;
350 call.call_flags = 0;
351 call.call_reslen = 0;
352
353 r = mib_dispatch(&call, oldpp, newpp);
354
355 /*
356 * From NetBSD: we copy out as much as we can from the old data, while
357 * at the same time computing the full data length. Then, here at the
358 * end, if the entire result did not fit in the destination buffer, we
359 * return ENOMEM instead of success, thus also returning a partial
360 * result and the full data length.
361 *
362 * It is also possible that data are copied out along with a "real"
363 * error. In that case, we must report a nonzero resulting length
364 * along with that error code. This is currently the case when node
365 * creation resulted in a collision, in which case the error code is
366 * EEXIST while the existing node is copied out as well.
367 */
368 if (r >= 0) {
369 m_out->m_mib_lc_sysctl.oldlen = (size_t)r;
370
371 if (oldaddr != 0 && oldlen < (size_t)r)
372 r = ENOMEM;
373 else
374 r = OK;
375 } else
376 m_out->m_mib_lc_sysctl.oldlen = call.call_reslen;
377
378 return r;
379 }
380
381 /*
382 * Initialize the service.
383 */
384 static int
mib_init(int type __unused,sef_init_info_t * info __unused)385 mib_init(int type __unused, sef_init_info_t * info __unused)
386 {
387
388 /*
389 * Initialize pointers and sizes of subtrees in different modules.
390 * This is needed because we cannot use sizeof on external arrays.
391 * We do initialize the node entry (including any other fields)
392 * statically through MIB_ENODE because that forces the array to be
393 * large enough to store the entry.
394 */
395 mib_kern_init(&mib_table[CTL_KERN]);
396 mib_vm_init(&mib_table[CTL_VM]);
397 mib_hw_init(&mib_table[CTL_HW]);
398 mib_minix_init(&mib_table[CTL_MINIX]);
399
400 /*
401 * Now that the static tree is complete, go through the entire tree,
402 * initializing miscellaneous fields.
403 */
404 mib_tree_init();
405
406 /* Prepare for requests to mount remote subtrees. */
407 mib_remote_init();
408
409 return OK;
410 }
411
412 /*
413 * Perform SEF startup.
414 */
415 static void
mib_startup(void)416 mib_startup(void)
417 {
418
419 sef_setcb_init_fresh(mib_init);
420 /*
421 * If we restart we lose all dynamic state, which means we lose all
422 * nodes that have been created at run time. However, running with
423 * only the static node tree is still better than not running at all.
424 */
425 sef_setcb_init_restart(mib_init);
426
427 sef_startup();
428 }
429
430 /*
431 * The Management Information Base (MIB) service.
432 */
433 int
main(void)434 main(void)
435 {
436 message m_in, m_out;
437 int r, ipc_status;
438
439 /* Perform initialization. */
440 mib_startup();
441
442 /* The main message loop. */
443 for (;;) {
444 /* Receive a request. */
445 if ((r = sef_receive_status(ANY, &m_in, &ipc_status)) != OK)
446 panic("sef_receive failed: %d", r);
447
448 /* Process the request. */
449 if (is_ipc_notify(ipc_status)) {
450 /* We are not expecting any notifications. */
451 printf("MIB: notification from %d\n", m_in.m_source);
452
453 continue;
454 }
455
456 memset(&m_out, 0, sizeof(m_out));
457
458 switch (m_in.m_type) {
459 case MIB_SYSCTL:
460 r = mib_sysctl(&m_in, ipc_status, &m_out);
461
462 break;
463
464 case MIB_REGISTER:
465 r = mib_register(&m_in, ipc_status);
466
467 break;
468
469 case MIB_DEREGISTER:
470 r = mib_deregister(&m_in, ipc_status);
471
472 break;
473
474 default:
475 if (IPC_STATUS_CALL(ipc_status) == SENDREC)
476 r = ENOSYS;
477 else
478 r = EDONTREPLY;
479 }
480
481 /* Send a reply, if applicable. */
482 if (r != EDONTREPLY) {
483 m_out.m_type = r;
484
485 if ((r = ipc_sendnb(m_in.m_source, &m_out)) != OK)
486 printf("MIB: ipc_sendnb failed (%d)\n", r);
487 }
488 }
489
490 /* NOTREACHED */
491 return 0;
492 }
493