1 /* LWIP service - bpfdev.c - Berkeley Packet Filter (/dev/bpf) interface */
2 /*
3 * BPF is a cloning device: opening /dev/bpf returns a new BPF device which is
4 * independent from any other opened BPF devices. We assume that each BPF
5 * device is used by one single user process, and this implementation therefore
6 * does not support multiple concurrent device calls on the same BPF device.
7 *
8 * Packet buffering basically follows the BSD model: each BPF device that is
9 * configured (that is, it has been attached to an interface) has two buffers,
10 * each of the configured size: a store buffer, where new packets are stored,
11 * and a hold buffer, which is typically full and awaiting retrieval through a
12 * read call from userland. The buffers are swapped ("rotated") when the store
13 * buffer is filled up and the hold buffer is empty - if the hold buffer is not
14 * empty is not empty either, additional packets are dropped.
15 *
16 * These buffers are allocated when the BPF device is attached to an interface.
17 * The interface may later disappear, in which case the BPF device is detached
18 * from it, allowing any final packets to be read before read requests start
19 * returning I/O errors. The buffers are freed only when the device is closed.
20 */
21
22 #include "lwip.h"
23 #include "bpfdev.h"
24
25 #include <minix/chardriver.h>
26 #include <net/if.h>
27 #include <net/bpfdesc.h>
28 #include <minix/bpf.h>
29 #include <sys/mman.h>
30
31 /*
32 * Make sure that our implementation matches the BPF version in the NetBSD
33 * headers. If they change the version number, we may have to make changes
34 * here accordingly.
35 */
36 #if BPF_MAJOR_VERSION != 1 || BPF_MINOR_VERSION != 1
37 #error "NetBSD BPF version has changed"
38 #endif
39
40 /* The number of BPF devices. */
41 #define NR_BPFDEV 16
42
43 /* BPF receive buffer size: allowed range and default. */
44 #define BPF_BUF_MIN BPF_WORDALIGN(sizeof(struct bpf_hdr))
45 #define BPF_BUF_DEF 32768
46 #define BPF_BUF_MAX 262144
47
48 /*
49 * By opening /dev/bpf, one will obtain a cloned device with a different minor
50 * number, which maps to one of the BPF devices.
51 */
52 #define BPFDEV_MINOR 0 /* minor number of /dev/bpf */
53 #define BPFDEV_BASE_MINOR 1 /* base minor number for BPF devices */
54
55 static struct bpfdev {
56 struct bpfdev_link bpf_link; /* structure link, MUST be first */
57 TAILQ_ENTRY(bpfdev) bpf_next; /* next on free or interface list */
58 struct ifdev *bpf_ifdev; /* associated interface, or NULL */
59 unsigned int bpf_flags; /* flags (BPFF_) */
60 size_t bpf_size; /* size of packet buffers */
61 char *bpf_sbuf; /* store buffer (mmap'd, or NULL) */
62 char *bpf_hbuf; /* hold buffer (mmap'd, or NULL) */
63 size_t bpf_slen; /* used part of store buffer */
64 size_t bpf_hlen; /* used part of hold buffer */
65 struct bpf_insn *bpf_filter; /* verified BPF filter, or NULL */
66 size_t bpf_filterlen; /* length of filter, for munmap */
67 pid_t bpf_pid; /* process ID of last using process */
68 clock_t bpf_timeout; /* timeout for read calls (0 = none) */
69 struct { /* state for pending read request */
70 endpoint_t br_endpt; /* reading endpoint, or NONE */
71 cp_grant_id_t br_grant; /* grant for reader's buffer */
72 cdev_id_t br_id; /* read request identifier */
73 minix_timer_t br_timer; /* timer for read timeout */
74 } bpf_read;
75 struct { /* state for pending select request */
76 endpoint_t bs_endpt; /* selecting endpoint, or NONE */
77 unsigned int bs_selops; /* pending select operations */
78 } bpf_select;
79 struct { /* packet capture statistics */
80 uint64_t bs_recv; /* # of packets run through filter */
81 uint64_t bs_drop; /* # of packets dropped: buffer full */
82 uint64_t bs_capt; /* # of packets accepted by filter */
83 } bpf_stat;
84 } bpf_array[NR_BPFDEV];
85
86 #define BPFF_IN_USE 0x01 /* this BPF device object is in use */
87 #define BPFF_PROMISC 0x02 /* promiscuous mode enabled */
88 #define BPFF_IMMEDIATE 0x04 /* immediate mode is enabled */
89 #define BPFF_SEESENT 0x08 /* also process host-sent packets */
90 #define BPFF_HDRCMPLT 0x10 /* do not fill in link-layer source */
91 #define BPFF_FEEDBACK 0x20 /* feed back written packet as input */
92
93 static TAILQ_HEAD(, bpfdev_link) bpfl_freelist; /* list of free BPF devices */
94
95 static struct bpf_stat bpf_stat;
96
97 static ssize_t bpfdev_peers(struct rmib_call *, struct rmib_node *,
98 struct rmib_oldp *, struct rmib_newp *);
99
100 /* The CTL_NET NET_BPF subtree. All nodes are dynamically numbered. */
101 static struct rmib_node net_bpf_table[] = {
102 RMIB_INT(RMIB_RO, BPF_BUF_MAX, "maxbufsize",
103 "Maximum size for data capture buffer"), /* TODO: read-write */
104 RMIB_STRUCT(RMIB_RO, sizeof(bpf_stat), &bpf_stat, "stats",
105 "BPF stats"),
106 RMIB_FUNC(RMIB_RO | CTLTYPE_NODE, 0, bpfdev_peers, "peers",
107 "BPF peers"),
108 };
109
110 static struct rmib_node net_bpf_node =
111 RMIB_NODE(RMIB_RO, net_bpf_table, "bpf", "BPF options");
112
113 /*
114 * Initialize the BPF module.
115 */
116 void
bpfdev_init(void)117 bpfdev_init(void)
118 {
119 const int mib[] = { CTL_NET, NET_BPF };
120 unsigned int slot;
121 int r;
122
123 /* Initialize data structures. */
124 TAILQ_INIT(&bpfl_freelist);
125
126 for (slot = 0; slot < __arraycount(bpf_array); slot++) {
127 bpf_array[slot].bpf_flags = 0;
128
129 TAILQ_INSERT_TAIL(&bpfl_freelist, &bpf_array[slot].bpf_link,
130 bpfl_next);
131 }
132
133 memset(&bpf_stat, 0, sizeof(bpf_stat));
134
135 /* Register the "net.bpf" subtree with the MIB service. */
136 if ((r = rmib_register(mib, __arraycount(mib), &net_bpf_node)) != OK)
137 panic("unable to register net.bpf RMIB tree: %d", r);
138 }
139
140 /*
141 * Given a BPF device object, return the corresponding minor number.
142 */
143 static devminor_t
bpfdev_get_minor(struct bpfdev * bpfdev)144 bpfdev_get_minor(struct bpfdev * bpfdev)
145 {
146
147 assert(bpfdev != NULL);
148
149 return BPFDEV_BASE_MINOR + (devminor_t)(bpfdev - bpf_array);
150 }
151
152 /*
153 * Given a minor number, return the corresponding BPF device object, or NULL if
154 * the minor number does not identify a BPF device.
155 */
156 static struct bpfdev *
bpfdev_get_by_minor(devminor_t minor)157 bpfdev_get_by_minor(devminor_t minor)
158 {
159
160 if (minor < BPFDEV_BASE_MINOR ||
161 (unsigned int)minor >= BPFDEV_BASE_MINOR + __arraycount(bpf_array))
162 return NULL;
163
164 return &bpf_array[minor - BPFDEV_BASE_MINOR];
165 }
166
167 /*
168 * Open a BPF device, returning a cloned device instance.
169 */
170 static int
bpfdev_open(devminor_t minor,int access __unused,endpoint_t user_endpt)171 bpfdev_open(devminor_t minor, int access __unused, endpoint_t user_endpt)
172 {
173 struct bpfdev_link *bpfl;
174 struct bpfdev *bpf;
175
176 /* Disallow opening cloned devices through device nodes. */
177 if (minor != BPFDEV_MINOR)
178 return ENXIO;
179
180 if (TAILQ_EMPTY(&bpfl_freelist))
181 return ENOBUFS;
182
183 bpfl = TAILQ_FIRST(&bpfl_freelist);
184 TAILQ_REMOVE(&bpfl_freelist, bpfl, bpfl_next);
185
186 bpf = (struct bpfdev *)bpfl;
187
188 memset(bpf, 0, sizeof(*bpf));
189
190 bpf->bpf_flags = BPFF_IN_USE | BPFF_SEESENT;
191 bpf->bpf_size = BPF_BUF_DEF;
192 bpf->bpf_pid = getnpid(user_endpt);
193 bpf->bpf_read.br_endpt = NONE;
194 bpf->bpf_select.bs_endpt = NONE;
195
196 return CDEV_CLONED | bpfdev_get_minor(bpf);
197 }
198
199 /*
200 * Close a BPF device.
201 */
202 static int
bpfdev_close(devminor_t minor)203 bpfdev_close(devminor_t minor)
204 {
205 struct bpfdev *bpf;
206
207 if ((bpf = bpfdev_get_by_minor(minor)) == NULL)
208 return EINVAL;
209
210 /*
211 * There cannot possibly be a pending read request, so we never need to
212 * cancel the read timer from here either.
213 */
214 assert(bpf->bpf_read.br_endpt == NONE);
215
216 if (bpf->bpf_sbuf != NULL) {
217 assert(bpf->bpf_hbuf != NULL);
218
219 if (munmap(bpf->bpf_sbuf, bpf->bpf_size) != 0)
220 panic("munmap failed: %d", -errno);
221 if (munmap(bpf->bpf_hbuf, bpf->bpf_size) != 0)
222 panic("munmap failed: %d", -errno);
223
224 bpf->bpf_sbuf = NULL;
225 bpf->bpf_hbuf = NULL;
226 } else
227 assert(bpf->bpf_hbuf == NULL);
228
229 if (bpf->bpf_filter != NULL) {
230 assert(bpf->bpf_filterlen > 0);
231
232 if (munmap(bpf->bpf_filter, bpf->bpf_filterlen) != 0)
233 panic("munmap failed: %d", -errno);
234
235 bpf->bpf_filter = NULL;
236 }
237
238 /*
239 * If the BPF device was attached to an interface, and that interface
240 * has not disappeared in the meantime, detach from it now.
241 */
242 if (bpf->bpf_ifdev != NULL) {
243 if (bpf->bpf_flags & BPFF_PROMISC)
244 ifdev_clear_promisc(bpf->bpf_ifdev);
245
246 ifdev_detach_bpf(bpf->bpf_ifdev, &bpf->bpf_link);
247
248 bpf->bpf_ifdev = NULL;
249 }
250
251 bpf->bpf_flags = 0; /* mark as no longer in use */
252
253 TAILQ_INSERT_HEAD(&bpfl_freelist, &bpf->bpf_link, bpfl_next);
254
255 return OK;
256 }
257
258 /*
259 * Rotate buffers for the BPF device, by swapping the store buffer and the hold
260 * buffer.
261 */
262 static void
bpfdev_rotate(struct bpfdev * bpf)263 bpfdev_rotate(struct bpfdev * bpf)
264 {
265 char *buf;
266 size_t len;
267
268 /*
269 * When rotating, the store buffer may or may not be empty, but the
270 * hold buffer must always be empty.
271 */
272 assert(bpf->bpf_hlen == 0);
273
274 buf = bpf->bpf_sbuf;
275 len = bpf->bpf_slen;
276 bpf->bpf_sbuf = bpf->bpf_hbuf;
277 bpf->bpf_slen = bpf->bpf_hlen;
278 bpf->bpf_hbuf = buf;
279 bpf->bpf_hlen = len;
280 }
281
282 /*
283 * Test whether any of the given select operations are ready on the BPF device,
284 * and return the set of ready operations.
285 */
286 static unsigned int
bpfdev_test_select(struct bpfdev * bpf,unsigned int ops)287 bpfdev_test_select(struct bpfdev * bpf, unsigned int ops)
288 {
289 unsigned int ready_ops;
290
291 ready_ops = 0;
292
293 /*
294 * The BPF device is ready for reading if the hold buffer is not empty
295 * (i.e.: the store buffer has been filled up completely and was
296 * therefore rotated) or if immediate mode is set and the store buffer
297 * is not empty (i.e.: any packet is available at all). In the latter
298 * case, the buffers will be rotated during the read. We do not
299 * support applying the read timeout to selects and maintaining state
300 * between the select and the following read, because despite that
301 * libpcap claims that it is the right behavior, that is just insane.
302 */
303 if (ops & CDEV_OP_RD) {
304 if (bpf->bpf_ifdev == NULL)
305 ready_ops |= CDEV_OP_RD;
306 else if (bpf->bpf_hlen > 0)
307 ready_ops |= CDEV_OP_RD;
308 else if ((bpf->bpf_flags & BPFF_IMMEDIATE) &&
309 bpf->bpf_slen > 0)
310 ready_ops |= CDEV_OP_RD;
311 }
312
313 if (ops & CDEV_OP_WR)
314 ready_ops |= CDEV_OP_WR;
315
316 return ready_ops;
317 }
318
319 /*
320 * There has been a state change on the BPF device. If now possible, resume a
321 * pending select query, if any.
322 */
323 static void
bpfdev_resume_select(struct bpfdev * bpf)324 bpfdev_resume_select(struct bpfdev * bpf)
325 {
326 unsigned int ops, ready_ops;
327 endpoint_t endpt;
328
329 /* First see if there is a pending select request at all. */
330 if ((endpt = bpf->bpf_select.bs_endpt) == NONE)
331 return;
332 ops = bpf->bpf_select.bs_selops;
333
334 assert(ops != 0);
335
336 /* Then see if any of the pending operations are now ready. */
337 if ((ready_ops = bpfdev_test_select(bpf, ops)) == 0)
338 return;
339
340 /* If so, notify VFS about the ready operations. */
341 chardriver_reply_select(bpf->bpf_select.bs_endpt,
342 bpfdev_get_minor(bpf), ready_ops);
343
344 /*
345 * Forget about the ready operations. If that leaves no pending
346 * operations, forget about the select request altogether.
347 */
348 if ((bpf->bpf_select.bs_selops &= ~ready_ops) == 0)
349 bpf->bpf_select.bs_endpt = NONE;
350 }
351
352 /*
353 * There has been a state change on the BPF device. If now possible, resume a
354 * pending read request, if any. If the call is a result of a timeout,
355 * 'is_timeout' is set. In that case, the read request must be resumed with an
356 * EAGAIN error if no packets are available, and the running timer must be
357 * canceled. Otherwise, the resumption is due to a full buffer or a
358 * disappeared interface, and 'is_timeout' is not set. In this case, the read
359 * request must be resumed with an I/O error if no packets are available.
360 */
361 static void
bpfdev_resume_read(struct bpfdev * bpf,int is_timeout)362 bpfdev_resume_read(struct bpfdev * bpf, int is_timeout)
363 {
364 ssize_t r;
365
366 assert(bpf->bpf_read.br_endpt != NONE);
367
368 /*
369 * If the hold buffer is still empty, see if the store buffer has
370 * any packets to copy out.
371 */
372 if (bpf->bpf_hlen == 0)
373 bpfdev_rotate(bpf);
374
375 /* Return any available packets, or otherwise an error. */
376 if (bpf->bpf_hlen > 0) {
377 assert(bpf->bpf_hlen <= bpf->bpf_size);
378
379 r = sys_safecopyto(bpf->bpf_read.br_endpt,
380 bpf->bpf_read.br_grant, 0, (vir_bytes)bpf->bpf_hbuf,
381 bpf->bpf_hlen);
382
383 if (r == OK) {
384 r = (ssize_t)bpf->bpf_hlen;
385
386 bpf->bpf_hlen = 0;
387
388 assert(bpf->bpf_slen != bpf->bpf_size);
389
390 /*
391 * Allow readers to get the last packets after the
392 * interface has disappeared, before getting errors.
393 */
394 if (bpf->bpf_ifdev == NULL)
395 bpfdev_rotate(bpf);
396 }
397 } else
398 r = (is_timeout) ? EAGAIN : EIO;
399
400 chardriver_reply_task(bpf->bpf_read.br_endpt, bpf->bpf_read.br_id, r);
401
402 bpf->bpf_read.br_endpt = NONE;
403
404 /* Was there still a timer running? Then cancel it now. */
405 if (bpf->bpf_timeout > 0 && !is_timeout)
406 cancel_timer(&bpf->bpf_read.br_timer);
407 }
408
409 /*
410 * A read timeout has triggered for the BPF device. Wake up the pending read
411 * request.
412 */
413 static void
bpfdev_timeout(int arg)414 bpfdev_timeout(int arg)
415 {
416 struct bpfdev *bpf;
417
418 assert(arg >= 0 && (unsigned int)arg < __arraycount(bpf_array));
419
420 bpf = &bpf_array[arg];
421
422 assert(bpf->bpf_read.br_endpt != NONE);
423
424 bpfdev_resume_read(bpf, TRUE /*is_timeout*/);
425 }
426
427 /*
428 * Read from a BPF device.
429 */
430 static ssize_t
bpfdev_read(devminor_t minor,uint64_t position,endpoint_t endpt,cp_grant_id_t grant,size_t size,int flags,cdev_id_t id)431 bpfdev_read(devminor_t minor, uint64_t position, endpoint_t endpt,
432 cp_grant_id_t grant, size_t size, int flags, cdev_id_t id)
433 {
434 struct bpfdev *bpf;
435 ssize_t r;
436 int suspend;
437
438 if ((bpf = bpfdev_get_by_minor(minor)) == NULL)
439 return EINVAL;
440
441 /* Allow only one read call at a time. */
442 if (bpf->bpf_read.br_endpt != NONE)
443 return EIO;
444
445 /* Has this BPF device been configured at all yet? */
446 if (bpf->bpf_sbuf == NULL)
447 return EINVAL;
448
449 /*
450 * Does the read call size match the entire buffer size? This is a
451 * ridiculous requirement but it makes our job quite a bit easier..
452 */
453 if (size != bpf->bpf_size)
454 return EINVAL;
455
456 /*
457 * Following standard receive semantics, if the interface is gone,
458 * return all the packets that were pending before returning an error.
459 * This requires extra buffer rotations after read completion, too.
460 */
461 if (bpf->bpf_ifdev == NULL && bpf->bpf_hlen == 0)
462 return EIO;
463
464 /*
465 * If immediate mode is not enabled, we should always suspend the read
466 * call if the hold buffer is empty. If immediate mode is enabled, we
467 * should only suspend the read call if both buffers are empty, and
468 * return data from the hold buffer or otherwise the store buffer,
469 * whichever is not empty. A non-blocking call behaves as though
470 * immediate mode is enabled, except it will return EAGAIN instead of
471 * suspending the read call if both buffers are empty. Thus, we may
472 * have to rotate buffers for both immediate mode and non-blocking
473 * calls. The latter is necessary for libpcap to behave correctly.
474 */
475 if ((flags & CDEV_NONBLOCK) || (bpf->bpf_flags & BPFF_IMMEDIATE))
476 suspend = (bpf->bpf_hlen == 0 && bpf->bpf_slen == 0);
477 else
478 suspend = (bpf->bpf_hlen == 0);
479
480 if (suspend) {
481 if (flags & CDEV_NONBLOCK)
482 return EAGAIN;
483
484 /* Suspend the read call for later. */
485 bpf->bpf_read.br_endpt = endpt;
486 bpf->bpf_read.br_grant = grant;
487 bpf->bpf_read.br_id = id;
488
489 /* Set a timer if requested. */
490 if (bpf->bpf_timeout > 0)
491 set_timer(&bpf->bpf_read.br_timer, bpf->bpf_timeout,
492 bpfdev_timeout, (int)(bpf - bpf_array));
493
494 return EDONTREPLY;
495 }
496
497 /* If we get here, either buffer has data; rotate buffers if needed. */
498 if (bpf->bpf_hlen == 0)
499 bpfdev_rotate(bpf);
500 assert(bpf->bpf_hlen > 0);
501
502 if ((r = sys_safecopyto(endpt, grant, 0, (vir_bytes)bpf->bpf_hbuf,
503 bpf->bpf_hlen)) != OK)
504 return r;
505
506 r = (ssize_t)bpf->bpf_hlen;
507
508 bpf->bpf_hlen = 0;
509
510 /*
511 * If the store buffer is exactly full, rotate it now. Also, if the
512 * interface has disappeared, the store buffer will never fill up.
513 * Rotate it so that the application will get any remaining data before
514 * getting errors about the interface being gone.
515 */
516 if (bpf->bpf_slen == bpf->bpf_size || bpf->bpf_ifdev == NULL)
517 bpfdev_rotate(bpf);
518
519 return r;
520 }
521
522 /*
523 * Write to a BPF device.
524 */
525 static ssize_t
bpfdev_write(devminor_t minor,uint64_t position,endpoint_t endpt,cp_grant_id_t grant,size_t size,int flags,cdev_id_t id)526 bpfdev_write(devminor_t minor, uint64_t position, endpoint_t endpt,
527 cp_grant_id_t grant, size_t size, int flags, cdev_id_t id)
528 {
529 struct bpfdev *bpf;
530 struct pbuf *pbuf, *pptr, *pcopy;
531 size_t off;
532 err_t err;
533 int r;
534
535 if ((bpf = bpfdev_get_by_minor(minor)) == NULL)
536 return EINVAL;
537
538 if (bpf->bpf_ifdev == NULL)
539 return EINVAL;
540
541 /* VFS skips zero-sized I/O calls right now, but that may change. */
542 if (size == 0)
543 return 0; /* nothing to do */
544
545 if (size > ifdev_get_hdrlen(bpf->bpf_ifdev) +
546 ifdev_get_mtu(bpf->bpf_ifdev))
547 return EMSGSIZE;
548
549 if ((pbuf = pchain_alloc(PBUF_LINK, size)) == NULL)
550 return ENOMEM;
551
552 /* TODO: turn this into a series of vector copies. */
553 off = 0;
554 for (pptr = pbuf; pptr != NULL; pptr = pptr->next) {
555 if ((r = sys_safecopyfrom(endpt, grant, off,
556 (vir_bytes)pptr->payload, pptr->len)) != OK) {
557 pbuf_free(pbuf);
558
559 return r;
560 }
561 off += pptr->len;
562 }
563 assert(off == size);
564
565 /*
566 * In feedback mode, we cannot use the same packet buffers for both
567 * output and input, so make a copy. We do this before calling the
568 * output function, which may change part of the buffers, because the
569 * BSDs take this approach as well.
570 */
571 if (bpf->bpf_flags & BPFF_FEEDBACK) {
572 if ((pcopy = pchain_alloc(PBUF_LINK, size)) == NULL) {
573 pbuf_free(pbuf);
574
575 return ENOMEM;
576 }
577
578 if (pbuf_copy(pcopy, pbuf) != ERR_OK)
579 panic("unexpected pbuf copy failure");
580 } else
581 pcopy = NULL;
582
583 /* Pass in the packet as output, and free it again. */
584 err = ifdev_output(bpf->bpf_ifdev, pbuf, NULL /*netif*/,
585 TRUE /*to_bpf*/, !!(bpf->bpf_flags & BPFF_HDRCMPLT));
586
587 pbuf_free(pbuf);
588
589 /* In feedback mode, pass in the copy as input, if output succeeded. */
590 if (err == ERR_OK && (bpf->bpf_flags & BPFF_FEEDBACK))
591 ifdev_input(bpf->bpf_ifdev, pcopy, NULL /*netif*/,
592 FALSE /*to_bpf*/);
593 else if (pcopy != NULL)
594 pbuf_free(pcopy);
595
596 return (err == ERR_OK) ? (ssize_t)size : util_convert_err(err);
597 }
598
599 /*
600 * Attach a BPF device to a network interface, using the interface name given
601 * in an ifreq structure. As side effect, allocate hold and store buffers for
602 * the device. These buffers will stay allocated until the device is closed,
603 * even though the interface may disappear before that. Return OK if the BPF
604 * device was successfully attached to the interface, or a negative error code
605 * otherwise.
606 */
607 static int
bpfdev_attach(struct bpfdev * bpf,struct ifreq * ifr)608 bpfdev_attach(struct bpfdev * bpf, struct ifreq * ifr)
609 {
610 struct ifdev *ifdev;
611 void *sbuf, *hbuf;
612
613 /* Find the interface with the given name. */
614 ifr->ifr_name[sizeof(ifr->ifr_name) - 1] = '\0';
615 if ((ifdev = ifdev_find_by_name(ifr->ifr_name)) == NULL)
616 return ENXIO;
617
618 /*
619 * Allocate a store buffer and a hold buffer. Preallocate the memory,
620 * or we might get killed later during low-memory conditions.
621 */
622 if ((sbuf = (char *)mmap(NULL, bpf->bpf_size, PROT_READ | PROT_WRITE,
623 MAP_ANON | MAP_PRIVATE | MAP_PREALLOC, -1, 0)) == MAP_FAILED)
624 return ENOMEM;
625
626 if ((hbuf = (char *)mmap(NULL, bpf->bpf_size, PROT_READ | PROT_WRITE,
627 MAP_ANON | MAP_PRIVATE | MAP_PREALLOC, -1, 0)) == MAP_FAILED) {
628 (void)munmap(sbuf, bpf->bpf_size);
629
630 return ENOMEM;
631 }
632
633 bpf->bpf_ifdev = ifdev;
634 bpf->bpf_sbuf = sbuf;
635 bpf->bpf_hbuf = hbuf;
636 assert(bpf->bpf_slen == 0);
637 assert(bpf->bpf_hlen == 0);
638
639 ifdev_attach_bpf(ifdev, &bpf->bpf_link);
640
641 return OK;
642 }
643
644 /*
645 * Detach the BPF device from its interface, which is about to disappear.
646 */
647 void
bpfdev_detach(struct bpfdev_link * bpfl)648 bpfdev_detach(struct bpfdev_link * bpfl)
649 {
650 struct bpfdev *bpf = (struct bpfdev *)bpfl;
651
652 assert(bpf->bpf_flags & BPFF_IN_USE);
653 assert(bpf->bpf_ifdev != NULL);
654
655 /*
656 * We deliberately leave the buffers allocated here, for two reasons:
657 *
658 * 1) it lets applications to read any last packets in the buffers;
659 * 2) it prevents reattaching the BPF device to another interface.
660 */
661 bpf->bpf_ifdev = NULL;
662
663 /*
664 * Resume pending read and select requests, returning any data left,
665 * or an error if none.
666 */
667 if (bpf->bpf_hlen == 0)
668 bpfdev_rotate(bpf);
669
670 if (bpf->bpf_read.br_endpt != NONE)
671 bpfdev_resume_read(bpf, FALSE /*is_timeout*/);
672
673 bpfdev_resume_select(bpf);
674 }
675
676 /*
677 * Flush the given BPF device, resetting its buffer contents and statistics
678 * counters.
679 */
680 static void
bpfdev_flush(struct bpfdev * bpf)681 bpfdev_flush(struct bpfdev * bpf)
682 {
683
684 bpf->bpf_slen = 0;
685 bpf->bpf_hlen = 0;
686
687 bpf->bpf_stat.bs_recv = 0;
688 bpf->bpf_stat.bs_drop = 0;
689 bpf->bpf_stat.bs_capt = 0;
690 }
691
692 /*
693 * Install a filter program on the BPF device. A new filter replaces any old
694 * one. A zero-sized filter simply clears a previous filter. On success,
695 * perform a flush and return OK. On failure, return a negative error code
696 * without making any modifications to the current filter.
697 */
698 static int
bpfdev_setfilter(struct bpfdev * bpf,endpoint_t endpt,cp_grant_id_t grant)699 bpfdev_setfilter(struct bpfdev * bpf, endpoint_t endpt, cp_grant_id_t grant)
700 {
701 struct bpf_insn *filter;
702 unsigned int count;
703 size_t len;
704 int r;
705
706 if ((r = sys_safecopyfrom(endpt, grant,
707 offsetof(struct minix_bpf_program, mbf_len), (vir_bytes)&count,
708 sizeof(count))) != OK)
709 return r;
710
711 if (count > BPF_MAXINSNS)
712 return EINVAL;
713 len = count * sizeof(struct bpf_insn);
714
715 if (len > 0) {
716 if ((filter = (struct bpf_insn *)mmap(NULL, len,
717 PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, -1, 0)) ==
718 MAP_FAILED)
719 return ENOMEM;
720
721 if ((r = sys_safecopyfrom(endpt, grant,
722 offsetof(struct minix_bpf_program, mbf_insns),
723 (vir_bytes)filter, len)) != OK) {
724 (void)munmap(filter, len);
725
726 return r;
727 }
728
729 if (!bpf_validate(filter, count)) {
730 (void)munmap(filter, len);
731
732 return EINVAL;
733 }
734 } else
735 filter = NULL;
736
737 if (bpf->bpf_filter != NULL)
738 (void)munmap(bpf->bpf_filter, bpf->bpf_filterlen);
739
740 bpf->bpf_filter = filter;
741 bpf->bpf_filterlen = len;
742
743 bpfdev_flush(bpf);
744
745 return OK;
746 }
747
748 /*
749 * Process an I/O control request on the BPF device.
750 */
751 static int
bpfdev_ioctl(devminor_t minor,unsigned long request,endpoint_t endpt,cp_grant_id_t grant,int flags,endpoint_t user_endpt,cdev_id_t id)752 bpfdev_ioctl(devminor_t minor, unsigned long request, endpoint_t endpt,
753 cp_grant_id_t grant, int flags, endpoint_t user_endpt, cdev_id_t id)
754 {
755 struct bpfdev *bpf;
756 struct bpf_stat bs;
757 struct bpf_version bv;
758 struct bpf_dltlist bfl;
759 struct timeval tv;
760 struct ifreq ifr;
761 unsigned int uval;
762 int r, val;
763
764 if ((bpf = bpfdev_get_by_minor(minor)) == NULL)
765 return EINVAL;
766
767 /*
768 * We do not support multiple concurrent requests in this module. That
769 * not only means that we forbid a read(2) call on a BPF device object
770 * while another read(2) is already pending: we also disallow IOCTL
771 * IOCTL calls while such a read(2) call is in progress. This
772 * restriction should never be a problem for user programs, and allows
773 * us to rely on the fact that that no settings can change between the
774 * start and end of any read call. As a side note, pending select(2)
775 * queries may be similarly affected, and will also not be fully
776 * accurate if any options are changed while pending.
777 */
778 if (bpf->bpf_read.br_endpt != NONE)
779 return EIO;
780
781 bpf->bpf_pid = getnpid(user_endpt);
782
783 /* These are in order of the NetBSD BIOC.. IOCTL numbers. */
784 switch (request) {
785 case BIOCGBLEN:
786 uval = bpf->bpf_size;
787
788 return sys_safecopyto(endpt, grant, 0, (vir_bytes)&uval,
789 sizeof(uval));
790
791 case BIOCSBLEN:
792 if (bpf->bpf_sbuf != NULL)
793 return EINVAL;
794
795 if ((r = sys_safecopyfrom(endpt, grant, 0, (vir_bytes)&uval,
796 sizeof(uval))) != OK)
797 return r;
798
799 if (uval < BPF_BUF_MIN)
800 uval = BPF_BUF_MIN;
801 else if (uval > BPF_BUF_MAX)
802 uval = BPF_BUF_MAX;
803
804 /* Is this the right thing to do? It doesn't matter for us. */
805 uval = BPF_WORDALIGN(uval);
806
807 if ((r = sys_safecopyto(endpt, grant, 0, (vir_bytes)&uval,
808 sizeof(uval))) != OK)
809 return r;
810
811 bpf->bpf_size = uval;
812
813 return OK;
814
815 case MINIX_BIOCSETF:
816 return bpfdev_setfilter(bpf, endpt, grant);
817
818 case BIOCPROMISC:
819 if (bpf->bpf_ifdev == NULL)
820 return EINVAL;
821
822 if (!(bpf->bpf_flags & BPFF_PROMISC)) {
823 if (!ifdev_set_promisc(bpf->bpf_ifdev))
824 return EINVAL;
825
826 bpf->bpf_flags |= BPFF_PROMISC;
827 }
828
829 return OK;
830
831 case BIOCFLUSH:
832 bpfdev_flush(bpf);
833
834 return OK;
835
836 case BIOCGDLT:
837 if (bpf->bpf_ifdev == NULL)
838 return EINVAL;
839
840 /* TODO: support for type configuration per BPF device. */
841 uval = ifdev_get_dlt(bpf->bpf_ifdev);
842
843 return sys_safecopyto(endpt, grant, 0, (vir_bytes)&uval,
844 sizeof(uval));
845
846 case BIOCGETIF:
847 if (bpf->bpf_ifdev == NULL)
848 return EINVAL;
849
850 memset(&ifr, 0, sizeof(ifr));
851 strlcpy(ifr.ifr_name, ifdev_get_name(bpf->bpf_ifdev),
852 sizeof(ifr.ifr_name));
853
854 return sys_safecopyto(endpt, grant, 0, (vir_bytes)&ifr,
855 sizeof(ifr));
856
857 case BIOCSETIF:
858 /*
859 * Test on the presence of a buffer rather than on an interface
860 * since the latter may disappear and thus be reset to NULL, in
861 * which case we do not want to allow rebinding to another.
862 */
863 if (bpf->bpf_sbuf != NULL)
864 return EINVAL;
865
866 if ((r = sys_safecopyfrom(endpt, grant, 0, (vir_bytes)&ifr,
867 sizeof(ifr))) != OK)
868 return r;
869
870 return bpfdev_attach(bpf, &ifr);
871
872 case BIOCGSTATS:
873 /*
874 * Why do we not embed a bpf_stat structure directly in the
875 * BPF device structure? Well, bpf_stat has massive padding..
876 */
877 memset(&bs, 0, sizeof(bs));
878 bs.bs_recv = bpf->bpf_stat.bs_recv;
879 bs.bs_drop = bpf->bpf_stat.bs_drop;
880 bs.bs_capt = bpf->bpf_stat.bs_capt;
881
882 return sys_safecopyto(endpt, grant, 0, (vir_bytes)&bs,
883 sizeof(bs));
884
885 case BIOCIMMEDIATE:
886 if ((r = sys_safecopyfrom(endpt, grant, 0, (vir_bytes)&uval,
887 sizeof(uval))) != OK)
888 return r;
889
890 if (uval)
891 bpf->bpf_flags |= BPFF_IMMEDIATE;
892 else
893 bpf->bpf_flags &= ~BPFF_IMMEDIATE;
894
895 return OK;
896
897 case BIOCVERSION:
898 memset(&bv, 0, sizeof(bv));
899 bv.bv_major = BPF_MAJOR_VERSION;
900 bv.bv_minor = BPF_MINOR_VERSION;
901
902 return sys_safecopyto(endpt, grant, 0, (vir_bytes)&bv,
903 sizeof(bv));
904
905 case BIOCGHDRCMPLT:
906 uval = !!(bpf->bpf_flags & BPFF_HDRCMPLT);
907
908 return sys_safecopyto(endpt, grant, 0, (vir_bytes)&uval,
909 sizeof(uval));
910
911 case BIOCSHDRCMPLT:
912 if ((r = sys_safecopyfrom(endpt, grant, 0, (vir_bytes)&uval,
913 sizeof(uval))) != OK)
914 return r;
915
916 if (uval)
917 bpf->bpf_flags |= BPFF_HDRCMPLT;
918 else
919 bpf->bpf_flags &= ~BPFF_HDRCMPLT;
920
921 return OK;
922
923 case BIOCSDLT:
924 if (bpf->bpf_ifdev == NULL)
925 return EINVAL;
926
927 if ((r = sys_safecopyfrom(endpt, grant, 0, (vir_bytes)&uval,
928 sizeof(uval))) != OK)
929 return r;
930
931 /* TODO: support for type configuration per BPF device. */
932 if (uval != ifdev_get_dlt(bpf->bpf_ifdev))
933 return EINVAL;
934
935 return OK;
936
937 case MINIX_BIOCGDLTLIST:
938 if (bpf->bpf_ifdev == NULL)
939 return EINVAL;
940
941 if ((r = sys_safecopyfrom(endpt, grant, 0, (vir_bytes)&bfl,
942 sizeof(bfl))) != OK)
943 return r;
944
945 if (bfl.bfl_list != NULL) {
946 if (bfl.bfl_len < 1)
947 return ENOMEM;
948
949 /*
950 * Copy out the 'list', which consists of one entry.
951 * If we were to produce multiple entries, we would
952 * have to check against the MINIX_BPF_MAXDLT limit.
953 */
954 uval = ifdev_get_dlt(bpf->bpf_ifdev);
955
956 if ((r = sys_safecopyto(endpt, grant,
957 offsetof(struct minix_bpf_dltlist, mbfl_list),
958 (vir_bytes)&uval, sizeof(uval))) != OK)
959 return r;
960 }
961 bfl.bfl_len = 1;
962
963 return sys_safecopyto(endpt, grant, 0, (vir_bytes)&bfl,
964 sizeof(bfl));
965
966 case BIOCGSEESENT:
967 uval = !!(bpf->bpf_flags & BPFF_SEESENT);
968
969 return sys_safecopyto(endpt, grant, 0, (vir_bytes)&uval,
970 sizeof(uval));
971
972 case BIOCSSEESENT:
973 if ((r = sys_safecopyfrom(endpt, grant, 0, (vir_bytes)&uval,
974 sizeof(uval))) != OK)
975 return r;
976
977 if (uval)
978 bpf->bpf_flags |= BPFF_SEESENT;
979 else
980 bpf->bpf_flags &= ~BPFF_SEESENT;
981
982 return OK;
983
984 case BIOCSRTIMEOUT:
985 if ((r = sys_safecopyfrom(endpt, grant, 0, (vir_bytes)&tv,
986 sizeof(tv))) != OK)
987 return r;
988
989 if ((r = util_timeval_to_ticks(&tv, &bpf->bpf_timeout)) != OK)
990 return r;
991
992 return OK;
993
994 case BIOCGRTIMEOUT:
995 util_ticks_to_timeval(bpf->bpf_timeout, &tv);
996
997 return sys_safecopyto(endpt, grant, 0, (vir_bytes)&tv,
998 sizeof(tv));
999
1000 case BIOCGFEEDBACK:
1001 uval = !!(bpf->bpf_flags & BPFF_FEEDBACK);
1002
1003 return sys_safecopyto(endpt, grant, 0, (vir_bytes)&uval,
1004 sizeof(uval));
1005
1006 case BIOCSFEEDBACK:
1007 if ((r = sys_safecopyfrom(endpt, grant, 0, (vir_bytes)&uval,
1008 sizeof(uval))) != OK)
1009 return r;
1010
1011 if (uval)
1012 bpf->bpf_flags |= BPFF_FEEDBACK;
1013 else
1014 bpf->bpf_flags &= ~BPFF_FEEDBACK;
1015
1016 return OK;
1017
1018 case FIONREAD:
1019 val = 0;
1020 if (bpf->bpf_hlen > 0)
1021 val = bpf->bpf_hlen;
1022 else if ((bpf->bpf_flags & BPFF_IMMEDIATE) &&
1023 bpf->bpf_slen > 0)
1024 val = bpf->bpf_slen;
1025 else
1026 val = 0;
1027
1028 return sys_safecopyto(endpt, grant, 0, (vir_bytes)&val,
1029 sizeof(val));
1030
1031 default:
1032 return ENOTTY;
1033 }
1034 }
1035
1036 /*
1037 * Cancel a previously suspended request on a BPF device. Since only read
1038 * requests may be suspended (select is handled differently), the cancel
1039 * request must be for a read request. Note that character devices currently
1040 * (still) behave slightly differently from socket devices here: while socket
1041 * drivers are supposed to respond to the original request, character drivers
1042 * must respond to the original request from the cancel callback.
1043 */
1044 static int
bpfdev_cancel(devminor_t minor,endpoint_t endpt,cdev_id_t id)1045 bpfdev_cancel(devminor_t minor, endpoint_t endpt, cdev_id_t id)
1046 {
1047 struct bpfdev *bpf;
1048
1049 if ((bpf = bpfdev_get_by_minor(minor)) == NULL)
1050 return EDONTREPLY;
1051
1052 /* Is this a cancel request for the currently pending read request? */
1053 if (bpf->bpf_read.br_endpt != endpt || bpf->bpf_read.br_id != id)
1054 return EDONTREPLY;
1055
1056 /* If so, cancel the read request. */
1057 if (bpf->bpf_timeout > 0)
1058 cancel_timer(&bpf->bpf_read.br_timer);
1059
1060 bpf->bpf_read.br_endpt = NONE;
1061
1062 return EINTR; /* the return value for the canceled read request */
1063 }
1064
1065 /*
1066 * Perform a select query on a BPF device.
1067 */
1068 static int
bpfdev_select(devminor_t minor,unsigned int ops,endpoint_t endpt)1069 bpfdev_select(devminor_t minor, unsigned int ops, endpoint_t endpt)
1070 {
1071 struct bpfdev *bpf;
1072 unsigned int r, notify;
1073
1074 if ((bpf = bpfdev_get_by_minor(minor)) == NULL)
1075 return EINVAL;
1076
1077 notify = (ops & CDEV_NOTIFY);
1078 ops &= (CDEV_OP_RD | CDEV_OP_WR | CDEV_OP_ERR);
1079
1080 r = bpfdev_test_select(bpf, ops);
1081
1082 /*
1083 * For the operations that were not immediately ready, if requested,
1084 * save the select request for later.
1085 */
1086 ops &= ~r;
1087
1088 if (ops != 0 && notify) {
1089 if (bpf->bpf_select.bs_endpt != NONE) {
1090 /* Merge in the operations with any earlier request. */
1091 if (bpf->bpf_select.bs_endpt != endpt)
1092 return EIO;
1093 bpf->bpf_select.bs_selops |= ops;
1094 } else {
1095 bpf->bpf_select.bs_endpt = endpt;
1096 bpf->bpf_select.bs_selops = ops;
1097 }
1098 }
1099
1100 return r;
1101 }
1102
1103 /*
1104 * Process an incoming packet on the interface to which the given BPF device is
1105 * attached. If the packet passes the filter (if any), store as much as
1106 * requested of it in the store buffer, rotating buffers if needed and resuming
1107 * suspended read and select requests as appropriate. This function is also
1108 * called through bpfdev_output() below.
1109 */
1110 void
bpfdev_input(struct bpfdev_link * bpfl,const struct pbuf * pbuf)1111 bpfdev_input(struct bpfdev_link * bpfl, const struct pbuf * pbuf)
1112 {
1113 struct bpfdev *bpf = (struct bpfdev *)bpfl;
1114 struct timespec ts;
1115 struct bpf_hdr bh;
1116 const struct pbuf *pptr;
1117 size_t caplen, hdrlen, totlen, off, chunk;
1118 int hfull;
1119
1120 /*
1121 * Apparently bs_recv is the counter of packets that were run through
1122 * the filter, not the number of packets that were or could be received
1123 * by the user (which is what I got from the manual page.. oh well).
1124 */
1125 bpf->bpf_stat.bs_recv++;
1126 bpf_stat.bs_recv++;
1127
1128 /*
1129 * Run the packet through the BPF device's filter to see whether the
1130 * packet should be stored and if so, how much of it. If no filter is
1131 * set, all packets will be stored in their entirety.
1132 */
1133 caplen = bpf_filter_ext(bpf->bpf_filter, pbuf, (u_char *)pbuf->payload,
1134 pbuf->tot_len, pbuf->len);
1135
1136 if (caplen == 0)
1137 return; /* no match; ignore packet */
1138
1139 if (caplen > pbuf->tot_len)
1140 caplen = pbuf->tot_len;
1141
1142 /* Truncate packet entries to the full size of the buffers. */
1143 hdrlen = BPF_WORDALIGN(sizeof(bh));
1144 totlen = BPF_WORDALIGN(hdrlen + caplen);
1145
1146 if (totlen > bpf->bpf_size) {
1147 totlen = bpf->bpf_size;
1148 caplen = totlen - hdrlen;
1149 }
1150 assert(totlen >= hdrlen);
1151
1152 bpf->bpf_stat.bs_capt++;
1153 bpf_stat.bs_capt++;
1154
1155 assert(bpf->bpf_sbuf != NULL);
1156 if (totlen > bpf->bpf_size - bpf->bpf_slen) {
1157 /*
1158 * If the store buffer is full and the hold buffer is not
1159 * empty, we cannot swap the two buffers, and so we must drop
1160 * the current packet.
1161 */
1162 if (bpf->bpf_hlen > 0) {
1163 bpf->bpf_stat.bs_drop++;
1164 bpf_stat.bs_drop++;
1165
1166 return;
1167 }
1168
1169 /*
1170 * Rotate the buffers: the hold buffer will now be "full" and
1171 * ready to be read - it may not actually be entirely full, but
1172 * we could not fit this packet and we are not going to deliver
1173 * packets out of order..
1174 */
1175 bpfdev_rotate(bpf);
1176
1177 hfull = TRUE;
1178 } else
1179 hfull = FALSE;
1180
1181 /*
1182 * Retrieve the capture time for the packet. Ideally this would be
1183 * done only once per accepted packet, but we do not expect many BPF
1184 * devices to be receiving the same packets often enough to make that
1185 * worth it.
1186 */
1187 clock_time(&ts);
1188
1189 /*
1190 * Copy the packet into the store buffer, including a newly generated
1191 * header. Zero any padding areas, even if strictly not necessary.
1192 */
1193 memset(&bh, 0, sizeof(bh));
1194 bh.bh_tstamp.tv_sec = ts.tv_sec;
1195 bh.bh_tstamp.tv_usec = ts.tv_nsec / 1000;
1196 bh.bh_caplen = caplen;
1197 bh.bh_datalen = pbuf->tot_len;
1198 bh.bh_hdrlen = hdrlen;
1199
1200 assert(bpf->bpf_sbuf != NULL);
1201 off = bpf->bpf_slen;
1202
1203 memcpy(&bpf->bpf_sbuf[off], &bh, sizeof(bh));
1204 if (hdrlen > sizeof(bh))
1205 memset(&bpf->bpf_sbuf[off + sizeof(bh)], 0,
1206 hdrlen - sizeof(bh));
1207 off += hdrlen;
1208
1209 for (pptr = pbuf; pptr != NULL && caplen > 0; pptr = pptr->next) {
1210 chunk = pptr->len;
1211 if (chunk > caplen)
1212 chunk = caplen;
1213
1214 memcpy(&bpf->bpf_sbuf[off], pptr->payload, chunk);
1215
1216 off += chunk;
1217 caplen -= chunk;
1218 }
1219
1220 assert(off <= bpf->bpf_slen + totlen);
1221 if (bpf->bpf_slen + totlen > off)
1222 memset(&bpf->bpf_sbuf[off], 0, bpf->bpf_slen + totlen - off);
1223
1224 bpf->bpf_slen += totlen;
1225
1226 /*
1227 * Edge case: if the hold buffer is empty and the store buffer is now
1228 * exactly full, rotate buffers so that the packets can be read
1229 * immediately, without waiting for the next packet to cause rotation.
1230 */
1231 if (bpf->bpf_hlen == 0 && bpf->bpf_slen == bpf->bpf_size) {
1232 bpfdev_rotate(bpf);
1233
1234 hfull = TRUE;
1235 }
1236
1237 /*
1238 * If the hold buffer is now full, or if immediate mode is enabled,
1239 * then we now have data to deliver to userland. See if we can wake up
1240 * any read or select call (either but not both here).
1241 */
1242 if (hfull || (bpf->bpf_flags & BPFF_IMMEDIATE)) {
1243 if (bpf->bpf_read.br_endpt != NONE)
1244 bpfdev_resume_read(bpf, FALSE /*is_timeout*/);
1245 else
1246 bpfdev_resume_select(bpf);
1247 }
1248 }
1249
1250 /*
1251 * Process an outgoing packet on the interface to which the given BPF device is
1252 * attached. If the BPF device is configured to capture outgoing packets as
1253 * well, attempt to capture the packet as per bpfdev_input().
1254 */
1255 void
bpfdev_output(struct bpfdev_link * bpfl,const struct pbuf * pbuf)1256 bpfdev_output(struct bpfdev_link * bpfl, const struct pbuf * pbuf)
1257 {
1258 struct bpfdev *bpf = (struct bpfdev *)bpfl;
1259
1260 if (bpf->bpf_flags & BPFF_SEESENT)
1261 bpfdev_input(bpfl, pbuf);
1262 }
1263
1264 /*
1265 * Fill the given 'bde' structure with information about BPF device 'bpf'.
1266 */
1267 static void
bpfdev_get_info(struct bpf_d_ext * bde,const struct bpfdev * bpf)1268 bpfdev_get_info(struct bpf_d_ext * bde, const struct bpfdev * bpf)
1269 {
1270
1271 bde->bde_bufsize = bpf->bpf_size;
1272 bde->bde_promisc = !!(bpf->bpf_flags & BPFF_PROMISC);
1273 bde->bde_state = BPF_IDLE;
1274 bde->bde_immediate = !!(bpf->bpf_flags & BPFF_IMMEDIATE);
1275 bde->bde_hdrcmplt = !!(bpf->bpf_flags & BPFF_HDRCMPLT);
1276 bde->bde_seesent = !!(bpf->bpf_flags & BPFF_SEESENT);
1277 /*
1278 * NetBSD updates the process ID upon device open, close, ioctl, and
1279 * poll. From those, only open and ioctl make sense for us. Sadly
1280 * there is no way to indicate "no known PID" to netstat(1), so we
1281 * cannot even save just the endpoint and look up the corresponding PID
1282 * later, since the user process may be gone by then.
1283 */
1284 bde->bde_pid = bpf->bpf_pid;
1285 bde->bde_rcount = bpf->bpf_stat.bs_recv;
1286 bde->bde_dcount = bpf->bpf_stat.bs_drop;
1287 bde->bde_ccount = bpf->bpf_stat.bs_capt;
1288 if (bpf->bpf_ifdev != NULL)
1289 strlcpy(bde->bde_ifname, ifdev_get_name(bpf->bpf_ifdev),
1290 sizeof(bde->bde_ifname));
1291 }
1292
1293 /*
1294 * Obtain statistics about open BPF devices ("peers"). This node may be
1295 * accessed by the superuser only. Used by netstat(1).
1296 */
1297 static ssize_t
bpfdev_peers(struct rmib_call * call,struct rmib_node * node __unused,struct rmib_oldp * oldp,struct rmib_newp * newp __unused)1298 bpfdev_peers(struct rmib_call * call, struct rmib_node * node __unused,
1299 struct rmib_oldp * oldp, struct rmib_newp * newp __unused)
1300 {
1301 struct bpfdev *bpf;
1302 struct bpf_d_ext bde;
1303 unsigned int slot;
1304 ssize_t off;
1305 int r, size, max;
1306
1307 if (!(call->call_flags & RMIB_FLAG_AUTH))
1308 return EPERM;
1309
1310 if (call->call_namelen != 2)
1311 return EINVAL;
1312
1313 size = call->call_name[0];
1314 if (size < 0 || (size_t)size > sizeof(bde))
1315 return EINVAL;
1316 if (size == 0)
1317 size = sizeof(bde);
1318 max = call->call_name[1];
1319
1320 off = 0;
1321
1322 for (slot = 0; slot < __arraycount(bpf_array); slot++) {
1323 bpf = &bpf_array[slot];
1324
1325 if (!(bpf->bpf_flags & BPFF_IN_USE))
1326 continue;
1327
1328 if (rmib_inrange(oldp, off)) {
1329 memset(&bde, 0, sizeof(bde));
1330
1331 bpfdev_get_info(&bde, bpf);
1332
1333 if ((r = rmib_copyout(oldp, off, &bde, size)) < 0)
1334 return r;
1335 }
1336
1337 off += sizeof(bde);
1338 if (max > 0 && --max == 0)
1339 break;
1340 }
1341
1342 /* No slack needed: netstat(1) resizes its buffer as needed. */
1343 return off;
1344 }
1345
1346 static const struct chardriver bpfdev_tab = {
1347 .cdr_open = bpfdev_open,
1348 .cdr_close = bpfdev_close,
1349 .cdr_read = bpfdev_read,
1350 .cdr_write = bpfdev_write,
1351 .cdr_ioctl = bpfdev_ioctl,
1352 .cdr_cancel = bpfdev_cancel,
1353 .cdr_select = bpfdev_select
1354 };
1355
1356 /*
1357 * Process a character driver request. Since the LWIP service offers character
1358 * devices for BPF only, it must be a request for a BPF device.
1359 */
1360 void
bpfdev_process(message * m_ptr,int ipc_status)1361 bpfdev_process(message * m_ptr, int ipc_status)
1362 {
1363
1364 chardriver_process(&bpfdev_tab, m_ptr, ipc_status);
1365 }
1366