xref: /minix3/minix/net/lwip/bpfdev.c (revision ef8d499e2d2af900e9b2ab297171d7b088652482)
1*ef8d499eSDavid van Moolenbroek /* LWIP service - bpfdev.c - Berkeley Packet Filter (/dev/bpf) interface */
2*ef8d499eSDavid van Moolenbroek /*
3*ef8d499eSDavid van Moolenbroek  * BPF is a cloning device: opening /dev/bpf returns a new BPF device which is
4*ef8d499eSDavid van Moolenbroek  * independent from any other opened BPF devices.  We assume that each BPF
5*ef8d499eSDavid van Moolenbroek  * device is used by one single user process, and this implementation therefore
6*ef8d499eSDavid van Moolenbroek  * does not support multiple concurrent device calls on the same BPF device.
7*ef8d499eSDavid van Moolenbroek  *
8*ef8d499eSDavid van Moolenbroek  * Packet buffering basically follows the BSD model: each BPF device that is
9*ef8d499eSDavid van Moolenbroek  * configured (that is, it has been attached to an interface) has two buffers,
10*ef8d499eSDavid van Moolenbroek  * each of the configured size: a store buffer, where new packets are stored,
11*ef8d499eSDavid van Moolenbroek  * and a hold buffer, which is typically full and awaiting retrieval through a
12*ef8d499eSDavid van Moolenbroek  * read call from userland.  The buffers are swapped ("rotated") when the store
13*ef8d499eSDavid van Moolenbroek  * buffer is filled up and the hold buffer is empty - if the hold buffer is not
14*ef8d499eSDavid van Moolenbroek  * empty is not empty either, additional packets are dropped.
15*ef8d499eSDavid van Moolenbroek  *
16*ef8d499eSDavid van Moolenbroek  * These buffers are allocated when the BPF device is attached to an interface.
17*ef8d499eSDavid van Moolenbroek  * The interface may later disappear, in which case the BPF device is detached
18*ef8d499eSDavid van Moolenbroek  * from it, allowing any final packets to be read before read requests start
19*ef8d499eSDavid van Moolenbroek  * returning I/O errors.  The buffers are freed only when the device is closed.
20*ef8d499eSDavid van Moolenbroek  */
21*ef8d499eSDavid van Moolenbroek 
22*ef8d499eSDavid van Moolenbroek #include "lwip.h"
23*ef8d499eSDavid van Moolenbroek #include "bpfdev.h"
24*ef8d499eSDavid van Moolenbroek 
25*ef8d499eSDavid van Moolenbroek #include <minix/chardriver.h>
26*ef8d499eSDavid van Moolenbroek #include <net/if.h>
27*ef8d499eSDavid van Moolenbroek #include <net/bpfdesc.h>
28*ef8d499eSDavid van Moolenbroek #include <minix/bpf.h>
29*ef8d499eSDavid van Moolenbroek #include <sys/mman.h>
30*ef8d499eSDavid van Moolenbroek 
31*ef8d499eSDavid van Moolenbroek /*
32*ef8d499eSDavid van Moolenbroek  * Make sure that our implementation matches the BPF version in the NetBSD
33*ef8d499eSDavid van Moolenbroek  * headers.  If they change the version number, we may have to make changes
34*ef8d499eSDavid van Moolenbroek  * here accordingly.
35*ef8d499eSDavid van Moolenbroek  */
36*ef8d499eSDavid van Moolenbroek #if BPF_MAJOR_VERSION != 1 || BPF_MINOR_VERSION != 1
37*ef8d499eSDavid van Moolenbroek #error "NetBSD BPF version has changed"
38*ef8d499eSDavid van Moolenbroek #endif
39*ef8d499eSDavid van Moolenbroek 
40*ef8d499eSDavid van Moolenbroek /* The number of BPF devices. */
41*ef8d499eSDavid van Moolenbroek #define NR_BPFDEV		16
42*ef8d499eSDavid van Moolenbroek 
43*ef8d499eSDavid van Moolenbroek /* BPF receive buffer size: allowed range and default. */
44*ef8d499eSDavid van Moolenbroek #define BPF_BUF_MIN		BPF_WORDALIGN(sizeof(struct bpf_hdr))
45*ef8d499eSDavid van Moolenbroek #define BPF_BUF_DEF		32768
46*ef8d499eSDavid van Moolenbroek #define BPF_BUF_MAX		262144
47*ef8d499eSDavid van Moolenbroek 
48*ef8d499eSDavid van Moolenbroek /*
49*ef8d499eSDavid van Moolenbroek  * By opening /dev/bpf, one will obtain a cloned device with a different minor
50*ef8d499eSDavid van Moolenbroek  * number, which maps to one of the BPF devices.
51*ef8d499eSDavid van Moolenbroek  */
52*ef8d499eSDavid van Moolenbroek #define BPFDEV_MINOR		0	/* minor number of /dev/bpf */
53*ef8d499eSDavid van Moolenbroek #define BPFDEV_BASE_MINOR	1	/* base minor number for BPF devices */
54*ef8d499eSDavid van Moolenbroek 
55*ef8d499eSDavid van Moolenbroek static struct bpfdev {
56*ef8d499eSDavid van Moolenbroek 	struct bpfdev_link bpf_link;	/* structure link, MUST be first */
57*ef8d499eSDavid van Moolenbroek 	TAILQ_ENTRY(bpfdev) bpf_next;	/* next on free or interface list */
58*ef8d499eSDavid van Moolenbroek 	struct ifdev *bpf_ifdev;	/* associated interface, or NULL */
59*ef8d499eSDavid van Moolenbroek 	unsigned int bpf_flags;		/* flags (BPFF_) */
60*ef8d499eSDavid van Moolenbroek 	size_t bpf_size;		/* size of packet buffers */
61*ef8d499eSDavid van Moolenbroek 	char *bpf_sbuf;			/* store buffer (mmap'd, or NULL) */
62*ef8d499eSDavid van Moolenbroek 	char *bpf_hbuf;			/* hold buffer (mmap'd, or NULL) */
63*ef8d499eSDavid van Moolenbroek 	size_t bpf_slen;		/* used part of store buffer */
64*ef8d499eSDavid van Moolenbroek 	size_t bpf_hlen;		/* used part of hold buffer */
65*ef8d499eSDavid van Moolenbroek 	struct bpf_insn *bpf_filter;	/* verified BPF filter, or NULL */
66*ef8d499eSDavid van Moolenbroek 	size_t bpf_filterlen;		/* length of filter, for munmap */
67*ef8d499eSDavid van Moolenbroek 	pid_t bpf_pid;			/* process ID of last using process */
68*ef8d499eSDavid van Moolenbroek 	clock_t bpf_timeout;		/* timeout for read calls (0 = none) */
69*ef8d499eSDavid van Moolenbroek 	struct {			/* state for pending read request */
70*ef8d499eSDavid van Moolenbroek 		endpoint_t br_endpt;	/* reading endpoint, or NONE */
71*ef8d499eSDavid van Moolenbroek 		cp_grant_id_t br_grant;	/* grant for reader's buffer */
72*ef8d499eSDavid van Moolenbroek 		cdev_id_t br_id;	/* read request identifier */
73*ef8d499eSDavid van Moolenbroek 		minix_timer_t br_timer;	/* timer for read timeout */
74*ef8d499eSDavid van Moolenbroek 	} bpf_read;
75*ef8d499eSDavid van Moolenbroek 	struct {			/* state for pending select request */
76*ef8d499eSDavid van Moolenbroek 		endpoint_t bs_endpt;	/* selecting endpoint, or NONE */
77*ef8d499eSDavid van Moolenbroek 		unsigned int bs_selops;	/* pending select operations */
78*ef8d499eSDavid van Moolenbroek 	} bpf_select;
79*ef8d499eSDavid van Moolenbroek 	struct {			/* packet capture statistics */
80*ef8d499eSDavid van Moolenbroek 		uint64_t bs_recv;	/* # of packets run through filter */
81*ef8d499eSDavid van Moolenbroek 		uint64_t bs_drop;	/* # of packets dropped: buffer full */
82*ef8d499eSDavid van Moolenbroek 		uint64_t bs_capt;	/* # of packets accepted by filter */
83*ef8d499eSDavid van Moolenbroek 	} bpf_stat;
84*ef8d499eSDavid van Moolenbroek } bpf_array[NR_BPFDEV];
85*ef8d499eSDavid van Moolenbroek 
86*ef8d499eSDavid van Moolenbroek #define BPFF_IN_USE	0x01		/* this BPF device object is in use */
87*ef8d499eSDavid van Moolenbroek #define BPFF_PROMISC	0x02		/* promiscuous mode enabled */
88*ef8d499eSDavid van Moolenbroek #define BPFF_IMMEDIATE	0x04		/* immediate mode is enabled */
89*ef8d499eSDavid van Moolenbroek #define BPFF_SEESENT	0x08		/* also process host-sent packets */
90*ef8d499eSDavid van Moolenbroek #define BPFF_HDRCMPLT	0x10		/* do not fill in link-layer source */
91*ef8d499eSDavid van Moolenbroek #define BPFF_FEEDBACK	0x20		/* feed back written packet as input */
92*ef8d499eSDavid van Moolenbroek 
93*ef8d499eSDavid van Moolenbroek static TAILQ_HEAD(, bpfdev_link) bpfl_freelist;	/* list of free BPF devices */
94*ef8d499eSDavid van Moolenbroek 
95*ef8d499eSDavid van Moolenbroek static struct bpf_stat bpf_stat;
96*ef8d499eSDavid van Moolenbroek 
97*ef8d499eSDavid van Moolenbroek static ssize_t bpfdev_peers(struct rmib_call *, struct rmib_node *,
98*ef8d499eSDavid van Moolenbroek 	struct rmib_oldp *, struct rmib_newp *);
99*ef8d499eSDavid van Moolenbroek 
100*ef8d499eSDavid van Moolenbroek /* The CTL_NET NET_BPF subtree.  All nodes are dynamically numbered. */
101*ef8d499eSDavid van Moolenbroek static struct rmib_node net_bpf_table[] = {
102*ef8d499eSDavid van Moolenbroek 	RMIB_INT(RMIB_RO, BPF_BUF_MAX, "maxbufsize",
103*ef8d499eSDavid van Moolenbroek 	    "Maximum size for data capture buffer"), /* TODO: read-write */
104*ef8d499eSDavid van Moolenbroek 	RMIB_STRUCT(RMIB_RO, sizeof(bpf_stat), &bpf_stat, "stats",
105*ef8d499eSDavid van Moolenbroek 	    "BPF stats"),
106*ef8d499eSDavid van Moolenbroek 	RMIB_FUNC(RMIB_RO | CTLTYPE_NODE, 0, bpfdev_peers, "peers",
107*ef8d499eSDavid van Moolenbroek 	    "BPF peers"),
108*ef8d499eSDavid van Moolenbroek };
109*ef8d499eSDavid van Moolenbroek 
110*ef8d499eSDavid van Moolenbroek static struct rmib_node net_bpf_node =
111*ef8d499eSDavid van Moolenbroek     RMIB_NODE(RMIB_RO, net_bpf_table, "bpf", "BPF options");
112*ef8d499eSDavid van Moolenbroek 
113*ef8d499eSDavid van Moolenbroek /*
114*ef8d499eSDavid van Moolenbroek  * Initialize the BPF module.
115*ef8d499eSDavid van Moolenbroek  */
116*ef8d499eSDavid van Moolenbroek void
bpfdev_init(void)117*ef8d499eSDavid van Moolenbroek bpfdev_init(void)
118*ef8d499eSDavid van Moolenbroek {
119*ef8d499eSDavid van Moolenbroek 	const int mib[] = { CTL_NET, NET_BPF };
120*ef8d499eSDavid van Moolenbroek 	unsigned int slot;
121*ef8d499eSDavid van Moolenbroek 	int r;
122*ef8d499eSDavid van Moolenbroek 
123*ef8d499eSDavid van Moolenbroek 	/* Initialize data structures. */
124*ef8d499eSDavid van Moolenbroek 	TAILQ_INIT(&bpfl_freelist);
125*ef8d499eSDavid van Moolenbroek 
126*ef8d499eSDavid van Moolenbroek 	for (slot = 0; slot < __arraycount(bpf_array); slot++) {
127*ef8d499eSDavid van Moolenbroek 		bpf_array[slot].bpf_flags = 0;
128*ef8d499eSDavid van Moolenbroek 
129*ef8d499eSDavid van Moolenbroek 		TAILQ_INSERT_TAIL(&bpfl_freelist, &bpf_array[slot].bpf_link,
130*ef8d499eSDavid van Moolenbroek 		    bpfl_next);
131*ef8d499eSDavid van Moolenbroek 	}
132*ef8d499eSDavid van Moolenbroek 
133*ef8d499eSDavid van Moolenbroek 	memset(&bpf_stat, 0, sizeof(bpf_stat));
134*ef8d499eSDavid van Moolenbroek 
135*ef8d499eSDavid van Moolenbroek 	/* Register the "net.bpf" subtree with the MIB service. */
136*ef8d499eSDavid van Moolenbroek 	if ((r = rmib_register(mib, __arraycount(mib), &net_bpf_node)) != OK)
137*ef8d499eSDavid van Moolenbroek 		panic("unable to register net.bpf RMIB tree: %d", r);
138*ef8d499eSDavid van Moolenbroek }
139*ef8d499eSDavid van Moolenbroek 
140*ef8d499eSDavid van Moolenbroek /*
141*ef8d499eSDavid van Moolenbroek  * Given a BPF device object, return the corresponding minor number.
142*ef8d499eSDavid van Moolenbroek  */
143*ef8d499eSDavid van Moolenbroek static devminor_t
bpfdev_get_minor(struct bpfdev * bpfdev)144*ef8d499eSDavid van Moolenbroek bpfdev_get_minor(struct bpfdev * bpfdev)
145*ef8d499eSDavid van Moolenbroek {
146*ef8d499eSDavid van Moolenbroek 
147*ef8d499eSDavid van Moolenbroek 	assert(bpfdev != NULL);
148*ef8d499eSDavid van Moolenbroek 
149*ef8d499eSDavid van Moolenbroek 	return BPFDEV_BASE_MINOR + (devminor_t)(bpfdev - bpf_array);
150*ef8d499eSDavid van Moolenbroek }
151*ef8d499eSDavid van Moolenbroek 
152*ef8d499eSDavid van Moolenbroek /*
153*ef8d499eSDavid van Moolenbroek  * Given a minor number, return the corresponding BPF device object, or NULL if
154*ef8d499eSDavid van Moolenbroek  * the minor number does not identify a BPF device.
155*ef8d499eSDavid van Moolenbroek  */
156*ef8d499eSDavid van Moolenbroek static struct bpfdev *
bpfdev_get_by_minor(devminor_t minor)157*ef8d499eSDavid van Moolenbroek bpfdev_get_by_minor(devminor_t minor)
158*ef8d499eSDavid van Moolenbroek {
159*ef8d499eSDavid van Moolenbroek 
160*ef8d499eSDavid van Moolenbroek 	if (minor < BPFDEV_BASE_MINOR ||
161*ef8d499eSDavid van Moolenbroek 	    (unsigned int)minor >= BPFDEV_BASE_MINOR + __arraycount(bpf_array))
162*ef8d499eSDavid van Moolenbroek 		return NULL;
163*ef8d499eSDavid van Moolenbroek 
164*ef8d499eSDavid van Moolenbroek 	return &bpf_array[minor - BPFDEV_BASE_MINOR];
165*ef8d499eSDavid van Moolenbroek }
166*ef8d499eSDavid van Moolenbroek 
167*ef8d499eSDavid van Moolenbroek /*
168*ef8d499eSDavid van Moolenbroek  * Open a BPF device, returning a cloned device instance.
169*ef8d499eSDavid van Moolenbroek  */
170*ef8d499eSDavid van Moolenbroek static int
bpfdev_open(devminor_t minor,int access __unused,endpoint_t user_endpt)171*ef8d499eSDavid van Moolenbroek bpfdev_open(devminor_t minor, int access __unused, endpoint_t user_endpt)
172*ef8d499eSDavid van Moolenbroek {
173*ef8d499eSDavid van Moolenbroek 	struct bpfdev_link *bpfl;
174*ef8d499eSDavid van Moolenbroek 	struct bpfdev *bpf;
175*ef8d499eSDavid van Moolenbroek 
176*ef8d499eSDavid van Moolenbroek 	/* Disallow opening cloned devices through device nodes. */
177*ef8d499eSDavid van Moolenbroek 	if (minor != BPFDEV_MINOR)
178*ef8d499eSDavid van Moolenbroek 		return ENXIO;
179*ef8d499eSDavid van Moolenbroek 
180*ef8d499eSDavid van Moolenbroek 	if (TAILQ_EMPTY(&bpfl_freelist))
181*ef8d499eSDavid van Moolenbroek 		return ENOBUFS;
182*ef8d499eSDavid van Moolenbroek 
183*ef8d499eSDavid van Moolenbroek 	bpfl = TAILQ_FIRST(&bpfl_freelist);
184*ef8d499eSDavid van Moolenbroek 	TAILQ_REMOVE(&bpfl_freelist, bpfl, bpfl_next);
185*ef8d499eSDavid van Moolenbroek 
186*ef8d499eSDavid van Moolenbroek 	bpf = (struct bpfdev *)bpfl;
187*ef8d499eSDavid van Moolenbroek 
188*ef8d499eSDavid van Moolenbroek 	memset(bpf, 0, sizeof(*bpf));
189*ef8d499eSDavid van Moolenbroek 
190*ef8d499eSDavid van Moolenbroek 	bpf->bpf_flags = BPFF_IN_USE | BPFF_SEESENT;
191*ef8d499eSDavid van Moolenbroek 	bpf->bpf_size = BPF_BUF_DEF;
192*ef8d499eSDavid van Moolenbroek 	bpf->bpf_pid = getnpid(user_endpt);
193*ef8d499eSDavid van Moolenbroek 	bpf->bpf_read.br_endpt = NONE;
194*ef8d499eSDavid van Moolenbroek 	bpf->bpf_select.bs_endpt = NONE;
195*ef8d499eSDavid van Moolenbroek 
196*ef8d499eSDavid van Moolenbroek 	return CDEV_CLONED | bpfdev_get_minor(bpf);
197*ef8d499eSDavid van Moolenbroek }
198*ef8d499eSDavid van Moolenbroek 
199*ef8d499eSDavid van Moolenbroek /*
200*ef8d499eSDavid van Moolenbroek  * Close a BPF device.
201*ef8d499eSDavid van Moolenbroek  */
202*ef8d499eSDavid van Moolenbroek static int
bpfdev_close(devminor_t minor)203*ef8d499eSDavid van Moolenbroek bpfdev_close(devminor_t minor)
204*ef8d499eSDavid van Moolenbroek {
205*ef8d499eSDavid van Moolenbroek 	struct bpfdev *bpf;
206*ef8d499eSDavid van Moolenbroek 
207*ef8d499eSDavid van Moolenbroek 	if ((bpf = bpfdev_get_by_minor(minor)) == NULL)
208*ef8d499eSDavid van Moolenbroek 		return EINVAL;
209*ef8d499eSDavid van Moolenbroek 
210*ef8d499eSDavid van Moolenbroek 	/*
211*ef8d499eSDavid van Moolenbroek 	 * There cannot possibly be a pending read request, so we never need to
212*ef8d499eSDavid van Moolenbroek 	 * cancel the read timer from here either.
213*ef8d499eSDavid van Moolenbroek 	 */
214*ef8d499eSDavid van Moolenbroek 	assert(bpf->bpf_read.br_endpt == NONE);
215*ef8d499eSDavid van Moolenbroek 
216*ef8d499eSDavid van Moolenbroek 	if (bpf->bpf_sbuf != NULL) {
217*ef8d499eSDavid van Moolenbroek 		assert(bpf->bpf_hbuf != NULL);
218*ef8d499eSDavid van Moolenbroek 
219*ef8d499eSDavid van Moolenbroek 		if (munmap(bpf->bpf_sbuf, bpf->bpf_size) != 0)
220*ef8d499eSDavid van Moolenbroek 			panic("munmap failed: %d", -errno);
221*ef8d499eSDavid van Moolenbroek 		if (munmap(bpf->bpf_hbuf, bpf->bpf_size) != 0)
222*ef8d499eSDavid van Moolenbroek 			panic("munmap failed: %d", -errno);
223*ef8d499eSDavid van Moolenbroek 
224*ef8d499eSDavid van Moolenbroek 		bpf->bpf_sbuf = NULL;
225*ef8d499eSDavid van Moolenbroek 		bpf->bpf_hbuf = NULL;
226*ef8d499eSDavid van Moolenbroek 	} else
227*ef8d499eSDavid van Moolenbroek 		assert(bpf->bpf_hbuf == NULL);
228*ef8d499eSDavid van Moolenbroek 
229*ef8d499eSDavid van Moolenbroek 	if (bpf->bpf_filter != NULL) {
230*ef8d499eSDavid van Moolenbroek 		assert(bpf->bpf_filterlen > 0);
231*ef8d499eSDavid van Moolenbroek 
232*ef8d499eSDavid van Moolenbroek 		if (munmap(bpf->bpf_filter, bpf->bpf_filterlen) != 0)
233*ef8d499eSDavid van Moolenbroek 			panic("munmap failed: %d", -errno);
234*ef8d499eSDavid van Moolenbroek 
235*ef8d499eSDavid van Moolenbroek 		bpf->bpf_filter = NULL;
236*ef8d499eSDavid van Moolenbroek 	}
237*ef8d499eSDavid van Moolenbroek 
238*ef8d499eSDavid van Moolenbroek 	/*
239*ef8d499eSDavid van Moolenbroek 	 * If the BPF device was attached to an interface, and that interface
240*ef8d499eSDavid van Moolenbroek 	 * has not disappeared in the meantime, detach from it now.
241*ef8d499eSDavid van Moolenbroek 	 */
242*ef8d499eSDavid van Moolenbroek 	if (bpf->bpf_ifdev != NULL) {
243*ef8d499eSDavid van Moolenbroek 		if (bpf->bpf_flags & BPFF_PROMISC)
244*ef8d499eSDavid van Moolenbroek 			ifdev_clear_promisc(bpf->bpf_ifdev);
245*ef8d499eSDavid van Moolenbroek 
246*ef8d499eSDavid van Moolenbroek 		ifdev_detach_bpf(bpf->bpf_ifdev, &bpf->bpf_link);
247*ef8d499eSDavid van Moolenbroek 
248*ef8d499eSDavid van Moolenbroek 		bpf->bpf_ifdev = NULL;
249*ef8d499eSDavid van Moolenbroek 	}
250*ef8d499eSDavid van Moolenbroek 
251*ef8d499eSDavid van Moolenbroek 	bpf->bpf_flags = 0;		/* mark as no longer in use */
252*ef8d499eSDavid van Moolenbroek 
253*ef8d499eSDavid van Moolenbroek 	TAILQ_INSERT_HEAD(&bpfl_freelist, &bpf->bpf_link, bpfl_next);
254*ef8d499eSDavid van Moolenbroek 
255*ef8d499eSDavid van Moolenbroek 	return OK;
256*ef8d499eSDavid van Moolenbroek }
257*ef8d499eSDavid van Moolenbroek 
258*ef8d499eSDavid van Moolenbroek /*
259*ef8d499eSDavid van Moolenbroek  * Rotate buffers for the BPF device, by swapping the store buffer and the hold
260*ef8d499eSDavid van Moolenbroek  * buffer.
261*ef8d499eSDavid van Moolenbroek  */
262*ef8d499eSDavid van Moolenbroek static void
bpfdev_rotate(struct bpfdev * bpf)263*ef8d499eSDavid van Moolenbroek bpfdev_rotate(struct bpfdev * bpf)
264*ef8d499eSDavid van Moolenbroek {
265*ef8d499eSDavid van Moolenbroek 	char *buf;
266*ef8d499eSDavid van Moolenbroek 	size_t len;
267*ef8d499eSDavid van Moolenbroek 
268*ef8d499eSDavid van Moolenbroek 	/*
269*ef8d499eSDavid van Moolenbroek 	 * When rotating, the store buffer may or may not be empty, but the
270*ef8d499eSDavid van Moolenbroek 	 * hold buffer must always be empty.
271*ef8d499eSDavid van Moolenbroek 	 */
272*ef8d499eSDavid van Moolenbroek 	assert(bpf->bpf_hlen == 0);
273*ef8d499eSDavid van Moolenbroek 
274*ef8d499eSDavid van Moolenbroek 	buf = bpf->bpf_sbuf;
275*ef8d499eSDavid van Moolenbroek 	len = bpf->bpf_slen;
276*ef8d499eSDavid van Moolenbroek 	bpf->bpf_sbuf = bpf->bpf_hbuf;
277*ef8d499eSDavid van Moolenbroek 	bpf->bpf_slen = bpf->bpf_hlen;
278*ef8d499eSDavid van Moolenbroek 	bpf->bpf_hbuf = buf;
279*ef8d499eSDavid van Moolenbroek 	bpf->bpf_hlen = len;
280*ef8d499eSDavid van Moolenbroek }
281*ef8d499eSDavid van Moolenbroek 
282*ef8d499eSDavid van Moolenbroek /*
283*ef8d499eSDavid van Moolenbroek  * Test whether any of the given select operations are ready on the BPF device,
284*ef8d499eSDavid van Moolenbroek  * and return the set of ready operations.
285*ef8d499eSDavid van Moolenbroek  */
286*ef8d499eSDavid van Moolenbroek static unsigned int
bpfdev_test_select(struct bpfdev * bpf,unsigned int ops)287*ef8d499eSDavid van Moolenbroek bpfdev_test_select(struct bpfdev * bpf, unsigned int ops)
288*ef8d499eSDavid van Moolenbroek {
289*ef8d499eSDavid van Moolenbroek 	unsigned int ready_ops;
290*ef8d499eSDavid van Moolenbroek 
291*ef8d499eSDavid van Moolenbroek 	ready_ops = 0;
292*ef8d499eSDavid van Moolenbroek 
293*ef8d499eSDavid van Moolenbroek 	/*
294*ef8d499eSDavid van Moolenbroek 	 * The BPF device is ready for reading if the hold buffer is not empty
295*ef8d499eSDavid van Moolenbroek 	 * (i.e.: the store buffer has been filled up completely and was
296*ef8d499eSDavid van Moolenbroek 	 * therefore rotated) or if immediate mode is set and the store buffer
297*ef8d499eSDavid van Moolenbroek 	 * is not empty (i.e.: any packet is available at all).  In the latter
298*ef8d499eSDavid van Moolenbroek 	 * case, the buffers will be rotated during the read.  We do not
299*ef8d499eSDavid van Moolenbroek 	 * support applying the read timeout to selects and maintaining state
300*ef8d499eSDavid van Moolenbroek 	 * between the select and the following read, because despite that
301*ef8d499eSDavid van Moolenbroek 	 * libpcap claims that it is the right behavior, that is just insane.
302*ef8d499eSDavid van Moolenbroek 	 */
303*ef8d499eSDavid van Moolenbroek 	if (ops & CDEV_OP_RD) {
304*ef8d499eSDavid van Moolenbroek 		if (bpf->bpf_ifdev == NULL)
305*ef8d499eSDavid van Moolenbroek 			ready_ops |= CDEV_OP_RD;
306*ef8d499eSDavid van Moolenbroek 		else if (bpf->bpf_hlen > 0)
307*ef8d499eSDavid van Moolenbroek 			ready_ops |= CDEV_OP_RD;
308*ef8d499eSDavid van Moolenbroek 		else if ((bpf->bpf_flags & BPFF_IMMEDIATE) &&
309*ef8d499eSDavid van Moolenbroek 		    bpf->bpf_slen > 0)
310*ef8d499eSDavid van Moolenbroek 			ready_ops |= CDEV_OP_RD;
311*ef8d499eSDavid van Moolenbroek 	}
312*ef8d499eSDavid van Moolenbroek 
313*ef8d499eSDavid van Moolenbroek 	if (ops & CDEV_OP_WR)
314*ef8d499eSDavid van Moolenbroek 		ready_ops |= CDEV_OP_WR;
315*ef8d499eSDavid van Moolenbroek 
316*ef8d499eSDavid van Moolenbroek 	return ready_ops;
317*ef8d499eSDavid van Moolenbroek }
318*ef8d499eSDavid van Moolenbroek 
319*ef8d499eSDavid van Moolenbroek /*
320*ef8d499eSDavid van Moolenbroek  * There has been a state change on the BPF device.  If now possible, resume a
321*ef8d499eSDavid van Moolenbroek  * pending select query, if any.
322*ef8d499eSDavid van Moolenbroek  */
323*ef8d499eSDavid van Moolenbroek static void
bpfdev_resume_select(struct bpfdev * bpf)324*ef8d499eSDavid van Moolenbroek bpfdev_resume_select(struct bpfdev * bpf)
325*ef8d499eSDavid van Moolenbroek {
326*ef8d499eSDavid van Moolenbroek 	unsigned int ops, ready_ops;
327*ef8d499eSDavid van Moolenbroek 	endpoint_t endpt;
328*ef8d499eSDavid van Moolenbroek 
329*ef8d499eSDavid van Moolenbroek 	/* First see if there is a pending select request at all. */
330*ef8d499eSDavid van Moolenbroek 	if ((endpt = bpf->bpf_select.bs_endpt) == NONE)
331*ef8d499eSDavid van Moolenbroek 		return;
332*ef8d499eSDavid van Moolenbroek 	ops = bpf->bpf_select.bs_selops;
333*ef8d499eSDavid van Moolenbroek 
334*ef8d499eSDavid van Moolenbroek 	assert(ops != 0);
335*ef8d499eSDavid van Moolenbroek 
336*ef8d499eSDavid van Moolenbroek 	/* Then see if any of the pending operations are now ready. */
337*ef8d499eSDavid van Moolenbroek 	if ((ready_ops = bpfdev_test_select(bpf, ops)) == 0)
338*ef8d499eSDavid van Moolenbroek 		return;
339*ef8d499eSDavid van Moolenbroek 
340*ef8d499eSDavid van Moolenbroek 	/* If so, notify VFS about the ready operations. */
341*ef8d499eSDavid van Moolenbroek 	chardriver_reply_select(bpf->bpf_select.bs_endpt,
342*ef8d499eSDavid van Moolenbroek 	    bpfdev_get_minor(bpf), ready_ops);
343*ef8d499eSDavid van Moolenbroek 
344*ef8d499eSDavid van Moolenbroek 	/*
345*ef8d499eSDavid van Moolenbroek 	 * Forget about the ready operations.  If that leaves no pending
346*ef8d499eSDavid van Moolenbroek 	 * operations, forget about the select request altogether.
347*ef8d499eSDavid van Moolenbroek 	 */
348*ef8d499eSDavid van Moolenbroek 	if ((bpf->bpf_select.bs_selops &= ~ready_ops) == 0)
349*ef8d499eSDavid van Moolenbroek 		bpf->bpf_select.bs_endpt = NONE;
350*ef8d499eSDavid van Moolenbroek }
351*ef8d499eSDavid van Moolenbroek 
352*ef8d499eSDavid van Moolenbroek /*
353*ef8d499eSDavid van Moolenbroek  * There has been a state change on the BPF device.  If now possible, resume a
354*ef8d499eSDavid van Moolenbroek  * pending read request, if any.  If the call is a result of a timeout,
355*ef8d499eSDavid van Moolenbroek  * 'is_timeout' is set.  In that case, the read request must be resumed with an
356*ef8d499eSDavid van Moolenbroek  * EAGAIN error if no packets are available, and the running timer must be
357*ef8d499eSDavid van Moolenbroek  * canceled.  Otherwise, the resumption is due to a full buffer or a
358*ef8d499eSDavid van Moolenbroek  * disappeared interface, and 'is_timeout' is not set.  In this case, the read
359*ef8d499eSDavid van Moolenbroek  * request must be resumed with an I/O error if no packets are available.
360*ef8d499eSDavid van Moolenbroek  */
361*ef8d499eSDavid van Moolenbroek static void
bpfdev_resume_read(struct bpfdev * bpf,int is_timeout)362*ef8d499eSDavid van Moolenbroek bpfdev_resume_read(struct bpfdev * bpf, int is_timeout)
363*ef8d499eSDavid van Moolenbroek {
364*ef8d499eSDavid van Moolenbroek 	ssize_t r;
365*ef8d499eSDavid van Moolenbroek 
366*ef8d499eSDavid van Moolenbroek 	assert(bpf->bpf_read.br_endpt != NONE);
367*ef8d499eSDavid van Moolenbroek 
368*ef8d499eSDavid van Moolenbroek 	/*
369*ef8d499eSDavid van Moolenbroek 	 * If the hold buffer is still empty, see if the store buffer has
370*ef8d499eSDavid van Moolenbroek 	 * any packets to copy out.
371*ef8d499eSDavid van Moolenbroek 	 */
372*ef8d499eSDavid van Moolenbroek 	if (bpf->bpf_hlen == 0)
373*ef8d499eSDavid van Moolenbroek 		bpfdev_rotate(bpf);
374*ef8d499eSDavid van Moolenbroek 
375*ef8d499eSDavid van Moolenbroek 	/* Return any available packets, or otherwise an error. */
376*ef8d499eSDavid van Moolenbroek 	if (bpf->bpf_hlen > 0) {
377*ef8d499eSDavid van Moolenbroek 		assert(bpf->bpf_hlen <= bpf->bpf_size);
378*ef8d499eSDavid van Moolenbroek 
379*ef8d499eSDavid van Moolenbroek 		r = sys_safecopyto(bpf->bpf_read.br_endpt,
380*ef8d499eSDavid van Moolenbroek 		    bpf->bpf_read.br_grant, 0, (vir_bytes)bpf->bpf_hbuf,
381*ef8d499eSDavid van Moolenbroek 		    bpf->bpf_hlen);
382*ef8d499eSDavid van Moolenbroek 
383*ef8d499eSDavid van Moolenbroek 		if (r == OK) {
384*ef8d499eSDavid van Moolenbroek 			r = (ssize_t)bpf->bpf_hlen;
385*ef8d499eSDavid van Moolenbroek 
386*ef8d499eSDavid van Moolenbroek 			bpf->bpf_hlen = 0;
387*ef8d499eSDavid van Moolenbroek 
388*ef8d499eSDavid van Moolenbroek 			assert(bpf->bpf_slen != bpf->bpf_size);
389*ef8d499eSDavid van Moolenbroek 
390*ef8d499eSDavid van Moolenbroek 			/*
391*ef8d499eSDavid van Moolenbroek 			 * Allow readers to get the last packets after the
392*ef8d499eSDavid van Moolenbroek 			 * interface has disappeared, before getting errors.
393*ef8d499eSDavid van Moolenbroek 			 */
394*ef8d499eSDavid van Moolenbroek 			if (bpf->bpf_ifdev == NULL)
395*ef8d499eSDavid van Moolenbroek 				bpfdev_rotate(bpf);
396*ef8d499eSDavid van Moolenbroek 		}
397*ef8d499eSDavid van Moolenbroek 	} else
398*ef8d499eSDavid van Moolenbroek 		r = (is_timeout) ? EAGAIN : EIO;
399*ef8d499eSDavid van Moolenbroek 
400*ef8d499eSDavid van Moolenbroek 	chardriver_reply_task(bpf->bpf_read.br_endpt, bpf->bpf_read.br_id, r);
401*ef8d499eSDavid van Moolenbroek 
402*ef8d499eSDavid van Moolenbroek 	bpf->bpf_read.br_endpt = NONE;
403*ef8d499eSDavid van Moolenbroek 
404*ef8d499eSDavid van Moolenbroek 	/* Was there still a timer running?  Then cancel it now. */
405*ef8d499eSDavid van Moolenbroek 	if (bpf->bpf_timeout > 0 && !is_timeout)
406*ef8d499eSDavid van Moolenbroek 		cancel_timer(&bpf->bpf_read.br_timer);
407*ef8d499eSDavid van Moolenbroek }
408*ef8d499eSDavid van Moolenbroek 
409*ef8d499eSDavid van Moolenbroek /*
410*ef8d499eSDavid van Moolenbroek  * A read timeout has triggered for the BPF device.  Wake up the pending read
411*ef8d499eSDavid van Moolenbroek  * request.
412*ef8d499eSDavid van Moolenbroek  */
413*ef8d499eSDavid van Moolenbroek static void
bpfdev_timeout(int arg)414*ef8d499eSDavid van Moolenbroek bpfdev_timeout(int arg)
415*ef8d499eSDavid van Moolenbroek {
416*ef8d499eSDavid van Moolenbroek 	struct bpfdev *bpf;
417*ef8d499eSDavid van Moolenbroek 
418*ef8d499eSDavid van Moolenbroek 	assert(arg >= 0 && (unsigned int)arg < __arraycount(bpf_array));
419*ef8d499eSDavid van Moolenbroek 
420*ef8d499eSDavid van Moolenbroek 	bpf = &bpf_array[arg];
421*ef8d499eSDavid van Moolenbroek 
422*ef8d499eSDavid van Moolenbroek 	assert(bpf->bpf_read.br_endpt != NONE);
423*ef8d499eSDavid van Moolenbroek 
424*ef8d499eSDavid van Moolenbroek 	bpfdev_resume_read(bpf, TRUE /*is_timeout*/);
425*ef8d499eSDavid van Moolenbroek }
426*ef8d499eSDavid van Moolenbroek 
427*ef8d499eSDavid van Moolenbroek /*
428*ef8d499eSDavid van Moolenbroek  * Read from a BPF device.
429*ef8d499eSDavid van Moolenbroek  */
430*ef8d499eSDavid van Moolenbroek static ssize_t
bpfdev_read(devminor_t minor,uint64_t position,endpoint_t endpt,cp_grant_id_t grant,size_t size,int flags,cdev_id_t id)431*ef8d499eSDavid van Moolenbroek bpfdev_read(devminor_t minor, uint64_t position, endpoint_t endpt,
432*ef8d499eSDavid van Moolenbroek 	cp_grant_id_t grant, size_t size, int flags, cdev_id_t id)
433*ef8d499eSDavid van Moolenbroek {
434*ef8d499eSDavid van Moolenbroek 	struct bpfdev *bpf;
435*ef8d499eSDavid van Moolenbroek 	ssize_t r;
436*ef8d499eSDavid van Moolenbroek 	int suspend;
437*ef8d499eSDavid van Moolenbroek 
438*ef8d499eSDavid van Moolenbroek 	if ((bpf = bpfdev_get_by_minor(minor)) == NULL)
439*ef8d499eSDavid van Moolenbroek 		return EINVAL;
440*ef8d499eSDavid van Moolenbroek 
441*ef8d499eSDavid van Moolenbroek 	/* Allow only one read call at a time. */
442*ef8d499eSDavid van Moolenbroek 	if (bpf->bpf_read.br_endpt != NONE)
443*ef8d499eSDavid van Moolenbroek 		return EIO;
444*ef8d499eSDavid van Moolenbroek 
445*ef8d499eSDavid van Moolenbroek 	/* Has this BPF device been configured at all yet? */
446*ef8d499eSDavid van Moolenbroek 	if (bpf->bpf_sbuf == NULL)
447*ef8d499eSDavid van Moolenbroek 		return EINVAL;
448*ef8d499eSDavid van Moolenbroek 
449*ef8d499eSDavid van Moolenbroek 	/*
450*ef8d499eSDavid van Moolenbroek 	 * Does the read call size match the entire buffer size?  This is a
451*ef8d499eSDavid van Moolenbroek 	 * ridiculous requirement but it makes our job quite a bit easier..
452*ef8d499eSDavid van Moolenbroek 	 */
453*ef8d499eSDavid van Moolenbroek 	if (size != bpf->bpf_size)
454*ef8d499eSDavid van Moolenbroek 		return EINVAL;
455*ef8d499eSDavid van Moolenbroek 
456*ef8d499eSDavid van Moolenbroek 	/*
457*ef8d499eSDavid van Moolenbroek 	 * Following standard receive semantics, if the interface is gone,
458*ef8d499eSDavid van Moolenbroek 	 * return all the packets that were pending before returning an error.
459*ef8d499eSDavid van Moolenbroek 	 * This requires extra buffer rotations after read completion, too.
460*ef8d499eSDavid van Moolenbroek 	 */
461*ef8d499eSDavid van Moolenbroek 	if (bpf->bpf_ifdev == NULL && bpf->bpf_hlen == 0)
462*ef8d499eSDavid van Moolenbroek 		return EIO;
463*ef8d499eSDavid van Moolenbroek 
464*ef8d499eSDavid van Moolenbroek 	/*
465*ef8d499eSDavid van Moolenbroek 	 * If immediate mode is not enabled, we should always suspend the read
466*ef8d499eSDavid van Moolenbroek 	 * call if the hold buffer is empty.  If immediate mode is enabled, we
467*ef8d499eSDavid van Moolenbroek 	 * should only suspend the read call if both buffers are empty, and
468*ef8d499eSDavid van Moolenbroek 	 * return data from the hold buffer or otherwise the store buffer,
469*ef8d499eSDavid van Moolenbroek 	 * whichever is not empty.  A non-blocking call behaves as though
470*ef8d499eSDavid van Moolenbroek 	 * immediate mode is enabled, except it will return EAGAIN instead of
471*ef8d499eSDavid van Moolenbroek 	 * suspending the read call if both buffers are empty.  Thus, we may
472*ef8d499eSDavid van Moolenbroek 	 * have to rotate buffers for both immediate mode and non-blocking
473*ef8d499eSDavid van Moolenbroek 	 * calls.  The latter is necessary for libpcap to behave correctly.
474*ef8d499eSDavid van Moolenbroek 	 */
475*ef8d499eSDavid van Moolenbroek 	if ((flags & CDEV_NONBLOCK) || (bpf->bpf_flags & BPFF_IMMEDIATE))
476*ef8d499eSDavid van Moolenbroek 		suspend = (bpf->bpf_hlen == 0 && bpf->bpf_slen == 0);
477*ef8d499eSDavid van Moolenbroek 	else
478*ef8d499eSDavid van Moolenbroek 		suspend = (bpf->bpf_hlen == 0);
479*ef8d499eSDavid van Moolenbroek 
480*ef8d499eSDavid van Moolenbroek 	if (suspend) {
481*ef8d499eSDavid van Moolenbroek 		if (flags & CDEV_NONBLOCK)
482*ef8d499eSDavid van Moolenbroek 			return EAGAIN;
483*ef8d499eSDavid van Moolenbroek 
484*ef8d499eSDavid van Moolenbroek 		/* Suspend the read call for later. */
485*ef8d499eSDavid van Moolenbroek 		bpf->bpf_read.br_endpt = endpt;
486*ef8d499eSDavid van Moolenbroek 		bpf->bpf_read.br_grant = grant;
487*ef8d499eSDavid van Moolenbroek 		bpf->bpf_read.br_id = id;
488*ef8d499eSDavid van Moolenbroek 
489*ef8d499eSDavid van Moolenbroek 		/* Set a timer if requested. */
490*ef8d499eSDavid van Moolenbroek 		if (bpf->bpf_timeout > 0)
491*ef8d499eSDavid van Moolenbroek 			set_timer(&bpf->bpf_read.br_timer, bpf->bpf_timeout,
492*ef8d499eSDavid van Moolenbroek 			    bpfdev_timeout, (int)(bpf - bpf_array));
493*ef8d499eSDavid van Moolenbroek 
494*ef8d499eSDavid van Moolenbroek 		return EDONTREPLY;
495*ef8d499eSDavid van Moolenbroek 	}
496*ef8d499eSDavid van Moolenbroek 
497*ef8d499eSDavid van Moolenbroek 	/* If we get here, either buffer has data; rotate buffers if needed. */
498*ef8d499eSDavid van Moolenbroek 	if (bpf->bpf_hlen == 0)
499*ef8d499eSDavid van Moolenbroek 		bpfdev_rotate(bpf);
500*ef8d499eSDavid van Moolenbroek 	assert(bpf->bpf_hlen > 0);
501*ef8d499eSDavid van Moolenbroek 
502*ef8d499eSDavid van Moolenbroek 	if ((r = sys_safecopyto(endpt, grant, 0, (vir_bytes)bpf->bpf_hbuf,
503*ef8d499eSDavid van Moolenbroek 	    bpf->bpf_hlen)) != OK)
504*ef8d499eSDavid van Moolenbroek 		return r;
505*ef8d499eSDavid van Moolenbroek 
506*ef8d499eSDavid van Moolenbroek 	r = (ssize_t)bpf->bpf_hlen;
507*ef8d499eSDavid van Moolenbroek 
508*ef8d499eSDavid van Moolenbroek 	bpf->bpf_hlen = 0;
509*ef8d499eSDavid van Moolenbroek 
510*ef8d499eSDavid van Moolenbroek 	/*
511*ef8d499eSDavid van Moolenbroek 	 * If the store buffer is exactly full, rotate it now.  Also, if the
512*ef8d499eSDavid van Moolenbroek 	 * interface has disappeared, the store buffer will never fill up.
513*ef8d499eSDavid van Moolenbroek 	 * Rotate it so that the application will get any remaining data before
514*ef8d499eSDavid van Moolenbroek 	 * getting errors about the interface being gone.
515*ef8d499eSDavid van Moolenbroek 	 */
516*ef8d499eSDavid van Moolenbroek 	if (bpf->bpf_slen == bpf->bpf_size || bpf->bpf_ifdev == NULL)
517*ef8d499eSDavid van Moolenbroek 		bpfdev_rotate(bpf);
518*ef8d499eSDavid van Moolenbroek 
519*ef8d499eSDavid van Moolenbroek 	return r;
520*ef8d499eSDavid van Moolenbroek }
521*ef8d499eSDavid van Moolenbroek 
522*ef8d499eSDavid van Moolenbroek /*
523*ef8d499eSDavid van Moolenbroek  * Write to a BPF device.
524*ef8d499eSDavid van Moolenbroek  */
525*ef8d499eSDavid van Moolenbroek static ssize_t
bpfdev_write(devminor_t minor,uint64_t position,endpoint_t endpt,cp_grant_id_t grant,size_t size,int flags,cdev_id_t id)526*ef8d499eSDavid van Moolenbroek bpfdev_write(devminor_t minor, uint64_t position, endpoint_t endpt,
527*ef8d499eSDavid van Moolenbroek 	cp_grant_id_t grant, size_t size, int flags, cdev_id_t id)
528*ef8d499eSDavid van Moolenbroek {
529*ef8d499eSDavid van Moolenbroek 	struct bpfdev *bpf;
530*ef8d499eSDavid van Moolenbroek 	struct pbuf *pbuf, *pptr, *pcopy;
531*ef8d499eSDavid van Moolenbroek 	size_t off;
532*ef8d499eSDavid van Moolenbroek 	err_t err;
533*ef8d499eSDavid van Moolenbroek 	int r;
534*ef8d499eSDavid van Moolenbroek 
535*ef8d499eSDavid van Moolenbroek 	if ((bpf = bpfdev_get_by_minor(minor)) == NULL)
536*ef8d499eSDavid van Moolenbroek 		return EINVAL;
537*ef8d499eSDavid van Moolenbroek 
538*ef8d499eSDavid van Moolenbroek 	if (bpf->bpf_ifdev == NULL)
539*ef8d499eSDavid van Moolenbroek 		return EINVAL;
540*ef8d499eSDavid van Moolenbroek 
541*ef8d499eSDavid van Moolenbroek 	/* VFS skips zero-sized I/O calls right now, but that may change. */
542*ef8d499eSDavid van Moolenbroek 	if (size == 0)
543*ef8d499eSDavid van Moolenbroek 		return 0;	/* nothing to do */
544*ef8d499eSDavid van Moolenbroek 
545*ef8d499eSDavid van Moolenbroek 	if (size > ifdev_get_hdrlen(bpf->bpf_ifdev) +
546*ef8d499eSDavid van Moolenbroek 	    ifdev_get_mtu(bpf->bpf_ifdev))
547*ef8d499eSDavid van Moolenbroek 		return EMSGSIZE;
548*ef8d499eSDavid van Moolenbroek 
549*ef8d499eSDavid van Moolenbroek 	if ((pbuf = pchain_alloc(PBUF_LINK, size)) == NULL)
550*ef8d499eSDavid van Moolenbroek 		return ENOMEM;
551*ef8d499eSDavid van Moolenbroek 
552*ef8d499eSDavid van Moolenbroek 	/* TODO: turn this into a series of vector copies. */
553*ef8d499eSDavid van Moolenbroek 	off = 0;
554*ef8d499eSDavid van Moolenbroek 	for (pptr = pbuf; pptr != NULL; pptr = pptr->next) {
555*ef8d499eSDavid van Moolenbroek 		if ((r = sys_safecopyfrom(endpt, grant, off,
556*ef8d499eSDavid van Moolenbroek 		    (vir_bytes)pptr->payload, pptr->len)) != OK) {
557*ef8d499eSDavid van Moolenbroek 			pbuf_free(pbuf);
558*ef8d499eSDavid van Moolenbroek 
559*ef8d499eSDavid van Moolenbroek 			return r;
560*ef8d499eSDavid van Moolenbroek 		}
561*ef8d499eSDavid van Moolenbroek 		off += pptr->len;
562*ef8d499eSDavid van Moolenbroek 	}
563*ef8d499eSDavid van Moolenbroek 	assert(off == size);
564*ef8d499eSDavid van Moolenbroek 
565*ef8d499eSDavid van Moolenbroek 	/*
566*ef8d499eSDavid van Moolenbroek 	 * In feedback mode, we cannot use the same packet buffers for both
567*ef8d499eSDavid van Moolenbroek 	 * output and input, so make a copy.  We do this before calling the
568*ef8d499eSDavid van Moolenbroek 	 * output function, which may change part of the buffers, because the
569*ef8d499eSDavid van Moolenbroek 	 * BSDs take this approach as well.
570*ef8d499eSDavid van Moolenbroek 	 */
571*ef8d499eSDavid van Moolenbroek 	if (bpf->bpf_flags & BPFF_FEEDBACK) {
572*ef8d499eSDavid van Moolenbroek 		if ((pcopy = pchain_alloc(PBUF_LINK, size)) == NULL) {
573*ef8d499eSDavid van Moolenbroek 			pbuf_free(pbuf);
574*ef8d499eSDavid van Moolenbroek 
575*ef8d499eSDavid van Moolenbroek 			return ENOMEM;
576*ef8d499eSDavid van Moolenbroek 		}
577*ef8d499eSDavid van Moolenbroek 
578*ef8d499eSDavid van Moolenbroek 		if (pbuf_copy(pcopy, pbuf) != ERR_OK)
579*ef8d499eSDavid van Moolenbroek 			panic("unexpected pbuf copy failure");
580*ef8d499eSDavid van Moolenbroek 	} else
581*ef8d499eSDavid van Moolenbroek 		pcopy = NULL;
582*ef8d499eSDavid van Moolenbroek 
583*ef8d499eSDavid van Moolenbroek 	/* Pass in the packet as output, and free it again. */
584*ef8d499eSDavid van Moolenbroek 	err = ifdev_output(bpf->bpf_ifdev, pbuf, NULL /*netif*/,
585*ef8d499eSDavid van Moolenbroek 	    TRUE /*to_bpf*/, !!(bpf->bpf_flags & BPFF_HDRCMPLT));
586*ef8d499eSDavid van Moolenbroek 
587*ef8d499eSDavid van Moolenbroek 	pbuf_free(pbuf);
588*ef8d499eSDavid van Moolenbroek 
589*ef8d499eSDavid van Moolenbroek 	/* In feedback mode, pass in the copy as input, if output succeeded. */
590*ef8d499eSDavid van Moolenbroek 	if (err == ERR_OK && (bpf->bpf_flags & BPFF_FEEDBACK))
591*ef8d499eSDavid van Moolenbroek 		ifdev_input(bpf->bpf_ifdev, pcopy, NULL /*netif*/,
592*ef8d499eSDavid van Moolenbroek 		    FALSE /*to_bpf*/);
593*ef8d499eSDavid van Moolenbroek 	else if (pcopy != NULL)
594*ef8d499eSDavid van Moolenbroek 		pbuf_free(pcopy);
595*ef8d499eSDavid van Moolenbroek 
596*ef8d499eSDavid van Moolenbroek 	return (err == ERR_OK) ? (ssize_t)size : util_convert_err(err);
597*ef8d499eSDavid van Moolenbroek }
598*ef8d499eSDavid van Moolenbroek 
599*ef8d499eSDavid van Moolenbroek /*
600*ef8d499eSDavid van Moolenbroek  * Attach a BPF device to a network interface, using the interface name given
601*ef8d499eSDavid van Moolenbroek  * in an ifreq structure.  As side effect, allocate hold and store buffers for
602*ef8d499eSDavid van Moolenbroek  * the device.  These buffers will stay allocated until the device is closed,
603*ef8d499eSDavid van Moolenbroek  * even though the interface may disappear before that.  Return OK if the BPF
604*ef8d499eSDavid van Moolenbroek  * device was successfully attached to the interface, or a negative error code
605*ef8d499eSDavid van Moolenbroek  * otherwise.
606*ef8d499eSDavid van Moolenbroek  */
607*ef8d499eSDavid van Moolenbroek static int
bpfdev_attach(struct bpfdev * bpf,struct ifreq * ifr)608*ef8d499eSDavid van Moolenbroek bpfdev_attach(struct bpfdev * bpf, struct ifreq * ifr)
609*ef8d499eSDavid van Moolenbroek {
610*ef8d499eSDavid van Moolenbroek 	struct ifdev *ifdev;
611*ef8d499eSDavid van Moolenbroek 	void *sbuf, *hbuf;
612*ef8d499eSDavid van Moolenbroek 
613*ef8d499eSDavid van Moolenbroek 	/* Find the interface with the given name. */
614*ef8d499eSDavid van Moolenbroek 	ifr->ifr_name[sizeof(ifr->ifr_name) - 1] = '\0';
615*ef8d499eSDavid van Moolenbroek 	if ((ifdev = ifdev_find_by_name(ifr->ifr_name)) == NULL)
616*ef8d499eSDavid van Moolenbroek 		return ENXIO;
617*ef8d499eSDavid van Moolenbroek 
618*ef8d499eSDavid van Moolenbroek 	/*
619*ef8d499eSDavid van Moolenbroek 	 * Allocate a store buffer and a hold buffer.  Preallocate the memory,
620*ef8d499eSDavid van Moolenbroek 	 * or we might get killed later during low-memory conditions.
621*ef8d499eSDavid van Moolenbroek 	 */
622*ef8d499eSDavid van Moolenbroek 	if ((sbuf = (char *)mmap(NULL, bpf->bpf_size, PROT_READ | PROT_WRITE,
623*ef8d499eSDavid van Moolenbroek 	    MAP_ANON | MAP_PRIVATE | MAP_PREALLOC, -1, 0)) == MAP_FAILED)
624*ef8d499eSDavid van Moolenbroek 		return ENOMEM;
625*ef8d499eSDavid van Moolenbroek 
626*ef8d499eSDavid van Moolenbroek 	if ((hbuf = (char *)mmap(NULL, bpf->bpf_size, PROT_READ | PROT_WRITE,
627*ef8d499eSDavid van Moolenbroek 	    MAP_ANON | MAP_PRIVATE | MAP_PREALLOC, -1, 0)) == MAP_FAILED) {
628*ef8d499eSDavid van Moolenbroek 		(void)munmap(sbuf, bpf->bpf_size);
629*ef8d499eSDavid van Moolenbroek 
630*ef8d499eSDavid van Moolenbroek 		return ENOMEM;
631*ef8d499eSDavid van Moolenbroek 	}
632*ef8d499eSDavid van Moolenbroek 
633*ef8d499eSDavid van Moolenbroek 	bpf->bpf_ifdev = ifdev;
634*ef8d499eSDavid van Moolenbroek 	bpf->bpf_sbuf = sbuf;
635*ef8d499eSDavid van Moolenbroek 	bpf->bpf_hbuf = hbuf;
636*ef8d499eSDavid van Moolenbroek 	assert(bpf->bpf_slen == 0);
637*ef8d499eSDavid van Moolenbroek 	assert(bpf->bpf_hlen == 0);
638*ef8d499eSDavid van Moolenbroek 
639*ef8d499eSDavid van Moolenbroek 	ifdev_attach_bpf(ifdev, &bpf->bpf_link);
640*ef8d499eSDavid van Moolenbroek 
641*ef8d499eSDavid van Moolenbroek 	return OK;
642*ef8d499eSDavid van Moolenbroek }
643*ef8d499eSDavid van Moolenbroek 
644*ef8d499eSDavid van Moolenbroek /*
645*ef8d499eSDavid van Moolenbroek  * Detach the BPF device from its interface, which is about to disappear.
646*ef8d499eSDavid van Moolenbroek  */
647*ef8d499eSDavid van Moolenbroek void
bpfdev_detach(struct bpfdev_link * bpfl)648*ef8d499eSDavid van Moolenbroek bpfdev_detach(struct bpfdev_link * bpfl)
649*ef8d499eSDavid van Moolenbroek {
650*ef8d499eSDavid van Moolenbroek 	struct bpfdev *bpf = (struct bpfdev *)bpfl;
651*ef8d499eSDavid van Moolenbroek 
652*ef8d499eSDavid van Moolenbroek 	assert(bpf->bpf_flags & BPFF_IN_USE);
653*ef8d499eSDavid van Moolenbroek 	assert(bpf->bpf_ifdev != NULL);
654*ef8d499eSDavid van Moolenbroek 
655*ef8d499eSDavid van Moolenbroek 	/*
656*ef8d499eSDavid van Moolenbroek 	 * We deliberately leave the buffers allocated here, for two reasons:
657*ef8d499eSDavid van Moolenbroek 	 *
658*ef8d499eSDavid van Moolenbroek 	 * 1) it lets applications to read any last packets in the buffers;
659*ef8d499eSDavid van Moolenbroek 	 * 2) it prevents reattaching the BPF device to another interface.
660*ef8d499eSDavid van Moolenbroek 	 */
661*ef8d499eSDavid van Moolenbroek 	bpf->bpf_ifdev = NULL;
662*ef8d499eSDavid van Moolenbroek 
663*ef8d499eSDavid van Moolenbroek 	/*
664*ef8d499eSDavid van Moolenbroek 	 * Resume pending read and select requests, returning any data left,
665*ef8d499eSDavid van Moolenbroek 	 * or an error if none.
666*ef8d499eSDavid van Moolenbroek 	 */
667*ef8d499eSDavid van Moolenbroek 	if (bpf->bpf_hlen == 0)
668*ef8d499eSDavid van Moolenbroek 		bpfdev_rotate(bpf);
669*ef8d499eSDavid van Moolenbroek 
670*ef8d499eSDavid van Moolenbroek 	if (bpf->bpf_read.br_endpt != NONE)
671*ef8d499eSDavid van Moolenbroek 		bpfdev_resume_read(bpf, FALSE /*is_timeout*/);
672*ef8d499eSDavid van Moolenbroek 
673*ef8d499eSDavid van Moolenbroek 	bpfdev_resume_select(bpf);
674*ef8d499eSDavid van Moolenbroek }
675*ef8d499eSDavid van Moolenbroek 
676*ef8d499eSDavid van Moolenbroek /*
677*ef8d499eSDavid van Moolenbroek  * Flush the given BPF device, resetting its buffer contents and statistics
678*ef8d499eSDavid van Moolenbroek  * counters.
679*ef8d499eSDavid van Moolenbroek  */
680*ef8d499eSDavid van Moolenbroek static void
bpfdev_flush(struct bpfdev * bpf)681*ef8d499eSDavid van Moolenbroek bpfdev_flush(struct bpfdev * bpf)
682*ef8d499eSDavid van Moolenbroek {
683*ef8d499eSDavid van Moolenbroek 
684*ef8d499eSDavid van Moolenbroek 	bpf->bpf_slen = 0;
685*ef8d499eSDavid van Moolenbroek 	bpf->bpf_hlen = 0;
686*ef8d499eSDavid van Moolenbroek 
687*ef8d499eSDavid van Moolenbroek 	bpf->bpf_stat.bs_recv = 0;
688*ef8d499eSDavid van Moolenbroek 	bpf->bpf_stat.bs_drop = 0;
689*ef8d499eSDavid van Moolenbroek 	bpf->bpf_stat.bs_capt = 0;
690*ef8d499eSDavid van Moolenbroek }
691*ef8d499eSDavid van Moolenbroek 
692*ef8d499eSDavid van Moolenbroek /*
693*ef8d499eSDavid van Moolenbroek  * Install a filter program on the BPF device.  A new filter replaces any old
694*ef8d499eSDavid van Moolenbroek  * one.  A zero-sized filter simply clears a previous filter.  On success,
695*ef8d499eSDavid van Moolenbroek  * perform a flush and return OK.  On failure, return a negative error code
696*ef8d499eSDavid van Moolenbroek  * without making any modifications to the current filter.
697*ef8d499eSDavid van Moolenbroek  */
698*ef8d499eSDavid van Moolenbroek static int
bpfdev_setfilter(struct bpfdev * bpf,endpoint_t endpt,cp_grant_id_t grant)699*ef8d499eSDavid van Moolenbroek bpfdev_setfilter(struct bpfdev * bpf, endpoint_t endpt, cp_grant_id_t grant)
700*ef8d499eSDavid van Moolenbroek {
701*ef8d499eSDavid van Moolenbroek 	struct bpf_insn *filter;
702*ef8d499eSDavid van Moolenbroek 	unsigned int count;
703*ef8d499eSDavid van Moolenbroek 	size_t len;
704*ef8d499eSDavid van Moolenbroek 	int r;
705*ef8d499eSDavid van Moolenbroek 
706*ef8d499eSDavid van Moolenbroek 	if ((r = sys_safecopyfrom(endpt, grant,
707*ef8d499eSDavid van Moolenbroek 	    offsetof(struct minix_bpf_program, mbf_len), (vir_bytes)&count,
708*ef8d499eSDavid van Moolenbroek 	    sizeof(count))) != OK)
709*ef8d499eSDavid van Moolenbroek 		return r;
710*ef8d499eSDavid van Moolenbroek 
711*ef8d499eSDavid van Moolenbroek 	if (count > BPF_MAXINSNS)
712*ef8d499eSDavid van Moolenbroek 		return EINVAL;
713*ef8d499eSDavid van Moolenbroek 	len = count * sizeof(struct bpf_insn);
714*ef8d499eSDavid van Moolenbroek 
715*ef8d499eSDavid van Moolenbroek 	if (len > 0) {
716*ef8d499eSDavid van Moolenbroek 		if ((filter = (struct bpf_insn *)mmap(NULL, len,
717*ef8d499eSDavid van Moolenbroek 		    PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, -1, 0)) ==
718*ef8d499eSDavid van Moolenbroek 		    MAP_FAILED)
719*ef8d499eSDavid van Moolenbroek 			return ENOMEM;
720*ef8d499eSDavid van Moolenbroek 
721*ef8d499eSDavid van Moolenbroek 		if ((r = sys_safecopyfrom(endpt, grant,
722*ef8d499eSDavid van Moolenbroek 		    offsetof(struct minix_bpf_program, mbf_insns),
723*ef8d499eSDavid van Moolenbroek 		    (vir_bytes)filter, len)) != OK) {
724*ef8d499eSDavid van Moolenbroek 			(void)munmap(filter, len);
725*ef8d499eSDavid van Moolenbroek 
726*ef8d499eSDavid van Moolenbroek 			return r;
727*ef8d499eSDavid van Moolenbroek 		}
728*ef8d499eSDavid van Moolenbroek 
729*ef8d499eSDavid van Moolenbroek 		if (!bpf_validate(filter, count)) {
730*ef8d499eSDavid van Moolenbroek 			(void)munmap(filter, len);
731*ef8d499eSDavid van Moolenbroek 
732*ef8d499eSDavid van Moolenbroek 			return EINVAL;
733*ef8d499eSDavid van Moolenbroek 		}
734*ef8d499eSDavid van Moolenbroek 	} else
735*ef8d499eSDavid van Moolenbroek 		filter = NULL;
736*ef8d499eSDavid van Moolenbroek 
737*ef8d499eSDavid van Moolenbroek 	if (bpf->bpf_filter != NULL)
738*ef8d499eSDavid van Moolenbroek 		(void)munmap(bpf->bpf_filter, bpf->bpf_filterlen);
739*ef8d499eSDavid van Moolenbroek 
740*ef8d499eSDavid van Moolenbroek 	bpf->bpf_filter = filter;
741*ef8d499eSDavid van Moolenbroek 	bpf->bpf_filterlen = len;
742*ef8d499eSDavid van Moolenbroek 
743*ef8d499eSDavid van Moolenbroek 	bpfdev_flush(bpf);
744*ef8d499eSDavid van Moolenbroek 
745*ef8d499eSDavid van Moolenbroek 	return OK;
746*ef8d499eSDavid van Moolenbroek }
747*ef8d499eSDavid van Moolenbroek 
748*ef8d499eSDavid van Moolenbroek /*
749*ef8d499eSDavid van Moolenbroek  * Process an I/O control request on the BPF device.
750*ef8d499eSDavid van Moolenbroek  */
751*ef8d499eSDavid van Moolenbroek static int
bpfdev_ioctl(devminor_t minor,unsigned long request,endpoint_t endpt,cp_grant_id_t grant,int flags,endpoint_t user_endpt,cdev_id_t id)752*ef8d499eSDavid van Moolenbroek bpfdev_ioctl(devminor_t minor, unsigned long request, endpoint_t endpt,
753*ef8d499eSDavid van Moolenbroek 	cp_grant_id_t grant, int flags, endpoint_t user_endpt, cdev_id_t id)
754*ef8d499eSDavid van Moolenbroek {
755*ef8d499eSDavid van Moolenbroek 	struct bpfdev *bpf;
756*ef8d499eSDavid van Moolenbroek 	struct bpf_stat bs;
757*ef8d499eSDavid van Moolenbroek 	struct bpf_version bv;
758*ef8d499eSDavid van Moolenbroek 	struct bpf_dltlist bfl;
759*ef8d499eSDavid van Moolenbroek 	struct timeval tv;
760*ef8d499eSDavid van Moolenbroek 	struct ifreq ifr;
761*ef8d499eSDavid van Moolenbroek 	unsigned int uval;
762*ef8d499eSDavid van Moolenbroek 	int r, val;
763*ef8d499eSDavid van Moolenbroek 
764*ef8d499eSDavid van Moolenbroek 	if ((bpf = bpfdev_get_by_minor(minor)) == NULL)
765*ef8d499eSDavid van Moolenbroek 		return EINVAL;
766*ef8d499eSDavid van Moolenbroek 
767*ef8d499eSDavid van Moolenbroek 	/*
768*ef8d499eSDavid van Moolenbroek 	 * We do not support multiple concurrent requests in this module.  That
769*ef8d499eSDavid van Moolenbroek 	 * not only means that we forbid a read(2) call on a BPF device object
770*ef8d499eSDavid van Moolenbroek 	 * while another read(2) is already pending: we also disallow IOCTL
771*ef8d499eSDavid van Moolenbroek 	 * IOCTL calls while such a read(2) call is in progress.  This
772*ef8d499eSDavid van Moolenbroek 	 * restriction should never be a problem for user programs, and allows
773*ef8d499eSDavid van Moolenbroek 	 * us to rely on the fact that that no settings can change between the
774*ef8d499eSDavid van Moolenbroek 	 * start and end of any read call.  As a side note, pending select(2)
775*ef8d499eSDavid van Moolenbroek 	 * queries may be similarly affected, and will also not be fully
776*ef8d499eSDavid van Moolenbroek 	 * accurate if any options are changed while pending.
777*ef8d499eSDavid van Moolenbroek 	 */
778*ef8d499eSDavid van Moolenbroek 	if (bpf->bpf_read.br_endpt != NONE)
779*ef8d499eSDavid van Moolenbroek 		return EIO;
780*ef8d499eSDavid van Moolenbroek 
781*ef8d499eSDavid van Moolenbroek 	bpf->bpf_pid = getnpid(user_endpt);
782*ef8d499eSDavid van Moolenbroek 
783*ef8d499eSDavid van Moolenbroek 	/* These are in order of the NetBSD BIOC.. IOCTL numbers. */
784*ef8d499eSDavid van Moolenbroek 	switch (request) {
785*ef8d499eSDavid van Moolenbroek 	case BIOCGBLEN:
786*ef8d499eSDavid van Moolenbroek 		uval = bpf->bpf_size;
787*ef8d499eSDavid van Moolenbroek 
788*ef8d499eSDavid van Moolenbroek 		return sys_safecopyto(endpt, grant, 0, (vir_bytes)&uval,
789*ef8d499eSDavid van Moolenbroek 		    sizeof(uval));
790*ef8d499eSDavid van Moolenbroek 
791*ef8d499eSDavid van Moolenbroek 	case BIOCSBLEN:
792*ef8d499eSDavid van Moolenbroek 		if (bpf->bpf_sbuf != NULL)
793*ef8d499eSDavid van Moolenbroek 			return EINVAL;
794*ef8d499eSDavid van Moolenbroek 
795*ef8d499eSDavid van Moolenbroek 		if ((r = sys_safecopyfrom(endpt, grant, 0, (vir_bytes)&uval,
796*ef8d499eSDavid van Moolenbroek 		    sizeof(uval))) != OK)
797*ef8d499eSDavid van Moolenbroek 			return r;
798*ef8d499eSDavid van Moolenbroek 
799*ef8d499eSDavid van Moolenbroek 		if (uval < BPF_BUF_MIN)
800*ef8d499eSDavid van Moolenbroek 			uval = BPF_BUF_MIN;
801*ef8d499eSDavid van Moolenbroek 		else if (uval > BPF_BUF_MAX)
802*ef8d499eSDavid van Moolenbroek 			uval = BPF_BUF_MAX;
803*ef8d499eSDavid van Moolenbroek 
804*ef8d499eSDavid van Moolenbroek 		/* Is this the right thing to do?  It doesn't matter for us. */
805*ef8d499eSDavid van Moolenbroek 		uval = BPF_WORDALIGN(uval);
806*ef8d499eSDavid van Moolenbroek 
807*ef8d499eSDavid van Moolenbroek 		if ((r = sys_safecopyto(endpt, grant, 0, (vir_bytes)&uval,
808*ef8d499eSDavid van Moolenbroek 		    sizeof(uval))) != OK)
809*ef8d499eSDavid van Moolenbroek 			return r;
810*ef8d499eSDavid van Moolenbroek 
811*ef8d499eSDavid van Moolenbroek 		bpf->bpf_size = uval;
812*ef8d499eSDavid van Moolenbroek 
813*ef8d499eSDavid van Moolenbroek 		return OK;
814*ef8d499eSDavid van Moolenbroek 
815*ef8d499eSDavid van Moolenbroek 	case MINIX_BIOCSETF:
816*ef8d499eSDavid van Moolenbroek 		return bpfdev_setfilter(bpf, endpt, grant);
817*ef8d499eSDavid van Moolenbroek 
818*ef8d499eSDavid van Moolenbroek 	case BIOCPROMISC:
819*ef8d499eSDavid van Moolenbroek 		if (bpf->bpf_ifdev == NULL)
820*ef8d499eSDavid van Moolenbroek 			return EINVAL;
821*ef8d499eSDavid van Moolenbroek 
822*ef8d499eSDavid van Moolenbroek 		if (!(bpf->bpf_flags & BPFF_PROMISC)) {
823*ef8d499eSDavid van Moolenbroek 			if (!ifdev_set_promisc(bpf->bpf_ifdev))
824*ef8d499eSDavid van Moolenbroek 				return EINVAL;
825*ef8d499eSDavid van Moolenbroek 
826*ef8d499eSDavid van Moolenbroek 			bpf->bpf_flags |= BPFF_PROMISC;
827*ef8d499eSDavid van Moolenbroek 		}
828*ef8d499eSDavid van Moolenbroek 
829*ef8d499eSDavid van Moolenbroek 		return OK;
830*ef8d499eSDavid van Moolenbroek 
831*ef8d499eSDavid van Moolenbroek 	case BIOCFLUSH:
832*ef8d499eSDavid van Moolenbroek 		bpfdev_flush(bpf);
833*ef8d499eSDavid van Moolenbroek 
834*ef8d499eSDavid van Moolenbroek 		return OK;
835*ef8d499eSDavid van Moolenbroek 
836*ef8d499eSDavid van Moolenbroek 	case BIOCGDLT:
837*ef8d499eSDavid van Moolenbroek 		if (bpf->bpf_ifdev == NULL)
838*ef8d499eSDavid van Moolenbroek 			return EINVAL;
839*ef8d499eSDavid van Moolenbroek 
840*ef8d499eSDavid van Moolenbroek 		/* TODO: support for type configuration per BPF device. */
841*ef8d499eSDavid van Moolenbroek 		uval = ifdev_get_dlt(bpf->bpf_ifdev);
842*ef8d499eSDavid van Moolenbroek 
843*ef8d499eSDavid van Moolenbroek 		return sys_safecopyto(endpt, grant, 0, (vir_bytes)&uval,
844*ef8d499eSDavid van Moolenbroek 		    sizeof(uval));
845*ef8d499eSDavid van Moolenbroek 
846*ef8d499eSDavid van Moolenbroek 	case BIOCGETIF:
847*ef8d499eSDavid van Moolenbroek 		if (bpf->bpf_ifdev == NULL)
848*ef8d499eSDavid van Moolenbroek 			return EINVAL;
849*ef8d499eSDavid van Moolenbroek 
850*ef8d499eSDavid van Moolenbroek 		memset(&ifr, 0, sizeof(ifr));
851*ef8d499eSDavid van Moolenbroek 		strlcpy(ifr.ifr_name, ifdev_get_name(bpf->bpf_ifdev),
852*ef8d499eSDavid van Moolenbroek 		    sizeof(ifr.ifr_name));
853*ef8d499eSDavid van Moolenbroek 
854*ef8d499eSDavid van Moolenbroek 		return sys_safecopyto(endpt, grant, 0, (vir_bytes)&ifr,
855*ef8d499eSDavid van Moolenbroek 		    sizeof(ifr));
856*ef8d499eSDavid van Moolenbroek 
857*ef8d499eSDavid van Moolenbroek 	case BIOCSETIF:
858*ef8d499eSDavid van Moolenbroek 		/*
859*ef8d499eSDavid van Moolenbroek 		 * Test on the presence of a buffer rather than on an interface
860*ef8d499eSDavid van Moolenbroek 		 * since the latter may disappear and thus be reset to NULL, in
861*ef8d499eSDavid van Moolenbroek 		 * which case we do not want to allow rebinding to another.
862*ef8d499eSDavid van Moolenbroek 		 */
863*ef8d499eSDavid van Moolenbroek 		if (bpf->bpf_sbuf != NULL)
864*ef8d499eSDavid van Moolenbroek 			return EINVAL;
865*ef8d499eSDavid van Moolenbroek 
866*ef8d499eSDavid van Moolenbroek 		if ((r = sys_safecopyfrom(endpt, grant, 0, (vir_bytes)&ifr,
867*ef8d499eSDavid van Moolenbroek 		    sizeof(ifr))) != OK)
868*ef8d499eSDavid van Moolenbroek 			return r;
869*ef8d499eSDavid van Moolenbroek 
870*ef8d499eSDavid van Moolenbroek 		return bpfdev_attach(bpf, &ifr);
871*ef8d499eSDavid van Moolenbroek 
872*ef8d499eSDavid van Moolenbroek 	case BIOCGSTATS:
873*ef8d499eSDavid van Moolenbroek 		/*
874*ef8d499eSDavid van Moolenbroek 		 * Why do we not embed a bpf_stat structure directly in the
875*ef8d499eSDavid van Moolenbroek 		 * BPF device structure?  Well, bpf_stat has massive padding..
876*ef8d499eSDavid van Moolenbroek 		 */
877*ef8d499eSDavid van Moolenbroek 		memset(&bs, 0, sizeof(bs));
878*ef8d499eSDavid van Moolenbroek 		bs.bs_recv = bpf->bpf_stat.bs_recv;
879*ef8d499eSDavid van Moolenbroek 		bs.bs_drop = bpf->bpf_stat.bs_drop;
880*ef8d499eSDavid van Moolenbroek 		bs.bs_capt = bpf->bpf_stat.bs_capt;
881*ef8d499eSDavid van Moolenbroek 
882*ef8d499eSDavid van Moolenbroek 		return sys_safecopyto(endpt, grant, 0, (vir_bytes)&bs,
883*ef8d499eSDavid van Moolenbroek 		    sizeof(bs));
884*ef8d499eSDavid van Moolenbroek 
885*ef8d499eSDavid van Moolenbroek 	case BIOCIMMEDIATE:
886*ef8d499eSDavid van Moolenbroek 		if ((r = sys_safecopyfrom(endpt, grant, 0, (vir_bytes)&uval,
887*ef8d499eSDavid van Moolenbroek 		    sizeof(uval))) != OK)
888*ef8d499eSDavid van Moolenbroek 			return r;
889*ef8d499eSDavid van Moolenbroek 
890*ef8d499eSDavid van Moolenbroek 		if (uval)
891*ef8d499eSDavid van Moolenbroek 			bpf->bpf_flags |= BPFF_IMMEDIATE;
892*ef8d499eSDavid van Moolenbroek 		else
893*ef8d499eSDavid van Moolenbroek 			bpf->bpf_flags &= ~BPFF_IMMEDIATE;
894*ef8d499eSDavid van Moolenbroek 
895*ef8d499eSDavid van Moolenbroek 		return OK;
896*ef8d499eSDavid van Moolenbroek 
897*ef8d499eSDavid van Moolenbroek 	case BIOCVERSION:
898*ef8d499eSDavid van Moolenbroek 		memset(&bv, 0, sizeof(bv));
899*ef8d499eSDavid van Moolenbroek 		bv.bv_major = BPF_MAJOR_VERSION;
900*ef8d499eSDavid van Moolenbroek 		bv.bv_minor = BPF_MINOR_VERSION;
901*ef8d499eSDavid van Moolenbroek 
902*ef8d499eSDavid van Moolenbroek 		return sys_safecopyto(endpt, grant, 0, (vir_bytes)&bv,
903*ef8d499eSDavid van Moolenbroek 		    sizeof(bv));
904*ef8d499eSDavid van Moolenbroek 
905*ef8d499eSDavid van Moolenbroek 	case BIOCGHDRCMPLT:
906*ef8d499eSDavid van Moolenbroek 		uval = !!(bpf->bpf_flags & BPFF_HDRCMPLT);
907*ef8d499eSDavid van Moolenbroek 
908*ef8d499eSDavid van Moolenbroek 		return sys_safecopyto(endpt, grant, 0, (vir_bytes)&uval,
909*ef8d499eSDavid van Moolenbroek 		    sizeof(uval));
910*ef8d499eSDavid van Moolenbroek 
911*ef8d499eSDavid van Moolenbroek 	case BIOCSHDRCMPLT:
912*ef8d499eSDavid van Moolenbroek 		if ((r = sys_safecopyfrom(endpt, grant, 0, (vir_bytes)&uval,
913*ef8d499eSDavid van Moolenbroek 		    sizeof(uval))) != OK)
914*ef8d499eSDavid van Moolenbroek 			return r;
915*ef8d499eSDavid van Moolenbroek 
916*ef8d499eSDavid van Moolenbroek 		if (uval)
917*ef8d499eSDavid van Moolenbroek 			bpf->bpf_flags |= BPFF_HDRCMPLT;
918*ef8d499eSDavid van Moolenbroek 		else
919*ef8d499eSDavid van Moolenbroek 			bpf->bpf_flags &= ~BPFF_HDRCMPLT;
920*ef8d499eSDavid van Moolenbroek 
921*ef8d499eSDavid van Moolenbroek 		return OK;
922*ef8d499eSDavid van Moolenbroek 
923*ef8d499eSDavid van Moolenbroek 	case BIOCSDLT:
924*ef8d499eSDavid van Moolenbroek 		if (bpf->bpf_ifdev == NULL)
925*ef8d499eSDavid van Moolenbroek 			return EINVAL;
926*ef8d499eSDavid van Moolenbroek 
927*ef8d499eSDavid van Moolenbroek 		if ((r = sys_safecopyfrom(endpt, grant, 0, (vir_bytes)&uval,
928*ef8d499eSDavid van Moolenbroek 		    sizeof(uval))) != OK)
929*ef8d499eSDavid van Moolenbroek 			return r;
930*ef8d499eSDavid van Moolenbroek 
931*ef8d499eSDavid van Moolenbroek 		/* TODO: support for type configuration per BPF device. */
932*ef8d499eSDavid van Moolenbroek 		if (uval != ifdev_get_dlt(bpf->bpf_ifdev))
933*ef8d499eSDavid van Moolenbroek 			return EINVAL;
934*ef8d499eSDavid van Moolenbroek 
935*ef8d499eSDavid van Moolenbroek 		return OK;
936*ef8d499eSDavid van Moolenbroek 
937*ef8d499eSDavid van Moolenbroek 	case MINIX_BIOCGDLTLIST:
938*ef8d499eSDavid van Moolenbroek 		if (bpf->bpf_ifdev == NULL)
939*ef8d499eSDavid van Moolenbroek 			return EINVAL;
940*ef8d499eSDavid van Moolenbroek 
941*ef8d499eSDavid van Moolenbroek 		if ((r = sys_safecopyfrom(endpt, grant, 0, (vir_bytes)&bfl,
942*ef8d499eSDavid van Moolenbroek 		    sizeof(bfl))) != OK)
943*ef8d499eSDavid van Moolenbroek 			return r;
944*ef8d499eSDavid van Moolenbroek 
945*ef8d499eSDavid van Moolenbroek 		if (bfl.bfl_list != NULL) {
946*ef8d499eSDavid van Moolenbroek 			if (bfl.bfl_len < 1)
947*ef8d499eSDavid van Moolenbroek 				return ENOMEM;
948*ef8d499eSDavid van Moolenbroek 
949*ef8d499eSDavid van Moolenbroek 			/*
950*ef8d499eSDavid van Moolenbroek 			 * Copy out the 'list', which consists of one entry.
951*ef8d499eSDavid van Moolenbroek 			 * If we were to produce multiple entries, we would
952*ef8d499eSDavid van Moolenbroek 			 * have to check against the MINIX_BPF_MAXDLT limit.
953*ef8d499eSDavid van Moolenbroek 			 */
954*ef8d499eSDavid van Moolenbroek 			uval = ifdev_get_dlt(bpf->bpf_ifdev);
955*ef8d499eSDavid van Moolenbroek 
956*ef8d499eSDavid van Moolenbroek 			if ((r = sys_safecopyto(endpt, grant,
957*ef8d499eSDavid van Moolenbroek 			    offsetof(struct minix_bpf_dltlist, mbfl_list),
958*ef8d499eSDavid van Moolenbroek 			    (vir_bytes)&uval, sizeof(uval))) != OK)
959*ef8d499eSDavid van Moolenbroek 				return r;
960*ef8d499eSDavid van Moolenbroek 		}
961*ef8d499eSDavid van Moolenbroek 		bfl.bfl_len = 1;
962*ef8d499eSDavid van Moolenbroek 
963*ef8d499eSDavid van Moolenbroek 		return sys_safecopyto(endpt, grant, 0, (vir_bytes)&bfl,
964*ef8d499eSDavid van Moolenbroek 		    sizeof(bfl));
965*ef8d499eSDavid van Moolenbroek 
966*ef8d499eSDavid van Moolenbroek 	case BIOCGSEESENT:
967*ef8d499eSDavid van Moolenbroek 		uval = !!(bpf->bpf_flags & BPFF_SEESENT);
968*ef8d499eSDavid van Moolenbroek 
969*ef8d499eSDavid van Moolenbroek 		return sys_safecopyto(endpt, grant, 0, (vir_bytes)&uval,
970*ef8d499eSDavid van Moolenbroek 		    sizeof(uval));
971*ef8d499eSDavid van Moolenbroek 
972*ef8d499eSDavid van Moolenbroek 	case BIOCSSEESENT:
973*ef8d499eSDavid van Moolenbroek 		if ((r = sys_safecopyfrom(endpt, grant, 0, (vir_bytes)&uval,
974*ef8d499eSDavid van Moolenbroek 		    sizeof(uval))) != OK)
975*ef8d499eSDavid van Moolenbroek 			return r;
976*ef8d499eSDavid van Moolenbroek 
977*ef8d499eSDavid van Moolenbroek 		if (uval)
978*ef8d499eSDavid van Moolenbroek 			bpf->bpf_flags |= BPFF_SEESENT;
979*ef8d499eSDavid van Moolenbroek 		else
980*ef8d499eSDavid van Moolenbroek 			bpf->bpf_flags &= ~BPFF_SEESENT;
981*ef8d499eSDavid van Moolenbroek 
982*ef8d499eSDavid van Moolenbroek 		return OK;
983*ef8d499eSDavid van Moolenbroek 
984*ef8d499eSDavid van Moolenbroek 	case BIOCSRTIMEOUT:
985*ef8d499eSDavid van Moolenbroek 		if ((r = sys_safecopyfrom(endpt, grant, 0, (vir_bytes)&tv,
986*ef8d499eSDavid van Moolenbroek 		    sizeof(tv))) != OK)
987*ef8d499eSDavid van Moolenbroek 			return r;
988*ef8d499eSDavid van Moolenbroek 
989*ef8d499eSDavid van Moolenbroek 		if ((r = util_timeval_to_ticks(&tv, &bpf->bpf_timeout)) != OK)
990*ef8d499eSDavid van Moolenbroek 			return r;
991*ef8d499eSDavid van Moolenbroek 
992*ef8d499eSDavid van Moolenbroek 		return OK;
993*ef8d499eSDavid van Moolenbroek 
994*ef8d499eSDavid van Moolenbroek 	case BIOCGRTIMEOUT:
995*ef8d499eSDavid van Moolenbroek 		util_ticks_to_timeval(bpf->bpf_timeout, &tv);
996*ef8d499eSDavid van Moolenbroek 
997*ef8d499eSDavid van Moolenbroek 		return sys_safecopyto(endpt, grant, 0, (vir_bytes)&tv,
998*ef8d499eSDavid van Moolenbroek 		    sizeof(tv));
999*ef8d499eSDavid van Moolenbroek 
1000*ef8d499eSDavid van Moolenbroek 	case BIOCGFEEDBACK:
1001*ef8d499eSDavid van Moolenbroek 		uval = !!(bpf->bpf_flags & BPFF_FEEDBACK);
1002*ef8d499eSDavid van Moolenbroek 
1003*ef8d499eSDavid van Moolenbroek 		return sys_safecopyto(endpt, grant, 0, (vir_bytes)&uval,
1004*ef8d499eSDavid van Moolenbroek 		    sizeof(uval));
1005*ef8d499eSDavid van Moolenbroek 
1006*ef8d499eSDavid van Moolenbroek 	case BIOCSFEEDBACK:
1007*ef8d499eSDavid van Moolenbroek 		if ((r = sys_safecopyfrom(endpt, grant, 0, (vir_bytes)&uval,
1008*ef8d499eSDavid van Moolenbroek 		    sizeof(uval))) != OK)
1009*ef8d499eSDavid van Moolenbroek 			return r;
1010*ef8d499eSDavid van Moolenbroek 
1011*ef8d499eSDavid van Moolenbroek 		if (uval)
1012*ef8d499eSDavid van Moolenbroek 			bpf->bpf_flags |= BPFF_FEEDBACK;
1013*ef8d499eSDavid van Moolenbroek 		else
1014*ef8d499eSDavid van Moolenbroek 			bpf->bpf_flags &= ~BPFF_FEEDBACK;
1015*ef8d499eSDavid van Moolenbroek 
1016*ef8d499eSDavid van Moolenbroek 		return OK;
1017*ef8d499eSDavid van Moolenbroek 
1018*ef8d499eSDavid van Moolenbroek 	case FIONREAD:
1019*ef8d499eSDavid van Moolenbroek 		val = 0;
1020*ef8d499eSDavid van Moolenbroek 		if (bpf->bpf_hlen > 0)
1021*ef8d499eSDavid van Moolenbroek 			val = bpf->bpf_hlen;
1022*ef8d499eSDavid van Moolenbroek 		else if ((bpf->bpf_flags & BPFF_IMMEDIATE) &&
1023*ef8d499eSDavid van Moolenbroek 		    bpf->bpf_slen > 0)
1024*ef8d499eSDavid van Moolenbroek 			val = bpf->bpf_slen;
1025*ef8d499eSDavid van Moolenbroek 		else
1026*ef8d499eSDavid van Moolenbroek 			val = 0;
1027*ef8d499eSDavid van Moolenbroek 
1028*ef8d499eSDavid van Moolenbroek 		return sys_safecopyto(endpt, grant, 0, (vir_bytes)&val,
1029*ef8d499eSDavid van Moolenbroek 		    sizeof(val));
1030*ef8d499eSDavid van Moolenbroek 
1031*ef8d499eSDavid van Moolenbroek 	default:
1032*ef8d499eSDavid van Moolenbroek 		return ENOTTY;
1033*ef8d499eSDavid van Moolenbroek 	}
1034*ef8d499eSDavid van Moolenbroek }
1035*ef8d499eSDavid van Moolenbroek 
1036*ef8d499eSDavid van Moolenbroek /*
1037*ef8d499eSDavid van Moolenbroek  * Cancel a previously suspended request on a BPF device.  Since only read
1038*ef8d499eSDavid van Moolenbroek  * requests may be suspended (select is handled differently), the cancel
1039*ef8d499eSDavid van Moolenbroek  * request must be for a read request.  Note that character devices currently
1040*ef8d499eSDavid van Moolenbroek  * (still) behave slightly differently from socket devices here: while socket
1041*ef8d499eSDavid van Moolenbroek  * drivers are supposed to respond to the original request, character drivers
1042*ef8d499eSDavid van Moolenbroek  * must respond to the original request from the cancel callback.
1043*ef8d499eSDavid van Moolenbroek  */
1044*ef8d499eSDavid van Moolenbroek static int
bpfdev_cancel(devminor_t minor,endpoint_t endpt,cdev_id_t id)1045*ef8d499eSDavid van Moolenbroek bpfdev_cancel(devminor_t minor, endpoint_t endpt, cdev_id_t id)
1046*ef8d499eSDavid van Moolenbroek {
1047*ef8d499eSDavid van Moolenbroek 	struct bpfdev *bpf;
1048*ef8d499eSDavid van Moolenbroek 
1049*ef8d499eSDavid van Moolenbroek 	if ((bpf = bpfdev_get_by_minor(minor)) == NULL)
1050*ef8d499eSDavid van Moolenbroek 		return EDONTREPLY;
1051*ef8d499eSDavid van Moolenbroek 
1052*ef8d499eSDavid van Moolenbroek 	/* Is this a cancel request for the currently pending read request? */
1053*ef8d499eSDavid van Moolenbroek 	if (bpf->bpf_read.br_endpt != endpt || bpf->bpf_read.br_id != id)
1054*ef8d499eSDavid van Moolenbroek 		return EDONTREPLY;
1055*ef8d499eSDavid van Moolenbroek 
1056*ef8d499eSDavid van Moolenbroek 	/* If so, cancel the read request. */
1057*ef8d499eSDavid van Moolenbroek 	if (bpf->bpf_timeout > 0)
1058*ef8d499eSDavid van Moolenbroek 		cancel_timer(&bpf->bpf_read.br_timer);
1059*ef8d499eSDavid van Moolenbroek 
1060*ef8d499eSDavid van Moolenbroek 	bpf->bpf_read.br_endpt = NONE;
1061*ef8d499eSDavid van Moolenbroek 
1062*ef8d499eSDavid van Moolenbroek 	return EINTR; /* the return value for the canceled read request */
1063*ef8d499eSDavid van Moolenbroek }
1064*ef8d499eSDavid van Moolenbroek 
1065*ef8d499eSDavid van Moolenbroek /*
1066*ef8d499eSDavid van Moolenbroek  * Perform a select query on a BPF device.
1067*ef8d499eSDavid van Moolenbroek  */
1068*ef8d499eSDavid van Moolenbroek static int
bpfdev_select(devminor_t minor,unsigned int ops,endpoint_t endpt)1069*ef8d499eSDavid van Moolenbroek bpfdev_select(devminor_t minor, unsigned int ops, endpoint_t endpt)
1070*ef8d499eSDavid van Moolenbroek {
1071*ef8d499eSDavid van Moolenbroek 	struct bpfdev *bpf;
1072*ef8d499eSDavid van Moolenbroek 	unsigned int r, notify;
1073*ef8d499eSDavid van Moolenbroek 
1074*ef8d499eSDavid van Moolenbroek 	if ((bpf = bpfdev_get_by_minor(minor)) == NULL)
1075*ef8d499eSDavid van Moolenbroek 		return EINVAL;
1076*ef8d499eSDavid van Moolenbroek 
1077*ef8d499eSDavid van Moolenbroek 	notify = (ops & CDEV_NOTIFY);
1078*ef8d499eSDavid van Moolenbroek 	ops &= (CDEV_OP_RD | CDEV_OP_WR | CDEV_OP_ERR);
1079*ef8d499eSDavid van Moolenbroek 
1080*ef8d499eSDavid van Moolenbroek 	r = bpfdev_test_select(bpf, ops);
1081*ef8d499eSDavid van Moolenbroek 
1082*ef8d499eSDavid van Moolenbroek 	/*
1083*ef8d499eSDavid van Moolenbroek 	 * For the operations that were not immediately ready, if requested,
1084*ef8d499eSDavid van Moolenbroek 	 * save the select request for later.
1085*ef8d499eSDavid van Moolenbroek 	 */
1086*ef8d499eSDavid van Moolenbroek 	ops &= ~r;
1087*ef8d499eSDavid van Moolenbroek 
1088*ef8d499eSDavid van Moolenbroek 	if (ops != 0 && notify) {
1089*ef8d499eSDavid van Moolenbroek 		if (bpf->bpf_select.bs_endpt != NONE) {
1090*ef8d499eSDavid van Moolenbroek 			/* Merge in the operations with any earlier request. */
1091*ef8d499eSDavid van Moolenbroek 			if (bpf->bpf_select.bs_endpt != endpt)
1092*ef8d499eSDavid van Moolenbroek 				return EIO;
1093*ef8d499eSDavid van Moolenbroek 			bpf->bpf_select.bs_selops |= ops;
1094*ef8d499eSDavid van Moolenbroek 		} else {
1095*ef8d499eSDavid van Moolenbroek 			bpf->bpf_select.bs_endpt = endpt;
1096*ef8d499eSDavid van Moolenbroek 			bpf->bpf_select.bs_selops = ops;
1097*ef8d499eSDavid van Moolenbroek 		}
1098*ef8d499eSDavid van Moolenbroek 	}
1099*ef8d499eSDavid van Moolenbroek 
1100*ef8d499eSDavid van Moolenbroek 	return r;
1101*ef8d499eSDavid van Moolenbroek }
1102*ef8d499eSDavid van Moolenbroek 
1103*ef8d499eSDavid van Moolenbroek /*
1104*ef8d499eSDavid van Moolenbroek  * Process an incoming packet on the interface to which the given BPF device is
1105*ef8d499eSDavid van Moolenbroek  * attached.  If the packet passes the filter (if any), store as much as
1106*ef8d499eSDavid van Moolenbroek  * requested of it in the store buffer, rotating buffers if needed and resuming
1107*ef8d499eSDavid van Moolenbroek  * suspended read and select requests as appropriate.  This function is also
1108*ef8d499eSDavid van Moolenbroek  * called through bpfdev_output() below.
1109*ef8d499eSDavid van Moolenbroek  */
1110*ef8d499eSDavid van Moolenbroek void
bpfdev_input(struct bpfdev_link * bpfl,const struct pbuf * pbuf)1111*ef8d499eSDavid van Moolenbroek bpfdev_input(struct bpfdev_link * bpfl, const struct pbuf * pbuf)
1112*ef8d499eSDavid van Moolenbroek {
1113*ef8d499eSDavid van Moolenbroek 	struct bpfdev *bpf = (struct bpfdev *)bpfl;
1114*ef8d499eSDavid van Moolenbroek 	struct timespec ts;
1115*ef8d499eSDavid van Moolenbroek 	struct bpf_hdr bh;
1116*ef8d499eSDavid van Moolenbroek 	const struct pbuf *pptr;
1117*ef8d499eSDavid van Moolenbroek 	size_t caplen, hdrlen, totlen, off, chunk;
1118*ef8d499eSDavid van Moolenbroek 	int hfull;
1119*ef8d499eSDavid van Moolenbroek 
1120*ef8d499eSDavid van Moolenbroek 	/*
1121*ef8d499eSDavid van Moolenbroek 	 * Apparently bs_recv is the counter of packets that were run through
1122*ef8d499eSDavid van Moolenbroek 	 * the filter, not the number of packets that were or could be received
1123*ef8d499eSDavid van Moolenbroek 	 * by the user (which is what I got from the manual page.. oh well).
1124*ef8d499eSDavid van Moolenbroek 	 */
1125*ef8d499eSDavid van Moolenbroek 	bpf->bpf_stat.bs_recv++;
1126*ef8d499eSDavid van Moolenbroek 	bpf_stat.bs_recv++;
1127*ef8d499eSDavid van Moolenbroek 
1128*ef8d499eSDavid van Moolenbroek 	/*
1129*ef8d499eSDavid van Moolenbroek 	 * Run the packet through the BPF device's filter to see whether the
1130*ef8d499eSDavid van Moolenbroek 	 * packet should be stored and if so, how much of it.  If no filter is
1131*ef8d499eSDavid van Moolenbroek 	 * set, all packets will be stored in their entirety.
1132*ef8d499eSDavid van Moolenbroek 	 */
1133*ef8d499eSDavid van Moolenbroek 	caplen = bpf_filter_ext(bpf->bpf_filter, pbuf, (u_char *)pbuf->payload,
1134*ef8d499eSDavid van Moolenbroek 	    pbuf->tot_len, pbuf->len);
1135*ef8d499eSDavid van Moolenbroek 
1136*ef8d499eSDavid van Moolenbroek 	if (caplen == 0)
1137*ef8d499eSDavid van Moolenbroek 		return;		/* no match; ignore packet */
1138*ef8d499eSDavid van Moolenbroek 
1139*ef8d499eSDavid van Moolenbroek 	if (caplen > pbuf->tot_len)
1140*ef8d499eSDavid van Moolenbroek 		caplen = pbuf->tot_len;
1141*ef8d499eSDavid van Moolenbroek 
1142*ef8d499eSDavid van Moolenbroek 	/* Truncate packet entries to the full size of the buffers. */
1143*ef8d499eSDavid van Moolenbroek 	hdrlen = BPF_WORDALIGN(sizeof(bh));
1144*ef8d499eSDavid van Moolenbroek 	totlen = BPF_WORDALIGN(hdrlen + caplen);
1145*ef8d499eSDavid van Moolenbroek 
1146*ef8d499eSDavid van Moolenbroek 	if (totlen > bpf->bpf_size) {
1147*ef8d499eSDavid van Moolenbroek 		totlen = bpf->bpf_size;
1148*ef8d499eSDavid van Moolenbroek 		caplen = totlen - hdrlen;
1149*ef8d499eSDavid van Moolenbroek 	}
1150*ef8d499eSDavid van Moolenbroek 	assert(totlen >= hdrlen);
1151*ef8d499eSDavid van Moolenbroek 
1152*ef8d499eSDavid van Moolenbroek 	bpf->bpf_stat.bs_capt++;
1153*ef8d499eSDavid van Moolenbroek 	bpf_stat.bs_capt++;
1154*ef8d499eSDavid van Moolenbroek 
1155*ef8d499eSDavid van Moolenbroek 	assert(bpf->bpf_sbuf != NULL);
1156*ef8d499eSDavid van Moolenbroek 	if (totlen > bpf->bpf_size - bpf->bpf_slen) {
1157*ef8d499eSDavid van Moolenbroek 		/*
1158*ef8d499eSDavid van Moolenbroek 		 * If the store buffer is full and the hold buffer is not
1159*ef8d499eSDavid van Moolenbroek 		 * empty, we cannot swap the two buffers, and so we must drop
1160*ef8d499eSDavid van Moolenbroek 		 * the current packet.
1161*ef8d499eSDavid van Moolenbroek 		 */
1162*ef8d499eSDavid van Moolenbroek 		if (bpf->bpf_hlen > 0) {
1163*ef8d499eSDavid van Moolenbroek 			bpf->bpf_stat.bs_drop++;
1164*ef8d499eSDavid van Moolenbroek 			bpf_stat.bs_drop++;
1165*ef8d499eSDavid van Moolenbroek 
1166*ef8d499eSDavid van Moolenbroek 			return;
1167*ef8d499eSDavid van Moolenbroek 		}
1168*ef8d499eSDavid van Moolenbroek 
1169*ef8d499eSDavid van Moolenbroek 		/*
1170*ef8d499eSDavid van Moolenbroek 		 * Rotate the buffers: the hold buffer will now be "full" and
1171*ef8d499eSDavid van Moolenbroek 		 * ready to be read - it may not actually be entirely full, but
1172*ef8d499eSDavid van Moolenbroek 		 * we could not fit this packet and we are not going to deliver
1173*ef8d499eSDavid van Moolenbroek 		 * packets out of order..
1174*ef8d499eSDavid van Moolenbroek 		 */
1175*ef8d499eSDavid van Moolenbroek 		bpfdev_rotate(bpf);
1176*ef8d499eSDavid van Moolenbroek 
1177*ef8d499eSDavid van Moolenbroek 		hfull = TRUE;
1178*ef8d499eSDavid van Moolenbroek 	} else
1179*ef8d499eSDavid van Moolenbroek 		hfull = FALSE;
1180*ef8d499eSDavid van Moolenbroek 
1181*ef8d499eSDavid van Moolenbroek 	/*
1182*ef8d499eSDavid van Moolenbroek 	 * Retrieve the capture time for the packet.  Ideally this would be
1183*ef8d499eSDavid van Moolenbroek 	 * done only once per accepted packet, but we do not expect many BPF
1184*ef8d499eSDavid van Moolenbroek 	 * devices to be receiving the same packets often enough to make that
1185*ef8d499eSDavid van Moolenbroek 	 * worth it.
1186*ef8d499eSDavid van Moolenbroek 	 */
1187*ef8d499eSDavid van Moolenbroek 	clock_time(&ts);
1188*ef8d499eSDavid van Moolenbroek 
1189*ef8d499eSDavid van Moolenbroek 	/*
1190*ef8d499eSDavid van Moolenbroek 	 * Copy the packet into the store buffer, including a newly generated
1191*ef8d499eSDavid van Moolenbroek 	 * header.  Zero any padding areas, even if strictly not necessary.
1192*ef8d499eSDavid van Moolenbroek 	 */
1193*ef8d499eSDavid van Moolenbroek 	memset(&bh, 0, sizeof(bh));
1194*ef8d499eSDavid van Moolenbroek 	bh.bh_tstamp.tv_sec = ts.tv_sec;
1195*ef8d499eSDavid van Moolenbroek 	bh.bh_tstamp.tv_usec = ts.tv_nsec / 1000;
1196*ef8d499eSDavid van Moolenbroek 	bh.bh_caplen = caplen;
1197*ef8d499eSDavid van Moolenbroek 	bh.bh_datalen = pbuf->tot_len;
1198*ef8d499eSDavid van Moolenbroek 	bh.bh_hdrlen = hdrlen;
1199*ef8d499eSDavid van Moolenbroek 
1200*ef8d499eSDavid van Moolenbroek 	assert(bpf->bpf_sbuf != NULL);
1201*ef8d499eSDavid van Moolenbroek 	off = bpf->bpf_slen;
1202*ef8d499eSDavid van Moolenbroek 
1203*ef8d499eSDavid van Moolenbroek 	memcpy(&bpf->bpf_sbuf[off], &bh, sizeof(bh));
1204*ef8d499eSDavid van Moolenbroek 	if (hdrlen > sizeof(bh))
1205*ef8d499eSDavid van Moolenbroek 		memset(&bpf->bpf_sbuf[off + sizeof(bh)], 0,
1206*ef8d499eSDavid van Moolenbroek 		    hdrlen - sizeof(bh));
1207*ef8d499eSDavid van Moolenbroek 	off += hdrlen;
1208*ef8d499eSDavid van Moolenbroek 
1209*ef8d499eSDavid van Moolenbroek 	for (pptr = pbuf; pptr != NULL && caplen > 0; pptr = pptr->next) {
1210*ef8d499eSDavid van Moolenbroek 		chunk = pptr->len;
1211*ef8d499eSDavid van Moolenbroek 		if (chunk > caplen)
1212*ef8d499eSDavid van Moolenbroek 			chunk = caplen;
1213*ef8d499eSDavid van Moolenbroek 
1214*ef8d499eSDavid van Moolenbroek 		memcpy(&bpf->bpf_sbuf[off], pptr->payload, chunk);
1215*ef8d499eSDavid van Moolenbroek 
1216*ef8d499eSDavid van Moolenbroek 		off += chunk;
1217*ef8d499eSDavid van Moolenbroek 		caplen -= chunk;
1218*ef8d499eSDavid van Moolenbroek 	}
1219*ef8d499eSDavid van Moolenbroek 
1220*ef8d499eSDavid van Moolenbroek 	assert(off <= bpf->bpf_slen + totlen);
1221*ef8d499eSDavid van Moolenbroek 	if (bpf->bpf_slen + totlen > off)
1222*ef8d499eSDavid van Moolenbroek 		memset(&bpf->bpf_sbuf[off], 0, bpf->bpf_slen + totlen - off);
1223*ef8d499eSDavid van Moolenbroek 
1224*ef8d499eSDavid van Moolenbroek 	bpf->bpf_slen += totlen;
1225*ef8d499eSDavid van Moolenbroek 
1226*ef8d499eSDavid van Moolenbroek 	/*
1227*ef8d499eSDavid van Moolenbroek 	 * Edge case: if the hold buffer is empty and the store buffer is now
1228*ef8d499eSDavid van Moolenbroek 	 * exactly full, rotate buffers so that the packets can be read
1229*ef8d499eSDavid van Moolenbroek 	 * immediately, without waiting for the next packet to cause rotation.
1230*ef8d499eSDavid van Moolenbroek 	 */
1231*ef8d499eSDavid van Moolenbroek 	if (bpf->bpf_hlen == 0 && bpf->bpf_slen == bpf->bpf_size) {
1232*ef8d499eSDavid van Moolenbroek 		bpfdev_rotate(bpf);
1233*ef8d499eSDavid van Moolenbroek 
1234*ef8d499eSDavid van Moolenbroek 		hfull = TRUE;
1235*ef8d499eSDavid van Moolenbroek 	}
1236*ef8d499eSDavid van Moolenbroek 
1237*ef8d499eSDavid van Moolenbroek 	/*
1238*ef8d499eSDavid van Moolenbroek 	 * If the hold buffer is now full, or if immediate mode is enabled,
1239*ef8d499eSDavid van Moolenbroek 	 * then we now have data to deliver to userland.  See if we can wake up
1240*ef8d499eSDavid van Moolenbroek 	 * any read or select call (either but not both here).
1241*ef8d499eSDavid van Moolenbroek 	 */
1242*ef8d499eSDavid van Moolenbroek 	if (hfull || (bpf->bpf_flags & BPFF_IMMEDIATE)) {
1243*ef8d499eSDavid van Moolenbroek 		if (bpf->bpf_read.br_endpt != NONE)
1244*ef8d499eSDavid van Moolenbroek 			bpfdev_resume_read(bpf, FALSE /*is_timeout*/);
1245*ef8d499eSDavid van Moolenbroek 		else
1246*ef8d499eSDavid van Moolenbroek 			bpfdev_resume_select(bpf);
1247*ef8d499eSDavid van Moolenbroek 	}
1248*ef8d499eSDavid van Moolenbroek }
1249*ef8d499eSDavid van Moolenbroek 
1250*ef8d499eSDavid van Moolenbroek /*
1251*ef8d499eSDavid van Moolenbroek  * Process an outgoing packet on the interface to which the given BPF device is
1252*ef8d499eSDavid van Moolenbroek  * attached.  If the BPF device is configured to capture outgoing packets as
1253*ef8d499eSDavid van Moolenbroek  * well, attempt to capture the packet as per bpfdev_input().
1254*ef8d499eSDavid van Moolenbroek  */
1255*ef8d499eSDavid van Moolenbroek void
bpfdev_output(struct bpfdev_link * bpfl,const struct pbuf * pbuf)1256*ef8d499eSDavid van Moolenbroek bpfdev_output(struct bpfdev_link * bpfl, const struct pbuf * pbuf)
1257*ef8d499eSDavid van Moolenbroek {
1258*ef8d499eSDavid van Moolenbroek 	struct bpfdev *bpf = (struct bpfdev *)bpfl;
1259*ef8d499eSDavid van Moolenbroek 
1260*ef8d499eSDavid van Moolenbroek 	if (bpf->bpf_flags & BPFF_SEESENT)
1261*ef8d499eSDavid van Moolenbroek 		bpfdev_input(bpfl, pbuf);
1262*ef8d499eSDavid van Moolenbroek }
1263*ef8d499eSDavid van Moolenbroek 
1264*ef8d499eSDavid van Moolenbroek /*
1265*ef8d499eSDavid van Moolenbroek  * Fill the given 'bde' structure with information about BPF device 'bpf'.
1266*ef8d499eSDavid van Moolenbroek  */
1267*ef8d499eSDavid van Moolenbroek static void
bpfdev_get_info(struct bpf_d_ext * bde,const struct bpfdev * bpf)1268*ef8d499eSDavid van Moolenbroek bpfdev_get_info(struct bpf_d_ext * bde, const struct bpfdev * bpf)
1269*ef8d499eSDavid van Moolenbroek {
1270*ef8d499eSDavid van Moolenbroek 
1271*ef8d499eSDavid van Moolenbroek 	bde->bde_bufsize = bpf->bpf_size;
1272*ef8d499eSDavid van Moolenbroek 	bde->bde_promisc = !!(bpf->bpf_flags & BPFF_PROMISC);
1273*ef8d499eSDavid van Moolenbroek 	bde->bde_state = BPF_IDLE;
1274*ef8d499eSDavid van Moolenbroek 	bde->bde_immediate = !!(bpf->bpf_flags & BPFF_IMMEDIATE);
1275*ef8d499eSDavid van Moolenbroek 	bde->bde_hdrcmplt = !!(bpf->bpf_flags & BPFF_HDRCMPLT);
1276*ef8d499eSDavid van Moolenbroek 	bde->bde_seesent = !!(bpf->bpf_flags & BPFF_SEESENT);
1277*ef8d499eSDavid van Moolenbroek 	/*
1278*ef8d499eSDavid van Moolenbroek 	 * NetBSD updates the process ID upon device open, close, ioctl, and
1279*ef8d499eSDavid van Moolenbroek 	 * poll.  From those, only open and ioctl make sense for us.  Sadly
1280*ef8d499eSDavid van Moolenbroek 	 * there is no way to indicate "no known PID" to netstat(1), so we
1281*ef8d499eSDavid van Moolenbroek 	 * cannot even save just the endpoint and look up the corresponding PID
1282*ef8d499eSDavid van Moolenbroek 	 * later, since the user process may be gone by then.
1283*ef8d499eSDavid van Moolenbroek 	 */
1284*ef8d499eSDavid van Moolenbroek 	bde->bde_pid = bpf->bpf_pid;
1285*ef8d499eSDavid van Moolenbroek 	bde->bde_rcount = bpf->bpf_stat.bs_recv;
1286*ef8d499eSDavid van Moolenbroek 	bde->bde_dcount = bpf->bpf_stat.bs_drop;
1287*ef8d499eSDavid van Moolenbroek 	bde->bde_ccount = bpf->bpf_stat.bs_capt;
1288*ef8d499eSDavid van Moolenbroek 	if (bpf->bpf_ifdev != NULL)
1289*ef8d499eSDavid van Moolenbroek 		strlcpy(bde->bde_ifname, ifdev_get_name(bpf->bpf_ifdev),
1290*ef8d499eSDavid van Moolenbroek 		    sizeof(bde->bde_ifname));
1291*ef8d499eSDavid van Moolenbroek }
1292*ef8d499eSDavid van Moolenbroek 
1293*ef8d499eSDavid van Moolenbroek /*
1294*ef8d499eSDavid van Moolenbroek  * Obtain statistics about open BPF devices ("peers").  This node may be
1295*ef8d499eSDavid van Moolenbroek  * accessed by the superuser only.  Used by netstat(1).
1296*ef8d499eSDavid van Moolenbroek  */
1297*ef8d499eSDavid van Moolenbroek static ssize_t
bpfdev_peers(struct rmib_call * call,struct rmib_node * node __unused,struct rmib_oldp * oldp,struct rmib_newp * newp __unused)1298*ef8d499eSDavid van Moolenbroek bpfdev_peers(struct rmib_call * call, struct rmib_node * node __unused,
1299*ef8d499eSDavid van Moolenbroek 	struct rmib_oldp * oldp, struct rmib_newp * newp __unused)
1300*ef8d499eSDavid van Moolenbroek {
1301*ef8d499eSDavid van Moolenbroek 	struct bpfdev *bpf;
1302*ef8d499eSDavid van Moolenbroek 	struct bpf_d_ext bde;
1303*ef8d499eSDavid van Moolenbroek 	unsigned int slot;
1304*ef8d499eSDavid van Moolenbroek 	ssize_t off;
1305*ef8d499eSDavid van Moolenbroek 	int r, size, max;
1306*ef8d499eSDavid van Moolenbroek 
1307*ef8d499eSDavid van Moolenbroek 	if (!(call->call_flags & RMIB_FLAG_AUTH))
1308*ef8d499eSDavid van Moolenbroek 		return EPERM;
1309*ef8d499eSDavid van Moolenbroek 
1310*ef8d499eSDavid van Moolenbroek 	if (call->call_namelen != 2)
1311*ef8d499eSDavid van Moolenbroek 		return EINVAL;
1312*ef8d499eSDavid van Moolenbroek 
1313*ef8d499eSDavid van Moolenbroek 	size = call->call_name[0];
1314*ef8d499eSDavid van Moolenbroek 	if (size < 0 || (size_t)size > sizeof(bde))
1315*ef8d499eSDavid van Moolenbroek 		return EINVAL;
1316*ef8d499eSDavid van Moolenbroek 	if (size == 0)
1317*ef8d499eSDavid van Moolenbroek 		size = sizeof(bde);
1318*ef8d499eSDavid van Moolenbroek 	max = call->call_name[1];
1319*ef8d499eSDavid van Moolenbroek 
1320*ef8d499eSDavid van Moolenbroek 	off = 0;
1321*ef8d499eSDavid van Moolenbroek 
1322*ef8d499eSDavid van Moolenbroek 	for (slot = 0; slot < __arraycount(bpf_array); slot++) {
1323*ef8d499eSDavid van Moolenbroek 		bpf = &bpf_array[slot];
1324*ef8d499eSDavid van Moolenbroek 
1325*ef8d499eSDavid van Moolenbroek 		if (!(bpf->bpf_flags & BPFF_IN_USE))
1326*ef8d499eSDavid van Moolenbroek 			continue;
1327*ef8d499eSDavid van Moolenbroek 
1328*ef8d499eSDavid van Moolenbroek 		if (rmib_inrange(oldp, off)) {
1329*ef8d499eSDavid van Moolenbroek 			memset(&bde, 0, sizeof(bde));
1330*ef8d499eSDavid van Moolenbroek 
1331*ef8d499eSDavid van Moolenbroek 			bpfdev_get_info(&bde, bpf);
1332*ef8d499eSDavid van Moolenbroek 
1333*ef8d499eSDavid van Moolenbroek 			if ((r = rmib_copyout(oldp, off, &bde, size)) < 0)
1334*ef8d499eSDavid van Moolenbroek 				return r;
1335*ef8d499eSDavid van Moolenbroek 		}
1336*ef8d499eSDavid van Moolenbroek 
1337*ef8d499eSDavid van Moolenbroek 		off += sizeof(bde);
1338*ef8d499eSDavid van Moolenbroek 		if (max > 0 && --max == 0)
1339*ef8d499eSDavid van Moolenbroek 			break;
1340*ef8d499eSDavid van Moolenbroek 	}
1341*ef8d499eSDavid van Moolenbroek 
1342*ef8d499eSDavid van Moolenbroek 	/* No slack needed: netstat(1) resizes its buffer as needed. */
1343*ef8d499eSDavid van Moolenbroek 	return off;
1344*ef8d499eSDavid van Moolenbroek }
1345*ef8d499eSDavid van Moolenbroek 
1346*ef8d499eSDavid van Moolenbroek static const struct chardriver bpfdev_tab = {
1347*ef8d499eSDavid van Moolenbroek 	.cdr_open		= bpfdev_open,
1348*ef8d499eSDavid van Moolenbroek 	.cdr_close		= bpfdev_close,
1349*ef8d499eSDavid van Moolenbroek 	.cdr_read		= bpfdev_read,
1350*ef8d499eSDavid van Moolenbroek 	.cdr_write		= bpfdev_write,
1351*ef8d499eSDavid van Moolenbroek 	.cdr_ioctl		= bpfdev_ioctl,
1352*ef8d499eSDavid van Moolenbroek 	.cdr_cancel		= bpfdev_cancel,
1353*ef8d499eSDavid van Moolenbroek 	.cdr_select		= bpfdev_select
1354*ef8d499eSDavid van Moolenbroek };
1355*ef8d499eSDavid van Moolenbroek 
1356*ef8d499eSDavid van Moolenbroek /*
1357*ef8d499eSDavid van Moolenbroek  * Process a character driver request.  Since the LWIP service offers character
1358*ef8d499eSDavid van Moolenbroek  * devices for BPF only, it must be a request for a BPF device.
1359*ef8d499eSDavid van Moolenbroek  */
1360*ef8d499eSDavid van Moolenbroek void
bpfdev_process(message * m_ptr,int ipc_status)1361*ef8d499eSDavid van Moolenbroek bpfdev_process(message * m_ptr, int ipc_status)
1362*ef8d499eSDavid van Moolenbroek {
1363*ef8d499eSDavid van Moolenbroek 
1364*ef8d499eSDavid van Moolenbroek 	chardriver_process(&bpfdev_tab, m_ptr, ipc_status);
1365*ef8d499eSDavid van Moolenbroek }
1366