xref: /openbsd-src/sys/net/bpf.c (revision ae3cb403620ab940fbaabb3055fac045a63d56b7)
1 /*	$OpenBSD: bpf.c,v 1.165 2017/12/30 23:08:29 guenther Exp $	*/
2 /*	$NetBSD: bpf.c,v 1.33 1997/02/21 23:59:35 thorpej Exp $	*/
3 
4 /*
5  * Copyright (c) 1990, 1991, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  * Copyright (c) 2010, 2014 Henning Brauer <henning@openbsd.org>
8  *
9  * This code is derived from the Stanford/CMU enet packet filter,
10  * (net/enet.c) distributed as part of 4.3BSD, and code contributed
11  * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
12  * Berkeley Laboratory.
13  *
14  * Redistribution and use in source and binary forms, with or without
15  * modification, are permitted provided that the following conditions
16  * are met:
17  * 1. Redistributions of source code must retain the above copyright
18  *    notice, this list of conditions and the following disclaimer.
19  * 2. Redistributions in binary form must reproduce the above copyright
20  *    notice, this list of conditions and the following disclaimer in the
21  *    documentation and/or other materials provided with the distribution.
22  * 3. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  *
38  *	@(#)bpf.c	8.2 (Berkeley) 3/28/94
39  */
40 
41 #include "bpfilter.h"
42 
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/mbuf.h>
46 #include <sys/proc.h>
47 #include <sys/signalvar.h>
48 #include <sys/ioctl.h>
49 #include <sys/conf.h>
50 #include <sys/vnode.h>
51 #include <sys/fcntl.h>
52 #include <sys/socket.h>
53 #include <sys/poll.h>
54 #include <sys/kernel.h>
55 #include <sys/sysctl.h>
56 #include <sys/rwlock.h>
57 #include <sys/atomic.h>
58 #include <sys/srp.h>
59 #include <sys/specdev.h>
60 #include <sys/selinfo.h>
61 #include <sys/task.h>
62 
63 #include <net/if.h>
64 #include <net/bpf.h>
65 #include <net/bpfdesc.h>
66 
67 #include <netinet/in.h>
68 #include <netinet/if_ether.h>
69 
70 #include "vlan.h"
71 #if NVLAN > 0
72 #include <net/if_vlan_var.h>
73 #endif
74 
75 #define BPF_BUFSIZE 32768
76 
77 #define PRINET  26			/* interruptible */
78 
79 /* from kern/kern_clock.c; incremented each clock tick. */
80 extern int ticks;
81 
82 /*
83  * The default read buffer size is patchable.
84  */
85 int bpf_bufsize = BPF_BUFSIZE;
86 int bpf_maxbufsize = BPF_MAXBUFSIZE;
87 
88 /*
89  *  bpf_iflist is the list of interfaces; each corresponds to an ifnet
90  *  bpf_d_list is the list of descriptors
91  */
92 struct bpf_if	*bpf_iflist;
93 LIST_HEAD(, bpf_d) bpf_d_list;
94 
95 int	bpf_allocbufs(struct bpf_d *);
96 void	bpf_ifname(struct ifnet *, struct ifreq *);
97 int	_bpf_mtap(caddr_t, const struct mbuf *, u_int,
98 	    void (*)(const void *, void *, size_t));
99 void	bpf_mcopy(const void *, void *, size_t);
100 int	bpf_movein(struct uio *, u_int, struct mbuf **,
101 	    struct sockaddr *, struct bpf_insn *);
102 int	bpf_setif(struct bpf_d *, struct ifreq *);
103 int	bpfpoll(dev_t, int, struct proc *);
104 int	bpfkqfilter(dev_t, struct knote *);
105 void	bpf_wakeup(struct bpf_d *);
106 void	bpf_wakeup_cb(void *);
107 void	bpf_catchpacket(struct bpf_d *, u_char *, size_t, size_t,
108 	    void (*)(const void *, void *, size_t), struct timeval *);
109 int	bpf_getdltlist(struct bpf_d *, struct bpf_dltlist *);
110 int	bpf_setdlt(struct bpf_d *, u_int);
111 
112 void	filt_bpfrdetach(struct knote *);
113 int	filt_bpfread(struct knote *, long);
114 
115 int	bpf_sysctl_locked(int *, u_int, void *, size_t *, void *, size_t);
116 
117 struct bpf_d *bpfilter_lookup(int);
118 
119 /*
120  * Called holding ``bd_mtx''.
121  */
122 void	bpf_attachd(struct bpf_d *, struct bpf_if *);
123 void	bpf_detachd(struct bpf_d *);
124 void	bpf_resetd(struct bpf_d *);
125 
126 /*
127  * Reference count access to descriptor buffers
128  */
129 void	bpf_get(struct bpf_d *);
130 void	bpf_put(struct bpf_d *);
131 
132 /*
133  * garbage collector srps
134  */
135 
136 void bpf_d_ref(void *, void *);
137 void bpf_d_unref(void *, void *);
138 struct srpl_rc bpf_d_rc = SRPL_RC_INITIALIZER(bpf_d_ref, bpf_d_unref, NULL);
139 
140 void bpf_insn_dtor(void *, void *);
141 struct srp_gc bpf_insn_gc = SRP_GC_INITIALIZER(bpf_insn_dtor, NULL);
142 
143 struct rwlock bpf_sysctl_lk = RWLOCK_INITIALIZER("bpfsz");
144 
145 int
146 bpf_movein(struct uio *uio, u_int linktype, struct mbuf **mp,
147     struct sockaddr *sockp, struct bpf_insn *filter)
148 {
149 	struct mbuf *m;
150 	struct m_tag *mtag;
151 	int error;
152 	u_int hlen;
153 	u_int len;
154 	u_int slen;
155 
156 	/*
157 	 * Build a sockaddr based on the data link layer type.
158 	 * We do this at this level because the ethernet header
159 	 * is copied directly into the data field of the sockaddr.
160 	 * In the case of SLIP, there is no header and the packet
161 	 * is forwarded as is.
162 	 * Also, we are careful to leave room at the front of the mbuf
163 	 * for the link level header.
164 	 */
165 	switch (linktype) {
166 
167 	case DLT_SLIP:
168 		sockp->sa_family = AF_INET;
169 		hlen = 0;
170 		break;
171 
172 	case DLT_PPP:
173 		sockp->sa_family = AF_UNSPEC;
174 		hlen = 0;
175 		break;
176 
177 	case DLT_EN10MB:
178 		sockp->sa_family = AF_UNSPEC;
179 		/* XXX Would MAXLINKHDR be better? */
180 		hlen = ETHER_HDR_LEN;
181 		break;
182 
183 	case DLT_IEEE802_11:
184 	case DLT_IEEE802_11_RADIO:
185 		sockp->sa_family = AF_UNSPEC;
186 		hlen = 0;
187 		break;
188 
189 	case DLT_RAW:
190 	case DLT_NULL:
191 		sockp->sa_family = AF_UNSPEC;
192 		hlen = 0;
193 		break;
194 
195 	case DLT_LOOP:
196 		sockp->sa_family = AF_UNSPEC;
197 		hlen = sizeof(u_int32_t);
198 		break;
199 
200 	default:
201 		return (EIO);
202 	}
203 
204 	if (uio->uio_resid > MAXMCLBYTES)
205 		return (EIO);
206 	len = uio->uio_resid;
207 
208 	MGETHDR(m, M_WAIT, MT_DATA);
209 	m->m_pkthdr.ph_ifidx = 0;
210 	m->m_pkthdr.len = len - hlen;
211 
212 	if (len > MHLEN) {
213 		MCLGETI(m, M_WAIT, NULL, len);
214 		if ((m->m_flags & M_EXT) == 0) {
215 			error = ENOBUFS;
216 			goto bad;
217 		}
218 	}
219 	m->m_len = len;
220 	*mp = m;
221 
222 	error = uiomove(mtod(m, caddr_t), len, uio);
223 	if (error)
224 		goto bad;
225 
226 	slen = bpf_filter(filter, mtod(m, u_char *), len, len);
227 	if (slen < len) {
228 		error = EPERM;
229 		goto bad;
230 	}
231 
232 	if (m->m_len < hlen) {
233 		error = EPERM;
234 		goto bad;
235 	}
236 	/*
237 	 * Make room for link header, and copy it to sockaddr
238 	 */
239 	if (hlen != 0) {
240 		if (linktype == DLT_LOOP) {
241 			u_int32_t af;
242 
243 			/* the link header indicates the address family */
244 			KASSERT(hlen == sizeof(u_int32_t));
245 			memcpy(&af, m->m_data, hlen);
246 			sockp->sa_family = ntohl(af);
247 		} else
248 			memcpy(sockp->sa_data, m->m_data, hlen);
249 		m->m_len -= hlen;
250 		m->m_data += hlen; /* XXX */
251 	}
252 
253 	/*
254 	 * Prepend the data link type as a mbuf tag
255 	 */
256 	mtag = m_tag_get(PACKET_TAG_DLT, sizeof(u_int), M_WAIT);
257 	*(u_int *)(mtag + 1) = linktype;
258 	m_tag_prepend(m, mtag);
259 
260 	return (0);
261  bad:
262 	m_freem(m);
263 	return (error);
264 }
265 
266 /*
267  * Attach file to the bpf interface, i.e. make d listen on bp.
268  */
269 void
270 bpf_attachd(struct bpf_d *d, struct bpf_if *bp)
271 {
272 	MUTEX_ASSERT_LOCKED(&d->bd_mtx);
273 
274 	/*
275 	 * Point d at bp, and add d to the interface's list of listeners.
276 	 * Finally, point the driver's bpf cookie at the interface so
277 	 * it will divert packets to bpf.
278 	 */
279 
280 	d->bd_bif = bp;
281 
282 	KERNEL_ASSERT_LOCKED();
283 	SRPL_INSERT_HEAD_LOCKED(&bpf_d_rc, &bp->bif_dlist, d, bd_next);
284 
285 	*bp->bif_driverp = bp;
286 }
287 
288 /*
289  * Detach a file from its interface.
290  */
291 void
292 bpf_detachd(struct bpf_d *d)
293 {
294 	struct bpf_if *bp;
295 
296 	MUTEX_ASSERT_LOCKED(&d->bd_mtx);
297 
298 	bp = d->bd_bif;
299 	/* Not attached. */
300 	if (bp == NULL)
301 		return;
302 
303 	/* Remove ``d'' from the interface's descriptor list. */
304 	KERNEL_ASSERT_LOCKED();
305 	SRPL_REMOVE_LOCKED(&bpf_d_rc, &bp->bif_dlist, d, bpf_d, bd_next);
306 
307 	if (SRPL_EMPTY_LOCKED(&bp->bif_dlist)) {
308 		/*
309 		 * Let the driver know that there are no more listeners.
310 		 */
311 		*bp->bif_driverp = NULL;
312 	}
313 
314 	d->bd_bif = NULL;
315 
316 	/*
317 	 * Check if this descriptor had requested promiscuous mode.
318 	 * If so, turn it off.
319 	 */
320 	if (d->bd_promisc) {
321 		int error;
322 
323 		d->bd_promisc = 0;
324 
325 		bpf_get(d);
326 		mtx_leave(&d->bd_mtx);
327 		error = ifpromisc(bp->bif_ifp, 0);
328 		mtx_enter(&d->bd_mtx);
329 		bpf_put(d);
330 
331 		if (error && !(error == EINVAL || error == ENODEV))
332 			/*
333 			 * Something is really wrong if we were able to put
334 			 * the driver into promiscuous mode, but can't
335 			 * take it out.
336 			 */
337 			panic("bpf: ifpromisc failed");
338 	}
339 }
340 
341 void
342 bpfilterattach(int n)
343 {
344 	LIST_INIT(&bpf_d_list);
345 }
346 
347 /*
348  * Open ethernet device.  Returns ENXIO for illegal minor device number,
349  * EBUSY if file is open by another process.
350  */
351 int
352 bpfopen(dev_t dev, int flag, int mode, struct proc *p)
353 {
354 	struct bpf_d *bd;
355 	int unit = minor(dev);
356 
357 	if (unit & ((1 << CLONE_SHIFT) - 1))
358 		return (ENXIO);
359 
360 	KASSERT(bpfilter_lookup(unit) == NULL);
361 
362 	/* create on demand */
363 	if ((bd = malloc(sizeof(*bd), M_DEVBUF, M_NOWAIT|M_ZERO)) == NULL)
364 		return (EBUSY);
365 
366 	/* Mark "free" and do most initialization. */
367 	bd->bd_unit = unit;
368 	bd->bd_bufsize = bpf_bufsize;
369 	bd->bd_sig = SIGIO;
370 	mtx_init(&bd->bd_mtx, IPL_NET);
371 	task_set(&bd->bd_wake_task, bpf_wakeup_cb, bd);
372 
373 	if (flag & FNONBLOCK)
374 		bd->bd_rtout = -1;
375 
376 	bpf_get(bd);
377 	LIST_INSERT_HEAD(&bpf_d_list, bd, bd_list);
378 
379 	return (0);
380 }
381 
382 /*
383  * Close the descriptor by detaching it from its interface,
384  * deallocating its buffers, and marking it free.
385  */
386 int
387 bpfclose(dev_t dev, int flag, int mode, struct proc *p)
388 {
389 	struct bpf_d *d;
390 
391 	d = bpfilter_lookup(minor(dev));
392 	mtx_enter(&d->bd_mtx);
393 	bpf_detachd(d);
394 	bpf_wakeup(d);
395 	LIST_REMOVE(d, bd_list);
396 	mtx_leave(&d->bd_mtx);
397 	bpf_put(d);
398 
399 	return (0);
400 }
401 
402 /*
403  * Rotate the packet buffers in descriptor d.  Move the store buffer
404  * into the hold slot, and the free buffer into the store slot.
405  * Zero the length of the new store buffer.
406  */
407 #define ROTATE_BUFFERS(d) \
408 	KASSERT(d->bd_in_uiomove == 0); \
409 	MUTEX_ASSERT_LOCKED(&d->bd_mtx); \
410 	(d)->bd_hbuf = (d)->bd_sbuf; \
411 	(d)->bd_hlen = (d)->bd_slen; \
412 	(d)->bd_sbuf = (d)->bd_fbuf; \
413 	(d)->bd_slen = 0; \
414 	(d)->bd_fbuf = NULL;
415 /*
416  *  bpfread - read next chunk of packets from buffers
417  */
418 int
419 bpfread(dev_t dev, struct uio *uio, int ioflag)
420 {
421 	struct bpf_d *d;
422 	caddr_t hbuf;
423 	int hlen, error;
424 
425 	KERNEL_ASSERT_LOCKED();
426 
427 	d = bpfilter_lookup(minor(dev));
428 	if (d->bd_bif == NULL)
429 		return (ENXIO);
430 
431 	bpf_get(d);
432 	mtx_enter(&d->bd_mtx);
433 
434 	/*
435 	 * Restrict application to use a buffer the same size as
436 	 * as kernel buffers.
437 	 */
438 	if (uio->uio_resid != d->bd_bufsize) {
439 		error = EINVAL;
440 		goto out;
441 	}
442 
443 	/*
444 	 * If there's a timeout, bd_rdStart is tagged when we start the read.
445 	 * we can then figure out when we're done reading.
446 	 */
447 	if (d->bd_rtout != -1 && d->bd_rdStart == 0)
448 		d->bd_rdStart = ticks;
449 	else
450 		d->bd_rdStart = 0;
451 
452 	/*
453 	 * If the hold buffer is empty, then do a timed sleep, which
454 	 * ends when the timeout expires or when enough packets
455 	 * have arrived to fill the store buffer.
456 	 */
457 	while (d->bd_hbuf == NULL) {
458 		if (d->bd_bif == NULL) {
459 			/* interface is gone */
460 			if (d->bd_slen == 0) {
461 				error = EIO;
462 				goto out;
463 			}
464 			ROTATE_BUFFERS(d);
465 			break;
466 		}
467 		if (d->bd_immediate && d->bd_slen != 0) {
468 			/*
469 			 * A packet(s) either arrived since the previous
470 			 * read or arrived while we were asleep.
471 			 * Rotate the buffers and return what's here.
472 			 */
473 			ROTATE_BUFFERS(d);
474 			break;
475 		}
476 		if (d->bd_rtout == -1) {
477 			/* User requested non-blocking I/O */
478 			error = EWOULDBLOCK;
479 		} else {
480 			if ((d->bd_rdStart + d->bd_rtout) < ticks) {
481 				error = msleep(d, &d->bd_mtx, PRINET|PCATCH,
482 				    "bpf", d->bd_rtout);
483 			} else
484 				error = EWOULDBLOCK;
485 		}
486 		if (error == EINTR || error == ERESTART)
487 			goto out;
488 		if (error == EWOULDBLOCK) {
489 			/*
490 			 * On a timeout, return what's in the buffer,
491 			 * which may be nothing.  If there is something
492 			 * in the store buffer, we can rotate the buffers.
493 			 */
494 			if (d->bd_hbuf != NULL)
495 				/*
496 				 * We filled up the buffer in between
497 				 * getting the timeout and arriving
498 				 * here, so we don't need to rotate.
499 				 */
500 				break;
501 
502 			if (d->bd_slen == 0) {
503 				error = 0;
504 				goto out;
505 			}
506 			ROTATE_BUFFERS(d);
507 			break;
508 		}
509 	}
510 	/*
511 	 * At this point, we know we have something in the hold slot.
512 	 */
513 	hbuf = d->bd_hbuf;
514 	hlen = d->bd_hlen;
515 	d->bd_hbuf = NULL;
516 	d->bd_hlen = 0;
517 	d->bd_fbuf = NULL;
518 	d->bd_in_uiomove = 1;
519 
520 	/*
521 	 * Move data from hold buffer into user space.
522 	 * We know the entire buffer is transferred since
523 	 * we checked above that the read buffer is bpf_bufsize bytes.
524 	 */
525 	mtx_leave(&d->bd_mtx);
526 	error = uiomove(hbuf, hlen, uio);
527 	mtx_enter(&d->bd_mtx);
528 
529 	/* Ensure that bpf_resetd() or ROTATE_BUFFERS() haven't been called. */
530 	KASSERT(d->bd_fbuf == NULL);
531 	KASSERT(d->bd_hbuf == NULL);
532 	d->bd_fbuf = hbuf;
533 	d->bd_in_uiomove = 0;
534 out:
535 	mtx_leave(&d->bd_mtx);
536 	bpf_put(d);
537 
538 	return (error);
539 }
540 
541 
542 /*
543  * If there are processes sleeping on this descriptor, wake them up.
544  */
545 void
546 bpf_wakeup(struct bpf_d *d)
547 {
548 	MUTEX_ASSERT_LOCKED(&d->bd_mtx);
549 
550 	/*
551 	 * As long as csignal() and selwakeup() need to be protected
552 	 * by the KERNEL_LOCK() we have to delay the wakeup to
553 	 * another context to keep the hot path KERNEL_LOCK()-free.
554 	 */
555 	bpf_get(d);
556 	if (!task_add(systq, &d->bd_wake_task))
557 		bpf_put(d);
558 }
559 
560 void
561 bpf_wakeup_cb(void *xd)
562 {
563 	struct bpf_d *d = xd;
564 
565 	KERNEL_ASSERT_LOCKED();
566 
567 	wakeup(d);
568 	if (d->bd_async && d->bd_sig)
569 		csignal(d->bd_pgid, d->bd_sig, d->bd_siguid, d->bd_sigeuid);
570 
571 	selwakeup(&d->bd_sel);
572 	bpf_put(d);
573 }
574 
575 int
576 bpfwrite(dev_t dev, struct uio *uio, int ioflag)
577 {
578 	struct bpf_d *d;
579 	struct ifnet *ifp;
580 	struct mbuf *m;
581 	struct bpf_program *bf;
582 	struct bpf_insn *fcode = NULL;
583 	int error;
584 	struct sockaddr_storage dst;
585 	u_int dlt;
586 
587 	KERNEL_ASSERT_LOCKED();
588 
589 	d = bpfilter_lookup(minor(dev));
590 	if (d->bd_bif == NULL)
591 		return (ENXIO);
592 
593 	bpf_get(d);
594 	ifp = d->bd_bif->bif_ifp;
595 
596 	if ((ifp->if_flags & IFF_UP) == 0) {
597 		error = ENETDOWN;
598 		goto out;
599 	}
600 
601 	if (uio->uio_resid == 0) {
602 		error = 0;
603 		goto out;
604 	}
605 
606 	KERNEL_ASSERT_LOCKED(); /* for accessing bd_wfilter */
607 	bf = srp_get_locked(&d->bd_wfilter);
608 	if (bf != NULL)
609 		fcode = bf->bf_insns;
610 
611 	dlt = d->bd_bif->bif_dlt;
612 
613 	error = bpf_movein(uio, dlt, &m, sstosa(&dst), fcode);
614 	if (error)
615 		goto out;
616 
617 	if (m->m_pkthdr.len > ifp->if_mtu) {
618 		m_freem(m);
619 		error = EMSGSIZE;
620 		goto out;
621 	}
622 
623 	m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
624 	m->m_pkthdr.pf.prio = ifp->if_llprio;
625 
626 	if (d->bd_hdrcmplt && dst.ss_family == AF_UNSPEC)
627 		dst.ss_family = pseudo_AF_HDRCMPLT;
628 
629 	NET_LOCK();
630 	error = ifp->if_output(ifp, m, sstosa(&dst), NULL);
631 	NET_UNLOCK();
632 
633 out:
634 	bpf_put(d);
635 	return (error);
636 }
637 
638 /*
639  * Reset a descriptor by flushing its packet buffer and clearing the
640  * receive and drop counts.
641  */
642 void
643 bpf_resetd(struct bpf_d *d)
644 {
645 	MUTEX_ASSERT_LOCKED(&d->bd_mtx);
646 	KASSERT(d->bd_in_uiomove == 0);
647 
648 	if (d->bd_hbuf != NULL) {
649 		/* Free the hold buffer. */
650 		d->bd_fbuf = d->bd_hbuf;
651 		d->bd_hbuf = NULL;
652 	}
653 	d->bd_slen = 0;
654 	d->bd_hlen = 0;
655 	d->bd_rcount = 0;
656 	d->bd_dcount = 0;
657 }
658 
659 /*
660  *  FIONREAD		Check for read packet available.
661  *  BIOCGBLEN		Get buffer len [for read()].
662  *  BIOCSETF		Set ethernet read filter.
663  *  BIOCFLUSH		Flush read packet buffer.
664  *  BIOCPROMISC		Put interface into promiscuous mode.
665  *  BIOCGDLTLIST	Get supported link layer types.
666  *  BIOCGDLT		Get link layer type.
667  *  BIOCSDLT		Set link layer type.
668  *  BIOCGETIF		Get interface name.
669  *  BIOCSETIF		Set interface.
670  *  BIOCSRTIMEOUT	Set read timeout.
671  *  BIOCGRTIMEOUT	Get read timeout.
672  *  BIOCGSTATS		Get packet stats.
673  *  BIOCIMMEDIATE	Set immediate mode.
674  *  BIOCVERSION		Get filter language version.
675  *  BIOCGHDRCMPLT	Get "header already complete" flag
676  *  BIOCSHDRCMPLT	Set "header already complete" flag
677  */
678 int
679 bpfioctl(dev_t dev, u_long cmd, caddr_t addr, int flag, struct proc *p)
680 {
681 	struct bpf_d *d;
682 	int error = 0;
683 
684 	d = bpfilter_lookup(minor(dev));
685 	if (d->bd_locked && suser(p, 0) != 0) {
686 		/* list of allowed ioctls when locked and not root */
687 		switch (cmd) {
688 		case BIOCGBLEN:
689 		case BIOCFLUSH:
690 		case BIOCGDLT:
691 		case BIOCGDLTLIST:
692 		case BIOCGETIF:
693 		case BIOCGRTIMEOUT:
694 		case BIOCGSTATS:
695 		case BIOCVERSION:
696 		case BIOCGRSIG:
697 		case BIOCGHDRCMPLT:
698 		case FIONREAD:
699 		case BIOCLOCK:
700 		case BIOCSRTIMEOUT:
701 		case BIOCIMMEDIATE:
702 		case TIOCGPGRP:
703 		case BIOCGDIRFILT:
704 			break;
705 		default:
706 			return (EPERM);
707 		}
708 	}
709 
710 	bpf_get(d);
711 
712 	switch (cmd) {
713 	default:
714 		error = EINVAL;
715 		break;
716 
717 	/*
718 	 * Check for read packet available.
719 	 */
720 	case FIONREAD:
721 		{
722 			int n;
723 
724 			mtx_enter(&d->bd_mtx);
725 			n = d->bd_slen;
726 			if (d->bd_hbuf != NULL)
727 				n += d->bd_hlen;
728 			mtx_leave(&d->bd_mtx);
729 
730 			*(int *)addr = n;
731 			break;
732 		}
733 
734 	/*
735 	 * Get buffer len [for read()].
736 	 */
737 	case BIOCGBLEN:
738 		*(u_int *)addr = d->bd_bufsize;
739 		break;
740 
741 	/*
742 	 * Set buffer length.
743 	 */
744 	case BIOCSBLEN:
745 		if (d->bd_bif != NULL)
746 			error = EINVAL;
747 		else {
748 			u_int size = *(u_int *)addr;
749 
750 			if (size > bpf_maxbufsize)
751 				*(u_int *)addr = size = bpf_maxbufsize;
752 			else if (size < BPF_MINBUFSIZE)
753 				*(u_int *)addr = size = BPF_MINBUFSIZE;
754 			mtx_enter(&d->bd_mtx);
755 			d->bd_bufsize = size;
756 			mtx_leave(&d->bd_mtx);
757 		}
758 		break;
759 
760 	/*
761 	 * Set link layer read filter.
762 	 */
763 	case BIOCSETF:
764 		error = bpf_setf(d, (struct bpf_program *)addr, 0);
765 		break;
766 
767 	/*
768 	 * Set link layer write filter.
769 	 */
770 	case BIOCSETWF:
771 		error = bpf_setf(d, (struct bpf_program *)addr, 1);
772 		break;
773 
774 	/*
775 	 * Flush read packet buffer.
776 	 */
777 	case BIOCFLUSH:
778 		mtx_enter(&d->bd_mtx);
779 		bpf_resetd(d);
780 		mtx_leave(&d->bd_mtx);
781 		break;
782 
783 	/*
784 	 * Put interface into promiscuous mode.
785 	 */
786 	case BIOCPROMISC:
787 		if (d->bd_bif == NULL) {
788 			/*
789 			 * No interface attached yet.
790 			 */
791 			error = EINVAL;
792 		} else {
793 			if (d->bd_promisc == 0) {
794 				MUTEX_ASSERT_UNLOCKED(&d->bd_mtx);
795 				error = ifpromisc(d->bd_bif->bif_ifp, 1);
796 				if (error == 0)
797 					d->bd_promisc = 1;
798 			}
799 		}
800 		break;
801 
802 	/*
803 	 * Get a list of supported device parameters.
804 	 */
805 	case BIOCGDLTLIST:
806 		if (d->bd_bif == NULL)
807 			error = EINVAL;
808 		else
809 			error = bpf_getdltlist(d, (struct bpf_dltlist *)addr);
810 		break;
811 
812 	/*
813 	 * Get device parameters.
814 	 */
815 	case BIOCGDLT:
816 		if (d->bd_bif == NULL)
817 			error = EINVAL;
818 		else
819 			*(u_int *)addr = d->bd_bif->bif_dlt;
820 		break;
821 
822 	/*
823 	 * Set device parameters.
824 	 */
825 	case BIOCSDLT:
826 		if (d->bd_bif == NULL)
827 			error = EINVAL;
828 		else {
829 			mtx_enter(&d->bd_mtx);
830 			error = bpf_setdlt(d, *(u_int *)addr);
831 			mtx_leave(&d->bd_mtx);
832 		}
833 		break;
834 
835 	/*
836 	 * Set interface name.
837 	 */
838 	case BIOCGETIF:
839 		if (d->bd_bif == NULL)
840 			error = EINVAL;
841 		else
842 			bpf_ifname(d->bd_bif->bif_ifp, (struct ifreq *)addr);
843 		break;
844 
845 	/*
846 	 * Set interface.
847 	 */
848 	case BIOCSETIF:
849 		error = bpf_setif(d, (struct ifreq *)addr);
850 		break;
851 
852 	/*
853 	 * Set read timeout.
854 	 */
855 	case BIOCSRTIMEOUT:
856 		{
857 			struct timeval *tv = (struct timeval *)addr;
858 
859 			/* Compute number of ticks. */
860 			d->bd_rtout = tv->tv_sec * hz + tv->tv_usec / tick;
861 			if (d->bd_rtout == 0 && tv->tv_usec != 0)
862 				d->bd_rtout = 1;
863 			break;
864 		}
865 
866 	/*
867 	 * Get read timeout.
868 	 */
869 	case BIOCGRTIMEOUT:
870 		{
871 			struct timeval *tv = (struct timeval *)addr;
872 
873 			tv->tv_sec = d->bd_rtout / hz;
874 			tv->tv_usec = (d->bd_rtout % hz) * tick;
875 			break;
876 		}
877 
878 	/*
879 	 * Get packet stats.
880 	 */
881 	case BIOCGSTATS:
882 		{
883 			struct bpf_stat *bs = (struct bpf_stat *)addr;
884 
885 			bs->bs_recv = d->bd_rcount;
886 			bs->bs_drop = d->bd_dcount;
887 			break;
888 		}
889 
890 	/*
891 	 * Set immediate mode.
892 	 */
893 	case BIOCIMMEDIATE:
894 		d->bd_immediate = *(u_int *)addr;
895 		break;
896 
897 	case BIOCVERSION:
898 		{
899 			struct bpf_version *bv = (struct bpf_version *)addr;
900 
901 			bv->bv_major = BPF_MAJOR_VERSION;
902 			bv->bv_minor = BPF_MINOR_VERSION;
903 			break;
904 		}
905 
906 	case BIOCGHDRCMPLT:	/* get "header already complete" flag */
907 		*(u_int *)addr = d->bd_hdrcmplt;
908 		break;
909 
910 	case BIOCSHDRCMPLT:	/* set "header already complete" flag */
911 		d->bd_hdrcmplt = *(u_int *)addr ? 1 : 0;
912 		break;
913 
914 	case BIOCLOCK:		/* set "locked" flag (no reset) */
915 		d->bd_locked = 1;
916 		break;
917 
918 	case BIOCGFILDROP:	/* get "filter-drop" flag */
919 		*(u_int *)addr = d->bd_fildrop;
920 		break;
921 
922 	case BIOCSFILDROP:	/* set "filter-drop" flag */
923 		d->bd_fildrop = *(u_int *)addr ? 1 : 0;
924 		break;
925 
926 	case BIOCGDIRFILT:	/* get direction filter */
927 		*(u_int *)addr = d->bd_dirfilt;
928 		break;
929 
930 	case BIOCSDIRFILT:	/* set direction filter */
931 		d->bd_dirfilt = (*(u_int *)addr) &
932 		    (BPF_DIRECTION_IN|BPF_DIRECTION_OUT);
933 		break;
934 
935 	case FIONBIO:		/* Non-blocking I/O */
936 		if (*(int *)addr)
937 			d->bd_rtout = -1;
938 		else
939 			d->bd_rtout = 0;
940 		break;
941 
942 	case FIOASYNC:		/* Send signal on receive packets */
943 		d->bd_async = *(int *)addr;
944 		break;
945 
946 	/*
947 	 * N.B.  ioctl (FIOSETOWN) and fcntl (F_SETOWN) both end up doing
948 	 * the equivalent of a TIOCSPGRP and hence end up here.  *However*
949 	 * TIOCSPGRP's arg is a process group if it's positive and a process
950 	 * id if it's negative.  This is exactly the opposite of what the
951 	 * other two functions want!  Therefore there is code in ioctl and
952 	 * fcntl to negate the arg before calling here.
953 	 */
954 	case TIOCSPGRP:		/* Process or group to send signals to */
955 		d->bd_pgid = *(int *)addr;
956 		d->bd_siguid = p->p_ucred->cr_ruid;
957 		d->bd_sigeuid = p->p_ucred->cr_uid;
958 		break;
959 
960 	case TIOCGPGRP:
961 		*(int *)addr = d->bd_pgid;
962 		break;
963 
964 	case BIOCSRSIG:		/* Set receive signal */
965 		{
966 			u_int sig;
967 
968 			sig = *(u_int *)addr;
969 
970 			if (sig >= NSIG)
971 				error = EINVAL;
972 			else
973 				d->bd_sig = sig;
974 			break;
975 		}
976 	case BIOCGRSIG:
977 		*(u_int *)addr = d->bd_sig;
978 		break;
979 	}
980 
981 	bpf_put(d);
982 	return (error);
983 }
984 
985 /*
986  * Set d's packet filter program to fp.  If this file already has a filter,
987  * free it and replace it.  Returns EINVAL for bogus requests.
988  */
989 int
990 bpf_setf(struct bpf_d *d, struct bpf_program *fp, int wf)
991 {
992 	struct bpf_program *bf;
993 	struct srp *filter;
994 	struct bpf_insn *fcode;
995 	u_int flen, size;
996 
997 	KERNEL_ASSERT_LOCKED();
998 	filter = wf ? &d->bd_wfilter : &d->bd_rfilter;
999 
1000 	if (fp->bf_insns == 0) {
1001 		if (fp->bf_len != 0)
1002 			return (EINVAL);
1003 		srp_update_locked(&bpf_insn_gc, filter, NULL);
1004 		mtx_enter(&d->bd_mtx);
1005 		bpf_resetd(d);
1006 		mtx_leave(&d->bd_mtx);
1007 		return (0);
1008 	}
1009 	flen = fp->bf_len;
1010 	if (flen > BPF_MAXINSNS)
1011 		return (EINVAL);
1012 
1013 	fcode = mallocarray(flen, sizeof(*fp->bf_insns), M_DEVBUF,
1014 	    M_WAITOK | M_CANFAIL);
1015 	if (fcode == NULL)
1016 		return (ENOMEM);
1017 
1018 	size = flen * sizeof(*fp->bf_insns);
1019 	if (copyin(fp->bf_insns, fcode, size) != 0 ||
1020 	    bpf_validate(fcode, (int)flen) == 0) {
1021 		free(fcode, M_DEVBUF, size);
1022 		return (EINVAL);
1023 	}
1024 
1025 	bf = malloc(sizeof(*bf), M_DEVBUF, M_WAITOK);
1026 	bf->bf_len = flen;
1027 	bf->bf_insns = fcode;
1028 
1029 	srp_update_locked(&bpf_insn_gc, filter, bf);
1030 
1031 	mtx_enter(&d->bd_mtx);
1032 	bpf_resetd(d);
1033 	mtx_leave(&d->bd_mtx);
1034 	return (0);
1035 }
1036 
1037 /*
1038  * Detach a file from its current interface (if attached at all) and attach
1039  * to the interface indicated by the name stored in ifr.
1040  * Return an errno or 0.
1041  */
1042 int
1043 bpf_setif(struct bpf_d *d, struct ifreq *ifr)
1044 {
1045 	struct bpf_if *bp, *candidate = NULL;
1046 	int error = 0;
1047 
1048 	/*
1049 	 * Look through attached interfaces for the named one.
1050 	 */
1051 	for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) {
1052 		struct ifnet *ifp = bp->bif_ifp;
1053 
1054 		if (ifp == NULL ||
1055 		    strcmp(ifp->if_xname, ifr->ifr_name) != 0)
1056 			continue;
1057 
1058 		if (candidate == NULL || candidate->bif_dlt > bp->bif_dlt)
1059 			candidate = bp;
1060 	}
1061 
1062 	/* Not found. */
1063 	if (candidate == NULL)
1064 		return (ENXIO);
1065 
1066 	/*
1067 	 * Allocate the packet buffers if we need to.
1068 	 * If we're already attached to requested interface,
1069 	 * just flush the buffer.
1070 	 */
1071 	mtx_enter(&d->bd_mtx);
1072 	if (d->bd_sbuf == NULL) {
1073 		if ((error = bpf_allocbufs(d)))
1074 			goto out;
1075 	}
1076 	if (candidate != d->bd_bif) {
1077 		/*
1078 		 * Detach if attached to something else.
1079 		 */
1080 		bpf_detachd(d);
1081 		bpf_attachd(d, candidate);
1082 	}
1083 	bpf_resetd(d);
1084 out:
1085 	mtx_leave(&d->bd_mtx);
1086 	return (error);
1087 }
1088 
1089 /*
1090  * Copy the interface name to the ifreq.
1091  */
1092 void
1093 bpf_ifname(struct ifnet *ifp, struct ifreq *ifr)
1094 {
1095 	bcopy(ifp->if_xname, ifr->ifr_name, IFNAMSIZ);
1096 }
1097 
1098 /*
1099  * Support for poll() system call
1100  */
1101 int
1102 bpfpoll(dev_t dev, int events, struct proc *p)
1103 {
1104 	struct bpf_d *d;
1105 	int revents;
1106 
1107 	KERNEL_ASSERT_LOCKED();
1108 
1109 	/*
1110 	 * An imitation of the FIONREAD ioctl code.
1111 	 */
1112 	d = bpfilter_lookup(minor(dev));
1113 
1114 	/*
1115 	 * XXX The USB stack manages it to trigger some race condition
1116 	 * which causes bpfilter_lookup to return NULL when a USB device
1117 	 * gets detached while it is up and has an open bpf handler (e.g.
1118 	 * dhclient).  We still should recheck if we can fix the root
1119 	 * cause of this issue.
1120 	 */
1121 	if (d == NULL)
1122 		return (POLLERR);
1123 
1124 	/* Always ready to write data */
1125 	revents = events & (POLLOUT | POLLWRNORM);
1126 
1127 	if (events & (POLLIN | POLLRDNORM)) {
1128 		mtx_enter(&d->bd_mtx);
1129 		if (d->bd_hlen != 0 || (d->bd_immediate && d->bd_slen != 0))
1130 			revents |= events & (POLLIN | POLLRDNORM);
1131 		else {
1132 			/*
1133 			 * if there's a timeout, mark the time we
1134 			 * started waiting.
1135 			 */
1136 			if (d->bd_rtout != -1 && d->bd_rdStart == 0)
1137 				d->bd_rdStart = ticks;
1138 			selrecord(p, &d->bd_sel);
1139 		}
1140 		mtx_leave(&d->bd_mtx);
1141 	}
1142 	return (revents);
1143 }
1144 
1145 struct filterops bpfread_filtops =
1146 	{ 1, NULL, filt_bpfrdetach, filt_bpfread };
1147 
1148 int
1149 bpfkqfilter(dev_t dev, struct knote *kn)
1150 {
1151 	struct bpf_d *d;
1152 	struct klist *klist;
1153 
1154 	KERNEL_ASSERT_LOCKED();
1155 
1156 	d = bpfilter_lookup(minor(dev));
1157 
1158 	switch (kn->kn_filter) {
1159 	case EVFILT_READ:
1160 		klist = &d->bd_sel.si_note;
1161 		kn->kn_fop = &bpfread_filtops;
1162 		break;
1163 	default:
1164 		return (EINVAL);
1165 	}
1166 
1167 	bpf_get(d);
1168 	kn->kn_hook = d;
1169 	SLIST_INSERT_HEAD(klist, kn, kn_selnext);
1170 
1171 	mtx_enter(&d->bd_mtx);
1172 	if (d->bd_rtout != -1 && d->bd_rdStart == 0)
1173 		d->bd_rdStart = ticks;
1174 	mtx_leave(&d->bd_mtx);
1175 
1176 	return (0);
1177 }
1178 
1179 void
1180 filt_bpfrdetach(struct knote *kn)
1181 {
1182 	struct bpf_d *d = kn->kn_hook;
1183 
1184 	KERNEL_ASSERT_LOCKED();
1185 
1186 	SLIST_REMOVE(&d->bd_sel.si_note, kn, knote, kn_selnext);
1187 	bpf_put(d);
1188 }
1189 
1190 int
1191 filt_bpfread(struct knote *kn, long hint)
1192 {
1193 	struct bpf_d *d = kn->kn_hook;
1194 
1195 	KERNEL_ASSERT_LOCKED();
1196 
1197 	mtx_enter(&d->bd_mtx);
1198 	kn->kn_data = d->bd_hlen;
1199 	if (d->bd_immediate)
1200 		kn->kn_data += d->bd_slen;
1201 	mtx_leave(&d->bd_mtx);
1202 
1203 	return (kn->kn_data > 0);
1204 }
1205 
1206 /*
1207  * Copy data from an mbuf chain into a buffer.  This code is derived
1208  * from m_copydata in sys/uipc_mbuf.c.
1209  */
1210 void
1211 bpf_mcopy(const void *src_arg, void *dst_arg, size_t len)
1212 {
1213 	const struct mbuf *m;
1214 	u_int count;
1215 	u_char *dst;
1216 
1217 	m = src_arg;
1218 	dst = dst_arg;
1219 	while (len > 0) {
1220 		if (m == NULL)
1221 			panic("bpf_mcopy");
1222 		count = min(m->m_len, len);
1223 		bcopy(mtod(m, caddr_t), (caddr_t)dst, count);
1224 		m = m->m_next;
1225 		dst += count;
1226 		len -= count;
1227 	}
1228 }
1229 
1230 /*
1231  * like bpf_mtap, but copy fn can be given. used by various bpf_mtap*
1232  */
1233 int
1234 _bpf_mtap(caddr_t arg, const struct mbuf *m, u_int direction,
1235     void (*cpfn)(const void *, void *, size_t))
1236 {
1237 	struct bpf_if *bp = (struct bpf_if *)arg;
1238 	struct srp_ref sr;
1239 	struct bpf_d *d;
1240 	size_t pktlen, slen;
1241 	const struct mbuf *m0;
1242 	struct timeval tv;
1243 	int gottime = 0;
1244 	int drop = 0;
1245 
1246 	if (m == NULL)
1247 		return (0);
1248 
1249 	if (cpfn == NULL)
1250 		cpfn = bpf_mcopy;
1251 
1252 	if (bp == NULL)
1253 		return (0);
1254 
1255 	pktlen = 0;
1256 	for (m0 = m; m0 != NULL; m0 = m0->m_next)
1257 		pktlen += m0->m_len;
1258 
1259 	SRPL_FOREACH(d, &sr, &bp->bif_dlist, bd_next) {
1260 		atomic_inc_long(&d->bd_rcount);
1261 
1262 		if ((direction & d->bd_dirfilt) != 0)
1263 			slen = 0;
1264 		else {
1265 			struct srp_ref bsr;
1266 			struct bpf_program *bf;
1267 			struct bpf_insn *fcode = NULL;
1268 
1269 			bf = srp_enter(&bsr, &d->bd_rfilter);
1270 			if (bf != NULL)
1271 				fcode = bf->bf_insns;
1272 			slen = bpf_mfilter(fcode, m, pktlen);
1273 			srp_leave(&bsr);
1274 		}
1275 
1276 		if (slen > 0) {
1277 			if (!gottime++)
1278 				microtime(&tv);
1279 
1280 			mtx_enter(&d->bd_mtx);
1281 			bpf_catchpacket(d, (u_char *)m, pktlen, slen, cpfn,
1282 			    &tv);
1283 			mtx_leave(&d->bd_mtx);
1284 
1285 			if (d->bd_fildrop)
1286 				drop = 1;
1287 		}
1288 	}
1289 	SRPL_LEAVE(&sr);
1290 
1291 	return (drop);
1292 }
1293 
1294 /*
1295  * Incoming linkage from device drivers, when packet is in an mbuf chain.
1296  */
1297 int
1298 bpf_mtap(caddr_t arg, const struct mbuf *m, u_int direction)
1299 {
1300 	return _bpf_mtap(arg, m, direction, NULL);
1301 }
1302 
1303 /*
1304  * Incoming linkage from device drivers, where we have a mbuf chain
1305  * but need to prepend some arbitrary header from a linear buffer.
1306  *
1307  * Con up a minimal dummy header to pacify bpf.  Allocate (only) a
1308  * struct m_hdr on the stack.  This is safe as bpf only reads from the
1309  * fields in this header that we initialize, and will not try to free
1310  * it or keep a pointer to it.
1311  */
1312 int
1313 bpf_mtap_hdr(caddr_t arg, caddr_t data, u_int dlen, const struct mbuf *m,
1314     u_int direction, void (*cpfn)(const void *, void *, size_t))
1315 {
1316 	struct m_hdr mh;
1317 	const struct mbuf *m0;
1318 
1319 	if (dlen > 0) {
1320 		mh.mh_flags = 0;
1321 		mh.mh_next = (struct mbuf *)m;
1322 		mh.mh_len = dlen;
1323 		mh.mh_data = data;
1324 		m0 = (struct mbuf *)&mh;
1325 	} else
1326 		m0 = m;
1327 
1328 	return _bpf_mtap(arg, m0, direction, cpfn);
1329 }
1330 
1331 /*
1332  * Incoming linkage from device drivers, where we have a mbuf chain
1333  * but need to prepend the address family.
1334  *
1335  * Con up a minimal dummy header to pacify bpf.  We allocate (only) a
1336  * struct m_hdr on the stack.  This is safe as bpf only reads from the
1337  * fields in this header that we initialize, and will not try to free
1338  * it or keep a pointer to it.
1339  */
1340 int
1341 bpf_mtap_af(caddr_t arg, u_int32_t af, const struct mbuf *m, u_int direction)
1342 {
1343 	u_int32_t    afh;
1344 
1345 	afh = htonl(af);
1346 
1347 	return bpf_mtap_hdr(arg, (caddr_t)&afh, sizeof(afh),
1348 	    m, direction, NULL);
1349 }
1350 
1351 /*
1352  * Incoming linkage from device drivers, where we have a mbuf chain
1353  * but need to prepend a VLAN encapsulation header.
1354  *
1355  * Con up a minimal dummy header to pacify bpf.  Allocate (only) a
1356  * struct m_hdr on the stack.  This is safe as bpf only reads from the
1357  * fields in this header that we initialize, and will not try to free
1358  * it or keep a pointer to it.
1359  */
1360 int
1361 bpf_mtap_ether(caddr_t arg, const struct mbuf *m, u_int direction)
1362 {
1363 #if NVLAN > 0
1364 	struct ether_vlan_header evh;
1365 	struct m_hdr mh;
1366 	uint8_t prio;
1367 
1368 	if ((m->m_flags & M_VLANTAG) == 0)
1369 #endif
1370 	{
1371 		return bpf_mtap(arg, m, direction);
1372 	}
1373 
1374 #if NVLAN > 0
1375 	KASSERT(m->m_len >= ETHER_HDR_LEN);
1376 
1377 	prio = m->m_pkthdr.pf.prio;
1378 	if (prio <= 1)
1379 		prio = !prio;
1380 
1381 	memcpy(&evh, mtod(m, char *), ETHER_HDR_LEN);
1382 	evh.evl_proto = evh.evl_encap_proto;
1383 	evh.evl_encap_proto = htons(ETHERTYPE_VLAN);
1384 	evh.evl_tag = htons(m->m_pkthdr.ether_vtag |
1385 	    (prio << EVL_PRIO_BITS));
1386 
1387 	mh.mh_flags = 0;
1388 	mh.mh_data = m->m_data + ETHER_HDR_LEN;
1389 	mh.mh_len = m->m_len - ETHER_HDR_LEN;
1390 	mh.mh_next = m->m_next;
1391 
1392 	return bpf_mtap_hdr(arg, (caddr_t)&evh, sizeof(evh),
1393 	    (struct mbuf *)&mh, direction, NULL);
1394 #endif
1395 }
1396 
1397 /*
1398  * Move the packet data from interface memory (pkt) into the
1399  * store buffer.  Wake up listeners if needed.
1400  * "copy" is the routine called to do the actual data
1401  * transfer.  bcopy is passed in to copy contiguous chunks, while
1402  * bpf_mcopy is passed in to copy mbuf chains.  In the latter case,
1403  * pkt is really an mbuf.
1404  */
1405 void
1406 bpf_catchpacket(struct bpf_d *d, u_char *pkt, size_t pktlen, size_t snaplen,
1407     void (*cpfn)(const void *, void *, size_t), struct timeval *tv)
1408 {
1409 	struct bpf_hdr *hp;
1410 	int totlen, curlen;
1411 	int hdrlen, do_wakeup = 0;
1412 
1413 	MUTEX_ASSERT_LOCKED(&d->bd_mtx);
1414 	if (d->bd_bif == NULL)
1415 		return;
1416 
1417 	hdrlen = d->bd_bif->bif_hdrlen;
1418 
1419 	/*
1420 	 * Figure out how many bytes to move.  If the packet is
1421 	 * greater or equal to the snapshot length, transfer that
1422 	 * much.  Otherwise, transfer the whole packet (unless
1423 	 * we hit the buffer size limit).
1424 	 */
1425 	totlen = hdrlen + min(snaplen, pktlen);
1426 	if (totlen > d->bd_bufsize)
1427 		totlen = d->bd_bufsize;
1428 
1429 	/*
1430 	 * Round up the end of the previous packet to the next longword.
1431 	 */
1432 	curlen = BPF_WORDALIGN(d->bd_slen);
1433 	if (curlen + totlen > d->bd_bufsize) {
1434 		/*
1435 		 * This packet will overflow the storage buffer.
1436 		 * Rotate the buffers if we can, then wakeup any
1437 		 * pending reads.
1438 		 */
1439 		if (d->bd_fbuf == NULL) {
1440 			/*
1441 			 * We haven't completed the previous read yet,
1442 			 * so drop the packet.
1443 			 */
1444 			++d->bd_dcount;
1445 			return;
1446 		}
1447 		ROTATE_BUFFERS(d);
1448 		do_wakeup = 1;
1449 		curlen = 0;
1450 	}
1451 
1452 	/*
1453 	 * Append the bpf header.
1454 	 */
1455 	hp = (struct bpf_hdr *)(d->bd_sbuf + curlen);
1456 	hp->bh_tstamp.tv_sec = tv->tv_sec;
1457 	hp->bh_tstamp.tv_usec = tv->tv_usec;
1458 	hp->bh_datalen = pktlen;
1459 	hp->bh_hdrlen = hdrlen;
1460 	/*
1461 	 * Copy the packet data into the store buffer and update its length.
1462 	 */
1463 	(*cpfn)(pkt, (u_char *)hp + hdrlen, (hp->bh_caplen = totlen - hdrlen));
1464 	d->bd_slen = curlen + totlen;
1465 
1466 	if (d->bd_immediate) {
1467 		/*
1468 		 * Immediate mode is set.  A packet arrived so any
1469 		 * reads should be woken up.
1470 		 */
1471 		do_wakeup = 1;
1472 	}
1473 
1474 	if (d->bd_rdStart && (d->bd_rtout + d->bd_rdStart < ticks)) {
1475 		/*
1476 		 * we could be selecting on the bpf, and we
1477 		 * may have timeouts set.  We got here by getting
1478 		 * a packet, so wake up the reader.
1479 		 */
1480 		if (d->bd_fbuf != NULL) {
1481 			d->bd_rdStart = 0;
1482 			ROTATE_BUFFERS(d);
1483 			do_wakeup = 1;
1484 		}
1485 	}
1486 
1487 	if (do_wakeup)
1488 		bpf_wakeup(d);
1489 }
1490 
1491 /*
1492  * Initialize all nonzero fields of a descriptor.
1493  */
1494 int
1495 bpf_allocbufs(struct bpf_d *d)
1496 {
1497 	MUTEX_ASSERT_LOCKED(&d->bd_mtx);
1498 
1499 	d->bd_fbuf = malloc(d->bd_bufsize, M_DEVBUF, M_NOWAIT);
1500 	if (d->bd_fbuf == NULL)
1501 		return (ENOMEM);
1502 
1503 	d->bd_sbuf = malloc(d->bd_bufsize, M_DEVBUF, M_NOWAIT);
1504 	if (d->bd_sbuf == NULL) {
1505 		free(d->bd_fbuf, M_DEVBUF, d->bd_bufsize);
1506 		return (ENOMEM);
1507 	}
1508 
1509 	d->bd_slen = 0;
1510 	d->bd_hlen = 0;
1511 
1512 	return (0);
1513 }
1514 
1515 void
1516 bpf_get(struct bpf_d *bd)
1517 {
1518 	atomic_inc_int(&bd->bd_ref);
1519 }
1520 
1521 /*
1522  * Free buffers currently in use by a descriptor
1523  * when the reference count drops to zero.
1524  */
1525 void
1526 bpf_put(struct bpf_d *bd)
1527 {
1528 	if (atomic_dec_int_nv(&bd->bd_ref) > 0)
1529 		return;
1530 
1531 	free(bd->bd_sbuf, M_DEVBUF, 0);
1532 	free(bd->bd_hbuf, M_DEVBUF, 0);
1533 	free(bd->bd_fbuf, M_DEVBUF, 0);
1534 	KERNEL_ASSERT_LOCKED();
1535 	srp_update_locked(&bpf_insn_gc, &bd->bd_rfilter, NULL);
1536 	srp_update_locked(&bpf_insn_gc, &bd->bd_wfilter, NULL);
1537 
1538 	free(bd, M_DEVBUF, sizeof(*bd));
1539 }
1540 
1541 /*
1542  * Attach an interface to bpf.  driverp is a pointer to a (struct bpf_if *)
1543  * in the driver's softc; dlt is the link layer type; hdrlen is the fixed
1544  * size of the link header (variable length headers not yet supported).
1545  */
1546 void
1547 bpfattach(caddr_t *driverp, struct ifnet *ifp, u_int dlt, u_int hdrlen)
1548 {
1549 	struct bpf_if *bp;
1550 
1551 	if ((bp = malloc(sizeof(*bp), M_DEVBUF, M_NOWAIT)) == NULL)
1552 		panic("bpfattach");
1553 	SRPL_INIT(&bp->bif_dlist);
1554 	bp->bif_driverp = (struct bpf_if **)driverp;
1555 	bp->bif_ifp = ifp;
1556 	bp->bif_dlt = dlt;
1557 
1558 	bp->bif_next = bpf_iflist;
1559 	bpf_iflist = bp;
1560 
1561 	*bp->bif_driverp = NULL;
1562 
1563 	/*
1564 	 * Compute the length of the bpf header.  This is not necessarily
1565 	 * equal to SIZEOF_BPF_HDR because we want to insert spacing such
1566 	 * that the network layer header begins on a longword boundary (for
1567 	 * performance reasons and to alleviate alignment restrictions).
1568 	 */
1569 	bp->bif_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen;
1570 }
1571 
1572 /* Detach an interface from its attached bpf device.  */
1573 void
1574 bpfdetach(struct ifnet *ifp)
1575 {
1576 	struct bpf_if *bp, *nbp, **pbp = &bpf_iflist;
1577 	struct bpf_d *bd;
1578 	int maj;
1579 
1580 	KERNEL_ASSERT_LOCKED();
1581 
1582 	for (bp = bpf_iflist; bp; bp = nbp) {
1583 		nbp= bp->bif_next;
1584 		if (bp->bif_ifp == ifp) {
1585 			*pbp = nbp;
1586 
1587 			/* Locate the major number. */
1588 			for (maj = 0; maj < nchrdev; maj++)
1589 				if (cdevsw[maj].d_open == bpfopen)
1590 					break;
1591 
1592 			while ((bd = SRPL_FIRST_LOCKED(&bp->bif_dlist)))
1593 				vdevgone(maj, bd->bd_unit, bd->bd_unit, VCHR);
1594 
1595 			free(bp, M_DEVBUF, sizeof *bp);
1596 		} else
1597 			pbp = &bp->bif_next;
1598 	}
1599 	ifp->if_bpf = NULL;
1600 }
1601 
1602 int
1603 bpf_sysctl_locked(int *name, u_int namelen, void *oldp, size_t *oldlenp,
1604     void *newp, size_t newlen)
1605 {
1606 	int newval;
1607 	int error;
1608 
1609 	switch (name[0]) {
1610 	case NET_BPF_BUFSIZE:
1611 		newval = bpf_bufsize;
1612 		error = sysctl_int(oldp, oldlenp, newp, newlen, &newval);
1613 		if (error)
1614 			return (error);
1615 		if (newval < BPF_MINBUFSIZE || newval > bpf_maxbufsize)
1616 			return (EINVAL);
1617 		bpf_bufsize = newval;
1618 		break;
1619 	case NET_BPF_MAXBUFSIZE:
1620 		newval = bpf_maxbufsize;
1621 		error = sysctl_int(oldp, oldlenp, newp, newlen, &newval);
1622 		if (error)
1623 			return (error);
1624 		if (newval < BPF_MINBUFSIZE)
1625 			return (EINVAL);
1626 		bpf_maxbufsize = newval;
1627 		break;
1628 	default:
1629 		return (EOPNOTSUPP);
1630 	}
1631 	return (0);
1632 }
1633 
1634 int
1635 bpf_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
1636     size_t newlen)
1637 {
1638 	int flags = RW_INTR;
1639 	int error;
1640 
1641 	if (namelen != 1)
1642 		return (ENOTDIR);
1643 
1644 	flags |= (newp == NULL) ? RW_READ : RW_WRITE;
1645 
1646 	error = rw_enter(&bpf_sysctl_lk, flags);
1647 	if (error != 0)
1648 		return (error);
1649 
1650 	error = bpf_sysctl_locked(name, namelen, oldp, oldlenp, newp, newlen);
1651 
1652 	rw_exit(&bpf_sysctl_lk);
1653 
1654 	return (error);
1655 }
1656 
1657 struct bpf_d *
1658 bpfilter_lookup(int unit)
1659 {
1660 	struct bpf_d *bd;
1661 
1662 	KERNEL_ASSERT_LOCKED();
1663 
1664 	LIST_FOREACH(bd, &bpf_d_list, bd_list)
1665 		if (bd->bd_unit == unit)
1666 			return (bd);
1667 	return (NULL);
1668 }
1669 
1670 /*
1671  * Get a list of available data link type of the interface.
1672  */
1673 int
1674 bpf_getdltlist(struct bpf_d *d, struct bpf_dltlist *bfl)
1675 {
1676 	int n, error;
1677 	struct ifnet *ifp;
1678 	struct bpf_if *bp;
1679 
1680 	ifp = d->bd_bif->bif_ifp;
1681 	n = 0;
1682 	error = 0;
1683 	for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) {
1684 		if (bp->bif_ifp != ifp)
1685 			continue;
1686 		if (bfl->bfl_list != NULL) {
1687 			if (n >= bfl->bfl_len)
1688 				return (ENOMEM);
1689 			error = copyout(&bp->bif_dlt,
1690 			    bfl->bfl_list + n, sizeof(u_int));
1691 			if (error)
1692 				break;
1693 		}
1694 		n++;
1695 	}
1696 
1697 	bfl->bfl_len = n;
1698 	return (error);
1699 }
1700 
1701 /*
1702  * Set the data link type of a BPF instance.
1703  */
1704 int
1705 bpf_setdlt(struct bpf_d *d, u_int dlt)
1706 {
1707 	struct ifnet *ifp;
1708 	struct bpf_if *bp;
1709 
1710 	MUTEX_ASSERT_LOCKED(&d->bd_mtx);
1711 	if (d->bd_bif->bif_dlt == dlt)
1712 		return (0);
1713 	ifp = d->bd_bif->bif_ifp;
1714 	for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) {
1715 		if (bp->bif_ifp == ifp && bp->bif_dlt == dlt)
1716 			break;
1717 	}
1718 	if (bp == NULL)
1719 		return (EINVAL);
1720 	bpf_detachd(d);
1721 	bpf_attachd(d, bp);
1722 	bpf_resetd(d);
1723 	return (0);
1724 }
1725 
1726 void
1727 bpf_d_ref(void *null, void *d)
1728 {
1729 	bpf_get(d);
1730 }
1731 
1732 void
1733 bpf_d_unref(void *null, void *d)
1734 {
1735 	bpf_put(d);
1736 }
1737 
1738 void
1739 bpf_insn_dtor(void *null, void *f)
1740 {
1741 	struct bpf_program *bf = f;
1742 	struct bpf_insn *insns = bf->bf_insns;
1743 
1744 	free(insns, M_DEVBUF, bf->bf_len * sizeof(*insns));
1745 	free(bf, M_DEVBUF, sizeof(*bf));
1746 }
1747 
1748 u_int32_t	bpf_mbuf_ldw(const void *, u_int32_t, int *);
1749 u_int32_t	bpf_mbuf_ldh(const void *, u_int32_t, int *);
1750 u_int32_t	bpf_mbuf_ldb(const void *, u_int32_t, int *);
1751 
1752 int		bpf_mbuf_copy(const struct mbuf *, u_int32_t,
1753 		    void *, u_int32_t);
1754 
1755 const struct bpf_ops bpf_mbuf_ops = {
1756 	bpf_mbuf_ldw,
1757 	bpf_mbuf_ldh,
1758 	bpf_mbuf_ldb,
1759 };
1760 
1761 int
1762 bpf_mbuf_copy(const struct mbuf *m, u_int32_t off, void *buf, u_int32_t len)
1763 {
1764 	u_int8_t *cp = buf;
1765 	u_int32_t count;
1766 
1767 	while (off >= m->m_len) {
1768 		off -= m->m_len;
1769 
1770 		m = m->m_next;
1771 		if (m == NULL)
1772 			return (-1);
1773 	}
1774 
1775 	for (;;) {
1776 		count = min(m->m_len - off, len);
1777 
1778 		memcpy(cp, m->m_data + off, count);
1779 		len -= count;
1780 
1781 		if (len == 0)
1782 			return (0);
1783 
1784 		m = m->m_next;
1785 		if (m == NULL)
1786 			break;
1787 
1788 		cp += count;
1789 		off = 0;
1790 	}
1791 
1792 	return (-1);
1793 }
1794 
1795 u_int32_t
1796 bpf_mbuf_ldw(const void *m0, u_int32_t k, int *err)
1797 {
1798 	u_int32_t v;
1799 
1800 	if (bpf_mbuf_copy(m0, k, &v, sizeof(v)) != 0) {
1801 		*err = 1;
1802 		return (0);
1803 	}
1804 
1805 	*err = 0;
1806 	return ntohl(v);
1807 }
1808 
1809 u_int32_t
1810 bpf_mbuf_ldh(const void *m0, u_int32_t k, int *err)
1811 {
1812 	u_int16_t v;
1813 
1814 	if (bpf_mbuf_copy(m0, k, &v, sizeof(v)) != 0) {
1815 		*err = 1;
1816 		return (0);
1817 	}
1818 
1819 	*err = 0;
1820 	return ntohs(v);
1821 }
1822 
1823 u_int32_t
1824 bpf_mbuf_ldb(const void *m0, u_int32_t k, int *err)
1825 {
1826 	const struct mbuf *m = m0;
1827 	u_int8_t v;
1828 
1829 	while (k >= m->m_len) {
1830 		k -= m->m_len;
1831 
1832 		m = m->m_next;
1833 		if (m == NULL) {
1834 			*err = 1;
1835 			return (0);
1836 		}
1837 	}
1838 	v = m->m_data[k];
1839 
1840 	*err = 0;
1841 	return v;
1842 }
1843 
1844 u_int
1845 bpf_mfilter(const struct bpf_insn *pc, const struct mbuf *m, u_int wirelen)
1846 {
1847 	return _bpf_filter(pc, &bpf_mbuf_ops, m, wirelen);
1848 }
1849