xref: /openbsd-src/sys/net/bpf.c (revision f2da64fbbbf1b03f09f390ab01267c93dfd77c4c)
1 /*	$OpenBSD: bpf.c,v 1.149 2016/09/12 16:24:37 krw Exp $	*/
2 /*	$NetBSD: bpf.c,v 1.33 1997/02/21 23:59:35 thorpej Exp $	*/
3 
4 /*
5  * Copyright (c) 1990, 1991, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  * Copyright (c) 2010, 2014 Henning Brauer <henning@openbsd.org>
8  *
9  * This code is derived from the Stanford/CMU enet packet filter,
10  * (net/enet.c) distributed as part of 4.3BSD, and code contributed
11  * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
12  * Berkeley Laboratory.
13  *
14  * Redistribution and use in source and binary forms, with or without
15  * modification, are permitted provided that the following conditions
16  * are met:
17  * 1. Redistributions of source code must retain the above copyright
18  *    notice, this list of conditions and the following disclaimer.
19  * 2. Redistributions in binary form must reproduce the above copyright
20  *    notice, this list of conditions and the following disclaimer in the
21  *    documentation and/or other materials provided with the distribution.
22  * 3. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  *
38  *	@(#)bpf.c	8.2 (Berkeley) 3/28/94
39  */
40 
41 #include "bpfilter.h"
42 
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/mbuf.h>
46 #include <sys/proc.h>
47 #include <sys/signalvar.h>
48 #include <sys/ioctl.h>
49 #include <sys/conf.h>
50 #include <sys/vnode.h>
51 #include <sys/file.h>
52 #include <sys/socket.h>
53 #include <sys/poll.h>
54 #include <sys/kernel.h>
55 #include <sys/sysctl.h>
56 #include <sys/rwlock.h>
57 #include <sys/atomic.h>
58 #include <sys/srp.h>
59 #include <sys/specdev.h>
60 #include <sys/selinfo.h>
61 #include <sys/task.h>
62 
63 #include <net/if.h>
64 #include <net/bpf.h>
65 #include <net/bpfdesc.h>
66 
67 #include <netinet/in.h>
68 #include <netinet/if_ether.h>
69 
70 #include "vlan.h"
71 #if NVLAN > 0
72 #include <net/if_vlan_var.h>
73 #endif
74 
75 #define BPF_BUFSIZE 32768
76 
77 #define PRINET  26			/* interruptible */
78 
79 /* from kern/kern_clock.c; incremented each clock tick. */
80 extern int ticks;
81 
82 /*
83  * The default read buffer size is patchable.
84  */
85 int bpf_bufsize = BPF_BUFSIZE;
86 int bpf_maxbufsize = BPF_MAXBUFSIZE;
87 
88 /*
89  *  bpf_iflist is the list of interfaces; each corresponds to an ifnet
90  *  bpf_d_list is the list of descriptors
91  */
92 struct bpf_if	*bpf_iflist;
93 LIST_HEAD(, bpf_d) bpf_d_list;
94 
95 void	bpf_allocbufs(struct bpf_d *);
96 void	bpf_ifname(struct ifnet *, struct ifreq *);
97 int	_bpf_mtap(caddr_t, const struct mbuf *, u_int,
98 	    void (*)(const void *, void *, size_t));
99 void	bpf_mcopy(const void *, void *, size_t);
100 int	bpf_movein(struct uio *, u_int, struct mbuf **,
101 	    struct sockaddr *, struct bpf_insn *);
102 void	bpf_attachd(struct bpf_d *, struct bpf_if *);
103 void	bpf_detachd(struct bpf_d *);
104 int	bpf_setif(struct bpf_d *, struct ifreq *);
105 int	bpfpoll(dev_t, int, struct proc *);
106 int	bpfkqfilter(dev_t, struct knote *);
107 void	bpf_wakeup(struct bpf_d *);
108 void	bpf_wakeup_cb(void *);
109 void	bpf_catchpacket(struct bpf_d *, u_char *, size_t, size_t,
110 	    void (*)(const void *, void *, size_t), struct timeval *);
111 void	bpf_reset_d(struct bpf_d *);
112 int	bpf_getdltlist(struct bpf_d *, struct bpf_dltlist *);
113 int	bpf_setdlt(struct bpf_d *, u_int);
114 
115 void	filt_bpfrdetach(struct knote *);
116 int	filt_bpfread(struct knote *, long);
117 
118 int	bpf_sysctl_locked(int *, u_int, void *, size_t *, void *, size_t);
119 
120 struct bpf_d *bpfilter_lookup(int);
121 
122 /*
123  * Reference count access to descriptor buffers
124  */
125 void	bpf_get(struct bpf_d *);
126 void	bpf_put(struct bpf_d *);
127 
128 /*
129  * garbage collector srps
130  */
131 
132 void bpf_d_ref(void *, void *);
133 void bpf_d_unref(void *, void *);
134 struct srpl_rc bpf_d_rc = SRPL_RC_INITIALIZER(bpf_d_ref, bpf_d_unref, NULL);
135 
136 void bpf_insn_dtor(void *, void *);
137 struct srp_gc bpf_insn_gc = SRP_GC_INITIALIZER(bpf_insn_dtor, NULL);
138 
139 int
140 bpf_movein(struct uio *uio, u_int linktype, struct mbuf **mp,
141     struct sockaddr *sockp, struct bpf_insn *filter)
142 {
143 	struct mbuf *m;
144 	struct m_tag *mtag;
145 	int error;
146 	u_int hlen;
147 	u_int len;
148 	u_int slen;
149 
150 	/*
151 	 * Build a sockaddr based on the data link layer type.
152 	 * We do this at this level because the ethernet header
153 	 * is copied directly into the data field of the sockaddr.
154 	 * In the case of SLIP, there is no header and the packet
155 	 * is forwarded as is.
156 	 * Also, we are careful to leave room at the front of the mbuf
157 	 * for the link level header.
158 	 */
159 	switch (linktype) {
160 
161 	case DLT_SLIP:
162 		sockp->sa_family = AF_INET;
163 		hlen = 0;
164 		break;
165 
166 	case DLT_PPP:
167 		sockp->sa_family = AF_UNSPEC;
168 		hlen = 0;
169 		break;
170 
171 	case DLT_EN10MB:
172 		sockp->sa_family = AF_UNSPEC;
173 		/* XXX Would MAXLINKHDR be better? */
174 		hlen = ETHER_HDR_LEN;
175 		break;
176 
177 	case DLT_IEEE802_11:
178 	case DLT_IEEE802_11_RADIO:
179 		sockp->sa_family = AF_UNSPEC;
180 		hlen = 0;
181 		break;
182 
183 	case DLT_RAW:
184 	case DLT_NULL:
185 		sockp->sa_family = AF_UNSPEC;
186 		hlen = 0;
187 		break;
188 
189 	case DLT_LOOP:
190 		sockp->sa_family = AF_UNSPEC;
191 		hlen = sizeof(u_int32_t);
192 		break;
193 
194 	default:
195 		return (EIO);
196 	}
197 
198 	if (uio->uio_resid > MAXMCLBYTES)
199 		return (EIO);
200 	len = uio->uio_resid;
201 
202 	MGETHDR(m, M_WAIT, MT_DATA);
203 	m->m_pkthdr.ph_ifidx = 0;
204 	m->m_pkthdr.len = len - hlen;
205 
206 	if (len > MHLEN) {
207 		MCLGETI(m, M_WAIT, NULL, len);
208 		if ((m->m_flags & M_EXT) == 0) {
209 			error = ENOBUFS;
210 			goto bad;
211 		}
212 	}
213 	m->m_len = len;
214 	*mp = m;
215 
216 	error = uiomove(mtod(m, caddr_t), len, uio);
217 	if (error)
218 		goto bad;
219 
220 	slen = bpf_filter(filter, mtod(m, u_char *), len, len);
221 	if (slen < len) {
222 		error = EPERM;
223 		goto bad;
224 	}
225 
226 	if (m->m_len < hlen) {
227 		error = EPERM;
228 		goto bad;
229 	}
230 	/*
231 	 * Make room for link header, and copy it to sockaddr
232 	 */
233 	if (hlen != 0) {
234 		if (linktype == DLT_LOOP) {
235 			u_int32_t af;
236 
237 			/* the link header indicates the address family */
238 			KASSERT(hlen == sizeof(u_int32_t));
239 			memcpy(&af, m->m_data, hlen);
240 			sockp->sa_family = ntohl(af);
241 		} else
242 			memcpy(sockp->sa_data, m->m_data, hlen);
243 		m->m_len -= hlen;
244 		m->m_data += hlen; /* XXX */
245 	}
246 
247 	/*
248 	 * Prepend the data link type as a mbuf tag
249 	 */
250 	mtag = m_tag_get(PACKET_TAG_DLT, sizeof(u_int), M_WAIT);
251 	*(u_int *)(mtag + 1) = linktype;
252 	m_tag_prepend(m, mtag);
253 
254 	return (0);
255  bad:
256 	m_freem(m);
257 	return (error);
258 }
259 
260 /*
261  * Attach file to the bpf interface, i.e. make d listen on bp.
262  * Must be called at splnet.
263  */
264 void
265 bpf_attachd(struct bpf_d *d, struct bpf_if *bp)
266 {
267 	/*
268 	 * Point d at bp, and add d to the interface's list of listeners.
269 	 * Finally, point the driver's bpf cookie at the interface so
270 	 * it will divert packets to bpf.
271 	 */
272 
273 	d->bd_bif = bp;
274 
275 	KERNEL_ASSERT_LOCKED();
276 	SRPL_INSERT_HEAD_LOCKED(&bpf_d_rc, &bp->bif_dlist, d, bd_next);
277 
278 	*bp->bif_driverp = bp;
279 }
280 
281 /*
282  * Detach a file from its interface.
283  */
284 void
285 bpf_detachd(struct bpf_d *d)
286 {
287 	struct bpf_if *bp;
288 
289 	bp = d->bd_bif;
290 	/*
291 	 * Check if this descriptor had requested promiscuous mode.
292 	 * If so, turn it off.
293 	 */
294 	if (d->bd_promisc) {
295 		int error;
296 
297 		d->bd_promisc = 0;
298 		error = ifpromisc(bp->bif_ifp, 0);
299 		if (error && !(error == EINVAL || error == ENODEV))
300 			/*
301 			 * Something is really wrong if we were able to put
302 			 * the driver into promiscuous mode, but can't
303 			 * take it out.
304 			 */
305 			panic("bpf: ifpromisc failed");
306 	}
307 
308 	/* Remove d from the interface's descriptor list. */
309 	KERNEL_ASSERT_LOCKED();
310 	SRPL_REMOVE_LOCKED(&bpf_d_rc, &bp->bif_dlist, d, bpf_d, bd_next);
311 
312 	if (SRPL_EMPTY_LOCKED(&bp->bif_dlist)) {
313 		/*
314 		 * Let the driver know that there are no more listeners.
315 		 */
316 		*d->bd_bif->bif_driverp = 0;
317 	}
318 
319 	d->bd_bif = NULL;
320 }
321 
322 void
323 bpfilterattach(int n)
324 {
325 	LIST_INIT(&bpf_d_list);
326 }
327 
328 /*
329  * Open ethernet device.  Returns ENXIO for illegal minor device number,
330  * EBUSY if file is open by another process.
331  */
332 int
333 bpfopen(dev_t dev, int flag, int mode, struct proc *p)
334 {
335 	struct bpf_d *bd;
336 	int unit = minor(dev);
337 
338 	if (unit & ((1 << CLONE_SHIFT) - 1))
339 		return (ENXIO);
340 
341 	KASSERT(bpfilter_lookup(unit) == NULL);
342 
343 	/* create on demand */
344 	if ((bd = malloc(sizeof(*bd), M_DEVBUF, M_NOWAIT|M_ZERO)) == NULL)
345 		return (EBUSY);
346 
347 	/* Mark "free" and do most initialization. */
348 	bd->bd_unit = unit;
349 	bd->bd_bufsize = bpf_bufsize;
350 	bd->bd_sig = SIGIO;
351 	task_set(&bd->bd_wake_task, bpf_wakeup_cb, bd);
352 
353 	if (flag & FNONBLOCK)
354 		bd->bd_rtout = -1;
355 
356 	bpf_get(bd);
357 	LIST_INSERT_HEAD(&bpf_d_list, bd, bd_list);
358 
359 	return (0);
360 }
361 
362 /*
363  * Close the descriptor by detaching it from its interface,
364  * deallocating its buffers, and marking it free.
365  */
366 int
367 bpfclose(dev_t dev, int flag, int mode, struct proc *p)
368 {
369 	struct bpf_d *d;
370 	int s;
371 
372 	d = bpfilter_lookup(minor(dev));
373 	s = splnet();
374 	if (d->bd_bif)
375 		bpf_detachd(d);
376 	bpf_wakeup(d);
377 	LIST_REMOVE(d, bd_list);
378 	bpf_put(d);
379 	splx(s);
380 
381 	return (0);
382 }
383 
384 /*
385  * Rotate the packet buffers in descriptor d.  Move the store buffer
386  * into the hold slot, and the free buffer into the store slot.
387  * Zero the length of the new store buffer.
388  */
389 #define ROTATE_BUFFERS(d) \
390 	(d)->bd_hbuf = (d)->bd_sbuf; \
391 	(d)->bd_hlen = (d)->bd_slen; \
392 	(d)->bd_sbuf = (d)->bd_fbuf; \
393 	(d)->bd_slen = 0; \
394 	(d)->bd_fbuf = 0;
395 /*
396  *  bpfread - read next chunk of packets from buffers
397  */
398 int
399 bpfread(dev_t dev, struct uio *uio, int ioflag)
400 {
401 	struct bpf_d *d;
402 	int error;
403 	int s;
404 
405 	d = bpfilter_lookup(minor(dev));
406 	if (d->bd_bif == NULL)
407 		return (ENXIO);
408 
409 	/*
410 	 * Restrict application to use a buffer the same size as
411 	 * as kernel buffers.
412 	 */
413 	if (uio->uio_resid != d->bd_bufsize)
414 		return (EINVAL);
415 
416 	s = splnet();
417 
418 	bpf_get(d);
419 
420 	/*
421 	 * If there's a timeout, bd_rdStart is tagged when we start the read.
422 	 * we can then figure out when we're done reading.
423 	 */
424 	if (d->bd_rtout != -1 && d->bd_rdStart == 0)
425 		d->bd_rdStart = ticks;
426 	else
427 		d->bd_rdStart = 0;
428 
429 	/*
430 	 * If the hold buffer is empty, then do a timed sleep, which
431 	 * ends when the timeout expires or when enough packets
432 	 * have arrived to fill the store buffer.
433 	 */
434 	while (d->bd_hbuf == 0) {
435 		if (d->bd_bif == NULL) {
436 			/* interface is gone */
437 			if (d->bd_slen == 0) {
438 				bpf_put(d);
439 				splx(s);
440 				return (EIO);
441 			}
442 			ROTATE_BUFFERS(d);
443 			break;
444 		}
445 		if (d->bd_immediate && d->bd_slen != 0) {
446 			/*
447 			 * A packet(s) either arrived since the previous
448 			 * read or arrived while we were asleep.
449 			 * Rotate the buffers and return what's here.
450 			 */
451 			ROTATE_BUFFERS(d);
452 			break;
453 		}
454 		if (d->bd_rtout == -1) {
455 			/* User requested non-blocking I/O */
456 			error = EWOULDBLOCK;
457 		} else {
458 			if ((d->bd_rdStart + d->bd_rtout) < ticks) {
459 				error = tsleep((caddr_t)d, PRINET|PCATCH, "bpf",
460 				    d->bd_rtout);
461 			} else
462 				error = EWOULDBLOCK;
463 		}
464 		if (error == EINTR || error == ERESTART) {
465 			bpf_put(d);
466 			splx(s);
467 			return (error);
468 		}
469 		if (error == EWOULDBLOCK) {
470 			/*
471 			 * On a timeout, return what's in the buffer,
472 			 * which may be nothing.  If there is something
473 			 * in the store buffer, we can rotate the buffers.
474 			 */
475 			if (d->bd_hbuf)
476 				/*
477 				 * We filled up the buffer in between
478 				 * getting the timeout and arriving
479 				 * here, so we don't need to rotate.
480 				 */
481 				break;
482 
483 			if (d->bd_slen == 0) {
484 				bpf_put(d);
485 				splx(s);
486 				return (0);
487 			}
488 			ROTATE_BUFFERS(d);
489 			break;
490 		}
491 	}
492 	/*
493 	 * At this point, we know we have something in the hold slot.
494 	 */
495 	splx(s);
496 
497 	/*
498 	 * Move data from hold buffer into user space.
499 	 * We know the entire buffer is transferred since
500 	 * we checked above that the read buffer is bpf_bufsize bytes.
501 	 */
502 	error = uiomove(d->bd_hbuf, d->bd_hlen, uio);
503 
504 	s = splnet();
505 	d->bd_fbuf = d->bd_hbuf;
506 	d->bd_hbuf = NULL;
507 	d->bd_hlen = 0;
508 
509 	bpf_put(d);
510 	splx(s);
511 
512 	return (error);
513 }
514 
515 
516 /*
517  * If there are processes sleeping on this descriptor, wake them up.
518  */
519 void
520 bpf_wakeup(struct bpf_d *d)
521 {
522 	/*
523 	 * As long as csignal() and selwakeup() need to be protected
524 	 * by the KERNEL_LOCK() we have to delay the wakeup to
525 	 * another context to keep the hot path KERNEL_LOCK()-free.
526 	 */
527 	bpf_get(d);
528 	if (!task_add(systq, &d->bd_wake_task))
529 		bpf_put(d);
530 }
531 
532 void
533 bpf_wakeup_cb(void *xd)
534 {
535 	struct bpf_d *d = xd;
536 
537 	KERNEL_ASSERT_LOCKED();
538 
539 	wakeup(d);
540 	if (d->bd_async && d->bd_sig)
541 		csignal(d->bd_pgid, d->bd_sig, d->bd_siguid, d->bd_sigeuid);
542 
543 	selwakeup(&d->bd_sel);
544 	bpf_put(d);
545 }
546 
547 int
548 bpfwrite(dev_t dev, struct uio *uio, int ioflag)
549 {
550 	struct bpf_d *d;
551 	struct ifnet *ifp;
552 	struct mbuf *m;
553 	struct bpf_program *bf;
554 	struct bpf_insn *fcode = NULL;
555 	int error, s;
556 	struct sockaddr_storage dst;
557 
558 	d = bpfilter_lookup(minor(dev));
559 	if (d->bd_bif == NULL)
560 		return (ENXIO);
561 
562 	ifp = d->bd_bif->bif_ifp;
563 
564 	if ((ifp->if_flags & IFF_UP) == 0)
565 		return (ENETDOWN);
566 
567 	if (uio->uio_resid == 0)
568 		return (0);
569 
570 	KERNEL_ASSERT_LOCKED(); /* for accessing bd_wfilter */
571 	bf = srp_get_locked(&d->bd_wfilter);
572 	if (bf != NULL)
573 		fcode = bf->bf_insns;
574 
575 	error = bpf_movein(uio, d->bd_bif->bif_dlt, &m,
576 	    (struct sockaddr *)&dst, fcode);
577 	if (error)
578 		return (error);
579 
580 	if (m->m_pkthdr.len > ifp->if_mtu) {
581 		m_freem(m);
582 		return (EMSGSIZE);
583 	}
584 
585 	m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
586 	m->m_pkthdr.pf.prio = ifp->if_llprio;
587 
588 	if (d->bd_hdrcmplt && dst.ss_family == AF_UNSPEC)
589 		dst.ss_family = pseudo_AF_HDRCMPLT;
590 
591 	s = splsoftnet();
592 	error = ifp->if_output(ifp, m, (struct sockaddr *)&dst, NULL);
593 	splx(s);
594 	/*
595 	 * The driver frees the mbuf.
596 	 */
597 	return (error);
598 }
599 
600 /*
601  * Reset a descriptor by flushing its packet buffer and clearing the
602  * receive and drop counts.  Should be called at splnet.
603  */
604 void
605 bpf_reset_d(struct bpf_d *d)
606 {
607 	if (d->bd_hbuf) {
608 		/* Free the hold buffer. */
609 		d->bd_fbuf = d->bd_hbuf;
610 		d->bd_hbuf = NULL;
611 	}
612 	d->bd_slen = 0;
613 	d->bd_hlen = 0;
614 	d->bd_rcount = 0;
615 	d->bd_dcount = 0;
616 }
617 
618 /*
619  *  FIONREAD		Check for read packet available.
620  *  BIOCGBLEN		Get buffer len [for read()].
621  *  BIOCSETF		Set ethernet read filter.
622  *  BIOCFLUSH		Flush read packet buffer.
623  *  BIOCPROMISC		Put interface into promiscuous mode.
624  *  BIOCGDLTLIST	Get supported link layer types.
625  *  BIOCGDLT		Get link layer type.
626  *  BIOCSDLT		Set link layer type.
627  *  BIOCGETIF		Get interface name.
628  *  BIOCSETIF		Set interface.
629  *  BIOCSRTIMEOUT	Set read timeout.
630  *  BIOCGRTIMEOUT	Get read timeout.
631  *  BIOCGSTATS		Get packet stats.
632  *  BIOCIMMEDIATE	Set immediate mode.
633  *  BIOCVERSION		Get filter language version.
634  *  BIOCGHDRCMPLT	Get "header already complete" flag
635  *  BIOCSHDRCMPLT	Set "header already complete" flag
636  */
637 int
638 bpfioctl(dev_t dev, u_long cmd, caddr_t addr, int flag, struct proc *p)
639 {
640 	struct bpf_d *d;
641 	int s, error = 0;
642 
643 	d = bpfilter_lookup(minor(dev));
644 	if (d->bd_locked && suser(p, 0) != 0) {
645 		/* list of allowed ioctls when locked and not root */
646 		switch (cmd) {
647 		case BIOCGBLEN:
648 		case BIOCFLUSH:
649 		case BIOCGDLT:
650 		case BIOCGDLTLIST:
651 		case BIOCGETIF:
652 		case BIOCGRTIMEOUT:
653 		case BIOCGSTATS:
654 		case BIOCVERSION:
655 		case BIOCGRSIG:
656 		case BIOCGHDRCMPLT:
657 		case FIONREAD:
658 		case BIOCLOCK:
659 		case BIOCSRTIMEOUT:
660 		case BIOCIMMEDIATE:
661 		case TIOCGPGRP:
662 		case BIOCGDIRFILT:
663 			break;
664 		default:
665 			return (EPERM);
666 		}
667 	}
668 
669 	switch (cmd) {
670 
671 	default:
672 		error = EINVAL;
673 		break;
674 
675 	/*
676 	 * Check for read packet available.
677 	 */
678 	case FIONREAD:
679 		{
680 			int n;
681 
682 			s = splnet();
683 			n = d->bd_slen;
684 			if (d->bd_hbuf)
685 				n += d->bd_hlen;
686 			splx(s);
687 
688 			*(int *)addr = n;
689 			break;
690 		}
691 
692 	/*
693 	 * Get buffer len [for read()].
694 	 */
695 	case BIOCGBLEN:
696 		*(u_int *)addr = d->bd_bufsize;
697 		break;
698 
699 	/*
700 	 * Set buffer length.
701 	 */
702 	case BIOCSBLEN:
703 		if (d->bd_bif != NULL)
704 			error = EINVAL;
705 		else {
706 			u_int size = *(u_int *)addr;
707 
708 			if (size > bpf_maxbufsize)
709 				*(u_int *)addr = size = bpf_maxbufsize;
710 			else if (size < BPF_MINBUFSIZE)
711 				*(u_int *)addr = size = BPF_MINBUFSIZE;
712 			d->bd_bufsize = size;
713 		}
714 		break;
715 
716 	/*
717 	 * Set link layer read filter.
718 	 */
719 	case BIOCSETF:
720 		error = bpf_setf(d, (struct bpf_program *)addr, 0);
721 		break;
722 
723 	/*
724 	 * Set link layer write filter.
725 	 */
726 	case BIOCSETWF:
727 		error = bpf_setf(d, (struct bpf_program *)addr, 1);
728 		break;
729 
730 	/*
731 	 * Flush read packet buffer.
732 	 */
733 	case BIOCFLUSH:
734 		s = splnet();
735 		bpf_reset_d(d);
736 		splx(s);
737 		break;
738 
739 	/*
740 	 * Put interface into promiscuous mode.
741 	 */
742 	case BIOCPROMISC:
743 		if (d->bd_bif == NULL) {
744 			/*
745 			 * No interface attached yet.
746 			 */
747 			error = EINVAL;
748 			break;
749 		}
750 		s = splnet();
751 		if (d->bd_promisc == 0) {
752 			error = ifpromisc(d->bd_bif->bif_ifp, 1);
753 			if (error == 0)
754 				d->bd_promisc = 1;
755 		}
756 		splx(s);
757 		break;
758 
759 	/*
760 	 * Get a list of supported device parameters.
761 	 */
762 	case BIOCGDLTLIST:
763 		if (d->bd_bif == NULL)
764 			error = EINVAL;
765 		else
766 			error = bpf_getdltlist(d, (struct bpf_dltlist *)addr);
767 		break;
768 
769 	/*
770 	 * Get device parameters.
771 	 */
772 	case BIOCGDLT:
773 		if (d->bd_bif == NULL)
774 			error = EINVAL;
775 		else
776 			*(u_int *)addr = d->bd_bif->bif_dlt;
777 		break;
778 
779 	/*
780 	 * Set device parameters.
781 	 */
782 	case BIOCSDLT:
783 		if (d->bd_bif == NULL)
784 			error = EINVAL;
785 		else
786 			error = bpf_setdlt(d, *(u_int *)addr);
787 		break;
788 
789 	/*
790 	 * Set interface name.
791 	 */
792 	case BIOCGETIF:
793 		if (d->bd_bif == NULL)
794 			error = EINVAL;
795 		else
796 			bpf_ifname(d->bd_bif->bif_ifp, (struct ifreq *)addr);
797 		break;
798 
799 	/*
800 	 * Set interface.
801 	 */
802 	case BIOCSETIF:
803 		error = bpf_setif(d, (struct ifreq *)addr);
804 		break;
805 
806 	/*
807 	 * Set read timeout.
808 	 */
809 	case BIOCSRTIMEOUT:
810 		{
811 			struct timeval *tv = (struct timeval *)addr;
812 
813 			/* Compute number of ticks. */
814 			d->bd_rtout = tv->tv_sec * hz + tv->tv_usec / tick;
815 			if (d->bd_rtout == 0 && tv->tv_usec != 0)
816 				d->bd_rtout = 1;
817 			break;
818 		}
819 
820 	/*
821 	 * Get read timeout.
822 	 */
823 	case BIOCGRTIMEOUT:
824 		{
825 			struct timeval *tv = (struct timeval *)addr;
826 
827 			tv->tv_sec = d->bd_rtout / hz;
828 			tv->tv_usec = (d->bd_rtout % hz) * tick;
829 			break;
830 		}
831 
832 	/*
833 	 * Get packet stats.
834 	 */
835 	case BIOCGSTATS:
836 		{
837 			struct bpf_stat *bs = (struct bpf_stat *)addr;
838 
839 			bs->bs_recv = d->bd_rcount;
840 			bs->bs_drop = d->bd_dcount;
841 			break;
842 		}
843 
844 	/*
845 	 * Set immediate mode.
846 	 */
847 	case BIOCIMMEDIATE:
848 		d->bd_immediate = *(u_int *)addr;
849 		break;
850 
851 	case BIOCVERSION:
852 		{
853 			struct bpf_version *bv = (struct bpf_version *)addr;
854 
855 			bv->bv_major = BPF_MAJOR_VERSION;
856 			bv->bv_minor = BPF_MINOR_VERSION;
857 			break;
858 		}
859 
860 	case BIOCGHDRCMPLT:	/* get "header already complete" flag */
861 		*(u_int *)addr = d->bd_hdrcmplt;
862 		break;
863 
864 	case BIOCSHDRCMPLT:	/* set "header already complete" flag */
865 		d->bd_hdrcmplt = *(u_int *)addr ? 1 : 0;
866 		break;
867 
868 	case BIOCLOCK:		/* set "locked" flag (no reset) */
869 		d->bd_locked = 1;
870 		break;
871 
872 	case BIOCGFILDROP:	/* get "filter-drop" flag */
873 		*(u_int *)addr = d->bd_fildrop;
874 		break;
875 
876 	case BIOCSFILDROP:	/* set "filter-drop" flag */
877 		d->bd_fildrop = *(u_int *)addr ? 1 : 0;
878 		break;
879 
880 	case BIOCGDIRFILT:	/* get direction filter */
881 		*(u_int *)addr = d->bd_dirfilt;
882 		break;
883 
884 	case BIOCSDIRFILT:	/* set direction filter */
885 		d->bd_dirfilt = (*(u_int *)addr) &
886 		    (BPF_DIRECTION_IN|BPF_DIRECTION_OUT);
887 		break;
888 
889 	case FIONBIO:		/* Non-blocking I/O */
890 		if (*(int *)addr)
891 			d->bd_rtout = -1;
892 		else
893 			d->bd_rtout = 0;
894 		break;
895 
896 	case FIOASYNC:		/* Send signal on receive packets */
897 		d->bd_async = *(int *)addr;
898 		break;
899 
900 	/*
901 	 * N.B.  ioctl (FIOSETOWN) and fcntl (F_SETOWN) both end up doing
902 	 * the equivalent of a TIOCSPGRP and hence end up here.  *However*
903 	 * TIOCSPGRP's arg is a process group if it's positive and a process
904 	 * id if it's negative.  This is exactly the opposite of what the
905 	 * other two functions want!  Therefore there is code in ioctl and
906 	 * fcntl to negate the arg before calling here.
907 	 */
908 	case TIOCSPGRP:		/* Process or group to send signals to */
909 		d->bd_pgid = *(int *)addr;
910 		d->bd_siguid = p->p_ucred->cr_ruid;
911 		d->bd_sigeuid = p->p_ucred->cr_uid;
912 		break;
913 
914 	case TIOCGPGRP:
915 		*(int *)addr = d->bd_pgid;
916 		break;
917 
918 	case BIOCSRSIG:		/* Set receive signal */
919 		{
920 			u_int sig;
921 
922 			sig = *(u_int *)addr;
923 
924 			if (sig >= NSIG)
925 				error = EINVAL;
926 			else
927 				d->bd_sig = sig;
928 			break;
929 		}
930 	case BIOCGRSIG:
931 		*(u_int *)addr = d->bd_sig;
932 		break;
933 	}
934 	return (error);
935 }
936 
937 /*
938  * Set d's packet filter program to fp.  If this file already has a filter,
939  * free it and replace it.  Returns EINVAL for bogus requests.
940  */
941 int
942 bpf_setf(struct bpf_d *d, struct bpf_program *fp, int wf)
943 {
944 	struct bpf_program *bf;
945 	struct srp *filter;
946 	struct bpf_insn *fcode;
947 	u_int flen, size;
948 	int s;
949 
950 	KERNEL_ASSERT_LOCKED();
951 	filter = wf ? &d->bd_wfilter : &d->bd_rfilter;
952 
953 	if (fp->bf_insns == 0) {
954 		if (fp->bf_len != 0)
955 			return (EINVAL);
956 		srp_update_locked(&bpf_insn_gc, filter, NULL);
957 		s = splnet();
958 		bpf_reset_d(d);
959 		splx(s);
960 		return (0);
961 	}
962 	flen = fp->bf_len;
963 	if (flen > BPF_MAXINSNS)
964 		return (EINVAL);
965 
966 	fcode = mallocarray(flen, sizeof(*fp->bf_insns), M_DEVBUF,
967 	    M_WAITOK | M_CANFAIL);
968 	if (fcode == NULL)
969 		return (ENOMEM);
970 
971 	size = flen * sizeof(*fp->bf_insns);
972 	if (copyin(fp->bf_insns, fcode, size) != 0 ||
973 	    bpf_validate(fcode, (int)flen) == 0) {
974 		free(fcode, M_DEVBUF, size);
975 		return (EINVAL);
976 	}
977 
978 	bf = malloc(sizeof(*bf), M_DEVBUF, M_WAITOK);
979 	bf->bf_len = flen;
980 	bf->bf_insns = fcode;
981 
982 	srp_update_locked(&bpf_insn_gc, filter, bf);
983 
984 	s = splnet();
985 	bpf_reset_d(d);
986 	splx(s);
987 	return (0);
988 }
989 
990 /*
991  * Detach a file from its current interface (if attached at all) and attach
992  * to the interface indicated by the name stored in ifr.
993  * Return an errno or 0.
994  */
995 int
996 bpf_setif(struct bpf_d *d, struct ifreq *ifr)
997 {
998 	struct bpf_if *bp, *candidate = NULL;
999 	int s;
1000 
1001 	/*
1002 	 * Look through attached interfaces for the named one.
1003 	 */
1004 	for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) {
1005 		struct ifnet *ifp = bp->bif_ifp;
1006 
1007 		if (ifp == NULL ||
1008 		    strcmp(ifp->if_xname, ifr->ifr_name) != 0)
1009 			continue;
1010 
1011 		if (candidate == NULL || candidate->bif_dlt > bp->bif_dlt)
1012 			candidate = bp;
1013 	}
1014 
1015 	if (candidate != NULL) {
1016 		/*
1017 		 * Allocate the packet buffers if we need to.
1018 		 * If we're already attached to requested interface,
1019 		 * just flush the buffer.
1020 		 */
1021 		if (d->bd_sbuf == NULL)
1022 			bpf_allocbufs(d);
1023 		s = splnet();
1024 		if (candidate != d->bd_bif) {
1025 			if (d->bd_bif)
1026 				/*
1027 				 * Detach if attached to something else.
1028 				 */
1029 				bpf_detachd(d);
1030 
1031 			bpf_attachd(d, candidate);
1032 		}
1033 		bpf_reset_d(d);
1034 		splx(s);
1035 		return (0);
1036 	}
1037 	/* Not found. */
1038 	return (ENXIO);
1039 }
1040 
1041 /*
1042  * Copy the interface name to the ifreq.
1043  */
1044 void
1045 bpf_ifname(struct ifnet *ifp, struct ifreq *ifr)
1046 {
1047 	bcopy(ifp->if_xname, ifr->ifr_name, IFNAMSIZ);
1048 }
1049 
1050 /*
1051  * Support for poll() system call
1052  */
1053 int
1054 bpfpoll(dev_t dev, int events, struct proc *p)
1055 {
1056 	struct bpf_d *d;
1057 	int s, revents;
1058 
1059 	/*
1060 	 * An imitation of the FIONREAD ioctl code.
1061 	 */
1062 	d = bpfilter_lookup(minor(dev));
1063 
1064 	/*
1065 	 * XXX The USB stack manages it to trigger some race condition
1066 	 * which causes bpfilter_lookup to return NULL when a USB device
1067 	 * gets detached while it is up and has an open bpf handler (e.g.
1068 	 * dhclient).  We still should recheck if we can fix the root
1069 	 * cause of this issue.
1070 	 */
1071 	if (d == NULL)
1072 		return (POLLERR);
1073 
1074 	/* Always ready to write data */
1075 	revents = events & (POLLOUT | POLLWRNORM);
1076 
1077 	if (events & (POLLIN | POLLRDNORM)) {
1078 		s = splnet();
1079 		if (d->bd_hlen != 0 || (d->bd_immediate && d->bd_slen != 0))
1080 			revents |= events & (POLLIN | POLLRDNORM);
1081 		else {
1082 			/*
1083 			 * if there's a timeout, mark the time we
1084 			 * started waiting.
1085 			 */
1086 			if (d->bd_rtout != -1 && d->bd_rdStart == 0)
1087 				d->bd_rdStart = ticks;
1088 			selrecord(p, &d->bd_sel);
1089 		}
1090 		splx(s);
1091 	}
1092 	return (revents);
1093 }
1094 
1095 struct filterops bpfread_filtops =
1096 	{ 1, NULL, filt_bpfrdetach, filt_bpfread };
1097 
1098 int
1099 bpfkqfilter(dev_t dev, struct knote *kn)
1100 {
1101 	struct bpf_d *d;
1102 	struct klist *klist;
1103 	int s;
1104 
1105 	d = bpfilter_lookup(minor(dev));
1106 	switch (kn->kn_filter) {
1107 	case EVFILT_READ:
1108 		klist = &d->bd_sel.si_note;
1109 		kn->kn_fop = &bpfread_filtops;
1110 		break;
1111 	default:
1112 		return (EINVAL);
1113 	}
1114 
1115 	kn->kn_hook = d;
1116 
1117 	s = splnet();
1118 	bpf_get(d);
1119 	SLIST_INSERT_HEAD(klist, kn, kn_selnext);
1120 	if (d->bd_rtout != -1 && d->bd_rdStart == 0)
1121 		d->bd_rdStart = ticks;
1122 	splx(s);
1123 
1124 	return (0);
1125 }
1126 
1127 void
1128 filt_bpfrdetach(struct knote *kn)
1129 {
1130 	struct bpf_d *d = kn->kn_hook;
1131 	int s;
1132 
1133 	s = splnet();
1134 	SLIST_REMOVE(&d->bd_sel.si_note, kn, knote, kn_selnext);
1135 	bpf_put(d);
1136 	splx(s);
1137 }
1138 
1139 int
1140 filt_bpfread(struct knote *kn, long hint)
1141 {
1142 	struct bpf_d *d = kn->kn_hook;
1143 
1144 	kn->kn_data = d->bd_hlen;
1145 	if (d->bd_immediate)
1146 		kn->kn_data += d->bd_slen;
1147 	return (kn->kn_data > 0);
1148 }
1149 
1150 /*
1151  * Copy data from an mbuf chain into a buffer.  This code is derived
1152  * from m_copydata in sys/uipc_mbuf.c.
1153  */
1154 void
1155 bpf_mcopy(const void *src_arg, void *dst_arg, size_t len)
1156 {
1157 	const struct mbuf *m;
1158 	u_int count;
1159 	u_char *dst;
1160 
1161 	m = src_arg;
1162 	dst = dst_arg;
1163 	while (len > 0) {
1164 		if (m == NULL)
1165 			panic("bpf_mcopy");
1166 		count = min(m->m_len, len);
1167 		bcopy(mtod(m, caddr_t), (caddr_t)dst, count);
1168 		m = m->m_next;
1169 		dst += count;
1170 		len -= count;
1171 	}
1172 }
1173 
1174 /*
1175  * like bpf_mtap, but copy fn can be given. used by various bpf_mtap*
1176  */
1177 int
1178 _bpf_mtap(caddr_t arg, const struct mbuf *m, u_int direction,
1179     void (*cpfn)(const void *, void *, size_t))
1180 {
1181 	struct bpf_if *bp = (struct bpf_if *)arg;
1182 	struct srp_ref sr;
1183 	struct bpf_d *d;
1184 	size_t pktlen, slen;
1185 	const struct mbuf *m0;
1186 	struct timeval tv;
1187 	int gottime = 0;
1188 	int drop = 0;
1189 	int s;
1190 
1191 	if (m == NULL)
1192 		return (0);
1193 
1194 	if (cpfn == NULL)
1195 		cpfn = bpf_mcopy;
1196 
1197 	if (bp == NULL)
1198 		return (0);
1199 
1200 	pktlen = 0;
1201 	for (m0 = m; m0 != NULL; m0 = m0->m_next)
1202 		pktlen += m0->m_len;
1203 
1204 	SRPL_FOREACH(d, &sr, &bp->bif_dlist, bd_next) {
1205 		atomic_inc_long(&d->bd_rcount);
1206 
1207 		if ((direction & d->bd_dirfilt) != 0)
1208 			slen = 0;
1209 		else {
1210 			struct srp_ref bsr;
1211 			struct bpf_program *bf;
1212 			struct bpf_insn *fcode = NULL;
1213 
1214 			bf = srp_enter(&bsr, &d->bd_rfilter);
1215 			if (bf != NULL)
1216 				fcode = bf->bf_insns;
1217 			slen = bpf_mfilter(fcode, m, pktlen);
1218 			srp_leave(&bsr);
1219 		}
1220 
1221 		if (slen > 0) {
1222 			if (!gottime++)
1223 				microtime(&tv);
1224 
1225 			KERNEL_LOCK();
1226 			s = splnet();
1227 			bpf_catchpacket(d, (u_char *)m, pktlen, slen, cpfn,
1228 			    &tv);
1229 			splx(s);
1230 			KERNEL_UNLOCK();
1231 
1232 			if (d->bd_fildrop)
1233 				drop = 1;
1234 		}
1235 	}
1236 	SRPL_LEAVE(&sr);
1237 
1238 	return (drop);
1239 }
1240 
1241 /*
1242  * Incoming linkage from device drivers, when packet is in an mbuf chain.
1243  */
1244 int
1245 bpf_mtap(caddr_t arg, const struct mbuf *m, u_int direction)
1246 {
1247 	return _bpf_mtap(arg, m, direction, NULL);
1248 }
1249 
1250 /*
1251  * Incoming linkage from device drivers, where we have a mbuf chain
1252  * but need to prepend some arbitrary header from a linear buffer.
1253  *
1254  * Con up a minimal dummy header to pacify bpf.  Allocate (only) a
1255  * struct m_hdr on the stack.  This is safe as bpf only reads from the
1256  * fields in this header that we initialize, and will not try to free
1257  * it or keep a pointer to it.
1258  */
1259 int
1260 bpf_mtap_hdr(caddr_t arg, caddr_t data, u_int dlen, const struct mbuf *m,
1261     u_int direction, void (*cpfn)(const void *, void *, size_t))
1262 {
1263 	struct m_hdr mh;
1264 	const struct mbuf *m0;
1265 
1266 	if (dlen > 0) {
1267 		mh.mh_flags = 0;
1268 		mh.mh_next = (struct mbuf *)m;
1269 		mh.mh_len = dlen;
1270 		mh.mh_data = data;
1271 		m0 = (struct mbuf *)&mh;
1272 	} else
1273 		m0 = m;
1274 
1275 	return _bpf_mtap(arg, m0, direction, cpfn);
1276 }
1277 
1278 /*
1279  * Incoming linkage from device drivers, where we have a mbuf chain
1280  * but need to prepend the address family.
1281  *
1282  * Con up a minimal dummy header to pacify bpf.  We allocate (only) a
1283  * struct m_hdr on the stack.  This is safe as bpf only reads from the
1284  * fields in this header that we initialize, and will not try to free
1285  * it or keep a pointer to it.
1286  */
1287 int
1288 bpf_mtap_af(caddr_t arg, u_int32_t af, const struct mbuf *m, u_int direction)
1289 {
1290 	u_int32_t    afh;
1291 
1292 	afh = htonl(af);
1293 
1294 	return bpf_mtap_hdr(arg, (caddr_t)&afh, sizeof(afh),
1295 	    m, direction, NULL);
1296 }
1297 
1298 /*
1299  * Incoming linkage from device drivers, where we have a mbuf chain
1300  * but need to prepend a VLAN encapsulation header.
1301  *
1302  * Con up a minimal dummy header to pacify bpf.  Allocate (only) a
1303  * struct m_hdr on the stack.  This is safe as bpf only reads from the
1304  * fields in this header that we initialize, and will not try to free
1305  * it or keep a pointer to it.
1306  */
1307 int
1308 bpf_mtap_ether(caddr_t arg, const struct mbuf *m, u_int direction)
1309 {
1310 #if NVLAN > 0
1311 	struct ether_vlan_header evh;
1312 	struct m_hdr mh;
1313 	uint8_t prio;
1314 
1315 	if ((m->m_flags & M_VLANTAG) == 0)
1316 #endif
1317 	{
1318 		return bpf_mtap(arg, m, direction);
1319 	}
1320 
1321 #if NVLAN > 0
1322 	KASSERT(m->m_len >= ETHER_HDR_LEN);
1323 
1324 	prio = m->m_pkthdr.pf.prio;
1325 	if (prio <= 1)
1326 		prio = !prio;
1327 
1328 	memcpy(&evh, mtod(m, char *), ETHER_HDR_LEN);
1329 	evh.evl_proto = evh.evl_encap_proto;
1330 	evh.evl_encap_proto = htons(ETHERTYPE_VLAN);
1331 	evh.evl_tag = htons(m->m_pkthdr.ether_vtag |
1332 	    (prio << EVL_PRIO_BITS));
1333 
1334 	mh.mh_flags = 0;
1335 	mh.mh_data = m->m_data + ETHER_HDR_LEN;
1336 	mh.mh_len = m->m_len - ETHER_HDR_LEN;
1337 	mh.mh_next = m->m_next;
1338 
1339 	return bpf_mtap_hdr(arg, (caddr_t)&evh, sizeof(evh),
1340 	    (struct mbuf *)&mh, direction, NULL);
1341 #endif
1342 }
1343 
1344 /*
1345  * Move the packet data from interface memory (pkt) into the
1346  * store buffer.  Return 1 if it's time to wakeup a listener (buffer full),
1347  * otherwise 0.  "copy" is the routine called to do the actual data
1348  * transfer.  bcopy is passed in to copy contiguous chunks, while
1349  * bpf_mcopy is passed in to copy mbuf chains.  In the latter case,
1350  * pkt is really an mbuf.
1351  */
1352 void
1353 bpf_catchpacket(struct bpf_d *d, u_char *pkt, size_t pktlen, size_t snaplen,
1354     void (*cpfn)(const void *, void *, size_t), struct timeval *tv)
1355 {
1356 	struct bpf_hdr *hp;
1357 	int totlen, curlen;
1358 	int hdrlen;
1359 
1360 	if (d->bd_bif == NULL)
1361 		return;
1362 
1363 	hdrlen = d->bd_bif->bif_hdrlen;
1364 
1365 	/*
1366 	 * Figure out how many bytes to move.  If the packet is
1367 	 * greater or equal to the snapshot length, transfer that
1368 	 * much.  Otherwise, transfer the whole packet (unless
1369 	 * we hit the buffer size limit).
1370 	 */
1371 	totlen = hdrlen + min(snaplen, pktlen);
1372 	if (totlen > d->bd_bufsize)
1373 		totlen = d->bd_bufsize;
1374 
1375 	/*
1376 	 * Round up the end of the previous packet to the next longword.
1377 	 */
1378 	curlen = BPF_WORDALIGN(d->bd_slen);
1379 	if (curlen + totlen > d->bd_bufsize) {
1380 		/*
1381 		 * This packet will overflow the storage buffer.
1382 		 * Rotate the buffers if we can, then wakeup any
1383 		 * pending reads.
1384 		 */
1385 		if (d->bd_fbuf == NULL) {
1386 			/*
1387 			 * We haven't completed the previous read yet,
1388 			 * so drop the packet.
1389 			 */
1390 			++d->bd_dcount;
1391 			return;
1392 		}
1393 		ROTATE_BUFFERS(d);
1394 		bpf_wakeup(d);
1395 		curlen = 0;
1396 	}
1397 
1398 	/*
1399 	 * Append the bpf header.
1400 	 */
1401 	hp = (struct bpf_hdr *)(d->bd_sbuf + curlen);
1402 	hp->bh_tstamp.tv_sec = tv->tv_sec;
1403 	hp->bh_tstamp.tv_usec = tv->tv_usec;
1404 	hp->bh_datalen = pktlen;
1405 	hp->bh_hdrlen = hdrlen;
1406 	/*
1407 	 * Copy the packet data into the store buffer and update its length.
1408 	 */
1409 	(*cpfn)(pkt, (u_char *)hp + hdrlen, (hp->bh_caplen = totlen - hdrlen));
1410 	d->bd_slen = curlen + totlen;
1411 
1412 	if (d->bd_immediate) {
1413 		/*
1414 		 * Immediate mode is set.  A packet arrived so any
1415 		 * reads should be woken up.
1416 		 */
1417 		bpf_wakeup(d);
1418 	}
1419 
1420 	if (d->bd_rdStart && (d->bd_rtout + d->bd_rdStart < ticks)) {
1421 		/*
1422 		 * we could be selecting on the bpf, and we
1423 		 * may have timeouts set.  We got here by getting
1424 		 * a packet, so wake up the reader.
1425 		 */
1426 		if (d->bd_fbuf) {
1427 			d->bd_rdStart = 0;
1428 			ROTATE_BUFFERS(d);
1429 			bpf_wakeup(d);
1430 		}
1431 	}
1432 }
1433 
1434 /*
1435  * Initialize all nonzero fields of a descriptor.
1436  */
1437 void
1438 bpf_allocbufs(struct bpf_d *d)
1439 {
1440 	d->bd_fbuf = malloc(d->bd_bufsize, M_DEVBUF, M_WAITOK);
1441 	d->bd_sbuf = malloc(d->bd_bufsize, M_DEVBUF, M_WAITOK);
1442 	d->bd_slen = 0;
1443 	d->bd_hlen = 0;
1444 }
1445 
1446 void
1447 bpf_get(struct bpf_d *bd)
1448 {
1449 	bd->bd_ref++;
1450 }
1451 
1452 /*
1453  * Free buffers currently in use by a descriptor
1454  * when the reference count drops to zero.
1455  */
1456 void
1457 bpf_put(struct bpf_d *bd)
1458 {
1459 	if (--bd->bd_ref > 0)
1460 		return;
1461 
1462 	free(bd->bd_sbuf, M_DEVBUF, 0);
1463 	free(bd->bd_hbuf, M_DEVBUF, 0);
1464 	free(bd->bd_fbuf, M_DEVBUF, 0);
1465 	KERNEL_ASSERT_LOCKED();
1466 	srp_update_locked(&bpf_insn_gc, &bd->bd_rfilter, NULL);
1467 	srp_update_locked(&bpf_insn_gc, &bd->bd_wfilter, NULL);
1468 
1469 	free(bd, M_DEVBUF, sizeof(*bd));
1470 }
1471 
1472 /*
1473  * Attach an interface to bpf.  driverp is a pointer to a (struct bpf_if *)
1474  * in the driver's softc; dlt is the link layer type; hdrlen is the fixed
1475  * size of the link header (variable length headers not yet supported).
1476  */
1477 void
1478 bpfattach(caddr_t *driverp, struct ifnet *ifp, u_int dlt, u_int hdrlen)
1479 {
1480 	struct bpf_if *bp;
1481 
1482 	if ((bp = malloc(sizeof(*bp), M_DEVBUF, M_NOWAIT)) == NULL)
1483 		panic("bpfattach");
1484 	SRPL_INIT(&bp->bif_dlist);
1485 	bp->bif_driverp = (struct bpf_if **)driverp;
1486 	bp->bif_ifp = ifp;
1487 	bp->bif_dlt = dlt;
1488 
1489 	bp->bif_next = bpf_iflist;
1490 	bpf_iflist = bp;
1491 
1492 	*bp->bif_driverp = NULL;
1493 
1494 	/*
1495 	 * Compute the length of the bpf header.  This is not necessarily
1496 	 * equal to SIZEOF_BPF_HDR because we want to insert spacing such
1497 	 * that the network layer header begins on a longword boundary (for
1498 	 * performance reasons and to alleviate alignment restrictions).
1499 	 */
1500 	bp->bif_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen;
1501 }
1502 
1503 /* Detach an interface from its attached bpf device.  */
1504 void
1505 bpfdetach(struct ifnet *ifp)
1506 {
1507 	struct bpf_if *bp, *nbp, **pbp = &bpf_iflist;
1508 	struct bpf_d *bd;
1509 	int maj;
1510 
1511 	KERNEL_ASSERT_LOCKED();
1512 
1513 	for (bp = bpf_iflist; bp; bp = nbp) {
1514 		nbp= bp->bif_next;
1515 		if (bp->bif_ifp == ifp) {
1516 			*pbp = nbp;
1517 
1518 			/* Locate the major number. */
1519 			for (maj = 0; maj < nchrdev; maj++)
1520 				if (cdevsw[maj].d_open == bpfopen)
1521 					break;
1522 
1523 			while ((bd = SRPL_FIRST_LOCKED(&bp->bif_dlist)))
1524 				vdevgone(maj, bd->bd_unit, bd->bd_unit, VCHR);
1525 
1526 			free(bp, M_DEVBUF, sizeof *bp);
1527 		} else
1528 			pbp = &bp->bif_next;
1529 	}
1530 	ifp->if_bpf = NULL;
1531 }
1532 
1533 int
1534 bpf_sysctl_locked(int *name, u_int namelen, void *oldp, size_t *oldlenp,
1535     void *newp, size_t newlen)
1536 {
1537 	int newval;
1538 	int error;
1539 
1540 	switch (name[0]) {
1541 	case NET_BPF_BUFSIZE:
1542 		newval = bpf_bufsize;
1543 		error = sysctl_int(oldp, oldlenp, newp, newlen, &newval);
1544 		if (error)
1545 			return (error);
1546 		if (newval < BPF_MINBUFSIZE || newval > bpf_maxbufsize)
1547 			return (EINVAL);
1548 		bpf_bufsize = newval;
1549 		break;
1550 	case NET_BPF_MAXBUFSIZE:
1551 		newval = bpf_maxbufsize;
1552 		error = sysctl_int(oldp, oldlenp, newp, newlen, &newval);
1553 		if (error)
1554 			return (error);
1555 		if (newval < BPF_MINBUFSIZE)
1556 			return (EINVAL);
1557 		bpf_maxbufsize = newval;
1558 		break;
1559 	default:
1560 		return (EOPNOTSUPP);
1561 	}
1562 	return (0);
1563 }
1564 
1565 int
1566 bpf_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
1567     size_t newlen)
1568 {
1569 	static struct rwlock bpf_sysctl_lk = RWLOCK_INITIALIZER("bpfsz");
1570 	int flags = RW_INTR;
1571 	int error;
1572 
1573 	if (namelen != 1)
1574 		return (ENOTDIR);
1575 
1576 	flags |= (newp == NULL) ? RW_READ : RW_WRITE;
1577 
1578 	error = rw_enter(&bpf_sysctl_lk, flags);
1579 	if (error != 0)
1580 		return (error);
1581 
1582 	error = bpf_sysctl_locked(name, namelen, oldp, oldlenp, newp, newlen);
1583 
1584 	rw_exit(&bpf_sysctl_lk);
1585 
1586 	return (error);
1587 }
1588 
1589 struct bpf_d *
1590 bpfilter_lookup(int unit)
1591 {
1592 	struct bpf_d *bd;
1593 
1594 	LIST_FOREACH(bd, &bpf_d_list, bd_list)
1595 		if (bd->bd_unit == unit)
1596 			return (bd);
1597 	return (NULL);
1598 }
1599 
1600 /*
1601  * Get a list of available data link type of the interface.
1602  */
1603 int
1604 bpf_getdltlist(struct bpf_d *d, struct bpf_dltlist *bfl)
1605 {
1606 	int n, error;
1607 	struct ifnet *ifp;
1608 	struct bpf_if *bp;
1609 
1610 	ifp = d->bd_bif->bif_ifp;
1611 	n = 0;
1612 	error = 0;
1613 	for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) {
1614 		if (bp->bif_ifp != ifp)
1615 			continue;
1616 		if (bfl->bfl_list != NULL) {
1617 			if (n >= bfl->bfl_len)
1618 				return (ENOMEM);
1619 			error = copyout(&bp->bif_dlt,
1620 			    bfl->bfl_list + n, sizeof(u_int));
1621 			if (error)
1622 				break;
1623 		}
1624 		n++;
1625 	}
1626 
1627 	bfl->bfl_len = n;
1628 	return (error);
1629 }
1630 
1631 /*
1632  * Set the data link type of a BPF instance.
1633  */
1634 int
1635 bpf_setdlt(struct bpf_d *d, u_int dlt)
1636 {
1637 	int s;
1638 	struct ifnet *ifp;
1639 	struct bpf_if *bp;
1640 
1641 	if (d->bd_bif->bif_dlt == dlt)
1642 		return (0);
1643 	ifp = d->bd_bif->bif_ifp;
1644 	for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) {
1645 		if (bp->bif_ifp == ifp && bp->bif_dlt == dlt)
1646 			break;
1647 	}
1648 	if (bp == NULL)
1649 		return (EINVAL);
1650 	s = splnet();
1651 	bpf_detachd(d);
1652 	bpf_attachd(d, bp);
1653 	bpf_reset_d(d);
1654 	splx(s);
1655 	return (0);
1656 }
1657 
1658 void
1659 bpf_d_ref(void *null, void *d)
1660 {
1661 	bpf_get(d);
1662 }
1663 
1664 void
1665 bpf_d_unref(void *null, void *d)
1666 {
1667 	bpf_put(d);
1668 }
1669 
1670 void
1671 bpf_insn_dtor(void *null, void *f)
1672 {
1673 	struct bpf_program *bf = f;
1674 	struct bpf_insn *insns = bf->bf_insns;
1675 
1676 	free(insns, M_DEVBUF, bf->bf_len * sizeof(*insns));
1677 	free(bf, M_DEVBUF, sizeof(*bf));
1678 }
1679 
1680 u_int32_t	bpf_mbuf_ldw(const void *, u_int32_t, int *);
1681 u_int32_t	bpf_mbuf_ldh(const void *, u_int32_t, int *);
1682 u_int32_t	bpf_mbuf_ldb(const void *, u_int32_t, int *);
1683 
1684 int		bpf_mbuf_copy(const struct mbuf *, u_int32_t,
1685 		    void *, u_int32_t);
1686 
1687 const struct bpf_ops bpf_mbuf_ops = {
1688 	bpf_mbuf_ldw,
1689 	bpf_mbuf_ldh,
1690 	bpf_mbuf_ldb,
1691 };
1692 
1693 int
1694 bpf_mbuf_copy(const struct mbuf *m, u_int32_t off, void *buf, u_int32_t len)
1695 {
1696 	u_int8_t *cp = buf;
1697 	u_int32_t count;
1698 
1699 	while (off >= m->m_len) {
1700 		off -= m->m_len;
1701 
1702 		m = m->m_next;
1703 		if (m == NULL)
1704 			return (-1);
1705 	}
1706 
1707 	for (;;) {
1708 		count = min(m->m_len - off, len);
1709 
1710 		memcpy(cp, m->m_data + off, count);
1711 		len -= count;
1712 
1713 		if (len == 0)
1714 			return (0);
1715 
1716 		m = m->m_next;
1717 		if (m == NULL)
1718 			break;
1719 
1720 		cp += count;
1721 		off = 0;
1722 	}
1723 
1724 	return (-1);
1725 }
1726 
1727 u_int32_t
1728 bpf_mbuf_ldw(const void *m0, u_int32_t k, int *err)
1729 {
1730 	u_int32_t v;
1731 
1732 	if (bpf_mbuf_copy(m0, k, &v, sizeof(v)) != 0) {
1733 		*err = 1;
1734 		return (0);
1735 	}
1736 
1737 	*err = 0;
1738 	return ntohl(v);
1739 }
1740 
1741 u_int32_t
1742 bpf_mbuf_ldh(const void *m0, u_int32_t k, int *err)
1743 {
1744 	u_int16_t v;
1745 
1746 	if (bpf_mbuf_copy(m0, k, &v, sizeof(v)) != 0) {
1747 		*err = 1;
1748 		return (0);
1749 	}
1750 
1751 	*err = 0;
1752 	return ntohs(v);
1753 }
1754 
1755 u_int32_t
1756 bpf_mbuf_ldb(const void *m0, u_int32_t k, int *err)
1757 {
1758 	const struct mbuf *m = m0;
1759 
1760 	while (k >= m->m_len) {
1761 		k -= m->m_len;
1762 
1763 		m = m->m_next;
1764 		if (m == NULL) {
1765 			*err = 1;
1766 			return (0);
1767 		}
1768 	}
1769 
1770 	*err = 0;
1771 	return (m->m_data[k]);
1772 }
1773 
1774 u_int
1775 bpf_mfilter(const struct bpf_insn *pc, const struct mbuf *m, u_int wirelen)
1776 {
1777 	return _bpf_filter(pc, &bpf_mbuf_ops, m, wirelen);
1778 }
1779