xref: /openbsd-src/sys/net/bpf.c (revision 0b7734b3d77bb9b21afec6f4621cae6c805dbd45)
1 /*	$OpenBSD: bpf.c,v 1.142 2016/06/10 20:33:29 vgross Exp $	*/
2 /*	$NetBSD: bpf.c,v 1.33 1997/02/21 23:59:35 thorpej Exp $	*/
3 
4 /*
5  * Copyright (c) 1990, 1991, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  * Copyright (c) 2010, 2014 Henning Brauer <henning@openbsd.org>
8  *
9  * This code is derived from the Stanford/CMU enet packet filter,
10  * (net/enet.c) distributed as part of 4.3BSD, and code contributed
11  * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
12  * Berkeley Laboratory.
13  *
14  * Redistribution and use in source and binary forms, with or without
15  * modification, are permitted provided that the following conditions
16  * are met:
17  * 1. Redistributions of source code must retain the above copyright
18  *    notice, this list of conditions and the following disclaimer.
19  * 2. Redistributions in binary form must reproduce the above copyright
20  *    notice, this list of conditions and the following disclaimer in the
21  *    documentation and/or other materials provided with the distribution.
22  * 3. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  *
38  *	@(#)bpf.c	8.2 (Berkeley) 3/28/94
39  */
40 
41 #include "bpfilter.h"
42 
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/mbuf.h>
46 #include <sys/proc.h>
47 #include <sys/signalvar.h>
48 #include <sys/ioctl.h>
49 #include <sys/conf.h>
50 #include <sys/vnode.h>
51 #include <sys/file.h>
52 #include <sys/socket.h>
53 #include <sys/poll.h>
54 #include <sys/kernel.h>
55 #include <sys/sysctl.h>
56 #include <sys/rwlock.h>
57 #include <sys/atomic.h>
58 #include <sys/srp.h>
59 #include <sys/specdev.h>
60 
61 #include <net/if.h>
62 #include <net/bpf.h>
63 #include <net/bpfdesc.h>
64 
65 #include <netinet/in.h>
66 #include <netinet/if_ether.h>
67 
68 #include "vlan.h"
69 #if NVLAN > 0
70 #include <net/if_vlan_var.h>
71 #endif
72 
73 #define BPF_BUFSIZE 32768
74 
75 #define PRINET  26			/* interruptible */
76 
77 /* from kern/kern_clock.c; incremented each clock tick. */
78 extern int ticks;
79 
80 /*
81  * The default read buffer size is patchable.
82  */
83 int bpf_bufsize = BPF_BUFSIZE;
84 int bpf_maxbufsize = BPF_MAXBUFSIZE;
85 
86 /*
87  *  bpf_iflist is the list of interfaces; each corresponds to an ifnet
88  *  bpf_d_list is the list of descriptors
89  */
90 struct bpf_if	*bpf_iflist;
91 LIST_HEAD(, bpf_d) bpf_d_list;
92 
93 void	bpf_allocbufs(struct bpf_d *);
94 void	bpf_freed(struct bpf_d *);
95 void	bpf_ifname(struct ifnet *, struct ifreq *);
96 int	_bpf_mtap(caddr_t, const struct mbuf *, u_int,
97 	    void (*)(const void *, void *, size_t));
98 void	bpf_mcopy(const void *, void *, size_t);
99 int	bpf_movein(struct uio *, u_int, struct mbuf **,
100 	    struct sockaddr *, struct bpf_insn *);
101 void	bpf_attachd(struct bpf_d *, struct bpf_if *);
102 void	bpf_detachd(struct bpf_d *);
103 int	bpf_setif(struct bpf_d *, struct ifreq *);
104 int	bpfpoll(dev_t, int, struct proc *);
105 int	bpfkqfilter(dev_t, struct knote *);
106 void	bpf_wakeup(struct bpf_d *);
107 void	bpf_catchpacket(struct bpf_d *, u_char *, size_t, size_t,
108 	    void (*)(const void *, void *, size_t), struct timeval *);
109 void	bpf_reset_d(struct bpf_d *);
110 int	bpf_getdltlist(struct bpf_d *, struct bpf_dltlist *);
111 int	bpf_setdlt(struct bpf_d *, u_int);
112 
113 void	filt_bpfrdetach(struct knote *);
114 int	filt_bpfread(struct knote *, long);
115 
116 int	bpf_sysctl_locked(int *, u_int, void *, size_t *, void *, size_t);
117 
118 struct bpf_d *bpfilter_lookup(int);
119 struct bpf_d *bpfilter_create(int);
120 void bpfilter_destroy(struct bpf_d *);
121 
122 /*
123  * Reference count access to descriptor buffers
124  */
125 #define D_GET(d) ((d)->bd_ref++)
126 #define D_PUT(d) bpf_freed(d)
127 
128 /*
129  * garbage collector srps
130  */
131 
132 void bpf_d_ref(void *, void *);
133 void bpf_d_unref(void *, void *);
134 struct srpl_rc bpf_d_rc = SRPL_RC_INITIALIZER(bpf_d_ref, bpf_d_unref, NULL);
135 
136 void bpf_insn_dtor(void *, void *);
137 struct srp_gc bpf_insn_gc = SRP_GC_INITIALIZER(bpf_insn_dtor, NULL);
138 
139 int
140 bpf_movein(struct uio *uio, u_int linktype, struct mbuf **mp,
141     struct sockaddr *sockp, struct bpf_insn *filter)
142 {
143 	struct mbuf *m;
144 	struct m_tag *mtag;
145 	int error;
146 	u_int hlen;
147 	u_int len;
148 	u_int slen;
149 
150 	/*
151 	 * Build a sockaddr based on the data link layer type.
152 	 * We do this at this level because the ethernet header
153 	 * is copied directly into the data field of the sockaddr.
154 	 * In the case of SLIP, there is no header and the packet
155 	 * is forwarded as is.
156 	 * Also, we are careful to leave room at the front of the mbuf
157 	 * for the link level header.
158 	 */
159 	switch (linktype) {
160 
161 	case DLT_SLIP:
162 		sockp->sa_family = AF_INET;
163 		hlen = 0;
164 		break;
165 
166 	case DLT_PPP:
167 		sockp->sa_family = AF_UNSPEC;
168 		hlen = 0;
169 		break;
170 
171 	case DLT_EN10MB:
172 		sockp->sa_family = AF_UNSPEC;
173 		/* XXX Would MAXLINKHDR be better? */
174 		hlen = ETHER_HDR_LEN;
175 		break;
176 
177 	case DLT_IEEE802_11:
178 	case DLT_IEEE802_11_RADIO:
179 		sockp->sa_family = AF_UNSPEC;
180 		hlen = 0;
181 		break;
182 
183 	case DLT_RAW:
184 	case DLT_NULL:
185 		sockp->sa_family = AF_UNSPEC;
186 		hlen = 0;
187 		break;
188 
189 	case DLT_LOOP:
190 		sockp->sa_family = AF_UNSPEC;
191 		hlen = sizeof(u_int32_t);
192 		break;
193 
194 	default:
195 		return (EIO);
196 	}
197 
198 	if (uio->uio_resid > MAXMCLBYTES)
199 		return (EIO);
200 	len = uio->uio_resid;
201 
202 	MGETHDR(m, M_WAIT, MT_DATA);
203 	m->m_pkthdr.ph_ifidx = 0;
204 	m->m_pkthdr.len = len - hlen;
205 
206 	if (len > MHLEN) {
207 		MCLGETI(m, M_WAIT, NULL, len);
208 		if ((m->m_flags & M_EXT) == 0) {
209 			error = ENOBUFS;
210 			goto bad;
211 		}
212 	}
213 	m->m_len = len;
214 	*mp = m;
215 
216 	error = uiomove(mtod(m, caddr_t), len, uio);
217 	if (error)
218 		goto bad;
219 
220 	slen = bpf_filter(filter, mtod(m, u_char *), len, len);
221 	if (slen < len) {
222 		error = EPERM;
223 		goto bad;
224 	}
225 
226 	if (m->m_len < hlen) {
227 		error = EPERM;
228 		goto bad;
229 	}
230 	/*
231 	 * Make room for link header, and copy it to sockaddr
232 	 */
233 	if (hlen != 0) {
234 		if (linktype == DLT_LOOP) {
235 			u_int32_t af;
236 
237 			/* the link header indicates the address family */
238 			KASSERT(hlen == sizeof(u_int32_t));
239 			memcpy(&af, m->m_data, hlen);
240 			sockp->sa_family = ntohl(af);
241 		} else
242 			memcpy(sockp->sa_data, m->m_data, hlen);
243 		m->m_len -= hlen;
244 		m->m_data += hlen; /* XXX */
245 	}
246 
247 	/*
248 	 * Prepend the data link type as a mbuf tag
249 	 */
250 	mtag = m_tag_get(PACKET_TAG_DLT, sizeof(u_int), M_WAIT);
251 	*(u_int *)(mtag + 1) = linktype;
252 	m_tag_prepend(m, mtag);
253 
254 	return (0);
255  bad:
256 	m_freem(m);
257 	return (error);
258 }
259 
260 /*
261  * Attach file to the bpf interface, i.e. make d listen on bp.
262  * Must be called at splnet.
263  */
264 void
265 bpf_attachd(struct bpf_d *d, struct bpf_if *bp)
266 {
267 	/*
268 	 * Point d at bp, and add d to the interface's list of listeners.
269 	 * Finally, point the driver's bpf cookie at the interface so
270 	 * it will divert packets to bpf.
271 	 */
272 
273 	d->bd_bif = bp;
274 
275 	KERNEL_ASSERT_LOCKED();
276 	SRPL_INSERT_HEAD_LOCKED(&bpf_d_rc, &bp->bif_dlist, d, bd_next);
277 
278 	*bp->bif_driverp = bp;
279 }
280 
281 /*
282  * Detach a file from its interface.
283  */
284 void
285 bpf_detachd(struct bpf_d *d)
286 {
287 	struct bpf_if *bp;
288 
289 	bp = d->bd_bif;
290 	/*
291 	 * Check if this descriptor had requested promiscuous mode.
292 	 * If so, turn it off.
293 	 */
294 	if (d->bd_promisc) {
295 		int error;
296 
297 		d->bd_promisc = 0;
298 		error = ifpromisc(bp->bif_ifp, 0);
299 		if (error && !(error == EINVAL || error == ENODEV))
300 			/*
301 			 * Something is really wrong if we were able to put
302 			 * the driver into promiscuous mode, but can't
303 			 * take it out.
304 			 */
305 			panic("bpf: ifpromisc failed");
306 	}
307 
308 	/* Remove d from the interface's descriptor list. */
309 	KERNEL_ASSERT_LOCKED();
310 	SRPL_REMOVE_LOCKED(&bpf_d_rc, &bp->bif_dlist, d, bpf_d, bd_next);
311 
312 	if (SRPL_EMPTY_LOCKED(&bp->bif_dlist)) {
313 		/*
314 		 * Let the driver know that there are no more listeners.
315 		 */
316 		*d->bd_bif->bif_driverp = 0;
317 	}
318 
319 	d->bd_bif = NULL;
320 }
321 
322 void
323 bpfilterattach(int n)
324 {
325 	LIST_INIT(&bpf_d_list);
326 }
327 
328 /*
329  * Open ethernet device.  Returns ENXIO for illegal minor device number,
330  * EBUSY if file is open by another process.
331  */
332 int
333 bpfopen(dev_t dev, int flag, int mode, struct proc *p)
334 {
335 	struct bpf_d *d;
336 
337 	if (minor(dev) & ((1 << CLONE_SHIFT) - 1))
338 		return (ENXIO);
339 
340 	/* create on demand */
341 	if ((d = bpfilter_create(minor(dev))) == NULL)
342 		return (EBUSY);
343 
344 	/* Mark "free" and do most initialization. */
345 	d->bd_bufsize = bpf_bufsize;
346 	d->bd_sig = SIGIO;
347 
348 	if (flag & FNONBLOCK)
349 		d->bd_rtout = -1;
350 
351 	D_GET(d);
352 
353 	return (0);
354 }
355 
356 /*
357  * Close the descriptor by detaching it from its interface,
358  * deallocating its buffers, and marking it free.
359  */
360 int
361 bpfclose(dev_t dev, int flag, int mode, struct proc *p)
362 {
363 	struct bpf_d *d;
364 	int s;
365 
366 	d = bpfilter_lookup(minor(dev));
367 	s = splnet();
368 	if (d->bd_bif)
369 		bpf_detachd(d);
370 	bpf_wakeup(d);
371 	D_PUT(d);
372 	splx(s);
373 
374 	return (0);
375 }
376 
377 /*
378  * Rotate the packet buffers in descriptor d.  Move the store buffer
379  * into the hold slot, and the free buffer into the store slot.
380  * Zero the length of the new store buffer.
381  */
382 #define ROTATE_BUFFERS(d) \
383 	(d)->bd_hbuf = (d)->bd_sbuf; \
384 	(d)->bd_hlen = (d)->bd_slen; \
385 	(d)->bd_sbuf = (d)->bd_fbuf; \
386 	(d)->bd_slen = 0; \
387 	(d)->bd_fbuf = 0;
388 /*
389  *  bpfread - read next chunk of packets from buffers
390  */
391 int
392 bpfread(dev_t dev, struct uio *uio, int ioflag)
393 {
394 	struct bpf_d *d;
395 	int error;
396 	int s;
397 
398 	d = bpfilter_lookup(minor(dev));
399 	if (d->bd_bif == NULL)
400 		return (ENXIO);
401 
402 	/*
403 	 * Restrict application to use a buffer the same size as
404 	 * as kernel buffers.
405 	 */
406 	if (uio->uio_resid != d->bd_bufsize)
407 		return (EINVAL);
408 
409 	s = splnet();
410 
411 	D_GET(d);
412 
413 	/*
414 	 * If there's a timeout, bd_rdStart is tagged when we start the read.
415 	 * we can then figure out when we're done reading.
416 	 */
417 	if (d->bd_rtout != -1 && d->bd_rdStart == 0)
418 		d->bd_rdStart = ticks;
419 	else
420 		d->bd_rdStart = 0;
421 
422 	/*
423 	 * If the hold buffer is empty, then do a timed sleep, which
424 	 * ends when the timeout expires or when enough packets
425 	 * have arrived to fill the store buffer.
426 	 */
427 	while (d->bd_hbuf == 0) {
428 		if (d->bd_bif == NULL) {
429 			/* interface is gone */
430 			if (d->bd_slen == 0) {
431 				D_PUT(d);
432 				splx(s);
433 				return (EIO);
434 			}
435 			ROTATE_BUFFERS(d);
436 			break;
437 		}
438 		if (d->bd_immediate && d->bd_slen != 0) {
439 			/*
440 			 * A packet(s) either arrived since the previous
441 			 * read or arrived while we were asleep.
442 			 * Rotate the buffers and return what's here.
443 			 */
444 			ROTATE_BUFFERS(d);
445 			break;
446 		}
447 		if (d->bd_rtout == -1) {
448 			/* User requested non-blocking I/O */
449 			error = EWOULDBLOCK;
450 		} else {
451 			if ((d->bd_rdStart + d->bd_rtout) < ticks) {
452 				error = tsleep((caddr_t)d, PRINET|PCATCH, "bpf",
453 				    d->bd_rtout);
454 			} else
455 				error = EWOULDBLOCK;
456 		}
457 		if (error == EINTR || error == ERESTART) {
458 			D_PUT(d);
459 			splx(s);
460 			return (error);
461 		}
462 		if (error == EWOULDBLOCK) {
463 			/*
464 			 * On a timeout, return what's in the buffer,
465 			 * which may be nothing.  If there is something
466 			 * in the store buffer, we can rotate the buffers.
467 			 */
468 			if (d->bd_hbuf)
469 				/*
470 				 * We filled up the buffer in between
471 				 * getting the timeout and arriving
472 				 * here, so we don't need to rotate.
473 				 */
474 				break;
475 
476 			if (d->bd_slen == 0) {
477 				D_PUT(d);
478 				splx(s);
479 				return (0);
480 			}
481 			ROTATE_BUFFERS(d);
482 			break;
483 		}
484 	}
485 	/*
486 	 * At this point, we know we have something in the hold slot.
487 	 */
488 	splx(s);
489 
490 	/*
491 	 * Move data from hold buffer into user space.
492 	 * We know the entire buffer is transferred since
493 	 * we checked above that the read buffer is bpf_bufsize bytes.
494 	 */
495 	error = uiomove(d->bd_hbuf, d->bd_hlen, uio);
496 
497 	s = splnet();
498 	d->bd_fbuf = d->bd_hbuf;
499 	d->bd_hbuf = NULL;
500 	d->bd_hlen = 0;
501 
502 	D_PUT(d);
503 	splx(s);
504 
505 	return (error);
506 }
507 
508 
509 /*
510  * If there are processes sleeping on this descriptor, wake them up.
511  */
512 void
513 bpf_wakeup(struct bpf_d *d)
514 {
515 	wakeup((caddr_t)d);
516 	if (d->bd_async && d->bd_sig)
517 		csignal(d->bd_pgid, d->bd_sig,
518 		    d->bd_siguid, d->bd_sigeuid);
519 
520 	selwakeup(&d->bd_sel);
521 	/* XXX */
522 	d->bd_sel.si_selpid = 0;
523 }
524 
525 int
526 bpfwrite(dev_t dev, struct uio *uio, int ioflag)
527 {
528 	struct bpf_d *d;
529 	struct ifnet *ifp;
530 	struct mbuf *m;
531 	struct bpf_program *bf;
532 	struct bpf_insn *fcode = NULL;
533 	int error, s;
534 	struct sockaddr_storage dst;
535 
536 	d = bpfilter_lookup(minor(dev));
537 	if (d->bd_bif == NULL)
538 		return (ENXIO);
539 
540 	ifp = d->bd_bif->bif_ifp;
541 
542 	if ((ifp->if_flags & IFF_UP) == 0)
543 		return (ENETDOWN);
544 
545 	if (uio->uio_resid == 0)
546 		return (0);
547 
548 	KERNEL_ASSERT_LOCKED(); /* for accessing bd_wfilter */
549 	bf = srp_get_locked(&d->bd_wfilter);
550 	if (bf != NULL)
551 		fcode = bf->bf_insns;
552 
553 	error = bpf_movein(uio, d->bd_bif->bif_dlt, &m,
554 	    (struct sockaddr *)&dst, fcode);
555 	if (error)
556 		return (error);
557 
558 	if (m->m_pkthdr.len > ifp->if_mtu) {
559 		m_freem(m);
560 		return (EMSGSIZE);
561 	}
562 
563 	m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
564 	m->m_pkthdr.pf.prio = ifp->if_llprio;
565 
566 	if (d->bd_hdrcmplt && dst.ss_family == AF_UNSPEC)
567 		dst.ss_family = pseudo_AF_HDRCMPLT;
568 
569 	s = splsoftnet();
570 	error = ifp->if_output(ifp, m, (struct sockaddr *)&dst, NULL);
571 	splx(s);
572 	/*
573 	 * The driver frees the mbuf.
574 	 */
575 	return (error);
576 }
577 
578 /*
579  * Reset a descriptor by flushing its packet buffer and clearing the
580  * receive and drop counts.  Should be called at splnet.
581  */
582 void
583 bpf_reset_d(struct bpf_d *d)
584 {
585 	if (d->bd_hbuf) {
586 		/* Free the hold buffer. */
587 		d->bd_fbuf = d->bd_hbuf;
588 		d->bd_hbuf = NULL;
589 	}
590 	d->bd_slen = 0;
591 	d->bd_hlen = 0;
592 	d->bd_rcount = 0;
593 	d->bd_dcount = 0;
594 }
595 
596 /*
597  *  FIONREAD		Check for read packet available.
598  *  BIOCGBLEN		Get buffer len [for read()].
599  *  BIOCSETF		Set ethernet read filter.
600  *  BIOCFLUSH		Flush read packet buffer.
601  *  BIOCPROMISC		Put interface into promiscuous mode.
602  *  BIOCGDLTLIST	Get supported link layer types.
603  *  BIOCGDLT		Get link layer type.
604  *  BIOCSDLT		Set link layer type.
605  *  BIOCGETIF		Get interface name.
606  *  BIOCSETIF		Set interface.
607  *  BIOCSRTIMEOUT	Set read timeout.
608  *  BIOCGRTIMEOUT	Get read timeout.
609  *  BIOCGSTATS		Get packet stats.
610  *  BIOCIMMEDIATE	Set immediate mode.
611  *  BIOCVERSION		Get filter language version.
612  *  BIOCGHDRCMPLT	Get "header already complete" flag
613  *  BIOCSHDRCMPLT	Set "header already complete" flag
614  */
615 int
616 bpfioctl(dev_t dev, u_long cmd, caddr_t addr, int flag, struct proc *p)
617 {
618 	struct bpf_d *d;
619 	int s, error = 0;
620 
621 	d = bpfilter_lookup(minor(dev));
622 	if (d->bd_locked && suser(p, 0) != 0) {
623 		/* list of allowed ioctls when locked and not root */
624 		switch (cmd) {
625 		case BIOCGBLEN:
626 		case BIOCFLUSH:
627 		case BIOCGDLT:
628 		case BIOCGDLTLIST:
629 		case BIOCGETIF:
630 		case BIOCGRTIMEOUT:
631 		case BIOCGSTATS:
632 		case BIOCVERSION:
633 		case BIOCGRSIG:
634 		case BIOCGHDRCMPLT:
635 		case FIONREAD:
636 		case BIOCLOCK:
637 		case BIOCSRTIMEOUT:
638 		case BIOCIMMEDIATE:
639 		case TIOCGPGRP:
640 		case BIOCGDIRFILT:
641 			break;
642 		default:
643 			return (EPERM);
644 		}
645 	}
646 
647 	switch (cmd) {
648 
649 	default:
650 		error = EINVAL;
651 		break;
652 
653 	/*
654 	 * Check for read packet available.
655 	 */
656 	case FIONREAD:
657 		{
658 			int n;
659 
660 			s = splnet();
661 			n = d->bd_slen;
662 			if (d->bd_hbuf)
663 				n += d->bd_hlen;
664 			splx(s);
665 
666 			*(int *)addr = n;
667 			break;
668 		}
669 
670 	/*
671 	 * Get buffer len [for read()].
672 	 */
673 	case BIOCGBLEN:
674 		*(u_int *)addr = d->bd_bufsize;
675 		break;
676 
677 	/*
678 	 * Set buffer length.
679 	 */
680 	case BIOCSBLEN:
681 		if (d->bd_bif != NULL)
682 			error = EINVAL;
683 		else {
684 			u_int size = *(u_int *)addr;
685 
686 			if (size > bpf_maxbufsize)
687 				*(u_int *)addr = size = bpf_maxbufsize;
688 			else if (size < BPF_MINBUFSIZE)
689 				*(u_int *)addr = size = BPF_MINBUFSIZE;
690 			d->bd_bufsize = size;
691 		}
692 		break;
693 
694 	/*
695 	 * Set link layer read filter.
696 	 */
697 	case BIOCSETF:
698 		error = bpf_setf(d, (struct bpf_program *)addr, 0);
699 		break;
700 
701 	/*
702 	 * Set link layer write filter.
703 	 */
704 	case BIOCSETWF:
705 		error = bpf_setf(d, (struct bpf_program *)addr, 1);
706 		break;
707 
708 	/*
709 	 * Flush read packet buffer.
710 	 */
711 	case BIOCFLUSH:
712 		s = splnet();
713 		bpf_reset_d(d);
714 		splx(s);
715 		break;
716 
717 	/*
718 	 * Put interface into promiscuous mode.
719 	 */
720 	case BIOCPROMISC:
721 		if (d->bd_bif == NULL) {
722 			/*
723 			 * No interface attached yet.
724 			 */
725 			error = EINVAL;
726 			break;
727 		}
728 		s = splnet();
729 		if (d->bd_promisc == 0) {
730 			error = ifpromisc(d->bd_bif->bif_ifp, 1);
731 			if (error == 0)
732 				d->bd_promisc = 1;
733 		}
734 		splx(s);
735 		break;
736 
737 	/*
738 	 * Get a list of supported device parameters.
739 	 */
740 	case BIOCGDLTLIST:
741 		if (d->bd_bif == NULL)
742 			error = EINVAL;
743 		else
744 			error = bpf_getdltlist(d, (struct bpf_dltlist *)addr);
745 		break;
746 
747 	/*
748 	 * Get device parameters.
749 	 */
750 	case BIOCGDLT:
751 		if (d->bd_bif == NULL)
752 			error = EINVAL;
753 		else
754 			*(u_int *)addr = d->bd_bif->bif_dlt;
755 		break;
756 
757 	/*
758 	 * Set device parameters.
759 	 */
760 	case BIOCSDLT:
761 		if (d->bd_bif == NULL)
762 			error = EINVAL;
763 		else
764 			error = bpf_setdlt(d, *(u_int *)addr);
765 		break;
766 
767 	/*
768 	 * Set interface name.
769 	 */
770 	case BIOCGETIF:
771 		if (d->bd_bif == NULL)
772 			error = EINVAL;
773 		else
774 			bpf_ifname(d->bd_bif->bif_ifp, (struct ifreq *)addr);
775 		break;
776 
777 	/*
778 	 * Set interface.
779 	 */
780 	case BIOCSETIF:
781 		error = bpf_setif(d, (struct ifreq *)addr);
782 		break;
783 
784 	/*
785 	 * Set read timeout.
786 	 */
787 	case BIOCSRTIMEOUT:
788 		{
789 			struct timeval *tv = (struct timeval *)addr;
790 
791 			/* Compute number of ticks. */
792 			d->bd_rtout = tv->tv_sec * hz + tv->tv_usec / tick;
793 			if (d->bd_rtout == 0 && tv->tv_usec != 0)
794 				d->bd_rtout = 1;
795 			break;
796 		}
797 
798 	/*
799 	 * Get read timeout.
800 	 */
801 	case BIOCGRTIMEOUT:
802 		{
803 			struct timeval *tv = (struct timeval *)addr;
804 
805 			tv->tv_sec = d->bd_rtout / hz;
806 			tv->tv_usec = (d->bd_rtout % hz) * tick;
807 			break;
808 		}
809 
810 	/*
811 	 * Get packet stats.
812 	 */
813 	case BIOCGSTATS:
814 		{
815 			struct bpf_stat *bs = (struct bpf_stat *)addr;
816 
817 			bs->bs_recv = d->bd_rcount;
818 			bs->bs_drop = d->bd_dcount;
819 			break;
820 		}
821 
822 	/*
823 	 * Set immediate mode.
824 	 */
825 	case BIOCIMMEDIATE:
826 		d->bd_immediate = *(u_int *)addr;
827 		break;
828 
829 	case BIOCVERSION:
830 		{
831 			struct bpf_version *bv = (struct bpf_version *)addr;
832 
833 			bv->bv_major = BPF_MAJOR_VERSION;
834 			bv->bv_minor = BPF_MINOR_VERSION;
835 			break;
836 		}
837 
838 	case BIOCGHDRCMPLT:	/* get "header already complete" flag */
839 		*(u_int *)addr = d->bd_hdrcmplt;
840 		break;
841 
842 	case BIOCSHDRCMPLT:	/* set "header already complete" flag */
843 		d->bd_hdrcmplt = *(u_int *)addr ? 1 : 0;
844 		break;
845 
846 	case BIOCLOCK:		/* set "locked" flag (no reset) */
847 		d->bd_locked = 1;
848 		break;
849 
850 	case BIOCGFILDROP:	/* get "filter-drop" flag */
851 		*(u_int *)addr = d->bd_fildrop;
852 		break;
853 
854 	case BIOCSFILDROP:	/* set "filter-drop" flag */
855 		d->bd_fildrop = *(u_int *)addr ? 1 : 0;
856 		break;
857 
858 	case BIOCGDIRFILT:	/* get direction filter */
859 		*(u_int *)addr = d->bd_dirfilt;
860 		break;
861 
862 	case BIOCSDIRFILT:	/* set direction filter */
863 		d->bd_dirfilt = (*(u_int *)addr) &
864 		    (BPF_DIRECTION_IN|BPF_DIRECTION_OUT);
865 		break;
866 
867 	case FIONBIO:		/* Non-blocking I/O */
868 		if (*(int *)addr)
869 			d->bd_rtout = -1;
870 		else
871 			d->bd_rtout = 0;
872 		break;
873 
874 	case FIOASYNC:		/* Send signal on receive packets */
875 		d->bd_async = *(int *)addr;
876 		break;
877 
878 	/*
879 	 * N.B.  ioctl (FIOSETOWN) and fcntl (F_SETOWN) both end up doing
880 	 * the equivalent of a TIOCSPGRP and hence end up here.  *However*
881 	 * TIOCSPGRP's arg is a process group if it's positive and a process
882 	 * id if it's negative.  This is exactly the opposite of what the
883 	 * other two functions want!  Therefore there is code in ioctl and
884 	 * fcntl to negate the arg before calling here.
885 	 */
886 	case TIOCSPGRP:		/* Process or group to send signals to */
887 		d->bd_pgid = *(int *)addr;
888 		d->bd_siguid = p->p_ucred->cr_ruid;
889 		d->bd_sigeuid = p->p_ucred->cr_uid;
890 		break;
891 
892 	case TIOCGPGRP:
893 		*(int *)addr = d->bd_pgid;
894 		break;
895 
896 	case BIOCSRSIG:		/* Set receive signal */
897 		{
898 			u_int sig;
899 
900 			sig = *(u_int *)addr;
901 
902 			if (sig >= NSIG)
903 				error = EINVAL;
904 			else
905 				d->bd_sig = sig;
906 			break;
907 		}
908 	case BIOCGRSIG:
909 		*(u_int *)addr = d->bd_sig;
910 		break;
911 	}
912 	return (error);
913 }
914 
915 /*
916  * Set d's packet filter program to fp.  If this file already has a filter,
917  * free it and replace it.  Returns EINVAL for bogus requests.
918  */
919 int
920 bpf_setf(struct bpf_d *d, struct bpf_program *fp, int wf)
921 {
922 	struct bpf_program *bf;
923 	struct srp *filter;
924 	struct bpf_insn *fcode;
925 	u_int flen, size;
926 	int s;
927 
928 	KERNEL_ASSERT_LOCKED();
929 	filter = wf ? &d->bd_wfilter : &d->bd_rfilter;
930 
931 	if (fp->bf_insns == 0) {
932 		if (fp->bf_len != 0)
933 			return (EINVAL);
934 		srp_update_locked(&bpf_insn_gc, filter, NULL);
935 		s = splnet();
936 		bpf_reset_d(d);
937 		splx(s);
938 		return (0);
939 	}
940 	flen = fp->bf_len;
941 	if (flen > BPF_MAXINSNS)
942 		return (EINVAL);
943 
944 	fcode = mallocarray(flen, sizeof(*fp->bf_insns), M_DEVBUF,
945 	    M_WAITOK | M_CANFAIL);
946 	if (fcode == NULL)
947 		return (ENOMEM);
948 
949 	size = flen * sizeof(*fp->bf_insns);
950 	if (copyin(fp->bf_insns, fcode, size) != 0 ||
951 	    bpf_validate(fcode, (int)flen) == 0) {
952 		free(fcode, M_DEVBUF, size);
953 		return (EINVAL);
954 	}
955 
956 	bf = malloc(sizeof(*bf), M_DEVBUF, M_WAITOK);
957 	bf->bf_len = flen;
958 	bf->bf_insns = fcode;
959 
960 	srp_update_locked(&bpf_insn_gc, filter, bf);
961 
962 	s = splnet();
963 	bpf_reset_d(d);
964 	splx(s);
965 	return (0);
966 }
967 
968 /*
969  * Detach a file from its current interface (if attached at all) and attach
970  * to the interface indicated by the name stored in ifr.
971  * Return an errno or 0.
972  */
973 int
974 bpf_setif(struct bpf_d *d, struct ifreq *ifr)
975 {
976 	struct bpf_if *bp, *candidate = NULL;
977 	int s;
978 
979 	/*
980 	 * Look through attached interfaces for the named one.
981 	 */
982 	for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) {
983 		struct ifnet *ifp = bp->bif_ifp;
984 
985 		if (ifp == NULL ||
986 		    strcmp(ifp->if_xname, ifr->ifr_name) != 0)
987 			continue;
988 
989 		if (candidate == NULL || candidate->bif_dlt > bp->bif_dlt)
990 			candidate = bp;
991 	}
992 
993 	if (candidate != NULL) {
994 		/*
995 		 * Allocate the packet buffers if we need to.
996 		 * If we're already attached to requested interface,
997 		 * just flush the buffer.
998 		 */
999 		if (d->bd_sbuf == NULL)
1000 			bpf_allocbufs(d);
1001 		s = splnet();
1002 		if (candidate != d->bd_bif) {
1003 			if (d->bd_bif)
1004 				/*
1005 				 * Detach if attached to something else.
1006 				 */
1007 				bpf_detachd(d);
1008 
1009 			bpf_attachd(d, candidate);
1010 		}
1011 		bpf_reset_d(d);
1012 		splx(s);
1013 		return (0);
1014 	}
1015 	/* Not found. */
1016 	return (ENXIO);
1017 }
1018 
1019 /*
1020  * Copy the interface name to the ifreq.
1021  */
1022 void
1023 bpf_ifname(struct ifnet *ifp, struct ifreq *ifr)
1024 {
1025 	bcopy(ifp->if_xname, ifr->ifr_name, IFNAMSIZ);
1026 }
1027 
1028 /*
1029  * Support for poll() system call
1030  */
1031 int
1032 bpfpoll(dev_t dev, int events, struct proc *p)
1033 {
1034 	struct bpf_d *d;
1035 	int s, revents;
1036 
1037 	/*
1038 	 * An imitation of the FIONREAD ioctl code.
1039 	 */
1040 	d = bpfilter_lookup(minor(dev));
1041 
1042 	/*
1043 	 * XXX The USB stack manages it to trigger some race condition
1044 	 * which causes bpfilter_lookup to return NULL when a USB device
1045 	 * gets detached while it is up and has an open bpf handler (e.g.
1046 	 * dhclient).  We still should recheck if we can fix the root
1047 	 * cause of this issue.
1048 	 */
1049 	if (d == NULL)
1050 		return (POLLERR);
1051 
1052 	/* Always ready to write data */
1053 	revents = events & (POLLOUT | POLLWRNORM);
1054 
1055 	if (events & (POLLIN | POLLRDNORM)) {
1056 		s = splnet();
1057 		if (d->bd_hlen != 0 || (d->bd_immediate && d->bd_slen != 0))
1058 			revents |= events & (POLLIN | POLLRDNORM);
1059 		else {
1060 			/*
1061 			 * if there's a timeout, mark the time we
1062 			 * started waiting.
1063 			 */
1064 			if (d->bd_rtout != -1 && d->bd_rdStart == 0)
1065 				d->bd_rdStart = ticks;
1066 			selrecord(p, &d->bd_sel);
1067 		}
1068 		splx(s);
1069 	}
1070 	return (revents);
1071 }
1072 
1073 struct filterops bpfread_filtops =
1074 	{ 1, NULL, filt_bpfrdetach, filt_bpfread };
1075 
1076 int
1077 bpfkqfilter(dev_t dev, struct knote *kn)
1078 {
1079 	struct bpf_d *d;
1080 	struct klist *klist;
1081 	int s;
1082 
1083 	d = bpfilter_lookup(minor(dev));
1084 	switch (kn->kn_filter) {
1085 	case EVFILT_READ:
1086 		klist = &d->bd_sel.si_note;
1087 		kn->kn_fop = &bpfread_filtops;
1088 		break;
1089 	default:
1090 		return (EINVAL);
1091 	}
1092 
1093 	kn->kn_hook = d;
1094 
1095 	s = splnet();
1096 	D_GET(d);
1097 	SLIST_INSERT_HEAD(klist, kn, kn_selnext);
1098 	if (d->bd_rtout != -1 && d->bd_rdStart == 0)
1099 		d->bd_rdStart = ticks;
1100 	splx(s);
1101 
1102 	return (0);
1103 }
1104 
1105 void
1106 filt_bpfrdetach(struct knote *kn)
1107 {
1108 	struct bpf_d *d = kn->kn_hook;
1109 	int s;
1110 
1111 	s = splnet();
1112 	SLIST_REMOVE(&d->bd_sel.si_note, kn, knote, kn_selnext);
1113 	D_PUT(d);
1114 	splx(s);
1115 }
1116 
1117 int
1118 filt_bpfread(struct knote *kn, long hint)
1119 {
1120 	struct bpf_d *d = kn->kn_hook;
1121 
1122 	kn->kn_data = d->bd_hlen;
1123 	if (d->bd_immediate)
1124 		kn->kn_data += d->bd_slen;
1125 	return (kn->kn_data > 0);
1126 }
1127 
1128 /*
1129  * Incoming linkage from device drivers.  Process the packet pkt, of length
1130  * pktlen, which is stored in a contiguous buffer.  The packet is parsed
1131  * by each process' filter, and if accepted, stashed into the corresponding
1132  * buffer.
1133  */
1134 int
1135 bpf_tap(caddr_t arg, u_char *pkt, u_int pktlen, u_int direction)
1136 {
1137 	struct bpf_if *bp = (struct bpf_if *)arg;
1138 	struct srp_ref sr;
1139 	struct bpf_d *d;
1140 	size_t slen;
1141 	struct timeval tv;
1142 	int drop = 0, gottime = 0;
1143 	int s;
1144 
1145 	if (bp == NULL)
1146 		return (0);
1147 
1148 	SRPL_FOREACH(d, &sr, &bp->bif_dlist, bd_next) {
1149 		atomic_inc_long(&d->bd_rcount);
1150 
1151 		if ((direction & d->bd_dirfilt) != 0)
1152 			slen = 0;
1153 		else {
1154 			struct srp_ref sr;
1155 			struct bpf_program *bf;
1156 			struct bpf_insn *fcode = NULL;
1157 
1158 			bf = srp_enter(&sr, &d->bd_rfilter);
1159 			if (bf != NULL)
1160 				fcode = bf->bf_insns;
1161 			slen = bpf_filter(fcode, pkt, pktlen, pktlen);
1162 			srp_leave(&sr);
1163 		}
1164 
1165 		if (slen > 0) {
1166 			if (!gottime++)
1167 				microtime(&tv);
1168 
1169 			KERNEL_LOCK();
1170 			s = splnet();
1171 			if (d->bd_bif != NULL) {
1172 				bpf_catchpacket(d, pkt, pktlen, slen,
1173 				    bcopy, &tv);
1174 			}
1175 			splx(s);
1176 			KERNEL_UNLOCK();
1177 
1178 			if (d->bd_fildrop)
1179 				drop = 1;
1180 		}
1181 	}
1182 	SRPL_LEAVE(&sr);
1183 
1184 	return (drop);
1185 }
1186 
1187 /*
1188  * Copy data from an mbuf chain into a buffer.  This code is derived
1189  * from m_copydata in sys/uipc_mbuf.c.
1190  */
1191 void
1192 bpf_mcopy(const void *src_arg, void *dst_arg, size_t len)
1193 {
1194 	const struct mbuf *m;
1195 	u_int count;
1196 	u_char *dst;
1197 
1198 	m = src_arg;
1199 	dst = dst_arg;
1200 	while (len > 0) {
1201 		if (m == NULL)
1202 			panic("bpf_mcopy");
1203 		count = min(m->m_len, len);
1204 		bcopy(mtod(m, caddr_t), (caddr_t)dst, count);
1205 		m = m->m_next;
1206 		dst += count;
1207 		len -= count;
1208 	}
1209 }
1210 
1211 /*
1212  * like bpf_mtap, but copy fn can be given. used by various bpf_mtap*
1213  */
1214 int
1215 _bpf_mtap(caddr_t arg, const struct mbuf *m, u_int direction,
1216     void (*cpfn)(const void *, void *, size_t))
1217 {
1218 	struct bpf_if *bp = (struct bpf_if *)arg;
1219 	struct srp_ref sr;
1220 	struct bpf_d *d;
1221 	size_t pktlen, slen;
1222 	const struct mbuf *m0;
1223 	struct timeval tv;
1224 	int gottime = 0;
1225 	int drop = 0;
1226 	int s;
1227 
1228 	if (m == NULL)
1229 		return (0);
1230 
1231 	if (cpfn == NULL)
1232 		cpfn = bpf_mcopy;
1233 
1234 	if (bp == NULL)
1235 		return (0);
1236 
1237 	pktlen = 0;
1238 	for (m0 = m; m0 != NULL; m0 = m0->m_next)
1239 		pktlen += m0->m_len;
1240 
1241 	SRPL_FOREACH(d, &sr, &bp->bif_dlist, bd_next) {
1242 		atomic_inc_long(&d->bd_rcount);
1243 
1244 		if ((direction & d->bd_dirfilt) != 0)
1245 			slen = 0;
1246 		else {
1247 			struct srp_ref bsr;
1248 			struct bpf_program *bf;
1249 			struct bpf_insn *fcode = NULL;
1250 
1251 			bf = srp_enter(&bsr, &d->bd_rfilter);
1252 			if (bf != NULL)
1253 				fcode = bf->bf_insns;
1254 			slen = bpf_mfilter(fcode, m, pktlen);
1255 			srp_leave(&bsr);
1256 		}
1257 
1258 		if (slen > 0) {
1259 			if (!gottime++)
1260 				microtime(&tv);
1261 
1262 			KERNEL_LOCK();
1263 			s = splnet();
1264 			if (d->bd_bif != NULL) {
1265 				bpf_catchpacket(d, (u_char *)m, pktlen, slen,
1266 				    cpfn, &tv);
1267 			}
1268 			splx(s);
1269 			KERNEL_UNLOCK();
1270 
1271 			if (d->bd_fildrop)
1272 				drop = 1;
1273 		}
1274 	}
1275 	SRPL_LEAVE(&sr);
1276 
1277 	return (drop);
1278 }
1279 
1280 /*
1281  * Incoming linkage from device drivers, when packet is in an mbuf chain.
1282  */
1283 int
1284 bpf_mtap(caddr_t arg, const struct mbuf *m, u_int direction)
1285 {
1286 	return _bpf_mtap(arg, m, direction, NULL);
1287 }
1288 
1289 /*
1290  * Incoming linkage from device drivers, where we have a mbuf chain
1291  * but need to prepend some arbitrary header from a linear buffer.
1292  *
1293  * Con up a minimal dummy header to pacify bpf.  Allocate (only) a
1294  * struct m_hdr on the stack.  This is safe as bpf only reads from the
1295  * fields in this header that we initialize, and will not try to free
1296  * it or keep a pointer to it.
1297  */
1298 int
1299 bpf_mtap_hdr(caddr_t arg, caddr_t data, u_int dlen, const struct mbuf *m,
1300     u_int direction, void (*cpfn)(const void *, void *, size_t))
1301 {
1302 	struct m_hdr mh;
1303 	const struct mbuf *m0;
1304 
1305 	if (dlen > 0) {
1306 		mh.mh_flags = 0;
1307 		mh.mh_next = (struct mbuf *)m;
1308 		mh.mh_len = dlen;
1309 		mh.mh_data = data;
1310 		m0 = (struct mbuf *)&mh;
1311 	} else
1312 		m0 = m;
1313 
1314 	return _bpf_mtap(arg, m0, direction, cpfn);
1315 }
1316 
1317 /*
1318  * Incoming linkage from device drivers, where we have a mbuf chain
1319  * but need to prepend the address family.
1320  *
1321  * Con up a minimal dummy header to pacify bpf.  We allocate (only) a
1322  * struct m_hdr on the stack.  This is safe as bpf only reads from the
1323  * fields in this header that we initialize, and will not try to free
1324  * it or keep a pointer to it.
1325  */
1326 int
1327 bpf_mtap_af(caddr_t arg, u_int32_t af, const struct mbuf *m, u_int direction)
1328 {
1329 	u_int32_t    afh;
1330 
1331 	afh = htonl(af);
1332 
1333 	return bpf_mtap_hdr(arg, (caddr_t)&afh, sizeof(afh),
1334 	    m, direction, NULL);
1335 }
1336 
1337 /*
1338  * Incoming linkage from device drivers, where we have a mbuf chain
1339  * but need to prepend a VLAN encapsulation header.
1340  *
1341  * Con up a minimal dummy header to pacify bpf.  Allocate (only) a
1342  * struct m_hdr on the stack.  This is safe as bpf only reads from the
1343  * fields in this header that we initialize, and will not try to free
1344  * it or keep a pointer to it.
1345  */
1346 int
1347 bpf_mtap_ether(caddr_t arg, const struct mbuf *m, u_int direction)
1348 {
1349 #if NVLAN > 0
1350 	struct ether_vlan_header evh;
1351 	struct m_hdr mh;
1352 	uint8_t prio;
1353 
1354 	if ((m->m_flags & M_VLANTAG) == 0)
1355 #endif
1356 	{
1357 		return bpf_mtap(arg, m, direction);
1358 	}
1359 
1360 #if NVLAN > 0
1361 	KASSERT(m->m_len >= ETHER_HDR_LEN);
1362 
1363 	prio = m->m_pkthdr.pf.prio;
1364 	if (prio <= 1)
1365 		prio = !prio;
1366 
1367 	memcpy(&evh, mtod(m, char *), ETHER_HDR_LEN);
1368 	evh.evl_proto = evh.evl_encap_proto;
1369 	evh.evl_encap_proto = htons(ETHERTYPE_VLAN);
1370 	evh.evl_tag = htons(m->m_pkthdr.ether_vtag |
1371 	    (prio << EVL_PRIO_BITS));
1372 
1373 	mh.mh_flags = 0;
1374 	mh.mh_data = m->m_data + ETHER_HDR_LEN;
1375 	mh.mh_len = m->m_len - ETHER_HDR_LEN;
1376 	mh.mh_next = m->m_next;
1377 
1378 	return bpf_mtap_hdr(arg, (caddr_t)&evh, sizeof(evh),
1379 	    (struct mbuf *)&mh, direction, NULL);
1380 #endif
1381 }
1382 
1383 /*
1384  * Move the packet data from interface memory (pkt) into the
1385  * store buffer.  Return 1 if it's time to wakeup a listener (buffer full),
1386  * otherwise 0.  "copy" is the routine called to do the actual data
1387  * transfer.  bcopy is passed in to copy contiguous chunks, while
1388  * bpf_mcopy is passed in to copy mbuf chains.  In the latter case,
1389  * pkt is really an mbuf.
1390  */
1391 void
1392 bpf_catchpacket(struct bpf_d *d, u_char *pkt, size_t pktlen, size_t snaplen,
1393     void (*cpfn)(const void *, void *, size_t), struct timeval *tv)
1394 {
1395 	struct bpf_hdr *hp;
1396 	int totlen, curlen;
1397 	int hdrlen = d->bd_bif->bif_hdrlen;
1398 
1399 	/*
1400 	 * Figure out how many bytes to move.  If the packet is
1401 	 * greater or equal to the snapshot length, transfer that
1402 	 * much.  Otherwise, transfer the whole packet (unless
1403 	 * we hit the buffer size limit).
1404 	 */
1405 	totlen = hdrlen + min(snaplen, pktlen);
1406 	if (totlen > d->bd_bufsize)
1407 		totlen = d->bd_bufsize;
1408 
1409 	/*
1410 	 * Round up the end of the previous packet to the next longword.
1411 	 */
1412 	curlen = BPF_WORDALIGN(d->bd_slen);
1413 	if (curlen + totlen > d->bd_bufsize) {
1414 		/*
1415 		 * This packet will overflow the storage buffer.
1416 		 * Rotate the buffers if we can, then wakeup any
1417 		 * pending reads.
1418 		 */
1419 		if (d->bd_fbuf == NULL) {
1420 			/*
1421 			 * We haven't completed the previous read yet,
1422 			 * so drop the packet.
1423 			 */
1424 			++d->bd_dcount;
1425 			return;
1426 		}
1427 		ROTATE_BUFFERS(d);
1428 		bpf_wakeup(d);
1429 		curlen = 0;
1430 	}
1431 
1432 	/*
1433 	 * Append the bpf header.
1434 	 */
1435 	hp = (struct bpf_hdr *)(d->bd_sbuf + curlen);
1436 	hp->bh_tstamp.tv_sec = tv->tv_sec;
1437 	hp->bh_tstamp.tv_usec = tv->tv_usec;
1438 	hp->bh_datalen = pktlen;
1439 	hp->bh_hdrlen = hdrlen;
1440 	/*
1441 	 * Copy the packet data into the store buffer and update its length.
1442 	 */
1443 	(*cpfn)(pkt, (u_char *)hp + hdrlen, (hp->bh_caplen = totlen - hdrlen));
1444 	d->bd_slen = curlen + totlen;
1445 
1446 	if (d->bd_immediate) {
1447 		/*
1448 		 * Immediate mode is set.  A packet arrived so any
1449 		 * reads should be woken up.
1450 		 */
1451 		bpf_wakeup(d);
1452 	}
1453 
1454 	if (d->bd_rdStart && (d->bd_rtout + d->bd_rdStart < ticks)) {
1455 		/*
1456 		 * we could be selecting on the bpf, and we
1457 		 * may have timeouts set.  We got here by getting
1458 		 * a packet, so wake up the reader.
1459 		 */
1460 		if (d->bd_fbuf) {
1461 			d->bd_rdStart = 0;
1462 			ROTATE_BUFFERS(d);
1463 			bpf_wakeup(d);
1464 		}
1465 	}
1466 }
1467 
1468 /*
1469  * Initialize all nonzero fields of a descriptor.
1470  */
1471 void
1472 bpf_allocbufs(struct bpf_d *d)
1473 {
1474 	d->bd_fbuf = malloc(d->bd_bufsize, M_DEVBUF, M_WAITOK);
1475 	d->bd_sbuf = malloc(d->bd_bufsize, M_DEVBUF, M_WAITOK);
1476 	d->bd_slen = 0;
1477 	d->bd_hlen = 0;
1478 }
1479 
1480 /*
1481  * Free buffers currently in use by a descriptor
1482  * when the reference count drops to zero.
1483  */
1484 void
1485 bpf_freed(struct bpf_d *d)
1486 {
1487 	if (--d->bd_ref > 0)
1488 		return;
1489 
1490 	free(d->bd_sbuf, M_DEVBUF, 0);
1491 	free(d->bd_hbuf, M_DEVBUF, 0);
1492 	free(d->bd_fbuf, M_DEVBUF, 0);
1493 	KERNEL_ASSERT_LOCKED();
1494 	srp_update_locked(&bpf_insn_gc, &d->bd_rfilter, NULL);
1495 	srp_update_locked(&bpf_insn_gc, &d->bd_wfilter, NULL);
1496 
1497 	bpfilter_destroy(d);
1498 }
1499 
1500 /*
1501  * Attach an interface to bpf.  driverp is a pointer to a (struct bpf_if *)
1502  * in the driver's softc; dlt is the link layer type; hdrlen is the fixed
1503  * size of the link header (variable length headers not yet supported).
1504  */
1505 void
1506 bpfattach(caddr_t *driverp, struct ifnet *ifp, u_int dlt, u_int hdrlen)
1507 {
1508 	struct bpf_if *bp;
1509 
1510 	if ((bp = malloc(sizeof(*bp), M_DEVBUF, M_NOWAIT)) == NULL)
1511 		panic("bpfattach");
1512 	SRPL_INIT(&bp->bif_dlist);
1513 	bp->bif_driverp = (struct bpf_if **)driverp;
1514 	bp->bif_ifp = ifp;
1515 	bp->bif_dlt = dlt;
1516 
1517 	bp->bif_next = bpf_iflist;
1518 	bpf_iflist = bp;
1519 
1520 	*bp->bif_driverp = NULL;
1521 
1522 	/*
1523 	 * Compute the length of the bpf header.  This is not necessarily
1524 	 * equal to SIZEOF_BPF_HDR because we want to insert spacing such
1525 	 * that the network layer header begins on a longword boundary (for
1526 	 * performance reasons and to alleviate alignment restrictions).
1527 	 */
1528 	bp->bif_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen;
1529 }
1530 
1531 /* Detach an interface from its attached bpf device.  */
1532 void
1533 bpfdetach(struct ifnet *ifp)
1534 {
1535 	struct bpf_if *bp, *nbp, **pbp = &bpf_iflist;
1536 	struct bpf_d *bd;
1537 	int maj;
1538 
1539 	KERNEL_ASSERT_LOCKED();
1540 
1541 	for (bp = bpf_iflist; bp; bp = nbp) {
1542 		nbp= bp->bif_next;
1543 		if (bp->bif_ifp == ifp) {
1544 			*pbp = nbp;
1545 
1546 			/* Locate the major number. */
1547 			for (maj = 0; maj < nchrdev; maj++)
1548 				if (cdevsw[maj].d_open == bpfopen)
1549 					break;
1550 
1551 			while ((bd = SRPL_FIRST_LOCKED(&bp->bif_dlist))) {
1552 				struct bpf_d *d;
1553 
1554 				/*
1555 				 * Locate the minor number and nuke the vnode
1556 				 * for any open instance.
1557 				 */
1558 				LIST_FOREACH(d, &bpf_d_list, bd_list)
1559 					if (d == bd) {
1560 						vdevgone(maj, d->bd_unit,
1561 						    d->bd_unit, VCHR);
1562 						break;
1563 					}
1564 			}
1565 
1566 			free(bp, M_DEVBUF, sizeof *bp);
1567 		} else
1568 			pbp = &bp->bif_next;
1569 	}
1570 	ifp->if_bpf = NULL;
1571 }
1572 
1573 int
1574 bpf_sysctl_locked(int *name, u_int namelen, void *oldp, size_t *oldlenp,
1575     void *newp, size_t newlen)
1576 {
1577 	int newval;
1578 	int error;
1579 
1580 	switch (name[0]) {
1581 	case NET_BPF_BUFSIZE:
1582 		newval = bpf_bufsize;
1583 		error = sysctl_int(oldp, oldlenp, newp, newlen, &newval);
1584 		if (error)
1585 			return (error);
1586 		if (newval < BPF_MINBUFSIZE || newval > bpf_maxbufsize)
1587 			return (EINVAL);
1588 		bpf_bufsize = newval;
1589 		break;
1590 	case NET_BPF_MAXBUFSIZE:
1591 		newval = bpf_maxbufsize;
1592 		error = sysctl_int(oldp, oldlenp, newp, newlen, &newval);
1593 		if (error)
1594 			return (error);
1595 		if (newval < BPF_MINBUFSIZE)
1596 			return (EINVAL);
1597 		bpf_maxbufsize = newval;
1598 		break;
1599 	default:
1600 		return (EOPNOTSUPP);
1601 	}
1602 	return (0);
1603 }
1604 
1605 int
1606 bpf_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
1607     size_t newlen)
1608 {
1609 	static struct rwlock bpf_sysctl_lk = RWLOCK_INITIALIZER("bpfsz");
1610 	int flags = RW_INTR;
1611 	int error;
1612 
1613 	if (namelen != 1)
1614 		return (ENOTDIR);
1615 
1616 	flags |= (newp == NULL) ? RW_READ : RW_WRITE;
1617 
1618 	error = rw_enter(&bpf_sysctl_lk, flags);
1619 	if (error != 0)
1620 		return (error);
1621 
1622 	error = bpf_sysctl_locked(name, namelen, oldp, oldlenp, newp, newlen);
1623 
1624 	rw_exit(&bpf_sysctl_lk);
1625 
1626 	return (error);
1627 }
1628 
1629 struct bpf_d *
1630 bpfilter_lookup(int unit)
1631 {
1632 	struct bpf_d *bd;
1633 
1634 	LIST_FOREACH(bd, &bpf_d_list, bd_list)
1635 		if (bd->bd_unit == unit)
1636 			return (bd);
1637 	return (NULL);
1638 }
1639 
1640 struct bpf_d *
1641 bpfilter_create(int unit)
1642 {
1643 	struct bpf_d *bd;
1644 
1645 	KASSERT(bpfilter_lookup(unit) == NULL);
1646 
1647 	if ((bd = malloc(sizeof(*bd), M_DEVBUF, M_NOWAIT|M_ZERO)) != NULL) {
1648 		bd->bd_unit = unit;
1649 		LIST_INSERT_HEAD(&bpf_d_list, bd, bd_list);
1650 	}
1651 	return (bd);
1652 }
1653 
1654 void
1655 bpfilter_destroy(struct bpf_d *bd)
1656 {
1657 	LIST_REMOVE(bd, bd_list);
1658 	free(bd, M_DEVBUF, sizeof(*bd));
1659 }
1660 
1661 /*
1662  * Get a list of available data link type of the interface.
1663  */
1664 int
1665 bpf_getdltlist(struct bpf_d *d, struct bpf_dltlist *bfl)
1666 {
1667 	int n, error;
1668 	struct ifnet *ifp;
1669 	struct bpf_if *bp;
1670 
1671 	ifp = d->bd_bif->bif_ifp;
1672 	n = 0;
1673 	error = 0;
1674 	for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) {
1675 		if (bp->bif_ifp != ifp)
1676 			continue;
1677 		if (bfl->bfl_list != NULL) {
1678 			if (n >= bfl->bfl_len)
1679 				return (ENOMEM);
1680 			error = copyout(&bp->bif_dlt,
1681 			    bfl->bfl_list + n, sizeof(u_int));
1682 			if (error)
1683 				break;
1684 		}
1685 		n++;
1686 	}
1687 
1688 	bfl->bfl_len = n;
1689 	return (error);
1690 }
1691 
1692 /*
1693  * Set the data link type of a BPF instance.
1694  */
1695 int
1696 bpf_setdlt(struct bpf_d *d, u_int dlt)
1697 {
1698 	int s;
1699 	struct ifnet *ifp;
1700 	struct bpf_if *bp;
1701 
1702 	if (d->bd_bif->bif_dlt == dlt)
1703 		return (0);
1704 	ifp = d->bd_bif->bif_ifp;
1705 	for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) {
1706 		if (bp->bif_ifp == ifp && bp->bif_dlt == dlt)
1707 			break;
1708 	}
1709 	if (bp == NULL)
1710 		return (EINVAL);
1711 	s = splnet();
1712 	bpf_detachd(d);
1713 	bpf_attachd(d, bp);
1714 	bpf_reset_d(d);
1715 	splx(s);
1716 	return (0);
1717 }
1718 
1719 void
1720 bpf_d_ref(void *null, void *d)
1721 {
1722 	D_GET((struct bpf_d *)d);
1723 }
1724 
1725 void
1726 bpf_d_unref(void *null, void *d)
1727 {
1728 	D_PUT(d);
1729 }
1730 
1731 void
1732 bpf_insn_dtor(void *null, void *f)
1733 {
1734 	struct bpf_program *bf = f;
1735 	struct bpf_insn *insns = bf->bf_insns;
1736 
1737 	free(insns, M_DEVBUF, bf->bf_len * sizeof(*insns));
1738 	free(bf, M_DEVBUF, sizeof(*bf));
1739 }
1740 
1741 u_int32_t	bpf_mbuf_ldw(const void *, u_int32_t, int *);
1742 u_int32_t	bpf_mbuf_ldh(const void *, u_int32_t, int *);
1743 u_int32_t	bpf_mbuf_ldb(const void *, u_int32_t, int *);
1744 
1745 int		bpf_mbuf_copy(const struct mbuf *, u_int32_t,
1746 		    void *, u_int32_t);
1747 
1748 const struct bpf_ops bpf_mbuf_ops = {
1749 	bpf_mbuf_ldw,
1750 	bpf_mbuf_ldh,
1751 	bpf_mbuf_ldb,
1752 };
1753 
1754 int
1755 bpf_mbuf_copy(const struct mbuf *m, u_int32_t off, void *buf, u_int32_t len)
1756 {
1757 	u_int8_t *cp = buf;
1758 	u_int32_t count;
1759 
1760 	while (off >= m->m_len) {
1761 		off -= m->m_len;
1762 
1763 		m = m->m_next;
1764 		if (m == NULL)
1765 			return (-1);
1766 	}
1767 
1768 	for (;;) {
1769 		count = min(m->m_len - off, len);
1770 
1771 		memcpy(cp, m->m_data + off, count);
1772 		len -= count;
1773 
1774 		if (len == 0)
1775 			return (0);
1776 
1777 		m = m->m_next;
1778 		if (m == NULL)
1779 			break;
1780 
1781 		cp += count;
1782 		off = 0;
1783 	}
1784 
1785 	return (-1);
1786 }
1787 
1788 u_int32_t
1789 bpf_mbuf_ldw(const void *m0, u_int32_t k, int *err)
1790 {
1791 	u_int32_t v;
1792 
1793 	if (bpf_mbuf_copy(m0, k, &v, sizeof(v)) != 0) {
1794 		*err = 1;
1795 		return (0);
1796 	}
1797 
1798 	*err = 0;
1799 	return ntohl(v);
1800 }
1801 
1802 u_int32_t
1803 bpf_mbuf_ldh(const void *m0, u_int32_t k, int *err)
1804 {
1805 	u_int16_t v;
1806 
1807 	if (bpf_mbuf_copy(m0, k, &v, sizeof(v)) != 0) {
1808 		*err = 1;
1809 		return (0);
1810 	}
1811 
1812 	*err = 0;
1813 	return ntohs(v);
1814 }
1815 
1816 u_int32_t
1817 bpf_mbuf_ldb(const void *m0, u_int32_t k, int *err)
1818 {
1819 	const struct mbuf *m = m0;
1820 
1821 	while (k >= m->m_len) {
1822 		k -= m->m_len;
1823 
1824 		m = m->m_next;
1825 		if (m == NULL) {
1826 			*err = 1;
1827 			return (0);
1828 		}
1829 	}
1830 
1831 	*err = 0;
1832 	return (m->m_data[k]);
1833 }
1834 
1835 u_int
1836 bpf_mfilter(const struct bpf_insn *pc, const struct mbuf *m, u_int wirelen)
1837 {
1838 	return _bpf_filter(pc, &bpf_mbuf_ops, m, wirelen);
1839 }
1840