xref: /openbsd-src/sys/net/bpf.c (revision 99fd087599a8791921855f21bd7e36130f39aadc)
1 /*	$OpenBSD: bpf.c,v 1.188 2020/02/20 16:56:52 visa Exp $	*/
2 /*	$NetBSD: bpf.c,v 1.33 1997/02/21 23:59:35 thorpej Exp $	*/
3 
4 /*
5  * Copyright (c) 1990, 1991, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  * Copyright (c) 2010, 2014 Henning Brauer <henning@openbsd.org>
8  *
9  * This code is derived from the Stanford/CMU enet packet filter,
10  * (net/enet.c) distributed as part of 4.3BSD, and code contributed
11  * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
12  * Berkeley Laboratory.
13  *
14  * Redistribution and use in source and binary forms, with or without
15  * modification, are permitted provided that the following conditions
16  * are met:
17  * 1. Redistributions of source code must retain the above copyright
18  *    notice, this list of conditions and the following disclaimer.
19  * 2. Redistributions in binary form must reproduce the above copyright
20  *    notice, this list of conditions and the following disclaimer in the
21  *    documentation and/or other materials provided with the distribution.
22  * 3. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  *
38  *	@(#)bpf.c	8.2 (Berkeley) 3/28/94
39  */
40 
41 #include "bpfilter.h"
42 
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/mbuf.h>
46 #include <sys/proc.h>
47 #include <sys/signalvar.h>
48 #include <sys/ioctl.h>
49 #include <sys/conf.h>
50 #include <sys/vnode.h>
51 #include <sys/fcntl.h>
52 #include <sys/socket.h>
53 #include <sys/poll.h>
54 #include <sys/kernel.h>
55 #include <sys/sysctl.h>
56 #include <sys/rwlock.h>
57 #include <sys/atomic.h>
58 #include <sys/smr.h>
59 #include <sys/specdev.h>
60 #include <sys/selinfo.h>
61 #include <sys/sigio.h>
62 #include <sys/task.h>
63 
64 #include <net/if.h>
65 #include <net/bpf.h>
66 #include <net/bpfdesc.h>
67 
68 #include <netinet/in.h>
69 #include <netinet/if_ether.h>
70 
71 #include "vlan.h"
72 #if NVLAN > 0
73 #include <net/if_vlan_var.h>
74 #endif
75 
76 #define BPF_BUFSIZE 32768
77 
78 #define PRINET  26			/* interruptible */
79 
80 /* from kern/kern_clock.c; incremented each clock tick. */
81 extern int ticks;
82 
83 /*
84  * The default read buffer size is patchable.
85  */
86 int bpf_bufsize = BPF_BUFSIZE;
87 int bpf_maxbufsize = BPF_MAXBUFSIZE;
88 
89 /*
90  *  bpf_iflist is the list of interfaces; each corresponds to an ifnet
91  *  bpf_d_list is the list of descriptors
92  */
93 struct bpf_if	*bpf_iflist;
94 LIST_HEAD(, bpf_d) bpf_d_list;
95 
96 int	bpf_allocbufs(struct bpf_d *);
97 void	bpf_ifname(struct bpf_if*, struct ifreq *);
98 void	bpf_mcopy(const void *, void *, size_t);
99 int	bpf_movein(struct uio *, struct bpf_d *, struct mbuf **,
100 	    struct sockaddr *);
101 int	bpf_setif(struct bpf_d *, struct ifreq *);
102 int	bpfpoll(dev_t, int, struct proc *);
103 int	bpfkqfilter(dev_t, struct knote *);
104 void	bpf_wakeup(struct bpf_d *);
105 void	bpf_wakeup_cb(void *);
106 void	bpf_catchpacket(struct bpf_d *, u_char *, size_t, size_t,
107 	    struct timeval *);
108 int	bpf_getdltlist(struct bpf_d *, struct bpf_dltlist *);
109 int	bpf_setdlt(struct bpf_d *, u_int);
110 
111 void	filt_bpfrdetach(struct knote *);
112 int	filt_bpfread(struct knote *, long);
113 
114 int	bpf_sysctl_locked(int *, u_int, void *, size_t *, void *, size_t);
115 
116 struct bpf_d *bpfilter_lookup(int);
117 
118 /*
119  * Called holding ``bd_mtx''.
120  */
121 void	bpf_attachd(struct bpf_d *, struct bpf_if *);
122 void	bpf_detachd(struct bpf_d *);
123 void	bpf_resetd(struct bpf_d *);
124 
125 void	bpf_prog_smr(void *);
126 void	bpf_d_smr(void *);
127 
128 /*
129  * Reference count access to descriptor buffers
130  */
131 void	bpf_get(struct bpf_d *);
132 void	bpf_put(struct bpf_d *);
133 
134 
135 struct rwlock bpf_sysctl_lk = RWLOCK_INITIALIZER("bpfsz");
136 
137 int
138 bpf_movein(struct uio *uio, struct bpf_d *d, struct mbuf **mp,
139     struct sockaddr *sockp)
140 {
141 	struct bpf_program_smr *bps;
142 	struct bpf_insn *fcode = NULL;
143 	struct mbuf *m;
144 	struct m_tag *mtag;
145 	int error;
146 	u_int hlen;
147 	u_int len;
148 	u_int linktype;
149 	u_int slen;
150 
151 	/*
152 	 * Build a sockaddr based on the data link layer type.
153 	 * We do this at this level because the ethernet header
154 	 * is copied directly into the data field of the sockaddr.
155 	 * In the case of SLIP, there is no header and the packet
156 	 * is forwarded as is.
157 	 * Also, we are careful to leave room at the front of the mbuf
158 	 * for the link level header.
159 	 */
160 	linktype = d->bd_bif->bif_dlt;
161 	switch (linktype) {
162 
163 	case DLT_SLIP:
164 		sockp->sa_family = AF_INET;
165 		hlen = 0;
166 		break;
167 
168 	case DLT_PPP:
169 		sockp->sa_family = AF_UNSPEC;
170 		hlen = 0;
171 		break;
172 
173 	case DLT_EN10MB:
174 		sockp->sa_family = AF_UNSPEC;
175 		/* XXX Would MAXLINKHDR be better? */
176 		hlen = ETHER_HDR_LEN;
177 		break;
178 
179 	case DLT_IEEE802_11:
180 	case DLT_IEEE802_11_RADIO:
181 		sockp->sa_family = AF_UNSPEC;
182 		hlen = 0;
183 		break;
184 
185 	case DLT_RAW:
186 	case DLT_NULL:
187 		sockp->sa_family = AF_UNSPEC;
188 		hlen = 0;
189 		break;
190 
191 	case DLT_LOOP:
192 		sockp->sa_family = AF_UNSPEC;
193 		hlen = sizeof(u_int32_t);
194 		break;
195 
196 	default:
197 		return (EIO);
198 	}
199 
200 	if (uio->uio_resid > MAXMCLBYTES)
201 		return (EIO);
202 	len = uio->uio_resid;
203 
204 	MGETHDR(m, M_WAIT, MT_DATA);
205 	m->m_pkthdr.ph_ifidx = 0;
206 	m->m_pkthdr.len = len - hlen;
207 
208 	if (len > MHLEN) {
209 		MCLGETI(m, M_WAIT, NULL, len);
210 		if ((m->m_flags & M_EXT) == 0) {
211 			error = ENOBUFS;
212 			goto bad;
213 		}
214 	}
215 	m->m_len = len;
216 	*mp = m;
217 
218 	error = uiomove(mtod(m, caddr_t), len, uio);
219 	if (error)
220 		goto bad;
221 
222 	smr_read_enter();
223 	bps = SMR_PTR_GET(&d->bd_wfilter);
224 	if (bps != NULL)
225 		fcode = bps->bps_bf.bf_insns;
226 	slen = bpf_filter(fcode, mtod(m, u_char *), len, len);
227 	smr_read_leave();
228 
229 	if (slen < len) {
230 		error = EPERM;
231 		goto bad;
232 	}
233 
234 	if (m->m_len < hlen) {
235 		error = EPERM;
236 		goto bad;
237 	}
238 	/*
239 	 * Make room for link header, and copy it to sockaddr
240 	 */
241 	if (hlen != 0) {
242 		if (linktype == DLT_LOOP) {
243 			u_int32_t af;
244 
245 			/* the link header indicates the address family */
246 			KASSERT(hlen == sizeof(u_int32_t));
247 			memcpy(&af, m->m_data, hlen);
248 			sockp->sa_family = ntohl(af);
249 		} else
250 			memcpy(sockp->sa_data, m->m_data, hlen);
251 		m->m_len -= hlen;
252 		m->m_data += hlen; /* XXX */
253 	}
254 
255 	/*
256 	 * Prepend the data link type as a mbuf tag
257 	 */
258 	mtag = m_tag_get(PACKET_TAG_DLT, sizeof(u_int), M_WAIT);
259 	*(u_int *)(mtag + 1) = linktype;
260 	m_tag_prepend(m, mtag);
261 
262 	return (0);
263  bad:
264 	m_freem(m);
265 	return (error);
266 }
267 
268 /*
269  * Attach file to the bpf interface, i.e. make d listen on bp.
270  */
271 void
272 bpf_attachd(struct bpf_d *d, struct bpf_if *bp)
273 {
274 	MUTEX_ASSERT_LOCKED(&d->bd_mtx);
275 
276 	/*
277 	 * Point d at bp, and add d to the interface's list of listeners.
278 	 * Finally, point the driver's bpf cookie at the interface so
279 	 * it will divert packets to bpf.
280 	 */
281 
282 	d->bd_bif = bp;
283 
284 	KERNEL_ASSERT_LOCKED();
285 	SMR_SLIST_INSERT_HEAD_LOCKED(&bp->bif_dlist, d, bd_next);
286 
287 	*bp->bif_driverp = bp;
288 }
289 
290 /*
291  * Detach a file from its interface.
292  */
293 void
294 bpf_detachd(struct bpf_d *d)
295 {
296 	struct bpf_if *bp;
297 
298 	MUTEX_ASSERT_LOCKED(&d->bd_mtx);
299 
300 	bp = d->bd_bif;
301 	/* Not attached. */
302 	if (bp == NULL)
303 		return;
304 
305 	/* Remove ``d'' from the interface's descriptor list. */
306 	KERNEL_ASSERT_LOCKED();
307 	SMR_SLIST_REMOVE_LOCKED(&bp->bif_dlist, d, bpf_d, bd_next);
308 
309 	if (SMR_SLIST_EMPTY_LOCKED(&bp->bif_dlist)) {
310 		/*
311 		 * Let the driver know that there are no more listeners.
312 		 */
313 		*bp->bif_driverp = NULL;
314 	}
315 
316 	d->bd_bif = NULL;
317 
318 	/*
319 	 * Check if this descriptor had requested promiscuous mode.
320 	 * If so, turn it off.
321 	 */
322 	if (d->bd_promisc) {
323 		int error;
324 
325 		KASSERT(bp->bif_ifp != NULL);
326 
327 		d->bd_promisc = 0;
328 
329 		bpf_get(d);
330 		mtx_leave(&d->bd_mtx);
331 		NET_LOCK();
332 		error = ifpromisc(bp->bif_ifp, 0);
333 		NET_UNLOCK();
334 		mtx_enter(&d->bd_mtx);
335 		bpf_put(d);
336 
337 		if (error && !(error == EINVAL || error == ENODEV ||
338 		    error == ENXIO))
339 			/*
340 			 * Something is really wrong if we were able to put
341 			 * the driver into promiscuous mode, but can't
342 			 * take it out.
343 			 */
344 			panic("bpf: ifpromisc failed");
345 	}
346 }
347 
348 void
349 bpfilterattach(int n)
350 {
351 	LIST_INIT(&bpf_d_list);
352 }
353 
354 /*
355  * Open ethernet device.  Returns ENXIO for illegal minor device number,
356  * EBUSY if file is open by another process.
357  */
358 int
359 bpfopen(dev_t dev, int flag, int mode, struct proc *p)
360 {
361 	struct bpf_d *bd;
362 	int unit = minor(dev);
363 
364 	if (unit & ((1 << CLONE_SHIFT) - 1))
365 		return (ENXIO);
366 
367 	KASSERT(bpfilter_lookup(unit) == NULL);
368 
369 	/* create on demand */
370 	if ((bd = malloc(sizeof(*bd), M_DEVBUF, M_NOWAIT|M_ZERO)) == NULL)
371 		return (EBUSY);
372 
373 	/* Mark "free" and do most initialization. */
374 	bd->bd_unit = unit;
375 	bd->bd_bufsize = bpf_bufsize;
376 	bd->bd_sig = SIGIO;
377 	mtx_init(&bd->bd_mtx, IPL_NET);
378 	task_set(&bd->bd_wake_task, bpf_wakeup_cb, bd);
379 	smr_init(&bd->bd_smr);
380 	sigio_init(&bd->bd_sigio);
381 
382 	if (flag & FNONBLOCK)
383 		bd->bd_rtout = -1;
384 
385 	bpf_get(bd);
386 	LIST_INSERT_HEAD(&bpf_d_list, bd, bd_list);
387 
388 	return (0);
389 }
390 
391 /*
392  * Close the descriptor by detaching it from its interface,
393  * deallocating its buffers, and marking it free.
394  */
395 int
396 bpfclose(dev_t dev, int flag, int mode, struct proc *p)
397 {
398 	struct bpf_d *d;
399 
400 	d = bpfilter_lookup(minor(dev));
401 	mtx_enter(&d->bd_mtx);
402 	bpf_detachd(d);
403 	bpf_wakeup(d);
404 	LIST_REMOVE(d, bd_list);
405 	mtx_leave(&d->bd_mtx);
406 	bpf_put(d);
407 
408 	return (0);
409 }
410 
411 /*
412  * Rotate the packet buffers in descriptor d.  Move the store buffer
413  * into the hold slot, and the free buffer into the store slot.
414  * Zero the length of the new store buffer.
415  */
416 #define ROTATE_BUFFERS(d) \
417 	KASSERT(d->bd_in_uiomove == 0); \
418 	MUTEX_ASSERT_LOCKED(&d->bd_mtx); \
419 	(d)->bd_hbuf = (d)->bd_sbuf; \
420 	(d)->bd_hlen = (d)->bd_slen; \
421 	(d)->bd_sbuf = (d)->bd_fbuf; \
422 	(d)->bd_slen = 0; \
423 	(d)->bd_fbuf = NULL;
424 /*
425  *  bpfread - read next chunk of packets from buffers
426  */
427 int
428 bpfread(dev_t dev, struct uio *uio, int ioflag)
429 {
430 	struct bpf_d *d;
431 	caddr_t hbuf;
432 	int hlen, error;
433 
434 	KERNEL_ASSERT_LOCKED();
435 
436 	d = bpfilter_lookup(minor(dev));
437 	if (d->bd_bif == NULL)
438 		return (ENXIO);
439 
440 	bpf_get(d);
441 	mtx_enter(&d->bd_mtx);
442 
443 	/*
444 	 * Restrict application to use a buffer the same size as
445 	 * as kernel buffers.
446 	 */
447 	if (uio->uio_resid != d->bd_bufsize) {
448 		error = EINVAL;
449 		goto out;
450 	}
451 
452 	/*
453 	 * If there's a timeout, bd_rdStart is tagged when we start the read.
454 	 * we can then figure out when we're done reading.
455 	 */
456 	if (d->bd_rtout != -1 && d->bd_rdStart == 0)
457 		d->bd_rdStart = ticks;
458 	else
459 		d->bd_rdStart = 0;
460 
461 	/*
462 	 * If the hold buffer is empty, then do a timed sleep, which
463 	 * ends when the timeout expires or when enough packets
464 	 * have arrived to fill the store buffer.
465 	 */
466 	while (d->bd_hbuf == NULL) {
467 		if (d->bd_bif == NULL) {
468 			/* interface is gone */
469 			if (d->bd_slen == 0) {
470 				error = EIO;
471 				goto out;
472 			}
473 			ROTATE_BUFFERS(d);
474 			break;
475 		}
476 		if (d->bd_immediate && d->bd_slen != 0) {
477 			/*
478 			 * A packet(s) either arrived since the previous
479 			 * read or arrived while we were asleep.
480 			 * Rotate the buffers and return what's here.
481 			 */
482 			ROTATE_BUFFERS(d);
483 			break;
484 		}
485 		if (d->bd_rtout == -1) {
486 			/* User requested non-blocking I/O */
487 			error = EWOULDBLOCK;
488 		} else {
489 			if (d->bd_rdStart <= ULONG_MAX - d->bd_rtout &&
490 			    d->bd_rdStart + d->bd_rtout < ticks) {
491 				error = msleep(d, &d->bd_mtx, PRINET|PCATCH,
492 				    "bpf", d->bd_rtout);
493 			} else
494 				error = EWOULDBLOCK;
495 		}
496 		if (error == EINTR || error == ERESTART)
497 			goto out;
498 		if (error == EWOULDBLOCK) {
499 			/*
500 			 * On a timeout, return what's in the buffer,
501 			 * which may be nothing.  If there is something
502 			 * in the store buffer, we can rotate the buffers.
503 			 */
504 			if (d->bd_hbuf != NULL)
505 				/*
506 				 * We filled up the buffer in between
507 				 * getting the timeout and arriving
508 				 * here, so we don't need to rotate.
509 				 */
510 				break;
511 
512 			if (d->bd_slen == 0) {
513 				error = 0;
514 				goto out;
515 			}
516 			ROTATE_BUFFERS(d);
517 			break;
518 		}
519 	}
520 	/*
521 	 * At this point, we know we have something in the hold slot.
522 	 */
523 	hbuf = d->bd_hbuf;
524 	hlen = d->bd_hlen;
525 	d->bd_hbuf = NULL;
526 	d->bd_hlen = 0;
527 	d->bd_fbuf = NULL;
528 	d->bd_in_uiomove = 1;
529 
530 	/*
531 	 * Move data from hold buffer into user space.
532 	 * We know the entire buffer is transferred since
533 	 * we checked above that the read buffer is bpf_bufsize bytes.
534 	 */
535 	mtx_leave(&d->bd_mtx);
536 	error = uiomove(hbuf, hlen, uio);
537 	mtx_enter(&d->bd_mtx);
538 
539 	/* Ensure that bpf_resetd() or ROTATE_BUFFERS() haven't been called. */
540 	KASSERT(d->bd_fbuf == NULL);
541 	KASSERT(d->bd_hbuf == NULL);
542 	d->bd_fbuf = hbuf;
543 	d->bd_in_uiomove = 0;
544 out:
545 	mtx_leave(&d->bd_mtx);
546 	bpf_put(d);
547 
548 	return (error);
549 }
550 
551 
552 /*
553  * If there are processes sleeping on this descriptor, wake them up.
554  */
555 void
556 bpf_wakeup(struct bpf_d *d)
557 {
558 	MUTEX_ASSERT_LOCKED(&d->bd_mtx);
559 
560 	/*
561 	 * As long as pgsigio() and selwakeup() need to be protected
562 	 * by the KERNEL_LOCK() we have to delay the wakeup to
563 	 * another context to keep the hot path KERNEL_LOCK()-free.
564 	 */
565 	bpf_get(d);
566 	if (!task_add(systq, &d->bd_wake_task))
567 		bpf_put(d);
568 }
569 
570 void
571 bpf_wakeup_cb(void *xd)
572 {
573 	struct bpf_d *d = xd;
574 
575 	wakeup(d);
576 	if (d->bd_async && d->bd_sig)
577 		pgsigio(&d->bd_sigio, d->bd_sig, 0);
578 
579 	selwakeup(&d->bd_sel);
580 	bpf_put(d);
581 }
582 
583 int
584 bpfwrite(dev_t dev, struct uio *uio, int ioflag)
585 {
586 	struct bpf_d *d;
587 	struct ifnet *ifp;
588 	struct mbuf *m;
589 	int error;
590 	struct sockaddr_storage dst;
591 
592 	KERNEL_ASSERT_LOCKED();
593 
594 	d = bpfilter_lookup(minor(dev));
595 	if (d->bd_bif == NULL)
596 		return (ENXIO);
597 
598 	bpf_get(d);
599 	ifp = d->bd_bif->bif_ifp;
600 
601 	if (ifp == NULL || (ifp->if_flags & IFF_UP) == 0) {
602 		error = ENETDOWN;
603 		goto out;
604 	}
605 
606 	if (uio->uio_resid == 0) {
607 		error = 0;
608 		goto out;
609 	}
610 
611 	error = bpf_movein(uio, d, &m, sstosa(&dst));
612 	if (error)
613 		goto out;
614 
615 	if (m->m_pkthdr.len > ifp->if_mtu) {
616 		m_freem(m);
617 		error = EMSGSIZE;
618 		goto out;
619 	}
620 
621 	m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
622 	m->m_pkthdr.pf.prio = ifp->if_llprio;
623 
624 	if (d->bd_hdrcmplt && dst.ss_family == AF_UNSPEC)
625 		dst.ss_family = pseudo_AF_HDRCMPLT;
626 
627 	NET_LOCK();
628 	error = ifp->if_output(ifp, m, sstosa(&dst), NULL);
629 	NET_UNLOCK();
630 
631 out:
632 	bpf_put(d);
633 	return (error);
634 }
635 
636 /*
637  * Reset a descriptor by flushing its packet buffer and clearing the
638  * receive and drop counts.
639  */
640 void
641 bpf_resetd(struct bpf_d *d)
642 {
643 	MUTEX_ASSERT_LOCKED(&d->bd_mtx);
644 	KASSERT(d->bd_in_uiomove == 0);
645 
646 	if (d->bd_hbuf != NULL) {
647 		/* Free the hold buffer. */
648 		d->bd_fbuf = d->bd_hbuf;
649 		d->bd_hbuf = NULL;
650 	}
651 	d->bd_slen = 0;
652 	d->bd_hlen = 0;
653 	d->bd_rcount = 0;
654 	d->bd_dcount = 0;
655 }
656 
657 /*
658  *  FIONREAD		Check for read packet available.
659  *  BIOCGBLEN		Get buffer len [for read()].
660  *  BIOCSETF		Set ethernet read filter.
661  *  BIOCFLUSH		Flush read packet buffer.
662  *  BIOCPROMISC		Put interface into promiscuous mode.
663  *  BIOCGDLTLIST	Get supported link layer types.
664  *  BIOCGDLT		Get link layer type.
665  *  BIOCSDLT		Set link layer type.
666  *  BIOCGETIF		Get interface name.
667  *  BIOCSETIF		Set interface.
668  *  BIOCSRTIMEOUT	Set read timeout.
669  *  BIOCGRTIMEOUT	Get read timeout.
670  *  BIOCGSTATS		Get packet stats.
671  *  BIOCIMMEDIATE	Set immediate mode.
672  *  BIOCVERSION		Get filter language version.
673  *  BIOCGHDRCMPLT	Get "header already complete" flag
674  *  BIOCSHDRCMPLT	Set "header already complete" flag
675  */
676 int
677 bpfioctl(dev_t dev, u_long cmd, caddr_t addr, int flag, struct proc *p)
678 {
679 	struct bpf_d *d;
680 	int error = 0;
681 
682 	d = bpfilter_lookup(minor(dev));
683 	if (d->bd_locked && suser(p) != 0) {
684 		/* list of allowed ioctls when locked and not root */
685 		switch (cmd) {
686 		case BIOCGBLEN:
687 		case BIOCFLUSH:
688 		case BIOCGDLT:
689 		case BIOCGDLTLIST:
690 		case BIOCGETIF:
691 		case BIOCGRTIMEOUT:
692 		case BIOCGSTATS:
693 		case BIOCVERSION:
694 		case BIOCGRSIG:
695 		case BIOCGHDRCMPLT:
696 		case FIONREAD:
697 		case BIOCLOCK:
698 		case BIOCSRTIMEOUT:
699 		case BIOCIMMEDIATE:
700 		case TIOCGPGRP:
701 		case BIOCGDIRFILT:
702 			break;
703 		default:
704 			return (EPERM);
705 		}
706 	}
707 
708 	bpf_get(d);
709 
710 	switch (cmd) {
711 	default:
712 		error = EINVAL;
713 		break;
714 
715 	/*
716 	 * Check for read packet available.
717 	 */
718 	case FIONREAD:
719 		{
720 			int n;
721 
722 			mtx_enter(&d->bd_mtx);
723 			n = d->bd_slen;
724 			if (d->bd_hbuf != NULL)
725 				n += d->bd_hlen;
726 			mtx_leave(&d->bd_mtx);
727 
728 			*(int *)addr = n;
729 			break;
730 		}
731 
732 	/*
733 	 * Get buffer len [for read()].
734 	 */
735 	case BIOCGBLEN:
736 		*(u_int *)addr = d->bd_bufsize;
737 		break;
738 
739 	/*
740 	 * Set buffer length.
741 	 */
742 	case BIOCSBLEN:
743 		if (d->bd_bif != NULL)
744 			error = EINVAL;
745 		else {
746 			u_int size = *(u_int *)addr;
747 
748 			if (size > bpf_maxbufsize)
749 				*(u_int *)addr = size = bpf_maxbufsize;
750 			else if (size < BPF_MINBUFSIZE)
751 				*(u_int *)addr = size = BPF_MINBUFSIZE;
752 			mtx_enter(&d->bd_mtx);
753 			d->bd_bufsize = size;
754 			mtx_leave(&d->bd_mtx);
755 		}
756 		break;
757 
758 	/*
759 	 * Set link layer read filter.
760 	 */
761 	case BIOCSETF:
762 		error = bpf_setf(d, (struct bpf_program *)addr, 0);
763 		break;
764 
765 	/*
766 	 * Set link layer write filter.
767 	 */
768 	case BIOCSETWF:
769 		error = bpf_setf(d, (struct bpf_program *)addr, 1);
770 		break;
771 
772 	/*
773 	 * Flush read packet buffer.
774 	 */
775 	case BIOCFLUSH:
776 		mtx_enter(&d->bd_mtx);
777 		bpf_resetd(d);
778 		mtx_leave(&d->bd_mtx);
779 		break;
780 
781 	/*
782 	 * Put interface into promiscuous mode.
783 	 */
784 	case BIOCPROMISC:
785 		if (d->bd_bif == NULL) {
786 			/*
787 			 * No interface attached yet.
788 			 */
789 			error = EINVAL;
790 		} else if (d->bd_bif->bif_ifp != NULL) {
791 			if (d->bd_promisc == 0) {
792 				MUTEX_ASSERT_UNLOCKED(&d->bd_mtx);
793 				NET_LOCK();
794 				error = ifpromisc(d->bd_bif->bif_ifp, 1);
795 				NET_UNLOCK();
796 				if (error == 0)
797 					d->bd_promisc = 1;
798 			}
799 		}
800 		break;
801 
802 	/*
803 	 * Get a list of supported device parameters.
804 	 */
805 	case BIOCGDLTLIST:
806 		if (d->bd_bif == NULL)
807 			error = EINVAL;
808 		else
809 			error = bpf_getdltlist(d, (struct bpf_dltlist *)addr);
810 		break;
811 
812 	/*
813 	 * Get device parameters.
814 	 */
815 	case BIOCGDLT:
816 		if (d->bd_bif == NULL)
817 			error = EINVAL;
818 		else
819 			*(u_int *)addr = d->bd_bif->bif_dlt;
820 		break;
821 
822 	/*
823 	 * Set device parameters.
824 	 */
825 	case BIOCSDLT:
826 		if (d->bd_bif == NULL)
827 			error = EINVAL;
828 		else {
829 			mtx_enter(&d->bd_mtx);
830 			error = bpf_setdlt(d, *(u_int *)addr);
831 			mtx_leave(&d->bd_mtx);
832 		}
833 		break;
834 
835 	/*
836 	 * Set interface name.
837 	 */
838 	case BIOCGETIF:
839 		if (d->bd_bif == NULL)
840 			error = EINVAL;
841 		else
842 			bpf_ifname(d->bd_bif, (struct ifreq *)addr);
843 		break;
844 
845 	/*
846 	 * Set interface.
847 	 */
848 	case BIOCSETIF:
849 		error = bpf_setif(d, (struct ifreq *)addr);
850 		break;
851 
852 	/*
853 	 * Set read timeout.
854 	 */
855 	case BIOCSRTIMEOUT:
856 		{
857 			struct timeval *tv = (struct timeval *)addr;
858 			u_long rtout;
859 
860 			/* Compute number of ticks. */
861 			if (tv->tv_sec < 0 || !timerisvalid(tv)) {
862 				error = EINVAL;
863 				break;
864 			}
865 			if (tv->tv_sec > INT_MAX / hz) {
866 				error = EOVERFLOW;
867 				break;
868 			}
869 			rtout = tv->tv_sec * hz;
870 			if (tv->tv_usec / tick > INT_MAX - rtout) {
871 				error = EOVERFLOW;
872 				break;
873 			}
874 			rtout += tv->tv_usec / tick;
875 			d->bd_rtout = rtout;
876 			if (d->bd_rtout == 0 && tv->tv_usec != 0)
877 				d->bd_rtout = 1;
878 			break;
879 		}
880 
881 	/*
882 	 * Get read timeout.
883 	 */
884 	case BIOCGRTIMEOUT:
885 		{
886 			struct timeval *tv = (struct timeval *)addr;
887 
888 			tv->tv_sec = d->bd_rtout / hz;
889 			tv->tv_usec = (d->bd_rtout % hz) * tick;
890 			break;
891 		}
892 
893 	/*
894 	 * Get packet stats.
895 	 */
896 	case BIOCGSTATS:
897 		{
898 			struct bpf_stat *bs = (struct bpf_stat *)addr;
899 
900 			bs->bs_recv = d->bd_rcount;
901 			bs->bs_drop = d->bd_dcount;
902 			break;
903 		}
904 
905 	/*
906 	 * Set immediate mode.
907 	 */
908 	case BIOCIMMEDIATE:
909 		d->bd_immediate = *(u_int *)addr;
910 		break;
911 
912 	case BIOCVERSION:
913 		{
914 			struct bpf_version *bv = (struct bpf_version *)addr;
915 
916 			bv->bv_major = BPF_MAJOR_VERSION;
917 			bv->bv_minor = BPF_MINOR_VERSION;
918 			break;
919 		}
920 
921 	case BIOCGHDRCMPLT:	/* get "header already complete" flag */
922 		*(u_int *)addr = d->bd_hdrcmplt;
923 		break;
924 
925 	case BIOCSHDRCMPLT:	/* set "header already complete" flag */
926 		d->bd_hdrcmplt = *(u_int *)addr ? 1 : 0;
927 		break;
928 
929 	case BIOCLOCK:		/* set "locked" flag (no reset) */
930 		d->bd_locked = 1;
931 		break;
932 
933 	case BIOCGFILDROP:	/* get "filter-drop" flag */
934 		*(u_int *)addr = d->bd_fildrop;
935 		break;
936 
937 	case BIOCSFILDROP: {	/* set "filter-drop" flag */
938 		unsigned int fildrop = *(u_int *)addr;
939 		switch (fildrop) {
940 		case BPF_FILDROP_PASS:
941 		case BPF_FILDROP_CAPTURE:
942 		case BPF_FILDROP_DROP:
943 			d->bd_fildrop = fildrop;
944 			break;
945 		default:
946 			error = EINVAL;
947 			break;
948 		}
949 		break;
950 	}
951 
952 	case BIOCGDIRFILT:	/* get direction filter */
953 		*(u_int *)addr = d->bd_dirfilt;
954 		break;
955 
956 	case BIOCSDIRFILT:	/* set direction filter */
957 		d->bd_dirfilt = (*(u_int *)addr) &
958 		    (BPF_DIRECTION_IN|BPF_DIRECTION_OUT);
959 		break;
960 
961 	case FIONBIO:		/* Non-blocking I/O */
962 		if (*(int *)addr)
963 			d->bd_rtout = -1;
964 		else
965 			d->bd_rtout = 0;
966 		break;
967 
968 	case FIOASYNC:		/* Send signal on receive packets */
969 		d->bd_async = *(int *)addr;
970 		break;
971 
972 	case FIOSETOWN:		/* Process or group to send signals to */
973 	case TIOCSPGRP:
974 		error = sigio_setown(&d->bd_sigio, cmd, addr);
975 		break;
976 
977 	case FIOGETOWN:
978 	case TIOCGPGRP:
979 		sigio_getown(&d->bd_sigio, cmd, addr);
980 		break;
981 
982 	case BIOCSRSIG:		/* Set receive signal */
983 		{
984 			u_int sig;
985 
986 			sig = *(u_int *)addr;
987 
988 			if (sig >= NSIG)
989 				error = EINVAL;
990 			else
991 				d->bd_sig = sig;
992 			break;
993 		}
994 	case BIOCGRSIG:
995 		*(u_int *)addr = d->bd_sig;
996 		break;
997 	}
998 
999 	bpf_put(d);
1000 	return (error);
1001 }
1002 
1003 /*
1004  * Set d's packet filter program to fp.  If this file already has a filter,
1005  * free it and replace it.  Returns EINVAL for bogus requests.
1006  */
1007 int
1008 bpf_setf(struct bpf_d *d, struct bpf_program *fp, int wf)
1009 {
1010 	struct bpf_program_smr *bps, *old_bps;
1011 	struct bpf_insn *fcode;
1012 	u_int flen, size;
1013 
1014 	KERNEL_ASSERT_LOCKED();
1015 
1016 	if (fp->bf_insns == 0) {
1017 		if (fp->bf_len != 0)
1018 			return (EINVAL);
1019 		bps = NULL;
1020 	} else {
1021 		flen = fp->bf_len;
1022 		if (flen > BPF_MAXINSNS)
1023 			return (EINVAL);
1024 
1025 		fcode = mallocarray(flen, sizeof(*fp->bf_insns), M_DEVBUF,
1026 		    M_WAITOK | M_CANFAIL);
1027 		if (fcode == NULL)
1028 			return (ENOMEM);
1029 
1030 		size = flen * sizeof(*fp->bf_insns);
1031 		if (copyin(fp->bf_insns, fcode, size) != 0 ||
1032 		    bpf_validate(fcode, (int)flen) == 0) {
1033 			free(fcode, M_DEVBUF, size);
1034 			return (EINVAL);
1035 		}
1036 
1037 		bps = malloc(sizeof(*bps), M_DEVBUF, M_WAITOK);
1038 		smr_init(&bps->bps_smr);
1039 		bps->bps_bf.bf_len = flen;
1040 		bps->bps_bf.bf_insns = fcode;
1041 	}
1042 
1043 	if (wf == 0) {
1044 		old_bps = SMR_PTR_GET_LOCKED(&d->bd_rfilter);
1045 		SMR_PTR_SET_LOCKED(&d->bd_rfilter, bps);
1046 	} else {
1047 		old_bps = SMR_PTR_GET_LOCKED(&d->bd_wfilter);
1048 		SMR_PTR_SET_LOCKED(&d->bd_wfilter, bps);
1049 	}
1050 
1051 	mtx_enter(&d->bd_mtx);
1052 	bpf_resetd(d);
1053 	mtx_leave(&d->bd_mtx);
1054 	if (old_bps != NULL)
1055 		smr_call(&old_bps->bps_smr, bpf_prog_smr, old_bps);
1056 
1057 	return (0);
1058 }
1059 
1060 /*
1061  * Detach a file from its current interface (if attached at all) and attach
1062  * to the interface indicated by the name stored in ifr.
1063  * Return an errno or 0.
1064  */
1065 int
1066 bpf_setif(struct bpf_d *d, struct ifreq *ifr)
1067 {
1068 	struct bpf_if *bp, *candidate = NULL;
1069 	int error = 0;
1070 
1071 	/*
1072 	 * Look through attached interfaces for the named one.
1073 	 */
1074 	for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) {
1075 		if (strcmp(bp->bif_name, ifr->ifr_name) != 0)
1076 			continue;
1077 
1078 		if (candidate == NULL || candidate->bif_dlt > bp->bif_dlt)
1079 			candidate = bp;
1080 	}
1081 
1082 	/* Not found. */
1083 	if (candidate == NULL)
1084 		return (ENXIO);
1085 
1086 	/*
1087 	 * Allocate the packet buffers if we need to.
1088 	 * If we're already attached to requested interface,
1089 	 * just flush the buffer.
1090 	 */
1091 	mtx_enter(&d->bd_mtx);
1092 	if (d->bd_sbuf == NULL) {
1093 		if ((error = bpf_allocbufs(d)))
1094 			goto out;
1095 	}
1096 	if (candidate != d->bd_bif) {
1097 		/*
1098 		 * Detach if attached to something else.
1099 		 */
1100 		bpf_detachd(d);
1101 		bpf_attachd(d, candidate);
1102 	}
1103 	bpf_resetd(d);
1104 out:
1105 	mtx_leave(&d->bd_mtx);
1106 	return (error);
1107 }
1108 
1109 /*
1110  * Copy the interface name to the ifreq.
1111  */
1112 void
1113 bpf_ifname(struct bpf_if *bif, struct ifreq *ifr)
1114 {
1115 	bcopy(bif->bif_name, ifr->ifr_name, sizeof(ifr->ifr_name));
1116 }
1117 
1118 /*
1119  * Support for poll() system call
1120  */
1121 int
1122 bpfpoll(dev_t dev, int events, struct proc *p)
1123 {
1124 	struct bpf_d *d;
1125 	int revents;
1126 
1127 	KERNEL_ASSERT_LOCKED();
1128 
1129 	/*
1130 	 * An imitation of the FIONREAD ioctl code.
1131 	 */
1132 	d = bpfilter_lookup(minor(dev));
1133 
1134 	/*
1135 	 * XXX The USB stack manages it to trigger some race condition
1136 	 * which causes bpfilter_lookup to return NULL when a USB device
1137 	 * gets detached while it is up and has an open bpf handler (e.g.
1138 	 * dhclient).  We still should recheck if we can fix the root
1139 	 * cause of this issue.
1140 	 */
1141 	if (d == NULL)
1142 		return (POLLERR);
1143 
1144 	/* Always ready to write data */
1145 	revents = events & (POLLOUT | POLLWRNORM);
1146 
1147 	if (events & (POLLIN | POLLRDNORM)) {
1148 		mtx_enter(&d->bd_mtx);
1149 		if (d->bd_hlen != 0 || (d->bd_immediate && d->bd_slen != 0))
1150 			revents |= events & (POLLIN | POLLRDNORM);
1151 		else {
1152 			/*
1153 			 * if there's a timeout, mark the time we
1154 			 * started waiting.
1155 			 */
1156 			if (d->bd_rtout != -1 && d->bd_rdStart == 0)
1157 				d->bd_rdStart = ticks;
1158 			selrecord(p, &d->bd_sel);
1159 		}
1160 		mtx_leave(&d->bd_mtx);
1161 	}
1162 	return (revents);
1163 }
1164 
1165 const struct filterops bpfread_filtops = {
1166 	.f_flags	= FILTEROP_ISFD,
1167 	.f_attach	= NULL,
1168 	.f_detach	= filt_bpfrdetach,
1169 	.f_event	= filt_bpfread,
1170 };
1171 
1172 int
1173 bpfkqfilter(dev_t dev, struct knote *kn)
1174 {
1175 	struct bpf_d *d;
1176 	struct klist *klist;
1177 
1178 	KERNEL_ASSERT_LOCKED();
1179 
1180 	d = bpfilter_lookup(minor(dev));
1181 
1182 	switch (kn->kn_filter) {
1183 	case EVFILT_READ:
1184 		klist = &d->bd_sel.si_note;
1185 		kn->kn_fop = &bpfread_filtops;
1186 		break;
1187 	default:
1188 		return (EINVAL);
1189 	}
1190 
1191 	bpf_get(d);
1192 	kn->kn_hook = d;
1193 	SLIST_INSERT_HEAD(klist, kn, kn_selnext);
1194 
1195 	mtx_enter(&d->bd_mtx);
1196 	if (d->bd_rtout != -1 && d->bd_rdStart == 0)
1197 		d->bd_rdStart = ticks;
1198 	mtx_leave(&d->bd_mtx);
1199 
1200 	return (0);
1201 }
1202 
1203 void
1204 filt_bpfrdetach(struct knote *kn)
1205 {
1206 	struct bpf_d *d = kn->kn_hook;
1207 
1208 	KERNEL_ASSERT_LOCKED();
1209 
1210 	SLIST_REMOVE(&d->bd_sel.si_note, kn, knote, kn_selnext);
1211 	bpf_put(d);
1212 }
1213 
1214 int
1215 filt_bpfread(struct knote *kn, long hint)
1216 {
1217 	struct bpf_d *d = kn->kn_hook;
1218 
1219 	KERNEL_ASSERT_LOCKED();
1220 
1221 	mtx_enter(&d->bd_mtx);
1222 	kn->kn_data = d->bd_hlen;
1223 	if (d->bd_immediate)
1224 		kn->kn_data += d->bd_slen;
1225 	mtx_leave(&d->bd_mtx);
1226 
1227 	return (kn->kn_data > 0);
1228 }
1229 
1230 /*
1231  * Copy data from an mbuf chain into a buffer.  This code is derived
1232  * from m_copydata in sys/uipc_mbuf.c.
1233  */
1234 void
1235 bpf_mcopy(const void *src_arg, void *dst_arg, size_t len)
1236 {
1237 	const struct mbuf *m;
1238 	u_int count;
1239 	u_char *dst;
1240 
1241 	m = src_arg;
1242 	dst = dst_arg;
1243 	while (len > 0) {
1244 		if (m == NULL)
1245 			panic("bpf_mcopy");
1246 		count = min(m->m_len, len);
1247 		bcopy(mtod(m, caddr_t), (caddr_t)dst, count);
1248 		m = m->m_next;
1249 		dst += count;
1250 		len -= count;
1251 	}
1252 }
1253 
1254 int
1255 bpf_mtap(caddr_t arg, const struct mbuf *m, u_int direction)
1256 {
1257 	struct bpf_if *bp = (struct bpf_if *)arg;
1258 	struct bpf_d *d;
1259 	size_t pktlen, slen;
1260 	const struct mbuf *m0;
1261 	struct timeval tv;
1262 	int gottime = 0;
1263 	int drop = 0;
1264 
1265 	if (m == NULL)
1266 		return (0);
1267 
1268 	if (bp == NULL)
1269 		return (0);
1270 
1271 	pktlen = 0;
1272 	for (m0 = m; m0 != NULL; m0 = m0->m_next)
1273 		pktlen += m0->m_len;
1274 
1275 	smr_read_enter();
1276 	SMR_SLIST_FOREACH(d, &bp->bif_dlist, bd_next) {
1277 		struct bpf_program_smr *bps;
1278 		struct bpf_insn *fcode = NULL;
1279 
1280 		atomic_inc_long(&d->bd_rcount);
1281 
1282 		if (ISSET(d->bd_dirfilt, direction))
1283 			continue;
1284 
1285 		bps = SMR_PTR_GET(&d->bd_rfilter);
1286 		if (bps != NULL)
1287 			fcode = bps->bps_bf.bf_insns;
1288 		slen = bpf_mfilter(fcode, m, pktlen);
1289 
1290 		if (slen == 0)
1291 			continue;
1292 		if (d->bd_fildrop != BPF_FILDROP_PASS)
1293 			drop = 1;
1294 		if (d->bd_fildrop != BPF_FILDROP_DROP) {
1295 			if (!gottime) {
1296 				if (ISSET(m->m_flags, M_PKTHDR))
1297 					m_microtime(m, &tv);
1298 				else
1299 					microtime(&tv);
1300 
1301 				gottime = 1;
1302 			}
1303 
1304 			mtx_enter(&d->bd_mtx);
1305 			bpf_catchpacket(d, (u_char *)m, pktlen, slen, &tv);
1306 			mtx_leave(&d->bd_mtx);
1307 		}
1308 	}
1309 	smr_read_leave();
1310 
1311 	return (drop);
1312 }
1313 
1314 /*
1315  * Incoming linkage from device drivers, where a data buffer should be
1316  * prepended by an arbitrary header. In this situation we already have a
1317  * way of representing a chain of memory buffers, ie, mbufs, so reuse
1318  * the existing functionality by attaching the buffers to mbufs.
1319  *
1320  * Con up a minimal mbuf chain to pacify bpf by allocating (only) a
1321  * struct m_hdr each for the header and data on the stack.
1322  */
1323 int
1324 bpf_tap_hdr(caddr_t arg, const void *hdr, unsigned int hdrlen,
1325     const void *buf, unsigned int buflen, u_int direction)
1326 {
1327 	struct m_hdr mh, md;
1328 	struct mbuf *m0 = NULL;
1329 	struct mbuf **mp = &m0;
1330 
1331 	if (hdr != NULL) {
1332 		mh.mh_flags = 0;
1333 		mh.mh_next = NULL;
1334 		mh.mh_len = hdrlen;
1335 		mh.mh_data = (void *)hdr;
1336 
1337 		*mp = (struct mbuf *)&mh;
1338 		mp = &mh.mh_next;
1339 	}
1340 
1341 	if (buf != NULL) {
1342 		md.mh_flags = 0;
1343 		md.mh_next = NULL;
1344 		md.mh_len = buflen;
1345 		md.mh_data = (void *)buf;
1346 
1347 		*mp = (struct mbuf *)&md;
1348 	}
1349 
1350 	return bpf_mtap(arg, m0, direction);
1351 }
1352 
1353 /*
1354  * Incoming linkage from device drivers, where we have a mbuf chain
1355  * but need to prepend some arbitrary header from a linear buffer.
1356  *
1357  * Con up a minimal dummy header to pacify bpf.  Allocate (only) a
1358  * struct m_hdr on the stack.  This is safe as bpf only reads from the
1359  * fields in this header that we initialize, and will not try to free
1360  * it or keep a pointer to it.
1361  */
1362 int
1363 bpf_mtap_hdr(caddr_t arg, const void *data, u_int dlen, const struct mbuf *m,
1364     u_int direction)
1365 {
1366 	struct m_hdr mh;
1367 	const struct mbuf *m0;
1368 
1369 	if (dlen > 0) {
1370 		mh.mh_flags = 0;
1371 		mh.mh_next = (struct mbuf *)m;
1372 		mh.mh_len = dlen;
1373 		mh.mh_data = (void *)data;
1374 		m0 = (struct mbuf *)&mh;
1375 	} else
1376 		m0 = m;
1377 
1378 	return bpf_mtap(arg, m0, direction);
1379 }
1380 
1381 /*
1382  * Incoming linkage from device drivers, where we have a mbuf chain
1383  * but need to prepend the address family.
1384  *
1385  * Con up a minimal dummy header to pacify bpf.  We allocate (only) a
1386  * struct m_hdr on the stack.  This is safe as bpf only reads from the
1387  * fields in this header that we initialize, and will not try to free
1388  * it or keep a pointer to it.
1389  */
1390 int
1391 bpf_mtap_af(caddr_t arg, u_int32_t af, const struct mbuf *m, u_int direction)
1392 {
1393 	u_int32_t    afh;
1394 
1395 	afh = htonl(af);
1396 
1397 	return bpf_mtap_hdr(arg, &afh, sizeof(afh), m, direction);
1398 }
1399 
1400 /*
1401  * Incoming linkage from device drivers, where we have a mbuf chain
1402  * but need to prepend a VLAN encapsulation header.
1403  *
1404  * Con up a minimal dummy header to pacify bpf.  Allocate (only) a
1405  * struct m_hdr on the stack.  This is safe as bpf only reads from the
1406  * fields in this header that we initialize, and will not try to free
1407  * it or keep a pointer to it.
1408  */
1409 int
1410 bpf_mtap_ether(caddr_t arg, const struct mbuf *m, u_int direction)
1411 {
1412 #if NVLAN > 0
1413 	struct ether_vlan_header evh;
1414 	struct m_hdr mh;
1415 	uint8_t prio;
1416 
1417 	if ((m->m_flags & M_VLANTAG) == 0)
1418 #endif
1419 	{
1420 		return bpf_mtap(arg, m, direction);
1421 	}
1422 
1423 #if NVLAN > 0
1424 	KASSERT(m->m_len >= ETHER_HDR_LEN);
1425 
1426 	prio = m->m_pkthdr.pf.prio;
1427 	if (prio <= 1)
1428 		prio = !prio;
1429 
1430 	memcpy(&evh, mtod(m, char *), ETHER_HDR_LEN);
1431 	evh.evl_proto = evh.evl_encap_proto;
1432 	evh.evl_encap_proto = htons(ETHERTYPE_VLAN);
1433 	evh.evl_tag = htons(m->m_pkthdr.ether_vtag |
1434 	    (prio << EVL_PRIO_BITS));
1435 
1436 	mh.mh_flags = 0;
1437 	mh.mh_data = m->m_data + ETHER_HDR_LEN;
1438 	mh.mh_len = m->m_len - ETHER_HDR_LEN;
1439 	mh.mh_next = m->m_next;
1440 
1441 	return bpf_mtap_hdr(arg, &evh, sizeof(evh),
1442 	    (struct mbuf *)&mh, direction);
1443 #endif
1444 }
1445 
1446 /*
1447  * Move the packet data from interface memory (pkt) into the
1448  * store buffer.  Wake up listeners if needed.
1449  * "copy" is the routine called to do the actual data
1450  * transfer.  bcopy is passed in to copy contiguous chunks, while
1451  * bpf_mcopy is passed in to copy mbuf chains.  In the latter case,
1452  * pkt is really an mbuf.
1453  */
1454 void
1455 bpf_catchpacket(struct bpf_d *d, u_char *pkt, size_t pktlen, size_t snaplen,
1456     struct timeval *tv)
1457 {
1458 	struct bpf_hdr *hp;
1459 	int totlen, curlen;
1460 	int hdrlen, do_wakeup = 0;
1461 
1462 	MUTEX_ASSERT_LOCKED(&d->bd_mtx);
1463 	if (d->bd_bif == NULL)
1464 		return;
1465 
1466 	hdrlen = d->bd_bif->bif_hdrlen;
1467 
1468 	/*
1469 	 * Figure out how many bytes to move.  If the packet is
1470 	 * greater or equal to the snapshot length, transfer that
1471 	 * much.  Otherwise, transfer the whole packet (unless
1472 	 * we hit the buffer size limit).
1473 	 */
1474 	totlen = hdrlen + min(snaplen, pktlen);
1475 	if (totlen > d->bd_bufsize)
1476 		totlen = d->bd_bufsize;
1477 
1478 	/*
1479 	 * Round up the end of the previous packet to the next longword.
1480 	 */
1481 	curlen = BPF_WORDALIGN(d->bd_slen);
1482 	if (curlen + totlen > d->bd_bufsize) {
1483 		/*
1484 		 * This packet will overflow the storage buffer.
1485 		 * Rotate the buffers if we can, then wakeup any
1486 		 * pending reads.
1487 		 */
1488 		if (d->bd_fbuf == NULL) {
1489 			/*
1490 			 * We haven't completed the previous read yet,
1491 			 * so drop the packet.
1492 			 */
1493 			++d->bd_dcount;
1494 			return;
1495 		}
1496 		ROTATE_BUFFERS(d);
1497 		do_wakeup = 1;
1498 		curlen = 0;
1499 	}
1500 
1501 	/*
1502 	 * Append the bpf header.
1503 	 */
1504 	hp = (struct bpf_hdr *)(d->bd_sbuf + curlen);
1505 	hp->bh_tstamp.tv_sec = tv->tv_sec;
1506 	hp->bh_tstamp.tv_usec = tv->tv_usec;
1507 	hp->bh_datalen = pktlen;
1508 	hp->bh_hdrlen = hdrlen;
1509 
1510 	/*
1511 	 * Copy the packet data into the store buffer and update its length.
1512 	 */
1513 	bpf_mcopy(pkt, (u_char *)hp + hdrlen,
1514 	    (hp->bh_caplen = totlen - hdrlen));
1515 	d->bd_slen = curlen + totlen;
1516 
1517 	if (d->bd_immediate) {
1518 		/*
1519 		 * Immediate mode is set.  A packet arrived so any
1520 		 * reads should be woken up.
1521 		 */
1522 		do_wakeup = 1;
1523 	}
1524 
1525 	if (d->bd_rdStart && d->bd_rdStart <= ULONG_MAX - d->bd_rtout &&
1526 	    d->bd_rdStart + d->bd_rtout < ticks) {
1527 		/*
1528 		 * we could be selecting on the bpf, and we
1529 		 * may have timeouts set.  We got here by getting
1530 		 * a packet, so wake up the reader.
1531 		 */
1532 		if (d->bd_fbuf != NULL) {
1533 			d->bd_rdStart = 0;
1534 			ROTATE_BUFFERS(d);
1535 			do_wakeup = 1;
1536 		}
1537 	}
1538 
1539 	if (do_wakeup)
1540 		bpf_wakeup(d);
1541 }
1542 
1543 /*
1544  * Initialize all nonzero fields of a descriptor.
1545  */
1546 int
1547 bpf_allocbufs(struct bpf_d *d)
1548 {
1549 	MUTEX_ASSERT_LOCKED(&d->bd_mtx);
1550 
1551 	d->bd_fbuf = malloc(d->bd_bufsize, M_DEVBUF, M_NOWAIT);
1552 	if (d->bd_fbuf == NULL)
1553 		return (ENOMEM);
1554 
1555 	d->bd_sbuf = malloc(d->bd_bufsize, M_DEVBUF, M_NOWAIT);
1556 	if (d->bd_sbuf == NULL) {
1557 		free(d->bd_fbuf, M_DEVBUF, d->bd_bufsize);
1558 		return (ENOMEM);
1559 	}
1560 
1561 	d->bd_slen = 0;
1562 	d->bd_hlen = 0;
1563 
1564 	return (0);
1565 }
1566 
1567 void
1568 bpf_prog_smr(void *bps_arg)
1569 {
1570 	struct bpf_program_smr *bps = bps_arg;
1571 
1572 	free(bps->bps_bf.bf_insns, M_DEVBUF,
1573 	    bps->bps_bf.bf_len * sizeof(struct bpf_insn));
1574 	free(bps, M_DEVBUF, sizeof(struct bpf_program_smr));
1575 }
1576 
1577 void
1578 bpf_d_smr(void *smr)
1579 {
1580 	struct bpf_d	*bd = smr;
1581 
1582 	sigio_free(&bd->bd_sigio);
1583 	free(bd->bd_sbuf, M_DEVBUF, bd->bd_bufsize);
1584 	free(bd->bd_hbuf, M_DEVBUF, bd->bd_bufsize);
1585 	free(bd->bd_fbuf, M_DEVBUF, bd->bd_bufsize);
1586 
1587 	if (bd->bd_rfilter != NULL)
1588 		bpf_prog_smr(bd->bd_rfilter);
1589 	if (bd->bd_wfilter != NULL)
1590 		bpf_prog_smr(bd->bd_wfilter);
1591 
1592 	free(bd, M_DEVBUF, sizeof(*bd));
1593 }
1594 
1595 void
1596 bpf_get(struct bpf_d *bd)
1597 {
1598 	atomic_inc_int(&bd->bd_ref);
1599 }
1600 
1601 /*
1602  * Free buffers currently in use by a descriptor
1603  * when the reference count drops to zero.
1604  */
1605 void
1606 bpf_put(struct bpf_d *bd)
1607 {
1608 	if (atomic_dec_int_nv(&bd->bd_ref) > 0)
1609 		return;
1610 
1611 	smr_call(&bd->bd_smr, bpf_d_smr, bd);
1612 }
1613 
1614 void *
1615 bpfsattach(caddr_t *bpfp, const char *name, u_int dlt, u_int hdrlen)
1616 {
1617 	struct bpf_if *bp;
1618 
1619 	if ((bp = malloc(sizeof(*bp), M_DEVBUF, M_NOWAIT)) == NULL)
1620 		panic("bpfattach");
1621 	SMR_SLIST_INIT(&bp->bif_dlist);
1622 	bp->bif_driverp = (struct bpf_if **)bpfp;
1623 	bp->bif_name = name;
1624 	bp->bif_ifp = NULL;
1625 	bp->bif_dlt = dlt;
1626 
1627 	bp->bif_next = bpf_iflist;
1628 	bpf_iflist = bp;
1629 
1630 	*bp->bif_driverp = NULL;
1631 
1632 	/*
1633 	 * Compute the length of the bpf header.  This is not necessarily
1634 	 * equal to SIZEOF_BPF_HDR because we want to insert spacing such
1635 	 * that the network layer header begins on a longword boundary (for
1636 	 * performance reasons and to alleviate alignment restrictions).
1637 	 */
1638 	bp->bif_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen;
1639 
1640 	return (bp);
1641 }
1642 
1643 void
1644 bpfattach(caddr_t *driverp, struct ifnet *ifp, u_int dlt, u_int hdrlen)
1645 {
1646 	struct bpf_if *bp;
1647 
1648 	bp = bpfsattach(driverp, ifp->if_xname, dlt, hdrlen);
1649 	bp->bif_ifp = ifp;
1650 }
1651 
1652 /* Detach an interface from its attached bpf device.  */
1653 void
1654 bpfdetach(struct ifnet *ifp)
1655 {
1656 	struct bpf_if *bp, *nbp;
1657 
1658 	KERNEL_ASSERT_LOCKED();
1659 
1660 	for (bp = bpf_iflist; bp; bp = nbp) {
1661 		nbp = bp->bif_next;
1662 		if (bp->bif_ifp == ifp)
1663 			bpfsdetach(bp);
1664 	}
1665 	ifp->if_bpf = NULL;
1666 }
1667 
1668 void
1669 bpfsdetach(void *p)
1670 {
1671 	struct bpf_if *bp = p, *tbp;
1672 	struct bpf_d *bd;
1673 	int maj;
1674 
1675 	KERNEL_ASSERT_LOCKED();
1676 
1677 	/* Locate the major number. */
1678 	for (maj = 0; maj < nchrdev; maj++)
1679 		if (cdevsw[maj].d_open == bpfopen)
1680 			break;
1681 
1682 	while ((bd = SMR_SLIST_FIRST_LOCKED(&bp->bif_dlist)))
1683 		vdevgone(maj, bd->bd_unit, bd->bd_unit, VCHR);
1684 
1685 	for (tbp = bpf_iflist; tbp; tbp = tbp->bif_next) {
1686 		if (tbp->bif_next == bp) {
1687 			tbp->bif_next = bp->bif_next;
1688 			break;
1689 		}
1690 	}
1691 
1692 	if (bpf_iflist == bp)
1693 		bpf_iflist = bp->bif_next;
1694 
1695 	free(bp, M_DEVBUF, sizeof(*bp));
1696 }
1697 
1698 int
1699 bpf_sysctl_locked(int *name, u_int namelen, void *oldp, size_t *oldlenp,
1700     void *newp, size_t newlen)
1701 {
1702 	int newval;
1703 	int error;
1704 
1705 	switch (name[0]) {
1706 	case NET_BPF_BUFSIZE:
1707 		newval = bpf_bufsize;
1708 		error = sysctl_int(oldp, oldlenp, newp, newlen, &newval);
1709 		if (error)
1710 			return (error);
1711 		if (newval < BPF_MINBUFSIZE || newval > bpf_maxbufsize)
1712 			return (EINVAL);
1713 		bpf_bufsize = newval;
1714 		break;
1715 	case NET_BPF_MAXBUFSIZE:
1716 		newval = bpf_maxbufsize;
1717 		error = sysctl_int(oldp, oldlenp, newp, newlen, &newval);
1718 		if (error)
1719 			return (error);
1720 		if (newval < BPF_MINBUFSIZE)
1721 			return (EINVAL);
1722 		bpf_maxbufsize = newval;
1723 		break;
1724 	default:
1725 		return (EOPNOTSUPP);
1726 	}
1727 	return (0);
1728 }
1729 
1730 int
1731 bpf_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
1732     size_t newlen)
1733 {
1734 	int flags = RW_INTR;
1735 	int error;
1736 
1737 	if (namelen != 1)
1738 		return (ENOTDIR);
1739 
1740 	flags |= (newp == NULL) ? RW_READ : RW_WRITE;
1741 
1742 	error = rw_enter(&bpf_sysctl_lk, flags);
1743 	if (error != 0)
1744 		return (error);
1745 
1746 	error = bpf_sysctl_locked(name, namelen, oldp, oldlenp, newp, newlen);
1747 
1748 	rw_exit(&bpf_sysctl_lk);
1749 
1750 	return (error);
1751 }
1752 
1753 struct bpf_d *
1754 bpfilter_lookup(int unit)
1755 {
1756 	struct bpf_d *bd;
1757 
1758 	KERNEL_ASSERT_LOCKED();
1759 
1760 	LIST_FOREACH(bd, &bpf_d_list, bd_list)
1761 		if (bd->bd_unit == unit)
1762 			return (bd);
1763 	return (NULL);
1764 }
1765 
1766 /*
1767  * Get a list of available data link type of the interface.
1768  */
1769 int
1770 bpf_getdltlist(struct bpf_d *d, struct bpf_dltlist *bfl)
1771 {
1772 	int n, error;
1773 	struct bpf_if *bp;
1774 	const char *name;
1775 
1776 	name = d->bd_bif->bif_name;
1777 	n = 0;
1778 	error = 0;
1779 	for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) {
1780 		if (strcmp(name, bp->bif_name) != 0)
1781 			continue;
1782 		if (bfl->bfl_list != NULL) {
1783 			if (n >= bfl->bfl_len)
1784 				return (ENOMEM);
1785 			error = copyout(&bp->bif_dlt,
1786 			    bfl->bfl_list + n, sizeof(u_int));
1787 			if (error)
1788 				break;
1789 		}
1790 		n++;
1791 	}
1792 
1793 	bfl->bfl_len = n;
1794 	return (error);
1795 }
1796 
1797 /*
1798  * Set the data link type of a BPF instance.
1799  */
1800 int
1801 bpf_setdlt(struct bpf_d *d, u_int dlt)
1802 {
1803 	const char *name;
1804 	struct bpf_if *bp;
1805 
1806 	MUTEX_ASSERT_LOCKED(&d->bd_mtx);
1807 	if (d->bd_bif->bif_dlt == dlt)
1808 		return (0);
1809 	name = d->bd_bif->bif_name;
1810 	for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) {
1811 		if (strcmp(name, bp->bif_name) != 0)
1812 			continue;
1813 		if (bp->bif_dlt == dlt)
1814 			break;
1815 	}
1816 	if (bp == NULL)
1817 		return (EINVAL);
1818 	bpf_detachd(d);
1819 	bpf_attachd(d, bp);
1820 	bpf_resetd(d);
1821 	return (0);
1822 }
1823 
1824 u_int32_t	bpf_mbuf_ldw(const void *, u_int32_t, int *);
1825 u_int32_t	bpf_mbuf_ldh(const void *, u_int32_t, int *);
1826 u_int32_t	bpf_mbuf_ldb(const void *, u_int32_t, int *);
1827 
1828 int		bpf_mbuf_copy(const struct mbuf *, u_int32_t,
1829 		    void *, u_int32_t);
1830 
1831 const struct bpf_ops bpf_mbuf_ops = {
1832 	bpf_mbuf_ldw,
1833 	bpf_mbuf_ldh,
1834 	bpf_mbuf_ldb,
1835 };
1836 
1837 int
1838 bpf_mbuf_copy(const struct mbuf *m, u_int32_t off, void *buf, u_int32_t len)
1839 {
1840 	u_int8_t *cp = buf;
1841 	u_int32_t count;
1842 
1843 	while (off >= m->m_len) {
1844 		off -= m->m_len;
1845 
1846 		m = m->m_next;
1847 		if (m == NULL)
1848 			return (-1);
1849 	}
1850 
1851 	for (;;) {
1852 		count = min(m->m_len - off, len);
1853 
1854 		memcpy(cp, m->m_data + off, count);
1855 		len -= count;
1856 
1857 		if (len == 0)
1858 			return (0);
1859 
1860 		m = m->m_next;
1861 		if (m == NULL)
1862 			break;
1863 
1864 		cp += count;
1865 		off = 0;
1866 	}
1867 
1868 	return (-1);
1869 }
1870 
1871 u_int32_t
1872 bpf_mbuf_ldw(const void *m0, u_int32_t k, int *err)
1873 {
1874 	u_int32_t v;
1875 
1876 	if (bpf_mbuf_copy(m0, k, &v, sizeof(v)) != 0) {
1877 		*err = 1;
1878 		return (0);
1879 	}
1880 
1881 	*err = 0;
1882 	return ntohl(v);
1883 }
1884 
1885 u_int32_t
1886 bpf_mbuf_ldh(const void *m0, u_int32_t k, int *err)
1887 {
1888 	u_int16_t v;
1889 
1890 	if (bpf_mbuf_copy(m0, k, &v, sizeof(v)) != 0) {
1891 		*err = 1;
1892 		return (0);
1893 	}
1894 
1895 	*err = 0;
1896 	return ntohs(v);
1897 }
1898 
1899 u_int32_t
1900 bpf_mbuf_ldb(const void *m0, u_int32_t k, int *err)
1901 {
1902 	const struct mbuf *m = m0;
1903 	u_int8_t v;
1904 
1905 	while (k >= m->m_len) {
1906 		k -= m->m_len;
1907 
1908 		m = m->m_next;
1909 		if (m == NULL) {
1910 			*err = 1;
1911 			return (0);
1912 		}
1913 	}
1914 	v = m->m_data[k];
1915 
1916 	*err = 0;
1917 	return v;
1918 }
1919 
1920 u_int
1921 bpf_mfilter(const struct bpf_insn *pc, const struct mbuf *m, u_int wirelen)
1922 {
1923 	return _bpf_filter(pc, &bpf_mbuf_ops, m, wirelen);
1924 }
1925