xref: /openbsd-src/sys/net/bpf.c (revision 25c4e8bd056e974b28f4a0ffd39d76c190a56013)
1 /*	$OpenBSD: bpf.c,v 1.219 2022/07/09 12:48:21 visa Exp $	*/
2 /*	$NetBSD: bpf.c,v 1.33 1997/02/21 23:59:35 thorpej Exp $	*/
3 
4 /*
5  * Copyright (c) 1990, 1991, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  * Copyright (c) 2010, 2014 Henning Brauer <henning@openbsd.org>
8  *
9  * This code is derived from the Stanford/CMU enet packet filter,
10  * (net/enet.c) distributed as part of 4.3BSD, and code contributed
11  * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
12  * Berkeley Laboratory.
13  *
14  * Redistribution and use in source and binary forms, with or without
15  * modification, are permitted provided that the following conditions
16  * are met:
17  * 1. Redistributions of source code must retain the above copyright
18  *    notice, this list of conditions and the following disclaimer.
19  * 2. Redistributions in binary form must reproduce the above copyright
20  *    notice, this list of conditions and the following disclaimer in the
21  *    documentation and/or other materials provided with the distribution.
22  * 3. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  *
38  *	@(#)bpf.c	8.2 (Berkeley) 3/28/94
39  */
40 
41 #include "bpfilter.h"
42 
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/mbuf.h>
46 #include <sys/proc.h>
47 #include <sys/signalvar.h>
48 #include <sys/ioctl.h>
49 #include <sys/conf.h>
50 #include <sys/vnode.h>
51 #include <sys/fcntl.h>
52 #include <sys/socket.h>
53 #include <sys/kernel.h>
54 #include <sys/sysctl.h>
55 #include <sys/rwlock.h>
56 #include <sys/atomic.h>
57 #include <sys/event.h>
58 #include <sys/mutex.h>
59 #include <sys/refcnt.h>
60 #include <sys/smr.h>
61 #include <sys/specdev.h>
62 #include <sys/sigio.h>
63 #include <sys/task.h>
64 #include <sys/time.h>
65 
66 #include <net/if.h>
67 #include <net/bpf.h>
68 #include <net/bpfdesc.h>
69 
70 #include <netinet/in.h>
71 #include <netinet/if_ether.h>
72 
73 #include "vlan.h"
74 #if NVLAN > 0
75 #include <net/if_vlan_var.h>
76 #endif
77 
78 #define BPF_BUFSIZE 32768
79 
80 #define PRINET  26			/* interruptible */
81 
82 /*
83  * The default read buffer size is patchable.
84  */
85 int bpf_bufsize = BPF_BUFSIZE;
86 int bpf_maxbufsize = BPF_MAXBUFSIZE;
87 
88 /*
89  *  bpf_iflist is the list of interfaces; each corresponds to an ifnet
90  *  bpf_d_list is the list of descriptors
91  */
92 struct bpf_if	*bpf_iflist;
93 LIST_HEAD(, bpf_d) bpf_d_list;
94 
95 int	bpf_allocbufs(struct bpf_d *);
96 void	bpf_ifname(struct bpf_if*, struct ifreq *);
97 void	bpf_mcopy(const void *, void *, size_t);
98 int	bpf_movein(struct uio *, struct bpf_d *, struct mbuf **,
99 	    struct sockaddr *);
100 int	bpf_setif(struct bpf_d *, struct ifreq *);
101 int	bpfkqfilter(dev_t, struct knote *);
102 void	bpf_wakeup(struct bpf_d *);
103 void	bpf_wakeup_cb(void *);
104 int	_bpf_mtap(caddr_t, const struct mbuf *, const struct mbuf *, u_int);
105 void	bpf_catchpacket(struct bpf_d *, u_char *, size_t, size_t,
106 	    const struct bpf_hdr *);
107 int	bpf_getdltlist(struct bpf_d *, struct bpf_dltlist *);
108 int	bpf_setdlt(struct bpf_d *, u_int);
109 
110 void	filt_bpfrdetach(struct knote *);
111 int	filt_bpfread(struct knote *, long);
112 int	filt_bpfreadmodify(struct kevent *, struct knote *);
113 int	filt_bpfreadprocess(struct knote *, struct kevent *);
114 
115 int	bpf_sysctl_locked(int *, u_int, void *, size_t *, void *, size_t);
116 
117 struct bpf_d *bpfilter_lookup(int);
118 
119 /*
120  * Called holding ``bd_mtx''.
121  */
122 void	bpf_attachd(struct bpf_d *, struct bpf_if *);
123 void	bpf_detachd(struct bpf_d *);
124 void	bpf_resetd(struct bpf_d *);
125 
126 void	bpf_prog_smr(void *);
127 void	bpf_d_smr(void *);
128 
129 /*
130  * Reference count access to descriptor buffers
131  */
132 void	bpf_get(struct bpf_d *);
133 void	bpf_put(struct bpf_d *);
134 
135 
136 struct rwlock bpf_sysctl_lk = RWLOCK_INITIALIZER("bpfsz");
137 
138 int
139 bpf_movein(struct uio *uio, struct bpf_d *d, struct mbuf **mp,
140     struct sockaddr *sockp)
141 {
142 	struct bpf_program_smr *bps;
143 	struct bpf_insn *fcode = NULL;
144 	struct mbuf *m;
145 	struct m_tag *mtag;
146 	int error;
147 	u_int hlen, alen, mlen;
148 	u_int len;
149 	u_int linktype;
150 	u_int slen;
151 
152 	/*
153 	 * Build a sockaddr based on the data link layer type.
154 	 * We do this at this level because the ethernet header
155 	 * is copied directly into the data field of the sockaddr.
156 	 * In the case of SLIP, there is no header and the packet
157 	 * is forwarded as is.
158 	 * Also, we are careful to leave room at the front of the mbuf
159 	 * for the link level header.
160 	 */
161 	linktype = d->bd_bif->bif_dlt;
162 	switch (linktype) {
163 
164 	case DLT_SLIP:
165 		sockp->sa_family = AF_INET;
166 		hlen = 0;
167 		break;
168 
169 	case DLT_PPP:
170 		sockp->sa_family = AF_UNSPEC;
171 		hlen = 0;
172 		break;
173 
174 	case DLT_EN10MB:
175 		sockp->sa_family = AF_UNSPEC;
176 		/* XXX Would MAXLINKHDR be better? */
177 		hlen = ETHER_HDR_LEN;
178 		break;
179 
180 	case DLT_IEEE802_11:
181 	case DLT_IEEE802_11_RADIO:
182 		sockp->sa_family = AF_UNSPEC;
183 		hlen = 0;
184 		break;
185 
186 	case DLT_RAW:
187 	case DLT_NULL:
188 		sockp->sa_family = AF_UNSPEC;
189 		hlen = 0;
190 		break;
191 
192 	case DLT_LOOP:
193 		sockp->sa_family = AF_UNSPEC;
194 		hlen = sizeof(u_int32_t);
195 		break;
196 
197 	default:
198 		return (EIO);
199 	}
200 
201 	if (uio->uio_resid > MAXMCLBYTES)
202 		return (EMSGSIZE);
203 	len = uio->uio_resid;
204 	if (len < hlen)
205 		return (EINVAL);
206 
207 	/*
208 	 * Get the length of the payload so we can align it properly.
209 	 */
210 	alen = len - hlen;
211 
212 	/*
213 	 * Allocate enough space for headers and the aligned payload.
214 	 */
215 	mlen = max(max_linkhdr, hlen) + roundup(alen, sizeof(long));
216 	if (mlen > MAXMCLBYTES)
217 		return (EMSGSIZE);
218 
219 	MGETHDR(m, M_WAIT, MT_DATA);
220 	if (mlen > MHLEN) {
221 		MCLGETL(m, M_WAIT, mlen);
222 		if ((m->m_flags & M_EXT) == 0) {
223 			error = ENOBUFS;
224 			goto bad;
225 		}
226 	}
227 
228 	m_align(m, alen); /* Align the payload. */
229 	m->m_data -= hlen;
230 
231 	m->m_pkthdr.ph_ifidx = 0;
232 	m->m_pkthdr.len = len;
233 	m->m_len = len;
234 
235 	error = uiomove(mtod(m, caddr_t), len, uio);
236 	if (error)
237 		goto bad;
238 
239 	smr_read_enter();
240 	bps = SMR_PTR_GET(&d->bd_wfilter);
241 	if (bps != NULL)
242 		fcode = bps->bps_bf.bf_insns;
243 	slen = bpf_filter(fcode, mtod(m, u_char *), len, len);
244 	smr_read_leave();
245 
246 	if (slen < len) {
247 		error = EPERM;
248 		goto bad;
249 	}
250 
251 	/*
252 	 * Make room for link header, and copy it to sockaddr
253 	 */
254 	if (hlen != 0) {
255 		if (linktype == DLT_LOOP) {
256 			u_int32_t af;
257 
258 			/* the link header indicates the address family */
259 			KASSERT(hlen == sizeof(u_int32_t));
260 			memcpy(&af, m->m_data, hlen);
261 			sockp->sa_family = ntohl(af);
262 		} else
263 			memcpy(sockp->sa_data, m->m_data, hlen);
264 
265 		m->m_pkthdr.len -= hlen;
266 		m->m_len -= hlen;
267 		m->m_data += hlen;
268 	}
269 
270 	/*
271 	 * Prepend the data link type as a mbuf tag
272 	 */
273 	mtag = m_tag_get(PACKET_TAG_DLT, sizeof(u_int), M_WAIT);
274 	*(u_int *)(mtag + 1) = linktype;
275 	m_tag_prepend(m, mtag);
276 
277 	*mp = m;
278 	return (0);
279  bad:
280 	m_freem(m);
281 	return (error);
282 }
283 
284 /*
285  * Attach file to the bpf interface, i.e. make d listen on bp.
286  */
287 void
288 bpf_attachd(struct bpf_d *d, struct bpf_if *bp)
289 {
290 	MUTEX_ASSERT_LOCKED(&d->bd_mtx);
291 
292 	/*
293 	 * Point d at bp, and add d to the interface's list of listeners.
294 	 * Finally, point the driver's bpf cookie at the interface so
295 	 * it will divert packets to bpf.
296 	 */
297 
298 	d->bd_bif = bp;
299 
300 	KERNEL_ASSERT_LOCKED();
301 	SMR_SLIST_INSERT_HEAD_LOCKED(&bp->bif_dlist, d, bd_next);
302 
303 	*bp->bif_driverp = bp;
304 }
305 
306 /*
307  * Detach a file from its interface.
308  */
309 void
310 bpf_detachd(struct bpf_d *d)
311 {
312 	struct bpf_if *bp;
313 
314 	MUTEX_ASSERT_LOCKED(&d->bd_mtx);
315 
316 	bp = d->bd_bif;
317 	/* Not attached. */
318 	if (bp == NULL)
319 		return;
320 
321 	/* Remove ``d'' from the interface's descriptor list. */
322 	KERNEL_ASSERT_LOCKED();
323 	SMR_SLIST_REMOVE_LOCKED(&bp->bif_dlist, d, bpf_d, bd_next);
324 
325 	if (SMR_SLIST_EMPTY_LOCKED(&bp->bif_dlist)) {
326 		/*
327 		 * Let the driver know that there are no more listeners.
328 		 */
329 		*bp->bif_driverp = NULL;
330 	}
331 
332 	d->bd_bif = NULL;
333 
334 	/*
335 	 * Check if this descriptor had requested promiscuous mode.
336 	 * If so, turn it off.
337 	 */
338 	if (d->bd_promisc) {
339 		int error;
340 
341 		KASSERT(bp->bif_ifp != NULL);
342 
343 		d->bd_promisc = 0;
344 
345 		bpf_get(d);
346 		mtx_leave(&d->bd_mtx);
347 		NET_LOCK();
348 		error = ifpromisc(bp->bif_ifp, 0);
349 		NET_UNLOCK();
350 		mtx_enter(&d->bd_mtx);
351 		bpf_put(d);
352 
353 		if (error && !(error == EINVAL || error == ENODEV ||
354 		    error == ENXIO))
355 			/*
356 			 * Something is really wrong if we were able to put
357 			 * the driver into promiscuous mode, but can't
358 			 * take it out.
359 			 */
360 			panic("bpf: ifpromisc failed");
361 	}
362 }
363 
364 void
365 bpfilterattach(int n)
366 {
367 	LIST_INIT(&bpf_d_list);
368 }
369 
370 /*
371  * Open ethernet device.  Returns ENXIO for illegal minor device number,
372  * EBUSY if file is open by another process.
373  */
374 int
375 bpfopen(dev_t dev, int flag, int mode, struct proc *p)
376 {
377 	struct bpf_d *bd;
378 	int unit = minor(dev);
379 
380 	if (unit & ((1 << CLONE_SHIFT) - 1))
381 		return (ENXIO);
382 
383 	KASSERT(bpfilter_lookup(unit) == NULL);
384 
385 	/* create on demand */
386 	if ((bd = malloc(sizeof(*bd), M_DEVBUF, M_NOWAIT|M_ZERO)) == NULL)
387 		return (EBUSY);
388 
389 	/* Mark "free" and do most initialization. */
390 	bd->bd_unit = unit;
391 	bd->bd_bufsize = bpf_bufsize;
392 	bd->bd_sig = SIGIO;
393 	mtx_init(&bd->bd_mtx, IPL_NET);
394 	task_set(&bd->bd_wake_task, bpf_wakeup_cb, bd);
395 	smr_init(&bd->bd_smr);
396 	sigio_init(&bd->bd_sigio);
397 	klist_init_mutex(&bd->bd_klist, &bd->bd_mtx);
398 
399 	bd->bd_rtout = 0;	/* no timeout by default */
400 
401 	refcnt_init(&bd->bd_refcnt);
402 	LIST_INSERT_HEAD(&bpf_d_list, bd, bd_list);
403 
404 	return (0);
405 }
406 
407 /*
408  * Close the descriptor by detaching it from its interface,
409  * deallocating its buffers, and marking it free.
410  */
411 int
412 bpfclose(dev_t dev, int flag, int mode, struct proc *p)
413 {
414 	struct bpf_d *d;
415 
416 	d = bpfilter_lookup(minor(dev));
417 	mtx_enter(&d->bd_mtx);
418 	bpf_detachd(d);
419 	bpf_wakeup(d);
420 	LIST_REMOVE(d, bd_list);
421 	mtx_leave(&d->bd_mtx);
422 	bpf_put(d);
423 
424 	return (0);
425 }
426 
427 /*
428  * Rotate the packet buffers in descriptor d.  Move the store buffer
429  * into the hold slot, and the free buffer into the store slot.
430  * Zero the length of the new store buffer.
431  */
432 #define ROTATE_BUFFERS(d) \
433 	KASSERT(d->bd_in_uiomove == 0); \
434 	MUTEX_ASSERT_LOCKED(&d->bd_mtx); \
435 	(d)->bd_hbuf = (d)->bd_sbuf; \
436 	(d)->bd_hlen = (d)->bd_slen; \
437 	(d)->bd_sbuf = (d)->bd_fbuf; \
438 	(d)->bd_slen = 0; \
439 	(d)->bd_fbuf = NULL;
440 
441 /*
442  *  bpfread - read next chunk of packets from buffers
443  */
444 int
445 bpfread(dev_t dev, struct uio *uio, int ioflag)
446 {
447 	uint64_t end, now;
448 	struct bpf_d *d;
449 	caddr_t hbuf;
450 	int error, hlen;
451 
452 	KERNEL_ASSERT_LOCKED();
453 
454 	d = bpfilter_lookup(minor(dev));
455 	if (d->bd_bif == NULL)
456 		return (ENXIO);
457 
458 	bpf_get(d);
459 	mtx_enter(&d->bd_mtx);
460 
461 	/*
462 	 * Restrict application to use a buffer the same size as
463 	 * as kernel buffers.
464 	 */
465 	if (uio->uio_resid != d->bd_bufsize) {
466 		error = EINVAL;
467 		goto out;
468 	}
469 
470 	/*
471 	 * If there's a timeout, mark when the read should end.
472 	 */
473 	if (d->bd_rtout != 0) {
474 		now = nsecuptime();
475 		end = now + d->bd_rtout;
476 		if (end < now)
477 			end = UINT64_MAX;
478 	}
479 
480 	/*
481 	 * If the hold buffer is empty, then do a timed sleep, which
482 	 * ends when the timeout expires or when enough packets
483 	 * have arrived to fill the store buffer.
484 	 */
485 	while (d->bd_hbuf == NULL) {
486 		if (d->bd_bif == NULL) {
487 			/* interface is gone */
488 			if (d->bd_slen == 0) {
489 				error = EIO;
490 				goto out;
491 			}
492 			ROTATE_BUFFERS(d);
493 			break;
494 		}
495 		if (d->bd_immediate && d->bd_slen != 0) {
496 			/*
497 			 * A packet(s) either arrived since the previous
498 			 * read or arrived while we were asleep.
499 			 * Rotate the buffers and return what's here.
500 			 */
501 			ROTATE_BUFFERS(d);
502 			break;
503 		}
504 		if (ISSET(ioflag, IO_NDELAY)) {
505 			/* User requested non-blocking I/O */
506 			error = EWOULDBLOCK;
507 		} else if (d->bd_rtout == 0) {
508 			/* No read timeout set. */
509 			d->bd_nreaders++;
510 			error = msleep_nsec(d, &d->bd_mtx, PRINET|PCATCH,
511 			    "bpf", INFSLP);
512 			d->bd_nreaders--;
513 		} else if ((now = nsecuptime()) < end) {
514 			/* Read timeout has not expired yet. */
515 			d->bd_nreaders++;
516 			error = msleep_nsec(d, &d->bd_mtx, PRINET|PCATCH,
517 			    "bpf", end - now);
518 			d->bd_nreaders--;
519 		} else {
520 			/* Read timeout has expired. */
521 			error = EWOULDBLOCK;
522 		}
523 		if (error == EINTR || error == ERESTART)
524 			goto out;
525 		if (error == EWOULDBLOCK) {
526 			/*
527 			 * On a timeout, return what's in the buffer,
528 			 * which may be nothing.  If there is something
529 			 * in the store buffer, we can rotate the buffers.
530 			 */
531 			if (d->bd_hbuf != NULL)
532 				/*
533 				 * We filled up the buffer in between
534 				 * getting the timeout and arriving
535 				 * here, so we don't need to rotate.
536 				 */
537 				break;
538 
539 			if (d->bd_slen == 0) {
540 				error = 0;
541 				goto out;
542 			}
543 			ROTATE_BUFFERS(d);
544 			break;
545 		}
546 	}
547 	/*
548 	 * At this point, we know we have something in the hold slot.
549 	 */
550 	hbuf = d->bd_hbuf;
551 	hlen = d->bd_hlen;
552 	d->bd_hbuf = NULL;
553 	d->bd_hlen = 0;
554 	d->bd_fbuf = NULL;
555 	d->bd_in_uiomove = 1;
556 
557 	/*
558 	 * Move data from hold buffer into user space.
559 	 * We know the entire buffer is transferred since
560 	 * we checked above that the read buffer is bpf_bufsize bytes.
561 	 */
562 	mtx_leave(&d->bd_mtx);
563 	error = uiomove(hbuf, hlen, uio);
564 	mtx_enter(&d->bd_mtx);
565 
566 	/* Ensure that bpf_resetd() or ROTATE_BUFFERS() haven't been called. */
567 	KASSERT(d->bd_fbuf == NULL);
568 	KASSERT(d->bd_hbuf == NULL);
569 	d->bd_fbuf = hbuf;
570 	d->bd_in_uiomove = 0;
571 out:
572 	mtx_leave(&d->bd_mtx);
573 	bpf_put(d);
574 
575 	return (error);
576 }
577 
578 /*
579  * If there are processes sleeping on this descriptor, wake them up.
580  */
581 void
582 bpf_wakeup(struct bpf_d *d)
583 {
584 	MUTEX_ASSERT_LOCKED(&d->bd_mtx);
585 
586 	if (d->bd_nreaders)
587 		wakeup(d);
588 
589 	KNOTE(&d->bd_klist, 0);
590 
591 	/*
592 	 * As long as pgsigio() needs to be protected
593 	 * by the KERNEL_LOCK() we have to delay the wakeup to
594 	 * another context to keep the hot path KERNEL_LOCK()-free.
595 	 */
596 	if (d->bd_async && d->bd_sig) {
597 		bpf_get(d);
598 		if (!task_add(systq, &d->bd_wake_task))
599 			bpf_put(d);
600 	}
601 }
602 
603 void
604 bpf_wakeup_cb(void *xd)
605 {
606 	struct bpf_d *d = xd;
607 
608 	if (d->bd_async && d->bd_sig)
609 		pgsigio(&d->bd_sigio, d->bd_sig, 0);
610 
611 	bpf_put(d);
612 }
613 
614 int
615 bpfwrite(dev_t dev, struct uio *uio, int ioflag)
616 {
617 	struct bpf_d *d;
618 	struct ifnet *ifp;
619 	struct mbuf *m;
620 	int error;
621 	struct sockaddr_storage dst;
622 
623 	KERNEL_ASSERT_LOCKED();
624 
625 	d = bpfilter_lookup(minor(dev));
626 	if (d->bd_bif == NULL)
627 		return (ENXIO);
628 
629 	bpf_get(d);
630 	ifp = d->bd_bif->bif_ifp;
631 
632 	if (ifp == NULL || (ifp->if_flags & IFF_UP) == 0) {
633 		error = ENETDOWN;
634 		goto out;
635 	}
636 
637 	if (uio->uio_resid == 0) {
638 		error = 0;
639 		goto out;
640 	}
641 
642 	error = bpf_movein(uio, d, &m, sstosa(&dst));
643 	if (error)
644 		goto out;
645 
646 	if (m->m_pkthdr.len > ifp->if_mtu) {
647 		m_freem(m);
648 		error = EMSGSIZE;
649 		goto out;
650 	}
651 
652 	m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
653 	m->m_pkthdr.pf.prio = ifp->if_llprio;
654 
655 	if (d->bd_hdrcmplt && dst.ss_family == AF_UNSPEC)
656 		dst.ss_family = pseudo_AF_HDRCMPLT;
657 
658 	NET_LOCK();
659 	error = ifp->if_output(ifp, m, sstosa(&dst), NULL);
660 	NET_UNLOCK();
661 
662 out:
663 	bpf_put(d);
664 	return (error);
665 }
666 
667 /*
668  * Reset a descriptor by flushing its packet buffer and clearing the
669  * receive and drop counts.
670  */
671 void
672 bpf_resetd(struct bpf_d *d)
673 {
674 	MUTEX_ASSERT_LOCKED(&d->bd_mtx);
675 	KASSERT(d->bd_in_uiomove == 0);
676 
677 	if (d->bd_hbuf != NULL) {
678 		/* Free the hold buffer. */
679 		d->bd_fbuf = d->bd_hbuf;
680 		d->bd_hbuf = NULL;
681 	}
682 	d->bd_slen = 0;
683 	d->bd_hlen = 0;
684 	d->bd_rcount = 0;
685 	d->bd_dcount = 0;
686 }
687 
688 /*
689  *  FIONREAD		Check for read packet available.
690  *  BIOCGBLEN		Get buffer len [for read()].
691  *  BIOCSETF		Set ethernet read filter.
692  *  BIOCFLUSH		Flush read packet buffer.
693  *  BIOCPROMISC		Put interface into promiscuous mode.
694  *  BIOCGDLTLIST	Get supported link layer types.
695  *  BIOCGDLT		Get link layer type.
696  *  BIOCSDLT		Set link layer type.
697  *  BIOCGETIF		Get interface name.
698  *  BIOCSETIF		Set interface.
699  *  BIOCSRTIMEOUT	Set read timeout.
700  *  BIOCGRTIMEOUT	Get read timeout.
701  *  BIOCGSTATS		Get packet stats.
702  *  BIOCIMMEDIATE	Set immediate mode.
703  *  BIOCVERSION		Get filter language version.
704  *  BIOCGHDRCMPLT	Get "header already complete" flag
705  *  BIOCSHDRCMPLT	Set "header already complete" flag
706  */
707 int
708 bpfioctl(dev_t dev, u_long cmd, caddr_t addr, int flag, struct proc *p)
709 {
710 	struct bpf_d *d;
711 	int error = 0;
712 
713 	d = bpfilter_lookup(minor(dev));
714 	if (d->bd_locked && suser(p) != 0) {
715 		/* list of allowed ioctls when locked and not root */
716 		switch (cmd) {
717 		case BIOCGBLEN:
718 		case BIOCFLUSH:
719 		case BIOCGDLT:
720 		case BIOCGDLTLIST:
721 		case BIOCGETIF:
722 		case BIOCGRTIMEOUT:
723 		case BIOCGSTATS:
724 		case BIOCVERSION:
725 		case BIOCGRSIG:
726 		case BIOCGHDRCMPLT:
727 		case FIONREAD:
728 		case BIOCLOCK:
729 		case BIOCSRTIMEOUT:
730 		case BIOCIMMEDIATE:
731 		case TIOCGPGRP:
732 		case BIOCGDIRFILT:
733 			break;
734 		default:
735 			return (EPERM);
736 		}
737 	}
738 
739 	bpf_get(d);
740 
741 	switch (cmd) {
742 	default:
743 		error = EINVAL;
744 		break;
745 
746 	/*
747 	 * Check for read packet available.
748 	 */
749 	case FIONREAD:
750 		{
751 			int n;
752 
753 			mtx_enter(&d->bd_mtx);
754 			n = d->bd_slen;
755 			if (d->bd_hbuf != NULL)
756 				n += d->bd_hlen;
757 			mtx_leave(&d->bd_mtx);
758 
759 			*(int *)addr = n;
760 			break;
761 		}
762 
763 	/*
764 	 * Get buffer len [for read()].
765 	 */
766 	case BIOCGBLEN:
767 		*(u_int *)addr = d->bd_bufsize;
768 		break;
769 
770 	/*
771 	 * Set buffer length.
772 	 */
773 	case BIOCSBLEN:
774 		if (d->bd_bif != NULL)
775 			error = EINVAL;
776 		else {
777 			u_int size = *(u_int *)addr;
778 
779 			if (size > bpf_maxbufsize)
780 				*(u_int *)addr = size = bpf_maxbufsize;
781 			else if (size < BPF_MINBUFSIZE)
782 				*(u_int *)addr = size = BPF_MINBUFSIZE;
783 			mtx_enter(&d->bd_mtx);
784 			d->bd_bufsize = size;
785 			mtx_leave(&d->bd_mtx);
786 		}
787 		break;
788 
789 	/*
790 	 * Set link layer read filter.
791 	 */
792 	case BIOCSETF:
793 		error = bpf_setf(d, (struct bpf_program *)addr, 0);
794 		break;
795 
796 	/*
797 	 * Set link layer write filter.
798 	 */
799 	case BIOCSETWF:
800 		error = bpf_setf(d, (struct bpf_program *)addr, 1);
801 		break;
802 
803 	/*
804 	 * Flush read packet buffer.
805 	 */
806 	case BIOCFLUSH:
807 		mtx_enter(&d->bd_mtx);
808 		bpf_resetd(d);
809 		mtx_leave(&d->bd_mtx);
810 		break;
811 
812 	/*
813 	 * Put interface into promiscuous mode.
814 	 */
815 	case BIOCPROMISC:
816 		if (d->bd_bif == NULL) {
817 			/*
818 			 * No interface attached yet.
819 			 */
820 			error = EINVAL;
821 		} else if (d->bd_bif->bif_ifp != NULL) {
822 			if (d->bd_promisc == 0) {
823 				MUTEX_ASSERT_UNLOCKED(&d->bd_mtx);
824 				NET_LOCK();
825 				error = ifpromisc(d->bd_bif->bif_ifp, 1);
826 				NET_UNLOCK();
827 				if (error == 0)
828 					d->bd_promisc = 1;
829 			}
830 		}
831 		break;
832 
833 	/*
834 	 * Get a list of supported device parameters.
835 	 */
836 	case BIOCGDLTLIST:
837 		if (d->bd_bif == NULL)
838 			error = EINVAL;
839 		else
840 			error = bpf_getdltlist(d, (struct bpf_dltlist *)addr);
841 		break;
842 
843 	/*
844 	 * Get device parameters.
845 	 */
846 	case BIOCGDLT:
847 		if (d->bd_bif == NULL)
848 			error = EINVAL;
849 		else
850 			*(u_int *)addr = d->bd_bif->bif_dlt;
851 		break;
852 
853 	/*
854 	 * Set device parameters.
855 	 */
856 	case BIOCSDLT:
857 		if (d->bd_bif == NULL)
858 			error = EINVAL;
859 		else {
860 			mtx_enter(&d->bd_mtx);
861 			error = bpf_setdlt(d, *(u_int *)addr);
862 			mtx_leave(&d->bd_mtx);
863 		}
864 		break;
865 
866 	/*
867 	 * Set interface name.
868 	 */
869 	case BIOCGETIF:
870 		if (d->bd_bif == NULL)
871 			error = EINVAL;
872 		else
873 			bpf_ifname(d->bd_bif, (struct ifreq *)addr);
874 		break;
875 
876 	/*
877 	 * Set interface.
878 	 */
879 	case BIOCSETIF:
880 		error = bpf_setif(d, (struct ifreq *)addr);
881 		break;
882 
883 	/*
884 	 * Set read timeout.
885 	 */
886 	case BIOCSRTIMEOUT:
887 		{
888 			struct timeval *tv = (struct timeval *)addr;
889 			uint64_t rtout;
890 
891 			if (tv->tv_sec < 0 || !timerisvalid(tv)) {
892 				error = EINVAL;
893 				break;
894 			}
895 			rtout = TIMEVAL_TO_NSEC(tv);
896 			if (rtout > MAXTSLP) {
897 				error = EOVERFLOW;
898 				break;
899 			}
900 			mtx_enter(&d->bd_mtx);
901 			d->bd_rtout = rtout;
902 			mtx_leave(&d->bd_mtx);
903 			break;
904 		}
905 
906 	/*
907 	 * Get read timeout.
908 	 */
909 	case BIOCGRTIMEOUT:
910 		{
911 			struct timeval *tv = (struct timeval *)addr;
912 
913 			memset(tv, 0, sizeof(*tv));
914 			mtx_enter(&d->bd_mtx);
915 			NSEC_TO_TIMEVAL(d->bd_rtout, tv);
916 			mtx_leave(&d->bd_mtx);
917 			break;
918 		}
919 
920 	/*
921 	 * Get packet stats.
922 	 */
923 	case BIOCGSTATS:
924 		{
925 			struct bpf_stat *bs = (struct bpf_stat *)addr;
926 
927 			bs->bs_recv = d->bd_rcount;
928 			bs->bs_drop = d->bd_dcount;
929 			break;
930 		}
931 
932 	/*
933 	 * Set immediate mode.
934 	 */
935 	case BIOCIMMEDIATE:
936 		d->bd_immediate = *(u_int *)addr;
937 		break;
938 
939 	case BIOCVERSION:
940 		{
941 			struct bpf_version *bv = (struct bpf_version *)addr;
942 
943 			bv->bv_major = BPF_MAJOR_VERSION;
944 			bv->bv_minor = BPF_MINOR_VERSION;
945 			break;
946 		}
947 
948 	case BIOCGHDRCMPLT:	/* get "header already complete" flag */
949 		*(u_int *)addr = d->bd_hdrcmplt;
950 		break;
951 
952 	case BIOCSHDRCMPLT:	/* set "header already complete" flag */
953 		d->bd_hdrcmplt = *(u_int *)addr ? 1 : 0;
954 		break;
955 
956 	case BIOCLOCK:		/* set "locked" flag (no reset) */
957 		d->bd_locked = 1;
958 		break;
959 
960 	case BIOCGFILDROP:	/* get "filter-drop" flag */
961 		*(u_int *)addr = d->bd_fildrop;
962 		break;
963 
964 	case BIOCSFILDROP: {	/* set "filter-drop" flag */
965 		unsigned int fildrop = *(u_int *)addr;
966 		switch (fildrop) {
967 		case BPF_FILDROP_PASS:
968 		case BPF_FILDROP_CAPTURE:
969 		case BPF_FILDROP_DROP:
970 			d->bd_fildrop = fildrop;
971 			break;
972 		default:
973 			error = EINVAL;
974 			break;
975 		}
976 		break;
977 	}
978 
979 	case BIOCGDIRFILT:	/* get direction filter */
980 		*(u_int *)addr = d->bd_dirfilt;
981 		break;
982 
983 	case BIOCSDIRFILT:	/* set direction filter */
984 		d->bd_dirfilt = (*(u_int *)addr) &
985 		    (BPF_DIRECTION_IN|BPF_DIRECTION_OUT);
986 		break;
987 
988 	case FIONBIO:		/* Non-blocking I/O */
989 		/* let vfs to keep track of this */
990 		break;
991 
992 	case FIOASYNC:		/* Send signal on receive packets */
993 		d->bd_async = *(int *)addr;
994 		break;
995 
996 	case FIOSETOWN:		/* Process or group to send signals to */
997 	case TIOCSPGRP:
998 		error = sigio_setown(&d->bd_sigio, cmd, addr);
999 		break;
1000 
1001 	case FIOGETOWN:
1002 	case TIOCGPGRP:
1003 		sigio_getown(&d->bd_sigio, cmd, addr);
1004 		break;
1005 
1006 	case BIOCSRSIG:		/* Set receive signal */
1007 		{
1008 			u_int sig;
1009 
1010 			sig = *(u_int *)addr;
1011 
1012 			if (sig >= NSIG)
1013 				error = EINVAL;
1014 			else
1015 				d->bd_sig = sig;
1016 			break;
1017 		}
1018 	case BIOCGRSIG:
1019 		*(u_int *)addr = d->bd_sig;
1020 		break;
1021 	}
1022 
1023 	bpf_put(d);
1024 	return (error);
1025 }
1026 
1027 /*
1028  * Set d's packet filter program to fp.  If this file already has a filter,
1029  * free it and replace it.  Returns EINVAL for bogus requests.
1030  */
1031 int
1032 bpf_setf(struct bpf_d *d, struct bpf_program *fp, int wf)
1033 {
1034 	struct bpf_program_smr *bps, *old_bps;
1035 	struct bpf_insn *fcode;
1036 	u_int flen, size;
1037 
1038 	KERNEL_ASSERT_LOCKED();
1039 
1040 	if (fp->bf_insns == 0) {
1041 		if (fp->bf_len != 0)
1042 			return (EINVAL);
1043 		bps = NULL;
1044 	} else {
1045 		flen = fp->bf_len;
1046 		if (flen > BPF_MAXINSNS)
1047 			return (EINVAL);
1048 
1049 		fcode = mallocarray(flen, sizeof(*fp->bf_insns), M_DEVBUF,
1050 		    M_WAITOK | M_CANFAIL);
1051 		if (fcode == NULL)
1052 			return (ENOMEM);
1053 
1054 		size = flen * sizeof(*fp->bf_insns);
1055 		if (copyin(fp->bf_insns, fcode, size) != 0 ||
1056 		    bpf_validate(fcode, (int)flen) == 0) {
1057 			free(fcode, M_DEVBUF, size);
1058 			return (EINVAL);
1059 		}
1060 
1061 		bps = malloc(sizeof(*bps), M_DEVBUF, M_WAITOK);
1062 		smr_init(&bps->bps_smr);
1063 		bps->bps_bf.bf_len = flen;
1064 		bps->bps_bf.bf_insns = fcode;
1065 	}
1066 
1067 	if (wf == 0) {
1068 		old_bps = SMR_PTR_GET_LOCKED(&d->bd_rfilter);
1069 		SMR_PTR_SET_LOCKED(&d->bd_rfilter, bps);
1070 	} else {
1071 		old_bps = SMR_PTR_GET_LOCKED(&d->bd_wfilter);
1072 		SMR_PTR_SET_LOCKED(&d->bd_wfilter, bps);
1073 	}
1074 
1075 	mtx_enter(&d->bd_mtx);
1076 	bpf_resetd(d);
1077 	mtx_leave(&d->bd_mtx);
1078 	if (old_bps != NULL)
1079 		smr_call(&old_bps->bps_smr, bpf_prog_smr, old_bps);
1080 
1081 	return (0);
1082 }
1083 
1084 /*
1085  * Detach a file from its current interface (if attached at all) and attach
1086  * to the interface indicated by the name stored in ifr.
1087  * Return an errno or 0.
1088  */
1089 int
1090 bpf_setif(struct bpf_d *d, struct ifreq *ifr)
1091 {
1092 	struct bpf_if *bp, *candidate = NULL;
1093 	int error = 0;
1094 
1095 	/*
1096 	 * Look through attached interfaces for the named one.
1097 	 */
1098 	for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) {
1099 		if (strcmp(bp->bif_name, ifr->ifr_name) != 0)
1100 			continue;
1101 
1102 		if (candidate == NULL || candidate->bif_dlt > bp->bif_dlt)
1103 			candidate = bp;
1104 	}
1105 
1106 	/* Not found. */
1107 	if (candidate == NULL)
1108 		return (ENXIO);
1109 
1110 	/*
1111 	 * Allocate the packet buffers if we need to.
1112 	 * If we're already attached to requested interface,
1113 	 * just flush the buffer.
1114 	 */
1115 	mtx_enter(&d->bd_mtx);
1116 	if (d->bd_sbuf == NULL) {
1117 		if ((error = bpf_allocbufs(d)))
1118 			goto out;
1119 	}
1120 	if (candidate != d->bd_bif) {
1121 		/*
1122 		 * Detach if attached to something else.
1123 		 */
1124 		bpf_detachd(d);
1125 		bpf_attachd(d, candidate);
1126 	}
1127 	bpf_resetd(d);
1128 out:
1129 	mtx_leave(&d->bd_mtx);
1130 	return (error);
1131 }
1132 
1133 /*
1134  * Copy the interface name to the ifreq.
1135  */
1136 void
1137 bpf_ifname(struct bpf_if *bif, struct ifreq *ifr)
1138 {
1139 	bcopy(bif->bif_name, ifr->ifr_name, sizeof(ifr->ifr_name));
1140 }
1141 
1142 const struct filterops bpfread_filtops = {
1143 	.f_flags	= FILTEROP_ISFD | FILTEROP_MPSAFE,
1144 	.f_attach	= NULL,
1145 	.f_detach	= filt_bpfrdetach,
1146 	.f_event	= filt_bpfread,
1147 	.f_modify	= filt_bpfreadmodify,
1148 	.f_process	= filt_bpfreadprocess,
1149 };
1150 
1151 int
1152 bpfkqfilter(dev_t dev, struct knote *kn)
1153 {
1154 	struct bpf_d *d;
1155 	struct klist *klist;
1156 
1157 	KERNEL_ASSERT_LOCKED();
1158 
1159 	d = bpfilter_lookup(minor(dev));
1160 	if (d == NULL)
1161 		return (ENXIO);
1162 
1163 	switch (kn->kn_filter) {
1164 	case EVFILT_READ:
1165 		klist = &d->bd_klist;
1166 		kn->kn_fop = &bpfread_filtops;
1167 		break;
1168 	default:
1169 		return (EINVAL);
1170 	}
1171 
1172 	bpf_get(d);
1173 	kn->kn_hook = d;
1174 	klist_insert(klist, kn);
1175 
1176 	return (0);
1177 }
1178 
1179 void
1180 filt_bpfrdetach(struct knote *kn)
1181 {
1182 	struct bpf_d *d = kn->kn_hook;
1183 
1184 	klist_remove(&d->bd_klist, kn);
1185 	bpf_put(d);
1186 }
1187 
1188 int
1189 filt_bpfread(struct knote *kn, long hint)
1190 {
1191 	struct bpf_d *d = kn->kn_hook;
1192 
1193 	MUTEX_ASSERT_LOCKED(&d->bd_mtx);
1194 
1195 	kn->kn_data = d->bd_hlen;
1196 	if (d->bd_immediate)
1197 		kn->kn_data += d->bd_slen;
1198 
1199 	return (kn->kn_data > 0);
1200 }
1201 
1202 int
1203 filt_bpfreadmodify(struct kevent *kev, struct knote *kn)
1204 {
1205 	struct bpf_d *d = kn->kn_hook;
1206 	int active;
1207 
1208 	mtx_enter(&d->bd_mtx);
1209 	active = knote_modify_fn(kev, kn, filt_bpfread);
1210 	mtx_leave(&d->bd_mtx);
1211 
1212 	return (active);
1213 }
1214 
1215 int
1216 filt_bpfreadprocess(struct knote *kn, struct kevent *kev)
1217 {
1218 	struct bpf_d *d = kn->kn_hook;
1219 	int active;
1220 
1221 	mtx_enter(&d->bd_mtx);
1222 	active = knote_process_fn(kn, kev, filt_bpfread);
1223 	mtx_leave(&d->bd_mtx);
1224 
1225 	return (active);
1226 }
1227 
1228 /*
1229  * Copy data from an mbuf chain into a buffer.  This code is derived
1230  * from m_copydata in sys/uipc_mbuf.c.
1231  */
1232 void
1233 bpf_mcopy(const void *src_arg, void *dst_arg, size_t len)
1234 {
1235 	const struct mbuf *m;
1236 	u_int count;
1237 	u_char *dst;
1238 
1239 	m = src_arg;
1240 	dst = dst_arg;
1241 	while (len > 0) {
1242 		if (m == NULL)
1243 			panic("bpf_mcopy");
1244 		count = min(m->m_len, len);
1245 		bcopy(mtod(m, caddr_t), (caddr_t)dst, count);
1246 		m = m->m_next;
1247 		dst += count;
1248 		len -= count;
1249 	}
1250 }
1251 
1252 int
1253 bpf_mtap(caddr_t arg, const struct mbuf *m, u_int direction)
1254 {
1255 	return _bpf_mtap(arg, m, m, direction);
1256 }
1257 
1258 int
1259 _bpf_mtap(caddr_t arg, const struct mbuf *mp, const struct mbuf *m,
1260     u_int direction)
1261 {
1262 	struct bpf_if *bp = (struct bpf_if *)arg;
1263 	struct bpf_d *d;
1264 	size_t pktlen, slen;
1265 	const struct mbuf *m0;
1266 	struct bpf_hdr tbh;
1267 	int gothdr = 0;
1268 	int drop = 0;
1269 
1270 	if (m == NULL)
1271 		return (0);
1272 
1273 	if (bp == NULL)
1274 		return (0);
1275 
1276 	pktlen = 0;
1277 	for (m0 = m; m0 != NULL; m0 = m0->m_next)
1278 		pktlen += m0->m_len;
1279 
1280 	smr_read_enter();
1281 	SMR_SLIST_FOREACH(d, &bp->bif_dlist, bd_next) {
1282 		struct bpf_program_smr *bps;
1283 		struct bpf_insn *fcode = NULL;
1284 
1285 		atomic_inc_long(&d->bd_rcount);
1286 
1287 		if (ISSET(d->bd_dirfilt, direction))
1288 			continue;
1289 
1290 		bps = SMR_PTR_GET(&d->bd_rfilter);
1291 		if (bps != NULL)
1292 			fcode = bps->bps_bf.bf_insns;
1293 		slen = bpf_mfilter(fcode, m, pktlen);
1294 
1295 		if (slen == 0)
1296 			continue;
1297 		if (d->bd_fildrop != BPF_FILDROP_PASS)
1298 			drop = 1;
1299 		if (d->bd_fildrop != BPF_FILDROP_DROP) {
1300 			if (!gothdr) {
1301 				struct timeval tv;
1302 				memset(&tbh, 0, sizeof(tbh));
1303 
1304 				if (ISSET(mp->m_flags, M_PKTHDR)) {
1305 					tbh.bh_ifidx = mp->m_pkthdr.ph_ifidx;
1306 					tbh.bh_flowid = mp->m_pkthdr.ph_flowid;
1307 					tbh.bh_flags = mp->m_pkthdr.pf.prio;
1308 					if (ISSET(mp->m_pkthdr.csum_flags,
1309 					    M_FLOWID))
1310 						SET(tbh.bh_flags, BPF_F_FLOWID);
1311 
1312 					m_microtime(mp, &tv);
1313 				} else
1314 					microtime(&tv);
1315 
1316 				tbh.bh_tstamp.tv_sec = tv.tv_sec;
1317 				tbh.bh_tstamp.tv_usec = tv.tv_usec;
1318 				SET(tbh.bh_flags, direction << BPF_F_DIR_SHIFT);
1319 
1320 				gothdr = 1;
1321 			}
1322 
1323 			mtx_enter(&d->bd_mtx);
1324 			bpf_catchpacket(d, (u_char *)m, pktlen, slen, &tbh);
1325 			mtx_leave(&d->bd_mtx);
1326 		}
1327 	}
1328 	smr_read_leave();
1329 
1330 	return (drop);
1331 }
1332 
1333 /*
1334  * Incoming linkage from device drivers, where a data buffer should be
1335  * prepended by an arbitrary header. In this situation we already have a
1336  * way of representing a chain of memory buffers, ie, mbufs, so reuse
1337  * the existing functionality by attaching the buffers to mbufs.
1338  *
1339  * Con up a minimal mbuf chain to pacify bpf by allocating (only) a
1340  * struct m_hdr each for the header and data on the stack.
1341  */
1342 int
1343 bpf_tap_hdr(caddr_t arg, const void *hdr, unsigned int hdrlen,
1344     const void *buf, unsigned int buflen, u_int direction)
1345 {
1346 	struct m_hdr mh, md;
1347 	struct mbuf *m0 = NULL;
1348 	struct mbuf **mp = &m0;
1349 
1350 	if (hdr != NULL) {
1351 		mh.mh_flags = 0;
1352 		mh.mh_next = NULL;
1353 		mh.mh_len = hdrlen;
1354 		mh.mh_data = (void *)hdr;
1355 
1356 		*mp = (struct mbuf *)&mh;
1357 		mp = &mh.mh_next;
1358 	}
1359 
1360 	if (buf != NULL) {
1361 		md.mh_flags = 0;
1362 		md.mh_next = NULL;
1363 		md.mh_len = buflen;
1364 		md.mh_data = (void *)buf;
1365 
1366 		*mp = (struct mbuf *)&md;
1367 	}
1368 
1369 	return bpf_mtap(arg, m0, direction);
1370 }
1371 
1372 /*
1373  * Incoming linkage from device drivers, where we have a mbuf chain
1374  * but need to prepend some arbitrary header from a linear buffer.
1375  *
1376  * Con up a minimal dummy header to pacify bpf.  Allocate (only) a
1377  * struct m_hdr on the stack.  This is safe as bpf only reads from the
1378  * fields in this header that we initialize, and will not try to free
1379  * it or keep a pointer to it.
1380  */
1381 int
1382 bpf_mtap_hdr(caddr_t arg, const void *data, u_int dlen, const struct mbuf *m,
1383     u_int direction)
1384 {
1385 	struct m_hdr mh;
1386 	const struct mbuf *m0;
1387 
1388 	if (dlen > 0) {
1389 		mh.mh_flags = 0;
1390 		mh.mh_next = (struct mbuf *)m;
1391 		mh.mh_len = dlen;
1392 		mh.mh_data = (void *)data;
1393 		m0 = (struct mbuf *)&mh;
1394 	} else
1395 		m0 = m;
1396 
1397 	return _bpf_mtap(arg, m, m0, direction);
1398 }
1399 
1400 /*
1401  * Incoming linkage from device drivers, where we have a mbuf chain
1402  * but need to prepend the address family.
1403  *
1404  * Con up a minimal dummy header to pacify bpf.  We allocate (only) a
1405  * struct m_hdr on the stack.  This is safe as bpf only reads from the
1406  * fields in this header that we initialize, and will not try to free
1407  * it or keep a pointer to it.
1408  */
1409 int
1410 bpf_mtap_af(caddr_t arg, u_int32_t af, const struct mbuf *m, u_int direction)
1411 {
1412 	u_int32_t    afh;
1413 
1414 	afh = htonl(af);
1415 
1416 	return bpf_mtap_hdr(arg, &afh, sizeof(afh), m, direction);
1417 }
1418 
1419 /*
1420  * Incoming linkage from device drivers, where we have a mbuf chain
1421  * but need to prepend a VLAN encapsulation header.
1422  *
1423  * Con up a minimal dummy header to pacify bpf.  Allocate (only) a
1424  * struct m_hdr on the stack.  This is safe as bpf only reads from the
1425  * fields in this header that we initialize, and will not try to free
1426  * it or keep a pointer to it.
1427  */
1428 int
1429 bpf_mtap_ether(caddr_t arg, const struct mbuf *m, u_int direction)
1430 {
1431 #if NVLAN > 0
1432 	struct ether_vlan_header evh;
1433 	struct m_hdr mh, md;
1434 
1435 	if ((m->m_flags & M_VLANTAG) == 0)
1436 #endif
1437 	{
1438 		return _bpf_mtap(arg, m, m, direction);
1439 	}
1440 
1441 #if NVLAN > 0
1442 	KASSERT(m->m_len >= ETHER_HDR_LEN);
1443 
1444 	memcpy(&evh, mtod(m, char *), ETHER_HDR_LEN);
1445 	evh.evl_proto = evh.evl_encap_proto;
1446 	evh.evl_encap_proto = htons(ETHERTYPE_VLAN);
1447 	evh.evl_tag = htons(m->m_pkthdr.ether_vtag);
1448 
1449 	mh.mh_flags = 0;
1450 	mh.mh_data = (caddr_t)&evh;
1451 	mh.mh_len = sizeof(evh);
1452 	mh.mh_next = (struct mbuf *)&md;
1453 
1454 	md.mh_flags = 0;
1455 	md.mh_data = m->m_data + ETHER_HDR_LEN;
1456 	md.mh_len = m->m_len - ETHER_HDR_LEN;
1457 	md.mh_next = m->m_next;
1458 
1459 	return _bpf_mtap(arg, m, (struct mbuf *)&mh, direction);
1460 #endif
1461 }
1462 
1463 /*
1464  * Move the packet data from interface memory (pkt) into the
1465  * store buffer.  Wake up listeners if needed.
1466  * "copy" is the routine called to do the actual data
1467  * transfer.  bcopy is passed in to copy contiguous chunks, while
1468  * bpf_mcopy is passed in to copy mbuf chains.  In the latter case,
1469  * pkt is really an mbuf.
1470  */
1471 void
1472 bpf_catchpacket(struct bpf_d *d, u_char *pkt, size_t pktlen, size_t snaplen,
1473     const struct bpf_hdr *tbh)
1474 {
1475 	struct bpf_hdr *bh;
1476 	int totlen, curlen;
1477 	int hdrlen, do_wakeup = 0;
1478 
1479 	MUTEX_ASSERT_LOCKED(&d->bd_mtx);
1480 	if (d->bd_bif == NULL)
1481 		return;
1482 
1483 	hdrlen = d->bd_bif->bif_hdrlen;
1484 
1485 	/*
1486 	 * Figure out how many bytes to move.  If the packet is
1487 	 * greater or equal to the snapshot length, transfer that
1488 	 * much.  Otherwise, transfer the whole packet (unless
1489 	 * we hit the buffer size limit).
1490 	 */
1491 	totlen = hdrlen + min(snaplen, pktlen);
1492 	if (totlen > d->bd_bufsize)
1493 		totlen = d->bd_bufsize;
1494 
1495 	/*
1496 	 * Round up the end of the previous packet to the next longword.
1497 	 */
1498 	curlen = BPF_WORDALIGN(d->bd_slen);
1499 	if (curlen + totlen > d->bd_bufsize) {
1500 		/*
1501 		 * This packet will overflow the storage buffer.
1502 		 * Rotate the buffers if we can, then wakeup any
1503 		 * pending reads.
1504 		 */
1505 		if (d->bd_fbuf == NULL) {
1506 			/*
1507 			 * We haven't completed the previous read yet,
1508 			 * so drop the packet.
1509 			 */
1510 			++d->bd_dcount;
1511 			return;
1512 		}
1513 		ROTATE_BUFFERS(d);
1514 		do_wakeup = 1;
1515 		curlen = 0;
1516 	}
1517 
1518 	/*
1519 	 * Append the bpf header.
1520 	 */
1521 	bh = (struct bpf_hdr *)(d->bd_sbuf + curlen);
1522 	*bh = *tbh;
1523 	bh->bh_datalen = pktlen;
1524 	bh->bh_hdrlen = hdrlen;
1525 	bh->bh_caplen = totlen - hdrlen;
1526 
1527 	/*
1528 	 * Copy the packet data into the store buffer and update its length.
1529 	 */
1530 	bpf_mcopy(pkt, (u_char *)bh + hdrlen, bh->bh_caplen);
1531 	d->bd_slen = curlen + totlen;
1532 
1533 	if (d->bd_immediate) {
1534 		/*
1535 		 * Immediate mode is set.  A packet arrived so any
1536 		 * reads should be woken up.
1537 		 */
1538 		do_wakeup = 1;
1539 	}
1540 
1541 	if (do_wakeup)
1542 		bpf_wakeup(d);
1543 }
1544 
1545 /*
1546  * Initialize all nonzero fields of a descriptor.
1547  */
1548 int
1549 bpf_allocbufs(struct bpf_d *d)
1550 {
1551 	MUTEX_ASSERT_LOCKED(&d->bd_mtx);
1552 
1553 	d->bd_fbuf = malloc(d->bd_bufsize, M_DEVBUF, M_NOWAIT);
1554 	if (d->bd_fbuf == NULL)
1555 		return (ENOMEM);
1556 
1557 	d->bd_sbuf = malloc(d->bd_bufsize, M_DEVBUF, M_NOWAIT);
1558 	if (d->bd_sbuf == NULL) {
1559 		free(d->bd_fbuf, M_DEVBUF, d->bd_bufsize);
1560 		d->bd_fbuf = NULL;
1561 		return (ENOMEM);
1562 	}
1563 
1564 	d->bd_slen = 0;
1565 	d->bd_hlen = 0;
1566 
1567 	return (0);
1568 }
1569 
1570 void
1571 bpf_prog_smr(void *bps_arg)
1572 {
1573 	struct bpf_program_smr *bps = bps_arg;
1574 
1575 	free(bps->bps_bf.bf_insns, M_DEVBUF,
1576 	    bps->bps_bf.bf_len * sizeof(struct bpf_insn));
1577 	free(bps, M_DEVBUF, sizeof(struct bpf_program_smr));
1578 }
1579 
1580 void
1581 bpf_d_smr(void *smr)
1582 {
1583 	struct bpf_d	*bd = smr;
1584 
1585 	sigio_free(&bd->bd_sigio);
1586 	free(bd->bd_sbuf, M_DEVBUF, bd->bd_bufsize);
1587 	free(bd->bd_hbuf, M_DEVBUF, bd->bd_bufsize);
1588 	free(bd->bd_fbuf, M_DEVBUF, bd->bd_bufsize);
1589 
1590 	if (bd->bd_rfilter != NULL)
1591 		bpf_prog_smr(bd->bd_rfilter);
1592 	if (bd->bd_wfilter != NULL)
1593 		bpf_prog_smr(bd->bd_wfilter);
1594 
1595 	klist_free(&bd->bd_klist);
1596 	free(bd, M_DEVBUF, sizeof(*bd));
1597 }
1598 
1599 void
1600 bpf_get(struct bpf_d *bd)
1601 {
1602 	refcnt_take(&bd->bd_refcnt);
1603 }
1604 
1605 /*
1606  * Free buffers currently in use by a descriptor
1607  * when the reference count drops to zero.
1608  */
1609 void
1610 bpf_put(struct bpf_d *bd)
1611 {
1612 	if (refcnt_rele(&bd->bd_refcnt) == 0)
1613 		return;
1614 
1615 	smr_call(&bd->bd_smr, bpf_d_smr, bd);
1616 }
1617 
1618 void *
1619 bpfsattach(caddr_t *bpfp, const char *name, u_int dlt, u_int hdrlen)
1620 {
1621 	struct bpf_if *bp;
1622 
1623 	if ((bp = malloc(sizeof(*bp), M_DEVBUF, M_NOWAIT)) == NULL)
1624 		panic("bpfattach");
1625 	SMR_SLIST_INIT(&bp->bif_dlist);
1626 	bp->bif_driverp = (struct bpf_if **)bpfp;
1627 	bp->bif_name = name;
1628 	bp->bif_ifp = NULL;
1629 	bp->bif_dlt = dlt;
1630 
1631 	bp->bif_next = bpf_iflist;
1632 	bpf_iflist = bp;
1633 
1634 	*bp->bif_driverp = NULL;
1635 
1636 	/*
1637 	 * Compute the length of the bpf header.  This is not necessarily
1638 	 * equal to SIZEOF_BPF_HDR because we want to insert spacing such
1639 	 * that the network layer header begins on a longword boundary (for
1640 	 * performance reasons and to alleviate alignment restrictions).
1641 	 */
1642 	bp->bif_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen;
1643 
1644 	return (bp);
1645 }
1646 
1647 void
1648 bpfattach(caddr_t *driverp, struct ifnet *ifp, u_int dlt, u_int hdrlen)
1649 {
1650 	struct bpf_if *bp;
1651 
1652 	bp = bpfsattach(driverp, ifp->if_xname, dlt, hdrlen);
1653 	bp->bif_ifp = ifp;
1654 }
1655 
1656 /* Detach an interface from its attached bpf device.  */
1657 void
1658 bpfdetach(struct ifnet *ifp)
1659 {
1660 	struct bpf_if *bp, *nbp;
1661 
1662 	KERNEL_ASSERT_LOCKED();
1663 
1664 	for (bp = bpf_iflist; bp; bp = nbp) {
1665 		nbp = bp->bif_next;
1666 		if (bp->bif_ifp == ifp)
1667 			bpfsdetach(bp);
1668 	}
1669 	ifp->if_bpf = NULL;
1670 }
1671 
1672 void
1673 bpfsdetach(void *p)
1674 {
1675 	struct bpf_if *bp = p, *tbp;
1676 	struct bpf_d *bd;
1677 	int maj;
1678 
1679 	KERNEL_ASSERT_LOCKED();
1680 
1681 	/* Locate the major number. */
1682 	for (maj = 0; maj < nchrdev; maj++)
1683 		if (cdevsw[maj].d_open == bpfopen)
1684 			break;
1685 
1686 	while ((bd = SMR_SLIST_FIRST_LOCKED(&bp->bif_dlist))) {
1687 		vdevgone(maj, bd->bd_unit, bd->bd_unit, VCHR);
1688 		klist_invalidate(&bd->bd_klist);
1689 	}
1690 
1691 	for (tbp = bpf_iflist; tbp; tbp = tbp->bif_next) {
1692 		if (tbp->bif_next == bp) {
1693 			tbp->bif_next = bp->bif_next;
1694 			break;
1695 		}
1696 	}
1697 
1698 	if (bpf_iflist == bp)
1699 		bpf_iflist = bp->bif_next;
1700 
1701 	free(bp, M_DEVBUF, sizeof(*bp));
1702 }
1703 
1704 int
1705 bpf_sysctl_locked(int *name, u_int namelen, void *oldp, size_t *oldlenp,
1706     void *newp, size_t newlen)
1707 {
1708 	switch (name[0]) {
1709 	case NET_BPF_BUFSIZE:
1710 		return sysctl_int_bounded(oldp, oldlenp, newp, newlen,
1711 		    &bpf_bufsize, BPF_MINBUFSIZE, bpf_maxbufsize);
1712 	case NET_BPF_MAXBUFSIZE:
1713 		return sysctl_int_bounded(oldp, oldlenp, newp, newlen,
1714 		    &bpf_maxbufsize, BPF_MINBUFSIZE, INT_MAX);
1715 	default:
1716 		return (EOPNOTSUPP);
1717 	}
1718 }
1719 
1720 int
1721 bpf_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
1722     size_t newlen)
1723 {
1724 	int flags = RW_INTR;
1725 	int error;
1726 
1727 	if (namelen != 1)
1728 		return (ENOTDIR);
1729 
1730 	flags |= (newp == NULL) ? RW_READ : RW_WRITE;
1731 
1732 	error = rw_enter(&bpf_sysctl_lk, flags);
1733 	if (error != 0)
1734 		return (error);
1735 
1736 	error = bpf_sysctl_locked(name, namelen, oldp, oldlenp, newp, newlen);
1737 
1738 	rw_exit(&bpf_sysctl_lk);
1739 
1740 	return (error);
1741 }
1742 
1743 struct bpf_d *
1744 bpfilter_lookup(int unit)
1745 {
1746 	struct bpf_d *bd;
1747 
1748 	KERNEL_ASSERT_LOCKED();
1749 
1750 	LIST_FOREACH(bd, &bpf_d_list, bd_list)
1751 		if (bd->bd_unit == unit)
1752 			return (bd);
1753 	return (NULL);
1754 }
1755 
1756 /*
1757  * Get a list of available data link type of the interface.
1758  */
1759 int
1760 bpf_getdltlist(struct bpf_d *d, struct bpf_dltlist *bfl)
1761 {
1762 	int n, error;
1763 	struct bpf_if *bp;
1764 	const char *name;
1765 
1766 	name = d->bd_bif->bif_name;
1767 	n = 0;
1768 	error = 0;
1769 	for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) {
1770 		if (strcmp(name, bp->bif_name) != 0)
1771 			continue;
1772 		if (bfl->bfl_list != NULL) {
1773 			if (n >= bfl->bfl_len)
1774 				return (ENOMEM);
1775 			error = copyout(&bp->bif_dlt,
1776 			    bfl->bfl_list + n, sizeof(u_int));
1777 			if (error)
1778 				break;
1779 		}
1780 		n++;
1781 	}
1782 
1783 	bfl->bfl_len = n;
1784 	return (error);
1785 }
1786 
1787 /*
1788  * Set the data link type of a BPF instance.
1789  */
1790 int
1791 bpf_setdlt(struct bpf_d *d, u_int dlt)
1792 {
1793 	const char *name;
1794 	struct bpf_if *bp;
1795 
1796 	MUTEX_ASSERT_LOCKED(&d->bd_mtx);
1797 	if (d->bd_bif->bif_dlt == dlt)
1798 		return (0);
1799 	name = d->bd_bif->bif_name;
1800 	for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) {
1801 		if (strcmp(name, bp->bif_name) != 0)
1802 			continue;
1803 		if (bp->bif_dlt == dlt)
1804 			break;
1805 	}
1806 	if (bp == NULL)
1807 		return (EINVAL);
1808 	bpf_detachd(d);
1809 	bpf_attachd(d, bp);
1810 	bpf_resetd(d);
1811 	return (0);
1812 }
1813 
1814 u_int32_t	bpf_mbuf_ldw(const void *, u_int32_t, int *);
1815 u_int32_t	bpf_mbuf_ldh(const void *, u_int32_t, int *);
1816 u_int32_t	bpf_mbuf_ldb(const void *, u_int32_t, int *);
1817 
1818 int		bpf_mbuf_copy(const struct mbuf *, u_int32_t,
1819 		    void *, u_int32_t);
1820 
1821 const struct bpf_ops bpf_mbuf_ops = {
1822 	bpf_mbuf_ldw,
1823 	bpf_mbuf_ldh,
1824 	bpf_mbuf_ldb,
1825 };
1826 
1827 int
1828 bpf_mbuf_copy(const struct mbuf *m, u_int32_t off, void *buf, u_int32_t len)
1829 {
1830 	u_int8_t *cp = buf;
1831 	u_int32_t count;
1832 
1833 	while (off >= m->m_len) {
1834 		off -= m->m_len;
1835 
1836 		m = m->m_next;
1837 		if (m == NULL)
1838 			return (-1);
1839 	}
1840 
1841 	for (;;) {
1842 		count = min(m->m_len - off, len);
1843 
1844 		memcpy(cp, m->m_data + off, count);
1845 		len -= count;
1846 
1847 		if (len == 0)
1848 			return (0);
1849 
1850 		m = m->m_next;
1851 		if (m == NULL)
1852 			break;
1853 
1854 		cp += count;
1855 		off = 0;
1856 	}
1857 
1858 	return (-1);
1859 }
1860 
1861 u_int32_t
1862 bpf_mbuf_ldw(const void *m0, u_int32_t k, int *err)
1863 {
1864 	u_int32_t v;
1865 
1866 	if (bpf_mbuf_copy(m0, k, &v, sizeof(v)) != 0) {
1867 		*err = 1;
1868 		return (0);
1869 	}
1870 
1871 	*err = 0;
1872 	return ntohl(v);
1873 }
1874 
1875 u_int32_t
1876 bpf_mbuf_ldh(const void *m0, u_int32_t k, int *err)
1877 {
1878 	u_int16_t v;
1879 
1880 	if (bpf_mbuf_copy(m0, k, &v, sizeof(v)) != 0) {
1881 		*err = 1;
1882 		return (0);
1883 	}
1884 
1885 	*err = 0;
1886 	return ntohs(v);
1887 }
1888 
1889 u_int32_t
1890 bpf_mbuf_ldb(const void *m0, u_int32_t k, int *err)
1891 {
1892 	const struct mbuf *m = m0;
1893 	u_int8_t v;
1894 
1895 	while (k >= m->m_len) {
1896 		k -= m->m_len;
1897 
1898 		m = m->m_next;
1899 		if (m == NULL) {
1900 			*err = 1;
1901 			return (0);
1902 		}
1903 	}
1904 	v = m->m_data[k];
1905 
1906 	*err = 0;
1907 	return v;
1908 }
1909 
1910 u_int
1911 bpf_mfilter(const struct bpf_insn *pc, const struct mbuf *m, u_int wirelen)
1912 {
1913 	return _bpf_filter(pc, &bpf_mbuf_ops, m, wirelen);
1914 }
1915