xref: /openbsd-src/sys/net/bpf.c (revision 897fc685943471cf985a0fe38ba076ea6fe74fa5)
1 /*	$OpenBSD: bpf.c,v 1.169 2018/03/02 16:57:41 bluhm Exp $	*/
2 /*	$NetBSD: bpf.c,v 1.33 1997/02/21 23:59:35 thorpej Exp $	*/
3 
4 /*
5  * Copyright (c) 1990, 1991, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  * Copyright (c) 2010, 2014 Henning Brauer <henning@openbsd.org>
8  *
9  * This code is derived from the Stanford/CMU enet packet filter,
10  * (net/enet.c) distributed as part of 4.3BSD, and code contributed
11  * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
12  * Berkeley Laboratory.
13  *
14  * Redistribution and use in source and binary forms, with or without
15  * modification, are permitted provided that the following conditions
16  * are met:
17  * 1. Redistributions of source code must retain the above copyright
18  *    notice, this list of conditions and the following disclaimer.
19  * 2. Redistributions in binary form must reproduce the above copyright
20  *    notice, this list of conditions and the following disclaimer in the
21  *    documentation and/or other materials provided with the distribution.
22  * 3. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  *
38  *	@(#)bpf.c	8.2 (Berkeley) 3/28/94
39  */
40 
41 #include "bpfilter.h"
42 
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/mbuf.h>
46 #include <sys/proc.h>
47 #include <sys/signalvar.h>
48 #include <sys/ioctl.h>
49 #include <sys/conf.h>
50 #include <sys/vnode.h>
51 #include <sys/fcntl.h>
52 #include <sys/socket.h>
53 #include <sys/poll.h>
54 #include <sys/kernel.h>
55 #include <sys/sysctl.h>
56 #include <sys/rwlock.h>
57 #include <sys/atomic.h>
58 #include <sys/srp.h>
59 #include <sys/specdev.h>
60 #include <sys/selinfo.h>
61 #include <sys/task.h>
62 
63 #include <net/if.h>
64 #include <net/bpf.h>
65 #include <net/bpfdesc.h>
66 
67 #include <netinet/in.h>
68 #include <netinet/if_ether.h>
69 
70 #include "vlan.h"
71 #if NVLAN > 0
72 #include <net/if_vlan_var.h>
73 #endif
74 
75 #define BPF_BUFSIZE 32768
76 
77 #define PRINET  26			/* interruptible */
78 
79 /* from kern/kern_clock.c; incremented each clock tick. */
80 extern int ticks;
81 
82 /*
83  * The default read buffer size is patchable.
84  */
85 int bpf_bufsize = BPF_BUFSIZE;
86 int bpf_maxbufsize = BPF_MAXBUFSIZE;
87 
88 /*
89  *  bpf_iflist is the list of interfaces; each corresponds to an ifnet
90  *  bpf_d_list is the list of descriptors
91  */
92 struct bpf_if	*bpf_iflist;
93 LIST_HEAD(, bpf_d) bpf_d_list;
94 
95 int	bpf_allocbufs(struct bpf_d *);
96 void	bpf_ifname(struct bpf_if*, struct ifreq *);
97 int	_bpf_mtap(caddr_t, const struct mbuf *, u_int,
98 	    void (*)(const void *, void *, size_t));
99 void	bpf_mcopy(const void *, void *, size_t);
100 int	bpf_movein(struct uio *, u_int, struct mbuf **,
101 	    struct sockaddr *, struct bpf_insn *);
102 int	bpf_setif(struct bpf_d *, struct ifreq *);
103 int	bpfpoll(dev_t, int, struct proc *);
104 int	bpfkqfilter(dev_t, struct knote *);
105 void	bpf_wakeup(struct bpf_d *);
106 void	bpf_wakeup_cb(void *);
107 void	bpf_catchpacket(struct bpf_d *, u_char *, size_t, size_t,
108 	    void (*)(const void *, void *, size_t), struct timeval *);
109 int	bpf_getdltlist(struct bpf_d *, struct bpf_dltlist *);
110 int	bpf_setdlt(struct bpf_d *, u_int);
111 
112 void	filt_bpfrdetach(struct knote *);
113 int	filt_bpfread(struct knote *, long);
114 
115 int	bpf_sysctl_locked(int *, u_int, void *, size_t *, void *, size_t);
116 
117 struct bpf_d *bpfilter_lookup(int);
118 
119 /*
120  * Called holding ``bd_mtx''.
121  */
122 void	bpf_attachd(struct bpf_d *, struct bpf_if *);
123 void	bpf_detachd(struct bpf_d *);
124 void	bpf_resetd(struct bpf_d *);
125 
126 /*
127  * Reference count access to descriptor buffers
128  */
129 void	bpf_get(struct bpf_d *);
130 void	bpf_put(struct bpf_d *);
131 
132 /*
133  * garbage collector srps
134  */
135 
136 void bpf_d_ref(void *, void *);
137 void bpf_d_unref(void *, void *);
138 struct srpl_rc bpf_d_rc = SRPL_RC_INITIALIZER(bpf_d_ref, bpf_d_unref, NULL);
139 
140 void bpf_insn_dtor(void *, void *);
141 struct srp_gc bpf_insn_gc = SRP_GC_INITIALIZER(bpf_insn_dtor, NULL);
142 
143 struct rwlock bpf_sysctl_lk = RWLOCK_INITIALIZER("bpfsz");
144 
145 int
146 bpf_movein(struct uio *uio, u_int linktype, struct mbuf **mp,
147     struct sockaddr *sockp, struct bpf_insn *filter)
148 {
149 	struct mbuf *m;
150 	struct m_tag *mtag;
151 	int error;
152 	u_int hlen;
153 	u_int len;
154 	u_int slen;
155 
156 	/*
157 	 * Build a sockaddr based on the data link layer type.
158 	 * We do this at this level because the ethernet header
159 	 * is copied directly into the data field of the sockaddr.
160 	 * In the case of SLIP, there is no header and the packet
161 	 * is forwarded as is.
162 	 * Also, we are careful to leave room at the front of the mbuf
163 	 * for the link level header.
164 	 */
165 	switch (linktype) {
166 
167 	case DLT_SLIP:
168 		sockp->sa_family = AF_INET;
169 		hlen = 0;
170 		break;
171 
172 	case DLT_PPP:
173 		sockp->sa_family = AF_UNSPEC;
174 		hlen = 0;
175 		break;
176 
177 	case DLT_EN10MB:
178 		sockp->sa_family = AF_UNSPEC;
179 		/* XXX Would MAXLINKHDR be better? */
180 		hlen = ETHER_HDR_LEN;
181 		break;
182 
183 	case DLT_IEEE802_11:
184 	case DLT_IEEE802_11_RADIO:
185 		sockp->sa_family = AF_UNSPEC;
186 		hlen = 0;
187 		break;
188 
189 	case DLT_RAW:
190 	case DLT_NULL:
191 		sockp->sa_family = AF_UNSPEC;
192 		hlen = 0;
193 		break;
194 
195 	case DLT_LOOP:
196 		sockp->sa_family = AF_UNSPEC;
197 		hlen = sizeof(u_int32_t);
198 		break;
199 
200 	default:
201 		return (EIO);
202 	}
203 
204 	if (uio->uio_resid > MAXMCLBYTES)
205 		return (EIO);
206 	len = uio->uio_resid;
207 
208 	MGETHDR(m, M_WAIT, MT_DATA);
209 	m->m_pkthdr.ph_ifidx = 0;
210 	m->m_pkthdr.len = len - hlen;
211 
212 	if (len > MHLEN) {
213 		MCLGETI(m, M_WAIT, NULL, len);
214 		if ((m->m_flags & M_EXT) == 0) {
215 			error = ENOBUFS;
216 			goto bad;
217 		}
218 	}
219 	m->m_len = len;
220 	*mp = m;
221 
222 	error = uiomove(mtod(m, caddr_t), len, uio);
223 	if (error)
224 		goto bad;
225 
226 	slen = bpf_filter(filter, mtod(m, u_char *), len, len);
227 	if (slen < len) {
228 		error = EPERM;
229 		goto bad;
230 	}
231 
232 	if (m->m_len < hlen) {
233 		error = EPERM;
234 		goto bad;
235 	}
236 	/*
237 	 * Make room for link header, and copy it to sockaddr
238 	 */
239 	if (hlen != 0) {
240 		if (linktype == DLT_LOOP) {
241 			u_int32_t af;
242 
243 			/* the link header indicates the address family */
244 			KASSERT(hlen == sizeof(u_int32_t));
245 			memcpy(&af, m->m_data, hlen);
246 			sockp->sa_family = ntohl(af);
247 		} else
248 			memcpy(sockp->sa_data, m->m_data, hlen);
249 		m->m_len -= hlen;
250 		m->m_data += hlen; /* XXX */
251 	}
252 
253 	/*
254 	 * Prepend the data link type as a mbuf tag
255 	 */
256 	mtag = m_tag_get(PACKET_TAG_DLT, sizeof(u_int), M_WAIT);
257 	*(u_int *)(mtag + 1) = linktype;
258 	m_tag_prepend(m, mtag);
259 
260 	return (0);
261  bad:
262 	m_freem(m);
263 	return (error);
264 }
265 
266 /*
267  * Attach file to the bpf interface, i.e. make d listen on bp.
268  */
269 void
270 bpf_attachd(struct bpf_d *d, struct bpf_if *bp)
271 {
272 	MUTEX_ASSERT_LOCKED(&d->bd_mtx);
273 
274 	/*
275 	 * Point d at bp, and add d to the interface's list of listeners.
276 	 * Finally, point the driver's bpf cookie at the interface so
277 	 * it will divert packets to bpf.
278 	 */
279 
280 	d->bd_bif = bp;
281 
282 	KERNEL_ASSERT_LOCKED();
283 	SRPL_INSERT_HEAD_LOCKED(&bpf_d_rc, &bp->bif_dlist, d, bd_next);
284 
285 	*bp->bif_driverp = bp;
286 }
287 
288 /*
289  * Detach a file from its interface.
290  */
291 void
292 bpf_detachd(struct bpf_d *d)
293 {
294 	struct bpf_if *bp;
295 
296 	MUTEX_ASSERT_LOCKED(&d->bd_mtx);
297 
298 	bp = d->bd_bif;
299 	/* Not attached. */
300 	if (bp == NULL)
301 		return;
302 
303 	/* Remove ``d'' from the interface's descriptor list. */
304 	KERNEL_ASSERT_LOCKED();
305 	SRPL_REMOVE_LOCKED(&bpf_d_rc, &bp->bif_dlist, d, bpf_d, bd_next);
306 
307 	if (SRPL_EMPTY_LOCKED(&bp->bif_dlist)) {
308 		/*
309 		 * Let the driver know that there are no more listeners.
310 		 */
311 		*bp->bif_driverp = NULL;
312 	}
313 
314 	d->bd_bif = NULL;
315 
316 	/*
317 	 * Check if this descriptor had requested promiscuous mode.
318 	 * If so, turn it off.
319 	 */
320 	if (d->bd_promisc) {
321 		int error;
322 
323 		KASSERT(bp->bif_ifp != NULL);
324 
325 		d->bd_promisc = 0;
326 
327 		bpf_get(d);
328 		mtx_leave(&d->bd_mtx);
329 		NET_LOCK();
330 		error = ifpromisc(bp->bif_ifp, 0);
331 		NET_UNLOCK();
332 		mtx_enter(&d->bd_mtx);
333 		bpf_put(d);
334 
335 		if (error && !(error == EINVAL || error == ENODEV))
336 			/*
337 			 * Something is really wrong if we were able to put
338 			 * the driver into promiscuous mode, but can't
339 			 * take it out.
340 			 */
341 			panic("bpf: ifpromisc failed");
342 	}
343 }
344 
345 void
346 bpfilterattach(int n)
347 {
348 	LIST_INIT(&bpf_d_list);
349 }
350 
351 /*
352  * Open ethernet device.  Returns ENXIO for illegal minor device number,
353  * EBUSY if file is open by another process.
354  */
355 int
356 bpfopen(dev_t dev, int flag, int mode, struct proc *p)
357 {
358 	struct bpf_d *bd;
359 	int unit = minor(dev);
360 
361 	if (unit & ((1 << CLONE_SHIFT) - 1))
362 		return (ENXIO);
363 
364 	KASSERT(bpfilter_lookup(unit) == NULL);
365 
366 	/* create on demand */
367 	if ((bd = malloc(sizeof(*bd), M_DEVBUF, M_NOWAIT|M_ZERO)) == NULL)
368 		return (EBUSY);
369 
370 	/* Mark "free" and do most initialization. */
371 	bd->bd_unit = unit;
372 	bd->bd_bufsize = bpf_bufsize;
373 	bd->bd_sig = SIGIO;
374 	mtx_init(&bd->bd_mtx, IPL_NET);
375 	task_set(&bd->bd_wake_task, bpf_wakeup_cb, bd);
376 
377 	if (flag & FNONBLOCK)
378 		bd->bd_rtout = -1;
379 
380 	bpf_get(bd);
381 	LIST_INSERT_HEAD(&bpf_d_list, bd, bd_list);
382 
383 	return (0);
384 }
385 
386 /*
387  * Close the descriptor by detaching it from its interface,
388  * deallocating its buffers, and marking it free.
389  */
390 int
391 bpfclose(dev_t dev, int flag, int mode, struct proc *p)
392 {
393 	struct bpf_d *d;
394 
395 	d = bpfilter_lookup(minor(dev));
396 	mtx_enter(&d->bd_mtx);
397 	bpf_detachd(d);
398 	bpf_wakeup(d);
399 	LIST_REMOVE(d, bd_list);
400 	mtx_leave(&d->bd_mtx);
401 	bpf_put(d);
402 
403 	return (0);
404 }
405 
406 /*
407  * Rotate the packet buffers in descriptor d.  Move the store buffer
408  * into the hold slot, and the free buffer into the store slot.
409  * Zero the length of the new store buffer.
410  */
411 #define ROTATE_BUFFERS(d) \
412 	KASSERT(d->bd_in_uiomove == 0); \
413 	MUTEX_ASSERT_LOCKED(&d->bd_mtx); \
414 	(d)->bd_hbuf = (d)->bd_sbuf; \
415 	(d)->bd_hlen = (d)->bd_slen; \
416 	(d)->bd_sbuf = (d)->bd_fbuf; \
417 	(d)->bd_slen = 0; \
418 	(d)->bd_fbuf = NULL;
419 /*
420  *  bpfread - read next chunk of packets from buffers
421  */
422 int
423 bpfread(dev_t dev, struct uio *uio, int ioflag)
424 {
425 	struct bpf_d *d;
426 	caddr_t hbuf;
427 	int hlen, error;
428 
429 	KERNEL_ASSERT_LOCKED();
430 
431 	d = bpfilter_lookup(minor(dev));
432 	if (d->bd_bif == NULL)
433 		return (ENXIO);
434 
435 	bpf_get(d);
436 	mtx_enter(&d->bd_mtx);
437 
438 	/*
439 	 * Restrict application to use a buffer the same size as
440 	 * as kernel buffers.
441 	 */
442 	if (uio->uio_resid != d->bd_bufsize) {
443 		error = EINVAL;
444 		goto out;
445 	}
446 
447 	/*
448 	 * If there's a timeout, bd_rdStart is tagged when we start the read.
449 	 * we can then figure out when we're done reading.
450 	 */
451 	if (d->bd_rtout != -1 && d->bd_rdStart == 0)
452 		d->bd_rdStart = ticks;
453 	else
454 		d->bd_rdStart = 0;
455 
456 	/*
457 	 * If the hold buffer is empty, then do a timed sleep, which
458 	 * ends when the timeout expires or when enough packets
459 	 * have arrived to fill the store buffer.
460 	 */
461 	while (d->bd_hbuf == NULL) {
462 		if (d->bd_bif == NULL) {
463 			/* interface is gone */
464 			if (d->bd_slen == 0) {
465 				error = EIO;
466 				goto out;
467 			}
468 			ROTATE_BUFFERS(d);
469 			break;
470 		}
471 		if (d->bd_immediate && d->bd_slen != 0) {
472 			/*
473 			 * A packet(s) either arrived since the previous
474 			 * read or arrived while we were asleep.
475 			 * Rotate the buffers and return what's here.
476 			 */
477 			ROTATE_BUFFERS(d);
478 			break;
479 		}
480 		if (d->bd_rtout == -1) {
481 			/* User requested non-blocking I/O */
482 			error = EWOULDBLOCK;
483 		} else {
484 			if ((d->bd_rdStart + d->bd_rtout) < ticks) {
485 				error = msleep(d, &d->bd_mtx, PRINET|PCATCH,
486 				    "bpf", d->bd_rtout);
487 			} else
488 				error = EWOULDBLOCK;
489 		}
490 		if (error == EINTR || error == ERESTART)
491 			goto out;
492 		if (error == EWOULDBLOCK) {
493 			/*
494 			 * On a timeout, return what's in the buffer,
495 			 * which may be nothing.  If there is something
496 			 * in the store buffer, we can rotate the buffers.
497 			 */
498 			if (d->bd_hbuf != NULL)
499 				/*
500 				 * We filled up the buffer in between
501 				 * getting the timeout and arriving
502 				 * here, so we don't need to rotate.
503 				 */
504 				break;
505 
506 			if (d->bd_slen == 0) {
507 				error = 0;
508 				goto out;
509 			}
510 			ROTATE_BUFFERS(d);
511 			break;
512 		}
513 	}
514 	/*
515 	 * At this point, we know we have something in the hold slot.
516 	 */
517 	hbuf = d->bd_hbuf;
518 	hlen = d->bd_hlen;
519 	d->bd_hbuf = NULL;
520 	d->bd_hlen = 0;
521 	d->bd_fbuf = NULL;
522 	d->bd_in_uiomove = 1;
523 
524 	/*
525 	 * Move data from hold buffer into user space.
526 	 * We know the entire buffer is transferred since
527 	 * we checked above that the read buffer is bpf_bufsize bytes.
528 	 */
529 	mtx_leave(&d->bd_mtx);
530 	error = uiomove(hbuf, hlen, uio);
531 	mtx_enter(&d->bd_mtx);
532 
533 	/* Ensure that bpf_resetd() or ROTATE_BUFFERS() haven't been called. */
534 	KASSERT(d->bd_fbuf == NULL);
535 	KASSERT(d->bd_hbuf == NULL);
536 	d->bd_fbuf = hbuf;
537 	d->bd_in_uiomove = 0;
538 out:
539 	mtx_leave(&d->bd_mtx);
540 	bpf_put(d);
541 
542 	return (error);
543 }
544 
545 
546 /*
547  * If there are processes sleeping on this descriptor, wake them up.
548  */
549 void
550 bpf_wakeup(struct bpf_d *d)
551 {
552 	MUTEX_ASSERT_LOCKED(&d->bd_mtx);
553 
554 	/*
555 	 * As long as csignal() and selwakeup() need to be protected
556 	 * by the KERNEL_LOCK() we have to delay the wakeup to
557 	 * another context to keep the hot path KERNEL_LOCK()-free.
558 	 */
559 	bpf_get(d);
560 	if (!task_add(systq, &d->bd_wake_task))
561 		bpf_put(d);
562 }
563 
564 void
565 bpf_wakeup_cb(void *xd)
566 {
567 	struct bpf_d *d = xd;
568 
569 	KERNEL_ASSERT_LOCKED();
570 
571 	wakeup(d);
572 	if (d->bd_async && d->bd_sig)
573 		csignal(d->bd_pgid, d->bd_sig, d->bd_siguid, d->bd_sigeuid);
574 
575 	selwakeup(&d->bd_sel);
576 	bpf_put(d);
577 }
578 
579 int
580 bpfwrite(dev_t dev, struct uio *uio, int ioflag)
581 {
582 	struct bpf_d *d;
583 	struct ifnet *ifp;
584 	struct mbuf *m;
585 	struct bpf_program *bf;
586 	struct bpf_insn *fcode = NULL;
587 	int error;
588 	struct sockaddr_storage dst;
589 	u_int dlt;
590 
591 	KERNEL_ASSERT_LOCKED();
592 
593 	d = bpfilter_lookup(minor(dev));
594 	if (d->bd_bif == NULL)
595 		return (ENXIO);
596 
597 	bpf_get(d);
598 	ifp = d->bd_bif->bif_ifp;
599 
600 	if (ifp == NULL || (ifp->if_flags & IFF_UP) == 0) {
601 		error = ENETDOWN;
602 		goto out;
603 	}
604 
605 	if (uio->uio_resid == 0) {
606 		error = 0;
607 		goto out;
608 	}
609 
610 	KERNEL_ASSERT_LOCKED(); /* for accessing bd_wfilter */
611 	bf = srp_get_locked(&d->bd_wfilter);
612 	if (bf != NULL)
613 		fcode = bf->bf_insns;
614 
615 	dlt = d->bd_bif->bif_dlt;
616 
617 	error = bpf_movein(uio, dlt, &m, sstosa(&dst), fcode);
618 	if (error)
619 		goto out;
620 
621 	if (m->m_pkthdr.len > ifp->if_mtu) {
622 		m_freem(m);
623 		error = EMSGSIZE;
624 		goto out;
625 	}
626 
627 	m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
628 	m->m_pkthdr.pf.prio = ifp->if_llprio;
629 
630 	if (d->bd_hdrcmplt && dst.ss_family == AF_UNSPEC)
631 		dst.ss_family = pseudo_AF_HDRCMPLT;
632 
633 	NET_LOCK();
634 	error = ifp->if_output(ifp, m, sstosa(&dst), NULL);
635 	NET_UNLOCK();
636 
637 out:
638 	bpf_put(d);
639 	return (error);
640 }
641 
642 /*
643  * Reset a descriptor by flushing its packet buffer and clearing the
644  * receive and drop counts.
645  */
646 void
647 bpf_resetd(struct bpf_d *d)
648 {
649 	MUTEX_ASSERT_LOCKED(&d->bd_mtx);
650 	KASSERT(d->bd_in_uiomove == 0);
651 
652 	if (d->bd_hbuf != NULL) {
653 		/* Free the hold buffer. */
654 		d->bd_fbuf = d->bd_hbuf;
655 		d->bd_hbuf = NULL;
656 	}
657 	d->bd_slen = 0;
658 	d->bd_hlen = 0;
659 	d->bd_rcount = 0;
660 	d->bd_dcount = 0;
661 }
662 
663 /*
664  *  FIONREAD		Check for read packet available.
665  *  BIOCGBLEN		Get buffer len [for read()].
666  *  BIOCSETF		Set ethernet read filter.
667  *  BIOCFLUSH		Flush read packet buffer.
668  *  BIOCPROMISC		Put interface into promiscuous mode.
669  *  BIOCGDLTLIST	Get supported link layer types.
670  *  BIOCGDLT		Get link layer type.
671  *  BIOCSDLT		Set link layer type.
672  *  BIOCGETIF		Get interface name.
673  *  BIOCSETIF		Set interface.
674  *  BIOCSRTIMEOUT	Set read timeout.
675  *  BIOCGRTIMEOUT	Get read timeout.
676  *  BIOCGSTATS		Get packet stats.
677  *  BIOCIMMEDIATE	Set immediate mode.
678  *  BIOCVERSION		Get filter language version.
679  *  BIOCGHDRCMPLT	Get "header already complete" flag
680  *  BIOCSHDRCMPLT	Set "header already complete" flag
681  */
682 int
683 bpfioctl(dev_t dev, u_long cmd, caddr_t addr, int flag, struct proc *p)
684 {
685 	struct bpf_d *d;
686 	int error = 0;
687 
688 	d = bpfilter_lookup(minor(dev));
689 	if (d->bd_locked && suser(p) != 0) {
690 		/* list of allowed ioctls when locked and not root */
691 		switch (cmd) {
692 		case BIOCGBLEN:
693 		case BIOCFLUSH:
694 		case BIOCGDLT:
695 		case BIOCGDLTLIST:
696 		case BIOCGETIF:
697 		case BIOCGRTIMEOUT:
698 		case BIOCGSTATS:
699 		case BIOCVERSION:
700 		case BIOCGRSIG:
701 		case BIOCGHDRCMPLT:
702 		case FIONREAD:
703 		case BIOCLOCK:
704 		case BIOCSRTIMEOUT:
705 		case BIOCIMMEDIATE:
706 		case TIOCGPGRP:
707 		case BIOCGDIRFILT:
708 			break;
709 		default:
710 			return (EPERM);
711 		}
712 	}
713 
714 	bpf_get(d);
715 
716 	switch (cmd) {
717 	default:
718 		error = EINVAL;
719 		break;
720 
721 	/*
722 	 * Check for read packet available.
723 	 */
724 	case FIONREAD:
725 		{
726 			int n;
727 
728 			mtx_enter(&d->bd_mtx);
729 			n = d->bd_slen;
730 			if (d->bd_hbuf != NULL)
731 				n += d->bd_hlen;
732 			mtx_leave(&d->bd_mtx);
733 
734 			*(int *)addr = n;
735 			break;
736 		}
737 
738 	/*
739 	 * Get buffer len [for read()].
740 	 */
741 	case BIOCGBLEN:
742 		*(u_int *)addr = d->bd_bufsize;
743 		break;
744 
745 	/*
746 	 * Set buffer length.
747 	 */
748 	case BIOCSBLEN:
749 		if (d->bd_bif != NULL)
750 			error = EINVAL;
751 		else {
752 			u_int size = *(u_int *)addr;
753 
754 			if (size > bpf_maxbufsize)
755 				*(u_int *)addr = size = bpf_maxbufsize;
756 			else if (size < BPF_MINBUFSIZE)
757 				*(u_int *)addr = size = BPF_MINBUFSIZE;
758 			mtx_enter(&d->bd_mtx);
759 			d->bd_bufsize = size;
760 			mtx_leave(&d->bd_mtx);
761 		}
762 		break;
763 
764 	/*
765 	 * Set link layer read filter.
766 	 */
767 	case BIOCSETF:
768 		error = bpf_setf(d, (struct bpf_program *)addr, 0);
769 		break;
770 
771 	/*
772 	 * Set link layer write filter.
773 	 */
774 	case BIOCSETWF:
775 		error = bpf_setf(d, (struct bpf_program *)addr, 1);
776 		break;
777 
778 	/*
779 	 * Flush read packet buffer.
780 	 */
781 	case BIOCFLUSH:
782 		mtx_enter(&d->bd_mtx);
783 		bpf_resetd(d);
784 		mtx_leave(&d->bd_mtx);
785 		break;
786 
787 	/*
788 	 * Put interface into promiscuous mode.
789 	 */
790 	case BIOCPROMISC:
791 		if (d->bd_bif == NULL) {
792 			/*
793 			 * No interface attached yet.
794 			 */
795 			error = EINVAL;
796 		} else if (d->bd_bif->bif_ifp != NULL) {
797 			if (d->bd_promisc == 0) {
798 				MUTEX_ASSERT_UNLOCKED(&d->bd_mtx);
799 				NET_LOCK();
800 				error = ifpromisc(d->bd_bif->bif_ifp, 1);
801 				NET_UNLOCK();
802 				if (error == 0)
803 					d->bd_promisc = 1;
804 			}
805 		}
806 		break;
807 
808 	/*
809 	 * Get a list of supported device parameters.
810 	 */
811 	case BIOCGDLTLIST:
812 		if (d->bd_bif == NULL)
813 			error = EINVAL;
814 		else
815 			error = bpf_getdltlist(d, (struct bpf_dltlist *)addr);
816 		break;
817 
818 	/*
819 	 * Get device parameters.
820 	 */
821 	case BIOCGDLT:
822 		if (d->bd_bif == NULL)
823 			error = EINVAL;
824 		else
825 			*(u_int *)addr = d->bd_bif->bif_dlt;
826 		break;
827 
828 	/*
829 	 * Set device parameters.
830 	 */
831 	case BIOCSDLT:
832 		if (d->bd_bif == NULL)
833 			error = EINVAL;
834 		else {
835 			mtx_enter(&d->bd_mtx);
836 			error = bpf_setdlt(d, *(u_int *)addr);
837 			mtx_leave(&d->bd_mtx);
838 		}
839 		break;
840 
841 	/*
842 	 * Set interface name.
843 	 */
844 	case BIOCGETIF:
845 		if (d->bd_bif == NULL)
846 			error = EINVAL;
847 		else
848 			bpf_ifname(d->bd_bif, (struct ifreq *)addr);
849 		break;
850 
851 	/*
852 	 * Set interface.
853 	 */
854 	case BIOCSETIF:
855 		error = bpf_setif(d, (struct ifreq *)addr);
856 		break;
857 
858 	/*
859 	 * Set read timeout.
860 	 */
861 	case BIOCSRTIMEOUT:
862 		{
863 			struct timeval *tv = (struct timeval *)addr;
864 
865 			/* Compute number of ticks. */
866 			d->bd_rtout = tv->tv_sec * hz + tv->tv_usec / tick;
867 			if (d->bd_rtout == 0 && tv->tv_usec != 0)
868 				d->bd_rtout = 1;
869 			break;
870 		}
871 
872 	/*
873 	 * Get read timeout.
874 	 */
875 	case BIOCGRTIMEOUT:
876 		{
877 			struct timeval *tv = (struct timeval *)addr;
878 
879 			tv->tv_sec = d->bd_rtout / hz;
880 			tv->tv_usec = (d->bd_rtout % hz) * tick;
881 			break;
882 		}
883 
884 	/*
885 	 * Get packet stats.
886 	 */
887 	case BIOCGSTATS:
888 		{
889 			struct bpf_stat *bs = (struct bpf_stat *)addr;
890 
891 			bs->bs_recv = d->bd_rcount;
892 			bs->bs_drop = d->bd_dcount;
893 			break;
894 		}
895 
896 	/*
897 	 * Set immediate mode.
898 	 */
899 	case BIOCIMMEDIATE:
900 		d->bd_immediate = *(u_int *)addr;
901 		break;
902 
903 	case BIOCVERSION:
904 		{
905 			struct bpf_version *bv = (struct bpf_version *)addr;
906 
907 			bv->bv_major = BPF_MAJOR_VERSION;
908 			bv->bv_minor = BPF_MINOR_VERSION;
909 			break;
910 		}
911 
912 	case BIOCGHDRCMPLT:	/* get "header already complete" flag */
913 		*(u_int *)addr = d->bd_hdrcmplt;
914 		break;
915 
916 	case BIOCSHDRCMPLT:	/* set "header already complete" flag */
917 		d->bd_hdrcmplt = *(u_int *)addr ? 1 : 0;
918 		break;
919 
920 	case BIOCLOCK:		/* set "locked" flag (no reset) */
921 		d->bd_locked = 1;
922 		break;
923 
924 	case BIOCGFILDROP:	/* get "filter-drop" flag */
925 		*(u_int *)addr = d->bd_fildrop;
926 		break;
927 
928 	case BIOCSFILDROP:	/* set "filter-drop" flag */
929 		d->bd_fildrop = *(u_int *)addr ? 1 : 0;
930 		break;
931 
932 	case BIOCGDIRFILT:	/* get direction filter */
933 		*(u_int *)addr = d->bd_dirfilt;
934 		break;
935 
936 	case BIOCSDIRFILT:	/* set direction filter */
937 		d->bd_dirfilt = (*(u_int *)addr) &
938 		    (BPF_DIRECTION_IN|BPF_DIRECTION_OUT);
939 		break;
940 
941 	case FIONBIO:		/* Non-blocking I/O */
942 		if (*(int *)addr)
943 			d->bd_rtout = -1;
944 		else
945 			d->bd_rtout = 0;
946 		break;
947 
948 	case FIOASYNC:		/* Send signal on receive packets */
949 		d->bd_async = *(int *)addr;
950 		break;
951 
952 	/*
953 	 * N.B.  ioctl (FIOSETOWN) and fcntl (F_SETOWN) both end up doing
954 	 * the equivalent of a TIOCSPGRP and hence end up here.  *However*
955 	 * TIOCSPGRP's arg is a process group if it's positive and a process
956 	 * id if it's negative.  This is exactly the opposite of what the
957 	 * other two functions want!  Therefore there is code in ioctl and
958 	 * fcntl to negate the arg before calling here.
959 	 */
960 	case TIOCSPGRP:		/* Process or group to send signals to */
961 		d->bd_pgid = *(int *)addr;
962 		d->bd_siguid = p->p_ucred->cr_ruid;
963 		d->bd_sigeuid = p->p_ucred->cr_uid;
964 		break;
965 
966 	case TIOCGPGRP:
967 		*(int *)addr = d->bd_pgid;
968 		break;
969 
970 	case BIOCSRSIG:		/* Set receive signal */
971 		{
972 			u_int sig;
973 
974 			sig = *(u_int *)addr;
975 
976 			if (sig >= NSIG)
977 				error = EINVAL;
978 			else
979 				d->bd_sig = sig;
980 			break;
981 		}
982 	case BIOCGRSIG:
983 		*(u_int *)addr = d->bd_sig;
984 		break;
985 	}
986 
987 	bpf_put(d);
988 	return (error);
989 }
990 
991 /*
992  * Set d's packet filter program to fp.  If this file already has a filter,
993  * free it and replace it.  Returns EINVAL for bogus requests.
994  */
995 int
996 bpf_setf(struct bpf_d *d, struct bpf_program *fp, int wf)
997 {
998 	struct bpf_program *bf;
999 	struct srp *filter;
1000 	struct bpf_insn *fcode;
1001 	u_int flen, size;
1002 
1003 	KERNEL_ASSERT_LOCKED();
1004 	filter = wf ? &d->bd_wfilter : &d->bd_rfilter;
1005 
1006 	if (fp->bf_insns == 0) {
1007 		if (fp->bf_len != 0)
1008 			return (EINVAL);
1009 		srp_update_locked(&bpf_insn_gc, filter, NULL);
1010 		mtx_enter(&d->bd_mtx);
1011 		bpf_resetd(d);
1012 		mtx_leave(&d->bd_mtx);
1013 		return (0);
1014 	}
1015 	flen = fp->bf_len;
1016 	if (flen > BPF_MAXINSNS)
1017 		return (EINVAL);
1018 
1019 	fcode = mallocarray(flen, sizeof(*fp->bf_insns), M_DEVBUF,
1020 	    M_WAITOK | M_CANFAIL);
1021 	if (fcode == NULL)
1022 		return (ENOMEM);
1023 
1024 	size = flen * sizeof(*fp->bf_insns);
1025 	if (copyin(fp->bf_insns, fcode, size) != 0 ||
1026 	    bpf_validate(fcode, (int)flen) == 0) {
1027 		free(fcode, M_DEVBUF, size);
1028 		return (EINVAL);
1029 	}
1030 
1031 	bf = malloc(sizeof(*bf), M_DEVBUF, M_WAITOK);
1032 	bf->bf_len = flen;
1033 	bf->bf_insns = fcode;
1034 
1035 	srp_update_locked(&bpf_insn_gc, filter, bf);
1036 
1037 	mtx_enter(&d->bd_mtx);
1038 	bpf_resetd(d);
1039 	mtx_leave(&d->bd_mtx);
1040 	return (0);
1041 }
1042 
1043 /*
1044  * Detach a file from its current interface (if attached at all) and attach
1045  * to the interface indicated by the name stored in ifr.
1046  * Return an errno or 0.
1047  */
1048 int
1049 bpf_setif(struct bpf_d *d, struct ifreq *ifr)
1050 {
1051 	struct bpf_if *bp, *candidate = NULL;
1052 	int error = 0;
1053 
1054 	/*
1055 	 * Look through attached interfaces for the named one.
1056 	 */
1057 	for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) {
1058 		if (strcmp(bp->bif_name, ifr->ifr_name) != 0)
1059 			continue;
1060 
1061 		if (candidate == NULL || candidate->bif_dlt > bp->bif_dlt)
1062 			candidate = bp;
1063 	}
1064 
1065 	/* Not found. */
1066 	if (candidate == NULL)
1067 		return (ENXIO);
1068 
1069 	/*
1070 	 * Allocate the packet buffers if we need to.
1071 	 * If we're already attached to requested interface,
1072 	 * just flush the buffer.
1073 	 */
1074 	mtx_enter(&d->bd_mtx);
1075 	if (d->bd_sbuf == NULL) {
1076 		if ((error = bpf_allocbufs(d)))
1077 			goto out;
1078 	}
1079 	if (candidate != d->bd_bif) {
1080 		/*
1081 		 * Detach if attached to something else.
1082 		 */
1083 		bpf_detachd(d);
1084 		bpf_attachd(d, candidate);
1085 	}
1086 	bpf_resetd(d);
1087 out:
1088 	mtx_leave(&d->bd_mtx);
1089 	return (error);
1090 }
1091 
1092 /*
1093  * Copy the interface name to the ifreq.
1094  */
1095 void
1096 bpf_ifname(struct bpf_if *bif, struct ifreq *ifr)
1097 {
1098 	bcopy(bif->bif_name, ifr->ifr_name, sizeof(ifr->ifr_name));
1099 }
1100 
1101 /*
1102  * Support for poll() system call
1103  */
1104 int
1105 bpfpoll(dev_t dev, int events, struct proc *p)
1106 {
1107 	struct bpf_d *d;
1108 	int revents;
1109 
1110 	KERNEL_ASSERT_LOCKED();
1111 
1112 	/*
1113 	 * An imitation of the FIONREAD ioctl code.
1114 	 */
1115 	d = bpfilter_lookup(minor(dev));
1116 
1117 	/*
1118 	 * XXX The USB stack manages it to trigger some race condition
1119 	 * which causes bpfilter_lookup to return NULL when a USB device
1120 	 * gets detached while it is up and has an open bpf handler (e.g.
1121 	 * dhclient).  We still should recheck if we can fix the root
1122 	 * cause of this issue.
1123 	 */
1124 	if (d == NULL)
1125 		return (POLLERR);
1126 
1127 	/* Always ready to write data */
1128 	revents = events & (POLLOUT | POLLWRNORM);
1129 
1130 	if (events & (POLLIN | POLLRDNORM)) {
1131 		mtx_enter(&d->bd_mtx);
1132 		if (d->bd_hlen != 0 || (d->bd_immediate && d->bd_slen != 0))
1133 			revents |= events & (POLLIN | POLLRDNORM);
1134 		else {
1135 			/*
1136 			 * if there's a timeout, mark the time we
1137 			 * started waiting.
1138 			 */
1139 			if (d->bd_rtout != -1 && d->bd_rdStart == 0)
1140 				d->bd_rdStart = ticks;
1141 			selrecord(p, &d->bd_sel);
1142 		}
1143 		mtx_leave(&d->bd_mtx);
1144 	}
1145 	return (revents);
1146 }
1147 
1148 struct filterops bpfread_filtops =
1149 	{ 1, NULL, filt_bpfrdetach, filt_bpfread };
1150 
1151 int
1152 bpfkqfilter(dev_t dev, struct knote *kn)
1153 {
1154 	struct bpf_d *d;
1155 	struct klist *klist;
1156 
1157 	KERNEL_ASSERT_LOCKED();
1158 
1159 	d = bpfilter_lookup(minor(dev));
1160 
1161 	switch (kn->kn_filter) {
1162 	case EVFILT_READ:
1163 		klist = &d->bd_sel.si_note;
1164 		kn->kn_fop = &bpfread_filtops;
1165 		break;
1166 	default:
1167 		return (EINVAL);
1168 	}
1169 
1170 	bpf_get(d);
1171 	kn->kn_hook = d;
1172 	SLIST_INSERT_HEAD(klist, kn, kn_selnext);
1173 
1174 	mtx_enter(&d->bd_mtx);
1175 	if (d->bd_rtout != -1 && d->bd_rdStart == 0)
1176 		d->bd_rdStart = ticks;
1177 	mtx_leave(&d->bd_mtx);
1178 
1179 	return (0);
1180 }
1181 
1182 void
1183 filt_bpfrdetach(struct knote *kn)
1184 {
1185 	struct bpf_d *d = kn->kn_hook;
1186 
1187 	KERNEL_ASSERT_LOCKED();
1188 
1189 	SLIST_REMOVE(&d->bd_sel.si_note, kn, knote, kn_selnext);
1190 	bpf_put(d);
1191 }
1192 
1193 int
1194 filt_bpfread(struct knote *kn, long hint)
1195 {
1196 	struct bpf_d *d = kn->kn_hook;
1197 
1198 	KERNEL_ASSERT_LOCKED();
1199 
1200 	mtx_enter(&d->bd_mtx);
1201 	kn->kn_data = d->bd_hlen;
1202 	if (d->bd_immediate)
1203 		kn->kn_data += d->bd_slen;
1204 	mtx_leave(&d->bd_mtx);
1205 
1206 	return (kn->kn_data > 0);
1207 }
1208 
1209 /*
1210  * Copy data from an mbuf chain into a buffer.  This code is derived
1211  * from m_copydata in sys/uipc_mbuf.c.
1212  */
1213 void
1214 bpf_mcopy(const void *src_arg, void *dst_arg, size_t len)
1215 {
1216 	const struct mbuf *m;
1217 	u_int count;
1218 	u_char *dst;
1219 
1220 	m = src_arg;
1221 	dst = dst_arg;
1222 	while (len > 0) {
1223 		if (m == NULL)
1224 			panic("bpf_mcopy");
1225 		count = min(m->m_len, len);
1226 		bcopy(mtod(m, caddr_t), (caddr_t)dst, count);
1227 		m = m->m_next;
1228 		dst += count;
1229 		len -= count;
1230 	}
1231 }
1232 
1233 /*
1234  * like bpf_mtap, but copy fn can be given. used by various bpf_mtap*
1235  */
1236 int
1237 _bpf_mtap(caddr_t arg, const struct mbuf *m, u_int direction,
1238     void (*cpfn)(const void *, void *, size_t))
1239 {
1240 	struct bpf_if *bp = (struct bpf_if *)arg;
1241 	struct srp_ref sr;
1242 	struct bpf_d *d;
1243 	size_t pktlen, slen;
1244 	const struct mbuf *m0;
1245 	struct timeval tv;
1246 	int gottime = 0;
1247 	int drop = 0;
1248 
1249 	if (m == NULL)
1250 		return (0);
1251 
1252 	if (cpfn == NULL)
1253 		cpfn = bpf_mcopy;
1254 
1255 	if (bp == NULL)
1256 		return (0);
1257 
1258 	pktlen = 0;
1259 	for (m0 = m; m0 != NULL; m0 = m0->m_next)
1260 		pktlen += m0->m_len;
1261 
1262 	SRPL_FOREACH(d, &sr, &bp->bif_dlist, bd_next) {
1263 		atomic_inc_long(&d->bd_rcount);
1264 
1265 		if ((direction & d->bd_dirfilt) != 0)
1266 			slen = 0;
1267 		else {
1268 			struct srp_ref bsr;
1269 			struct bpf_program *bf;
1270 			struct bpf_insn *fcode = NULL;
1271 
1272 			bf = srp_enter(&bsr, &d->bd_rfilter);
1273 			if (bf != NULL)
1274 				fcode = bf->bf_insns;
1275 			slen = bpf_mfilter(fcode, m, pktlen);
1276 			srp_leave(&bsr);
1277 		}
1278 
1279 		if (slen > 0) {
1280 			if (!gottime++)
1281 				microtime(&tv);
1282 
1283 			mtx_enter(&d->bd_mtx);
1284 			bpf_catchpacket(d, (u_char *)m, pktlen, slen, cpfn,
1285 			    &tv);
1286 			mtx_leave(&d->bd_mtx);
1287 
1288 			if (d->bd_fildrop)
1289 				drop = 1;
1290 		}
1291 	}
1292 	SRPL_LEAVE(&sr);
1293 
1294 	return (drop);
1295 }
1296 
1297 /*
1298  * Incoming linkage from device drivers, where a data buffer should be
1299  * prepended by an arbitrary header. In this situation we already have a
1300  * way of representing a chain of memory buffers, ie, mbufs, so reuse
1301  * the existing functionality by attaching the buffers to mbufs.
1302  *
1303  * Con up a minimal mbuf chain to pacify bpf by allocating (only) a
1304  * struct m_hdr each for the header and data on the stack.
1305  */
1306 int
1307 bpf_tap_hdr(caddr_t arg, const void *hdr, unsigned int hdrlen,
1308     const void *buf, unsigned int buflen, u_int direction)
1309 {
1310 	struct m_hdr mh, md;
1311 	struct mbuf *m0 = NULL;
1312 	struct mbuf **mp = &m0;
1313 
1314 	if (hdr != NULL) {
1315 		mh.mh_flags = 0;
1316 		mh.mh_next = NULL;
1317 		mh.mh_len = hdrlen;
1318 		mh.mh_data = (void *)hdr;
1319 
1320 		*mp = (struct mbuf *)&mh;
1321 		mp = &mh.mh_next;
1322 	}
1323 
1324 	if (buf != NULL) {
1325 		md.mh_flags = 0;
1326 		md.mh_next = NULL;
1327 		md.mh_len = buflen;
1328 		md.mh_data = (void *)buf;
1329 
1330 		*mp = (struct mbuf *)&md;
1331 	}
1332 
1333 	return _bpf_mtap(arg, m0, direction, bpf_mcopy);
1334 }
1335 
1336 /*
1337  * Incoming linkage from device drivers, when packet is in an mbuf chain.
1338  */
1339 int
1340 bpf_mtap(caddr_t arg, const struct mbuf *m, u_int direction)
1341 {
1342 	return _bpf_mtap(arg, m, direction, NULL);
1343 }
1344 
1345 /*
1346  * Incoming linkage from device drivers, where we have a mbuf chain
1347  * but need to prepend some arbitrary header from a linear buffer.
1348  *
1349  * Con up a minimal dummy header to pacify bpf.  Allocate (only) a
1350  * struct m_hdr on the stack.  This is safe as bpf only reads from the
1351  * fields in this header that we initialize, and will not try to free
1352  * it or keep a pointer to it.
1353  */
1354 int
1355 bpf_mtap_hdr(caddr_t arg, caddr_t data, u_int dlen, const struct mbuf *m,
1356     u_int direction, void (*cpfn)(const void *, void *, size_t))
1357 {
1358 	struct m_hdr mh;
1359 	const struct mbuf *m0;
1360 
1361 	if (dlen > 0) {
1362 		mh.mh_flags = 0;
1363 		mh.mh_next = (struct mbuf *)m;
1364 		mh.mh_len = dlen;
1365 		mh.mh_data = data;
1366 		m0 = (struct mbuf *)&mh;
1367 	} else
1368 		m0 = m;
1369 
1370 	return _bpf_mtap(arg, m0, direction, cpfn);
1371 }
1372 
1373 /*
1374  * Incoming linkage from device drivers, where we have a mbuf chain
1375  * but need to prepend the address family.
1376  *
1377  * Con up a minimal dummy header to pacify bpf.  We allocate (only) a
1378  * struct m_hdr on the stack.  This is safe as bpf only reads from the
1379  * fields in this header that we initialize, and will not try to free
1380  * it or keep a pointer to it.
1381  */
1382 int
1383 bpf_mtap_af(caddr_t arg, u_int32_t af, const struct mbuf *m, u_int direction)
1384 {
1385 	u_int32_t    afh;
1386 
1387 	afh = htonl(af);
1388 
1389 	return bpf_mtap_hdr(arg, (caddr_t)&afh, sizeof(afh),
1390 	    m, direction, NULL);
1391 }
1392 
1393 /*
1394  * Incoming linkage from device drivers, where we have a mbuf chain
1395  * but need to prepend a VLAN encapsulation header.
1396  *
1397  * Con up a minimal dummy header to pacify bpf.  Allocate (only) a
1398  * struct m_hdr on the stack.  This is safe as bpf only reads from the
1399  * fields in this header that we initialize, and will not try to free
1400  * it or keep a pointer to it.
1401  */
1402 int
1403 bpf_mtap_ether(caddr_t arg, const struct mbuf *m, u_int direction)
1404 {
1405 #if NVLAN > 0
1406 	struct ether_vlan_header evh;
1407 	struct m_hdr mh;
1408 	uint8_t prio;
1409 
1410 	if ((m->m_flags & M_VLANTAG) == 0)
1411 #endif
1412 	{
1413 		return bpf_mtap(arg, m, direction);
1414 	}
1415 
1416 #if NVLAN > 0
1417 	KASSERT(m->m_len >= ETHER_HDR_LEN);
1418 
1419 	prio = m->m_pkthdr.pf.prio;
1420 	if (prio <= 1)
1421 		prio = !prio;
1422 
1423 	memcpy(&evh, mtod(m, char *), ETHER_HDR_LEN);
1424 	evh.evl_proto = evh.evl_encap_proto;
1425 	evh.evl_encap_proto = htons(ETHERTYPE_VLAN);
1426 	evh.evl_tag = htons(m->m_pkthdr.ether_vtag |
1427 	    (prio << EVL_PRIO_BITS));
1428 
1429 	mh.mh_flags = 0;
1430 	mh.mh_data = m->m_data + ETHER_HDR_LEN;
1431 	mh.mh_len = m->m_len - ETHER_HDR_LEN;
1432 	mh.mh_next = m->m_next;
1433 
1434 	return bpf_mtap_hdr(arg, (caddr_t)&evh, sizeof(evh),
1435 	    (struct mbuf *)&mh, direction, NULL);
1436 #endif
1437 }
1438 
1439 /*
1440  * Move the packet data from interface memory (pkt) into the
1441  * store buffer.  Wake up listeners if needed.
1442  * "copy" is the routine called to do the actual data
1443  * transfer.  bcopy is passed in to copy contiguous chunks, while
1444  * bpf_mcopy is passed in to copy mbuf chains.  In the latter case,
1445  * pkt is really an mbuf.
1446  */
1447 void
1448 bpf_catchpacket(struct bpf_d *d, u_char *pkt, size_t pktlen, size_t snaplen,
1449     void (*cpfn)(const void *, void *, size_t), struct timeval *tv)
1450 {
1451 	struct bpf_hdr *hp;
1452 	int totlen, curlen;
1453 	int hdrlen, do_wakeup = 0;
1454 
1455 	MUTEX_ASSERT_LOCKED(&d->bd_mtx);
1456 	if (d->bd_bif == NULL)
1457 		return;
1458 
1459 	hdrlen = d->bd_bif->bif_hdrlen;
1460 
1461 	/*
1462 	 * Figure out how many bytes to move.  If the packet is
1463 	 * greater or equal to the snapshot length, transfer that
1464 	 * much.  Otherwise, transfer the whole packet (unless
1465 	 * we hit the buffer size limit).
1466 	 */
1467 	totlen = hdrlen + min(snaplen, pktlen);
1468 	if (totlen > d->bd_bufsize)
1469 		totlen = d->bd_bufsize;
1470 
1471 	/*
1472 	 * Round up the end of the previous packet to the next longword.
1473 	 */
1474 	curlen = BPF_WORDALIGN(d->bd_slen);
1475 	if (curlen + totlen > d->bd_bufsize) {
1476 		/*
1477 		 * This packet will overflow the storage buffer.
1478 		 * Rotate the buffers if we can, then wakeup any
1479 		 * pending reads.
1480 		 */
1481 		if (d->bd_fbuf == NULL) {
1482 			/*
1483 			 * We haven't completed the previous read yet,
1484 			 * so drop the packet.
1485 			 */
1486 			++d->bd_dcount;
1487 			return;
1488 		}
1489 		ROTATE_BUFFERS(d);
1490 		do_wakeup = 1;
1491 		curlen = 0;
1492 	}
1493 
1494 	/*
1495 	 * Append the bpf header.
1496 	 */
1497 	hp = (struct bpf_hdr *)(d->bd_sbuf + curlen);
1498 	hp->bh_tstamp.tv_sec = tv->tv_sec;
1499 	hp->bh_tstamp.tv_usec = tv->tv_usec;
1500 	hp->bh_datalen = pktlen;
1501 	hp->bh_hdrlen = hdrlen;
1502 	/*
1503 	 * Copy the packet data into the store buffer and update its length.
1504 	 */
1505 	(*cpfn)(pkt, (u_char *)hp + hdrlen, (hp->bh_caplen = totlen - hdrlen));
1506 	d->bd_slen = curlen + totlen;
1507 
1508 	if (d->bd_immediate) {
1509 		/*
1510 		 * Immediate mode is set.  A packet arrived so any
1511 		 * reads should be woken up.
1512 		 */
1513 		do_wakeup = 1;
1514 	}
1515 
1516 	if (d->bd_rdStart && (d->bd_rtout + d->bd_rdStart < ticks)) {
1517 		/*
1518 		 * we could be selecting on the bpf, and we
1519 		 * may have timeouts set.  We got here by getting
1520 		 * a packet, so wake up the reader.
1521 		 */
1522 		if (d->bd_fbuf != NULL) {
1523 			d->bd_rdStart = 0;
1524 			ROTATE_BUFFERS(d);
1525 			do_wakeup = 1;
1526 		}
1527 	}
1528 
1529 	if (do_wakeup)
1530 		bpf_wakeup(d);
1531 }
1532 
1533 /*
1534  * Initialize all nonzero fields of a descriptor.
1535  */
1536 int
1537 bpf_allocbufs(struct bpf_d *d)
1538 {
1539 	MUTEX_ASSERT_LOCKED(&d->bd_mtx);
1540 
1541 	d->bd_fbuf = malloc(d->bd_bufsize, M_DEVBUF, M_NOWAIT);
1542 	if (d->bd_fbuf == NULL)
1543 		return (ENOMEM);
1544 
1545 	d->bd_sbuf = malloc(d->bd_bufsize, M_DEVBUF, M_NOWAIT);
1546 	if (d->bd_sbuf == NULL) {
1547 		free(d->bd_fbuf, M_DEVBUF, d->bd_bufsize);
1548 		return (ENOMEM);
1549 	}
1550 
1551 	d->bd_slen = 0;
1552 	d->bd_hlen = 0;
1553 
1554 	return (0);
1555 }
1556 
1557 void
1558 bpf_get(struct bpf_d *bd)
1559 {
1560 	atomic_inc_int(&bd->bd_ref);
1561 }
1562 
1563 /*
1564  * Free buffers currently in use by a descriptor
1565  * when the reference count drops to zero.
1566  */
1567 void
1568 bpf_put(struct bpf_d *bd)
1569 {
1570 	if (atomic_dec_int_nv(&bd->bd_ref) > 0)
1571 		return;
1572 
1573 	free(bd->bd_sbuf, M_DEVBUF, 0);
1574 	free(bd->bd_hbuf, M_DEVBUF, 0);
1575 	free(bd->bd_fbuf, M_DEVBUF, 0);
1576 	KERNEL_ASSERT_LOCKED();
1577 	srp_update_locked(&bpf_insn_gc, &bd->bd_rfilter, NULL);
1578 	srp_update_locked(&bpf_insn_gc, &bd->bd_wfilter, NULL);
1579 
1580 	free(bd, M_DEVBUF, sizeof(*bd));
1581 }
1582 
1583 void *
1584 bpfsattach(caddr_t *bpfp, const char *name, u_int dlt, u_int hdrlen)
1585 {
1586 	struct bpf_if *bp;
1587 
1588 	if ((bp = malloc(sizeof(*bp), M_DEVBUF, M_NOWAIT)) == NULL)
1589 		panic("bpfattach");
1590 	SRPL_INIT(&bp->bif_dlist);
1591 	bp->bif_driverp = (struct bpf_if **)bpfp;
1592 	bp->bif_name = name;
1593 	bp->bif_ifp = NULL;
1594 	bp->bif_dlt = dlt;
1595 
1596 	bp->bif_next = bpf_iflist;
1597 	bpf_iflist = bp;
1598 
1599 	*bp->bif_driverp = NULL;
1600 
1601 	/*
1602 	 * Compute the length of the bpf header.  This is not necessarily
1603 	 * equal to SIZEOF_BPF_HDR because we want to insert spacing such
1604 	 * that the network layer header begins on a longword boundary (for
1605 	 * performance reasons and to alleviate alignment restrictions).
1606 	 */
1607 	bp->bif_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen;
1608 
1609 	return (bp);
1610 }
1611 
1612 void
1613 bpfattach(caddr_t *driverp, struct ifnet *ifp, u_int dlt, u_int hdrlen)
1614 {
1615 	struct bpf_if *bp;
1616 
1617 	bp = bpfsattach(driverp, ifp->if_xname, dlt, hdrlen);
1618 	bp->bif_ifp = ifp;
1619 }
1620 
1621 /* Detach an interface from its attached bpf device.  */
1622 void
1623 bpfdetach(struct ifnet *ifp)
1624 {
1625 	struct bpf_if *bp, *nbp, **pbp = &bpf_iflist;
1626 
1627 	KERNEL_ASSERT_LOCKED();
1628 
1629 	for (bp = bpf_iflist; bp; bp = nbp) {
1630 		nbp = bp->bif_next;
1631 		if (bp->bif_ifp == ifp) {
1632 			*pbp = nbp;
1633 
1634 			bpfsdetach(bp);
1635 		} else
1636 			pbp = &bp->bif_next;
1637 	}
1638 	ifp->if_bpf = NULL;
1639 }
1640 
1641 void
1642 bpfsdetach(void *p)
1643 {
1644 	struct bpf_if *bp = p;
1645 	struct bpf_d *bd;
1646 	int maj;
1647 
1648 	/* Locate the major number. */
1649 	for (maj = 0; maj < nchrdev; maj++)
1650 		if (cdevsw[maj].d_open == bpfopen)
1651 			break;
1652 
1653 	while ((bd = SRPL_FIRST_LOCKED(&bp->bif_dlist)))
1654 		vdevgone(maj, bd->bd_unit, bd->bd_unit, VCHR);
1655 
1656 	free(bp, M_DEVBUF, sizeof *bp);
1657 }
1658 
1659 int
1660 bpf_sysctl_locked(int *name, u_int namelen, void *oldp, size_t *oldlenp,
1661     void *newp, size_t newlen)
1662 {
1663 	int newval;
1664 	int error;
1665 
1666 	switch (name[0]) {
1667 	case NET_BPF_BUFSIZE:
1668 		newval = bpf_bufsize;
1669 		error = sysctl_int(oldp, oldlenp, newp, newlen, &newval);
1670 		if (error)
1671 			return (error);
1672 		if (newval < BPF_MINBUFSIZE || newval > bpf_maxbufsize)
1673 			return (EINVAL);
1674 		bpf_bufsize = newval;
1675 		break;
1676 	case NET_BPF_MAXBUFSIZE:
1677 		newval = bpf_maxbufsize;
1678 		error = sysctl_int(oldp, oldlenp, newp, newlen, &newval);
1679 		if (error)
1680 			return (error);
1681 		if (newval < BPF_MINBUFSIZE)
1682 			return (EINVAL);
1683 		bpf_maxbufsize = newval;
1684 		break;
1685 	default:
1686 		return (EOPNOTSUPP);
1687 	}
1688 	return (0);
1689 }
1690 
1691 int
1692 bpf_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
1693     size_t newlen)
1694 {
1695 	int flags = RW_INTR;
1696 	int error;
1697 
1698 	if (namelen != 1)
1699 		return (ENOTDIR);
1700 
1701 	flags |= (newp == NULL) ? RW_READ : RW_WRITE;
1702 
1703 	error = rw_enter(&bpf_sysctl_lk, flags);
1704 	if (error != 0)
1705 		return (error);
1706 
1707 	error = bpf_sysctl_locked(name, namelen, oldp, oldlenp, newp, newlen);
1708 
1709 	rw_exit(&bpf_sysctl_lk);
1710 
1711 	return (error);
1712 }
1713 
1714 struct bpf_d *
1715 bpfilter_lookup(int unit)
1716 {
1717 	struct bpf_d *bd;
1718 
1719 	KERNEL_ASSERT_LOCKED();
1720 
1721 	LIST_FOREACH(bd, &bpf_d_list, bd_list)
1722 		if (bd->bd_unit == unit)
1723 			return (bd);
1724 	return (NULL);
1725 }
1726 
1727 /*
1728  * Get a list of available data link type of the interface.
1729  */
1730 int
1731 bpf_getdltlist(struct bpf_d *d, struct bpf_dltlist *bfl)
1732 {
1733 	int n, error;
1734 	struct bpf_if *bp;
1735 	const char *name;
1736 
1737 	name = d->bd_bif->bif_name;
1738 	n = 0;
1739 	error = 0;
1740 	for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) {
1741 		if (strcmp(name, bp->bif_name) != 0)
1742 			continue;
1743 		if (bfl->bfl_list != NULL) {
1744 			if (n >= bfl->bfl_len)
1745 				return (ENOMEM);
1746 			error = copyout(&bp->bif_dlt,
1747 			    bfl->bfl_list + n, sizeof(u_int));
1748 			if (error)
1749 				break;
1750 		}
1751 		n++;
1752 	}
1753 
1754 	bfl->bfl_len = n;
1755 	return (error);
1756 }
1757 
1758 /*
1759  * Set the data link type of a BPF instance.
1760  */
1761 int
1762 bpf_setdlt(struct bpf_d *d, u_int dlt)
1763 {
1764 	const char *name;
1765 	struct bpf_if *bp;
1766 
1767 	MUTEX_ASSERT_LOCKED(&d->bd_mtx);
1768 	if (d->bd_bif->bif_dlt == dlt)
1769 		return (0);
1770 	name = d->bd_bif->bif_name;
1771 	for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) {
1772 		if (strcmp(name, bp->bif_name) != 0)
1773 			continue;
1774 		if (bp->bif_dlt == dlt)
1775 			break;
1776 	}
1777 	if (bp == NULL)
1778 		return (EINVAL);
1779 	bpf_detachd(d);
1780 	bpf_attachd(d, bp);
1781 	bpf_resetd(d);
1782 	return (0);
1783 }
1784 
1785 void
1786 bpf_d_ref(void *null, void *d)
1787 {
1788 	bpf_get(d);
1789 }
1790 
1791 void
1792 bpf_d_unref(void *null, void *d)
1793 {
1794 	bpf_put(d);
1795 }
1796 
1797 void
1798 bpf_insn_dtor(void *null, void *f)
1799 {
1800 	struct bpf_program *bf = f;
1801 	struct bpf_insn *insns = bf->bf_insns;
1802 
1803 	free(insns, M_DEVBUF, bf->bf_len * sizeof(*insns));
1804 	free(bf, M_DEVBUF, sizeof(*bf));
1805 }
1806 
1807 u_int32_t	bpf_mbuf_ldw(const void *, u_int32_t, int *);
1808 u_int32_t	bpf_mbuf_ldh(const void *, u_int32_t, int *);
1809 u_int32_t	bpf_mbuf_ldb(const void *, u_int32_t, int *);
1810 
1811 int		bpf_mbuf_copy(const struct mbuf *, u_int32_t,
1812 		    void *, u_int32_t);
1813 
1814 const struct bpf_ops bpf_mbuf_ops = {
1815 	bpf_mbuf_ldw,
1816 	bpf_mbuf_ldh,
1817 	bpf_mbuf_ldb,
1818 };
1819 
1820 int
1821 bpf_mbuf_copy(const struct mbuf *m, u_int32_t off, void *buf, u_int32_t len)
1822 {
1823 	u_int8_t *cp = buf;
1824 	u_int32_t count;
1825 
1826 	while (off >= m->m_len) {
1827 		off -= m->m_len;
1828 
1829 		m = m->m_next;
1830 		if (m == NULL)
1831 			return (-1);
1832 	}
1833 
1834 	for (;;) {
1835 		count = min(m->m_len - off, len);
1836 
1837 		memcpy(cp, m->m_data + off, count);
1838 		len -= count;
1839 
1840 		if (len == 0)
1841 			return (0);
1842 
1843 		m = m->m_next;
1844 		if (m == NULL)
1845 			break;
1846 
1847 		cp += count;
1848 		off = 0;
1849 	}
1850 
1851 	return (-1);
1852 }
1853 
1854 u_int32_t
1855 bpf_mbuf_ldw(const void *m0, u_int32_t k, int *err)
1856 {
1857 	u_int32_t v;
1858 
1859 	if (bpf_mbuf_copy(m0, k, &v, sizeof(v)) != 0) {
1860 		*err = 1;
1861 		return (0);
1862 	}
1863 
1864 	*err = 0;
1865 	return ntohl(v);
1866 }
1867 
1868 u_int32_t
1869 bpf_mbuf_ldh(const void *m0, u_int32_t k, int *err)
1870 {
1871 	u_int16_t v;
1872 
1873 	if (bpf_mbuf_copy(m0, k, &v, sizeof(v)) != 0) {
1874 		*err = 1;
1875 		return (0);
1876 	}
1877 
1878 	*err = 0;
1879 	return ntohs(v);
1880 }
1881 
1882 u_int32_t
1883 bpf_mbuf_ldb(const void *m0, u_int32_t k, int *err)
1884 {
1885 	const struct mbuf *m = m0;
1886 	u_int8_t v;
1887 
1888 	while (k >= m->m_len) {
1889 		k -= m->m_len;
1890 
1891 		m = m->m_next;
1892 		if (m == NULL) {
1893 			*err = 1;
1894 			return (0);
1895 		}
1896 	}
1897 	v = m->m_data[k];
1898 
1899 	*err = 0;
1900 	return v;
1901 }
1902 
1903 u_int
1904 bpf_mfilter(const struct bpf_insn *pc, const struct mbuf *m, u_int wirelen)
1905 {
1906 	return _bpf_filter(pc, &bpf_mbuf_ops, m, wirelen);
1907 }
1908