xref: /openbsd-src/sys/net/bpf.c (revision 46035553bfdd96e63c94e32da0210227ec2e3cf1)
1 /*	$OpenBSD: bpf.c,v 1.201 2021/01/02 07:25:42 dlg Exp $	*/
2 /*	$NetBSD: bpf.c,v 1.33 1997/02/21 23:59:35 thorpej Exp $	*/
3 
4 /*
5  * Copyright (c) 1990, 1991, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  * Copyright (c) 2010, 2014 Henning Brauer <henning@openbsd.org>
8  *
9  * This code is derived from the Stanford/CMU enet packet filter,
10  * (net/enet.c) distributed as part of 4.3BSD, and code contributed
11  * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
12  * Berkeley Laboratory.
13  *
14  * Redistribution and use in source and binary forms, with or without
15  * modification, are permitted provided that the following conditions
16  * are met:
17  * 1. Redistributions of source code must retain the above copyright
18  *    notice, this list of conditions and the following disclaimer.
19  * 2. Redistributions in binary form must reproduce the above copyright
20  *    notice, this list of conditions and the following disclaimer in the
21  *    documentation and/or other materials provided with the distribution.
22  * 3. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  *
38  *	@(#)bpf.c	8.2 (Berkeley) 3/28/94
39  */
40 
41 #include "bpfilter.h"
42 
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/mbuf.h>
46 #include <sys/proc.h>
47 #include <sys/signalvar.h>
48 #include <sys/ioctl.h>
49 #include <sys/conf.h>
50 #include <sys/vnode.h>
51 #include <sys/fcntl.h>
52 #include <sys/socket.h>
53 #include <sys/poll.h>
54 #include <sys/kernel.h>
55 #include <sys/sysctl.h>
56 #include <sys/rwlock.h>
57 #include <sys/atomic.h>
58 #include <sys/smr.h>
59 #include <sys/specdev.h>
60 #include <sys/selinfo.h>
61 #include <sys/sigio.h>
62 #include <sys/task.h>
63 #include <sys/time.h>
64 
65 #include <net/if.h>
66 #include <net/bpf.h>
67 #include <net/bpfdesc.h>
68 
69 #include <netinet/in.h>
70 #include <netinet/if_ether.h>
71 
72 #include "vlan.h"
73 #if NVLAN > 0
74 #include <net/if_vlan_var.h>
75 #endif
76 
77 #define BPF_BUFSIZE 32768
78 
79 #define PRINET  26			/* interruptible */
80 
81 /*
82  * The default read buffer size is patchable.
83  */
84 int bpf_bufsize = BPF_BUFSIZE;
85 int bpf_maxbufsize = BPF_MAXBUFSIZE;
86 
87 /*
88  *  bpf_iflist is the list of interfaces; each corresponds to an ifnet
89  *  bpf_d_list is the list of descriptors
90  */
91 struct bpf_if	*bpf_iflist;
92 LIST_HEAD(, bpf_d) bpf_d_list;
93 
94 int	bpf_allocbufs(struct bpf_d *);
95 void	bpf_ifname(struct bpf_if*, struct ifreq *);
96 void	bpf_mcopy(const void *, void *, size_t);
97 int	bpf_movein(struct uio *, struct bpf_d *, struct mbuf **,
98 	    struct sockaddr *);
99 int	bpf_setif(struct bpf_d *, struct ifreq *);
100 int	bpfpoll(dev_t, int, struct proc *);
101 int	bpfkqfilter(dev_t, struct knote *);
102 void	bpf_wakeup(struct bpf_d *);
103 void	bpf_wakeup_cb(void *);
104 int	_bpf_mtap(caddr_t, const struct mbuf *, const struct mbuf *, u_int);
105 void	bpf_catchpacket(struct bpf_d *, u_char *, size_t, size_t,
106 	    const struct bpf_hdr *);
107 int	bpf_getdltlist(struct bpf_d *, struct bpf_dltlist *);
108 int	bpf_setdlt(struct bpf_d *, u_int);
109 
110 void	filt_bpfrdetach(struct knote *);
111 int	filt_bpfread(struct knote *, long);
112 
113 int	bpf_sysctl_locked(int *, u_int, void *, size_t *, void *, size_t);
114 
115 struct bpf_d *bpfilter_lookup(int);
116 
117 /*
118  * Called holding ``bd_mtx''.
119  */
120 void	bpf_attachd(struct bpf_d *, struct bpf_if *);
121 void	bpf_detachd(struct bpf_d *);
122 void	bpf_resetd(struct bpf_d *);
123 
124 void	bpf_prog_smr(void *);
125 void	bpf_d_smr(void *);
126 
127 /*
128  * Reference count access to descriptor buffers
129  */
130 void	bpf_get(struct bpf_d *);
131 void	bpf_put(struct bpf_d *);
132 
133 
134 struct rwlock bpf_sysctl_lk = RWLOCK_INITIALIZER("bpfsz");
135 
136 int
137 bpf_movein(struct uio *uio, struct bpf_d *d, struct mbuf **mp,
138     struct sockaddr *sockp)
139 {
140 	struct bpf_program_smr *bps;
141 	struct bpf_insn *fcode = NULL;
142 	struct mbuf *m;
143 	struct m_tag *mtag;
144 	int error;
145 	u_int hlen;
146 	u_int len;
147 	u_int linktype;
148 	u_int slen;
149 
150 	/*
151 	 * Build a sockaddr based on the data link layer type.
152 	 * We do this at this level because the ethernet header
153 	 * is copied directly into the data field of the sockaddr.
154 	 * In the case of SLIP, there is no header and the packet
155 	 * is forwarded as is.
156 	 * Also, we are careful to leave room at the front of the mbuf
157 	 * for the link level header.
158 	 */
159 	linktype = d->bd_bif->bif_dlt;
160 	switch (linktype) {
161 
162 	case DLT_SLIP:
163 		sockp->sa_family = AF_INET;
164 		hlen = 0;
165 		break;
166 
167 	case DLT_PPP:
168 		sockp->sa_family = AF_UNSPEC;
169 		hlen = 0;
170 		break;
171 
172 	case DLT_EN10MB:
173 		sockp->sa_family = AF_UNSPEC;
174 		/* XXX Would MAXLINKHDR be better? */
175 		hlen = ETHER_HDR_LEN;
176 		break;
177 
178 	case DLT_IEEE802_11:
179 	case DLT_IEEE802_11_RADIO:
180 		sockp->sa_family = AF_UNSPEC;
181 		hlen = 0;
182 		break;
183 
184 	case DLT_RAW:
185 	case DLT_NULL:
186 		sockp->sa_family = AF_UNSPEC;
187 		hlen = 0;
188 		break;
189 
190 	case DLT_LOOP:
191 		sockp->sa_family = AF_UNSPEC;
192 		hlen = sizeof(u_int32_t);
193 		break;
194 
195 	default:
196 		return (EIO);
197 	}
198 
199 	if (uio->uio_resid > MAXMCLBYTES)
200 		return (EIO);
201 	len = uio->uio_resid;
202 
203 	MGETHDR(m, M_WAIT, MT_DATA);
204 	m->m_pkthdr.ph_ifidx = 0;
205 	m->m_pkthdr.len = len - hlen;
206 
207 	if (len > MHLEN) {
208 		MCLGETL(m, M_WAIT, len);
209 		if ((m->m_flags & M_EXT) == 0) {
210 			error = ENOBUFS;
211 			goto bad;
212 		}
213 	}
214 	m->m_len = len;
215 	*mp = m;
216 
217 	error = uiomove(mtod(m, caddr_t), len, uio);
218 	if (error)
219 		goto bad;
220 
221 	smr_read_enter();
222 	bps = SMR_PTR_GET(&d->bd_wfilter);
223 	if (bps != NULL)
224 		fcode = bps->bps_bf.bf_insns;
225 	slen = bpf_filter(fcode, mtod(m, u_char *), len, len);
226 	smr_read_leave();
227 
228 	if (slen < len) {
229 		error = EPERM;
230 		goto bad;
231 	}
232 
233 	if (m->m_len < hlen) {
234 		error = EPERM;
235 		goto bad;
236 	}
237 	/*
238 	 * Make room for link header, and copy it to sockaddr
239 	 */
240 	if (hlen != 0) {
241 		if (linktype == DLT_LOOP) {
242 			u_int32_t af;
243 
244 			/* the link header indicates the address family */
245 			KASSERT(hlen == sizeof(u_int32_t));
246 			memcpy(&af, m->m_data, hlen);
247 			sockp->sa_family = ntohl(af);
248 		} else
249 			memcpy(sockp->sa_data, m->m_data, hlen);
250 		m->m_len -= hlen;
251 		m->m_data += hlen; /* XXX */
252 	}
253 
254 	/*
255 	 * Prepend the data link type as a mbuf tag
256 	 */
257 	mtag = m_tag_get(PACKET_TAG_DLT, sizeof(u_int), M_WAIT);
258 	*(u_int *)(mtag + 1) = linktype;
259 	m_tag_prepend(m, mtag);
260 
261 	return (0);
262  bad:
263 	m_freem(m);
264 	return (error);
265 }
266 
267 /*
268  * Attach file to the bpf interface, i.e. make d listen on bp.
269  */
270 void
271 bpf_attachd(struct bpf_d *d, struct bpf_if *bp)
272 {
273 	MUTEX_ASSERT_LOCKED(&d->bd_mtx);
274 
275 	/*
276 	 * Point d at bp, and add d to the interface's list of listeners.
277 	 * Finally, point the driver's bpf cookie at the interface so
278 	 * it will divert packets to bpf.
279 	 */
280 
281 	d->bd_bif = bp;
282 
283 	KERNEL_ASSERT_LOCKED();
284 	SMR_SLIST_INSERT_HEAD_LOCKED(&bp->bif_dlist, d, bd_next);
285 
286 	*bp->bif_driverp = bp;
287 }
288 
289 /*
290  * Detach a file from its interface.
291  */
292 void
293 bpf_detachd(struct bpf_d *d)
294 {
295 	struct bpf_if *bp;
296 
297 	MUTEX_ASSERT_LOCKED(&d->bd_mtx);
298 
299 	bp = d->bd_bif;
300 	/* Not attached. */
301 	if (bp == NULL)
302 		return;
303 
304 	/* Remove ``d'' from the interface's descriptor list. */
305 	KERNEL_ASSERT_LOCKED();
306 	SMR_SLIST_REMOVE_LOCKED(&bp->bif_dlist, d, bpf_d, bd_next);
307 
308 	if (SMR_SLIST_EMPTY_LOCKED(&bp->bif_dlist)) {
309 		/*
310 		 * Let the driver know that there are no more listeners.
311 		 */
312 		*bp->bif_driverp = NULL;
313 	}
314 
315 	d->bd_bif = NULL;
316 
317 	/*
318 	 * Check if this descriptor had requested promiscuous mode.
319 	 * If so, turn it off.
320 	 */
321 	if (d->bd_promisc) {
322 		int error;
323 
324 		KASSERT(bp->bif_ifp != NULL);
325 
326 		d->bd_promisc = 0;
327 
328 		bpf_get(d);
329 		mtx_leave(&d->bd_mtx);
330 		NET_LOCK();
331 		error = ifpromisc(bp->bif_ifp, 0);
332 		NET_UNLOCK();
333 		mtx_enter(&d->bd_mtx);
334 		bpf_put(d);
335 
336 		if (error && !(error == EINVAL || error == ENODEV ||
337 		    error == ENXIO))
338 			/*
339 			 * Something is really wrong if we were able to put
340 			 * the driver into promiscuous mode, but can't
341 			 * take it out.
342 			 */
343 			panic("bpf: ifpromisc failed");
344 	}
345 }
346 
347 void
348 bpfilterattach(int n)
349 {
350 	LIST_INIT(&bpf_d_list);
351 }
352 
353 /*
354  * Open ethernet device.  Returns ENXIO for illegal minor device number,
355  * EBUSY if file is open by another process.
356  */
357 int
358 bpfopen(dev_t dev, int flag, int mode, struct proc *p)
359 {
360 	struct bpf_d *bd;
361 	int unit = minor(dev);
362 
363 	if (unit & ((1 << CLONE_SHIFT) - 1))
364 		return (ENXIO);
365 
366 	KASSERT(bpfilter_lookup(unit) == NULL);
367 
368 	/* create on demand */
369 	if ((bd = malloc(sizeof(*bd), M_DEVBUF, M_NOWAIT|M_ZERO)) == NULL)
370 		return (EBUSY);
371 
372 	/* Mark "free" and do most initialization. */
373 	bd->bd_unit = unit;
374 	bd->bd_bufsize = bpf_bufsize;
375 	bd->bd_sig = SIGIO;
376 	mtx_init(&bd->bd_mtx, IPL_NET);
377 	task_set(&bd->bd_wake_task, bpf_wakeup_cb, bd);
378 	smr_init(&bd->bd_smr);
379 	sigio_init(&bd->bd_sigio);
380 
381 	bd->bd_rtout = 0;	/* no timeout by default */
382 	bd->bd_rnonblock = ISSET(flag, FNONBLOCK);
383 
384 	bpf_get(bd);
385 	LIST_INSERT_HEAD(&bpf_d_list, bd, bd_list);
386 
387 	return (0);
388 }
389 
390 /*
391  * Close the descriptor by detaching it from its interface,
392  * deallocating its buffers, and marking it free.
393  */
394 int
395 bpfclose(dev_t dev, int flag, int mode, struct proc *p)
396 {
397 	struct bpf_d *d;
398 
399 	d = bpfilter_lookup(minor(dev));
400 	mtx_enter(&d->bd_mtx);
401 	bpf_detachd(d);
402 	bpf_wakeup(d);
403 	LIST_REMOVE(d, bd_list);
404 	mtx_leave(&d->bd_mtx);
405 	bpf_put(d);
406 
407 	return (0);
408 }
409 
410 /*
411  * Rotate the packet buffers in descriptor d.  Move the store buffer
412  * into the hold slot, and the free buffer into the store slot.
413  * Zero the length of the new store buffer.
414  */
415 #define ROTATE_BUFFERS(d) \
416 	KASSERT(d->bd_in_uiomove == 0); \
417 	MUTEX_ASSERT_LOCKED(&d->bd_mtx); \
418 	(d)->bd_hbuf = (d)->bd_sbuf; \
419 	(d)->bd_hlen = (d)->bd_slen; \
420 	(d)->bd_sbuf = (d)->bd_fbuf; \
421 	(d)->bd_slen = 0; \
422 	(d)->bd_fbuf = NULL;
423 
424 /*
425  * TODO Move nsecuptime() into kern_tc.c and document it when we have
426  * more users elsewhere in the kernel.
427  */
428 static uint64_t
429 nsecuptime(void)
430 {
431 	struct timespec now;
432 
433 	nanouptime(&now);
434 	return TIMESPEC_TO_NSEC(&now);
435 }
436 
437 /*
438  *  bpfread - read next chunk of packets from buffers
439  */
440 int
441 bpfread(dev_t dev, struct uio *uio, int ioflag)
442 {
443 	uint64_t end, now;
444 	struct bpf_d *d;
445 	caddr_t hbuf;
446 	int error, hlen;
447 
448 	KERNEL_ASSERT_LOCKED();
449 
450 	d = bpfilter_lookup(minor(dev));
451 	if (d->bd_bif == NULL)
452 		return (ENXIO);
453 
454 	bpf_get(d);
455 	mtx_enter(&d->bd_mtx);
456 
457 	/*
458 	 * Restrict application to use a buffer the same size as
459 	 * as kernel buffers.
460 	 */
461 	if (uio->uio_resid != d->bd_bufsize) {
462 		error = EINVAL;
463 		goto out;
464 	}
465 
466 	/*
467 	 * If there's a timeout, mark when the read should end.
468 	 */
469 	if (d->bd_rtout != 0) {
470 		now = nsecuptime();
471 		end = now + d->bd_rtout;
472 		if (end < now)
473 			end = UINT64_MAX;
474 	}
475 
476 	/*
477 	 * If the hold buffer is empty, then do a timed sleep, which
478 	 * ends when the timeout expires or when enough packets
479 	 * have arrived to fill the store buffer.
480 	 */
481 	while (d->bd_hbuf == NULL) {
482 		if (d->bd_bif == NULL) {
483 			/* interface is gone */
484 			if (d->bd_slen == 0) {
485 				error = EIO;
486 				goto out;
487 			}
488 			ROTATE_BUFFERS(d);
489 			break;
490 		}
491 		if (d->bd_immediate && d->bd_slen != 0) {
492 			/*
493 			 * A packet(s) either arrived since the previous
494 			 * read or arrived while we were asleep.
495 			 * Rotate the buffers and return what's here.
496 			 */
497 			ROTATE_BUFFERS(d);
498 			break;
499 		}
500 		if (d->bd_rnonblock) {
501 			/* User requested non-blocking I/O */
502 			error = EWOULDBLOCK;
503 		} else if (d->bd_rtout == 0) {
504 			/* No read timeout set. */
505 			d->bd_nreaders++;
506 			error = msleep_nsec(d, &d->bd_mtx, PRINET|PCATCH,
507 			    "bpf", INFSLP);
508 			d->bd_nreaders--;
509 		} else if ((now = nsecuptime()) < end) {
510 			/* Read timeout has not expired yet. */
511 			d->bd_nreaders++;
512 			error = msleep_nsec(d, &d->bd_mtx, PRINET|PCATCH,
513 			    "bpf", end - now);
514 			d->bd_nreaders--;
515 		} else {
516 			/* Read timeout has expired. */
517 			error = EWOULDBLOCK;
518 		}
519 		if (error == EINTR || error == ERESTART)
520 			goto out;
521 		if (error == EWOULDBLOCK) {
522 			/*
523 			 * On a timeout, return what's in the buffer,
524 			 * which may be nothing.  If there is something
525 			 * in the store buffer, we can rotate the buffers.
526 			 */
527 			if (d->bd_hbuf != NULL)
528 				/*
529 				 * We filled up the buffer in between
530 				 * getting the timeout and arriving
531 				 * here, so we don't need to rotate.
532 				 */
533 				break;
534 
535 			if (d->bd_slen == 0) {
536 				error = 0;
537 				goto out;
538 			}
539 			ROTATE_BUFFERS(d);
540 			break;
541 		}
542 	}
543 	/*
544 	 * At this point, we know we have something in the hold slot.
545 	 */
546 	hbuf = d->bd_hbuf;
547 	hlen = d->bd_hlen;
548 	d->bd_hbuf = NULL;
549 	d->bd_hlen = 0;
550 	d->bd_fbuf = NULL;
551 	d->bd_in_uiomove = 1;
552 
553 	/*
554 	 * Move data from hold buffer into user space.
555 	 * We know the entire buffer is transferred since
556 	 * we checked above that the read buffer is bpf_bufsize bytes.
557 	 */
558 	mtx_leave(&d->bd_mtx);
559 	error = uiomove(hbuf, hlen, uio);
560 	mtx_enter(&d->bd_mtx);
561 
562 	/* Ensure that bpf_resetd() or ROTATE_BUFFERS() haven't been called. */
563 	KASSERT(d->bd_fbuf == NULL);
564 	KASSERT(d->bd_hbuf == NULL);
565 	d->bd_fbuf = hbuf;
566 	d->bd_in_uiomove = 0;
567 out:
568 	mtx_leave(&d->bd_mtx);
569 	bpf_put(d);
570 
571 	return (error);
572 }
573 
574 /*
575  * If there are processes sleeping on this descriptor, wake them up.
576  */
577 void
578 bpf_wakeup(struct bpf_d *d)
579 {
580 	MUTEX_ASSERT_LOCKED(&d->bd_mtx);
581 
582 	if (d->bd_nreaders)
583 		wakeup(d);
584 
585 	/*
586 	 * As long as pgsigio() and selwakeup() need to be protected
587 	 * by the KERNEL_LOCK() we have to delay the wakeup to
588 	 * another context to keep the hot path KERNEL_LOCK()-free.
589 	 */
590 	if ((d->bd_async && d->bd_sig) ||
591 	    (!klist_empty(&d->bd_sel.si_note) || d->bd_sel.si_seltid != 0)) {
592 		bpf_get(d);
593 		if (!task_add(systq, &d->bd_wake_task))
594 			bpf_put(d);
595 	}
596 }
597 
598 void
599 bpf_wakeup_cb(void *xd)
600 {
601 	struct bpf_d *d = xd;
602 
603 	if (d->bd_async && d->bd_sig)
604 		pgsigio(&d->bd_sigio, d->bd_sig, 0);
605 
606 	selwakeup(&d->bd_sel);
607 	bpf_put(d);
608 }
609 
610 int
611 bpfwrite(dev_t dev, struct uio *uio, int ioflag)
612 {
613 	struct bpf_d *d;
614 	struct ifnet *ifp;
615 	struct mbuf *m;
616 	int error;
617 	struct sockaddr_storage dst;
618 
619 	KERNEL_ASSERT_LOCKED();
620 
621 	d = bpfilter_lookup(minor(dev));
622 	if (d->bd_bif == NULL)
623 		return (ENXIO);
624 
625 	bpf_get(d);
626 	ifp = d->bd_bif->bif_ifp;
627 
628 	if (ifp == NULL || (ifp->if_flags & IFF_UP) == 0) {
629 		error = ENETDOWN;
630 		goto out;
631 	}
632 
633 	if (uio->uio_resid == 0) {
634 		error = 0;
635 		goto out;
636 	}
637 
638 	error = bpf_movein(uio, d, &m, sstosa(&dst));
639 	if (error)
640 		goto out;
641 
642 	if (m->m_pkthdr.len > ifp->if_mtu) {
643 		m_freem(m);
644 		error = EMSGSIZE;
645 		goto out;
646 	}
647 
648 	m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
649 	m->m_pkthdr.pf.prio = ifp->if_llprio;
650 
651 	if (d->bd_hdrcmplt && dst.ss_family == AF_UNSPEC)
652 		dst.ss_family = pseudo_AF_HDRCMPLT;
653 
654 	NET_LOCK();
655 	error = ifp->if_output(ifp, m, sstosa(&dst), NULL);
656 	NET_UNLOCK();
657 
658 out:
659 	bpf_put(d);
660 	return (error);
661 }
662 
663 /*
664  * Reset a descriptor by flushing its packet buffer and clearing the
665  * receive and drop counts.
666  */
667 void
668 bpf_resetd(struct bpf_d *d)
669 {
670 	MUTEX_ASSERT_LOCKED(&d->bd_mtx);
671 	KASSERT(d->bd_in_uiomove == 0);
672 
673 	if (d->bd_hbuf != NULL) {
674 		/* Free the hold buffer. */
675 		d->bd_fbuf = d->bd_hbuf;
676 		d->bd_hbuf = NULL;
677 	}
678 	d->bd_slen = 0;
679 	d->bd_hlen = 0;
680 	d->bd_rcount = 0;
681 	d->bd_dcount = 0;
682 }
683 
684 /*
685  *  FIONREAD		Check for read packet available.
686  *  BIOCGBLEN		Get buffer len [for read()].
687  *  BIOCSETF		Set ethernet read filter.
688  *  BIOCFLUSH		Flush read packet buffer.
689  *  BIOCPROMISC		Put interface into promiscuous mode.
690  *  BIOCGDLTLIST	Get supported link layer types.
691  *  BIOCGDLT		Get link layer type.
692  *  BIOCSDLT		Set link layer type.
693  *  BIOCGETIF		Get interface name.
694  *  BIOCSETIF		Set interface.
695  *  BIOCSRTIMEOUT	Set read timeout.
696  *  BIOCGRTIMEOUT	Get read timeout.
697  *  BIOCGSTATS		Get packet stats.
698  *  BIOCIMMEDIATE	Set immediate mode.
699  *  BIOCVERSION		Get filter language version.
700  *  BIOCGHDRCMPLT	Get "header already complete" flag
701  *  BIOCSHDRCMPLT	Set "header already complete" flag
702  */
703 int
704 bpfioctl(dev_t dev, u_long cmd, caddr_t addr, int flag, struct proc *p)
705 {
706 	struct bpf_d *d;
707 	int error = 0;
708 
709 	d = bpfilter_lookup(minor(dev));
710 	if (d->bd_locked && suser(p) != 0) {
711 		/* list of allowed ioctls when locked and not root */
712 		switch (cmd) {
713 		case BIOCGBLEN:
714 		case BIOCFLUSH:
715 		case BIOCGDLT:
716 		case BIOCGDLTLIST:
717 		case BIOCGETIF:
718 		case BIOCGRTIMEOUT:
719 		case BIOCGSTATS:
720 		case BIOCVERSION:
721 		case BIOCGRSIG:
722 		case BIOCGHDRCMPLT:
723 		case FIONREAD:
724 		case BIOCLOCK:
725 		case BIOCSRTIMEOUT:
726 		case BIOCIMMEDIATE:
727 		case TIOCGPGRP:
728 		case BIOCGDIRFILT:
729 			break;
730 		default:
731 			return (EPERM);
732 		}
733 	}
734 
735 	bpf_get(d);
736 
737 	switch (cmd) {
738 	default:
739 		error = EINVAL;
740 		break;
741 
742 	/*
743 	 * Check for read packet available.
744 	 */
745 	case FIONREAD:
746 		{
747 			int n;
748 
749 			mtx_enter(&d->bd_mtx);
750 			n = d->bd_slen;
751 			if (d->bd_hbuf != NULL)
752 				n += d->bd_hlen;
753 			mtx_leave(&d->bd_mtx);
754 
755 			*(int *)addr = n;
756 			break;
757 		}
758 
759 	/*
760 	 * Get buffer len [for read()].
761 	 */
762 	case BIOCGBLEN:
763 		*(u_int *)addr = d->bd_bufsize;
764 		break;
765 
766 	/*
767 	 * Set buffer length.
768 	 */
769 	case BIOCSBLEN:
770 		if (d->bd_bif != NULL)
771 			error = EINVAL;
772 		else {
773 			u_int size = *(u_int *)addr;
774 
775 			if (size > bpf_maxbufsize)
776 				*(u_int *)addr = size = bpf_maxbufsize;
777 			else if (size < BPF_MINBUFSIZE)
778 				*(u_int *)addr = size = BPF_MINBUFSIZE;
779 			mtx_enter(&d->bd_mtx);
780 			d->bd_bufsize = size;
781 			mtx_leave(&d->bd_mtx);
782 		}
783 		break;
784 
785 	/*
786 	 * Set link layer read filter.
787 	 */
788 	case BIOCSETF:
789 		error = bpf_setf(d, (struct bpf_program *)addr, 0);
790 		break;
791 
792 	/*
793 	 * Set link layer write filter.
794 	 */
795 	case BIOCSETWF:
796 		error = bpf_setf(d, (struct bpf_program *)addr, 1);
797 		break;
798 
799 	/*
800 	 * Flush read packet buffer.
801 	 */
802 	case BIOCFLUSH:
803 		mtx_enter(&d->bd_mtx);
804 		bpf_resetd(d);
805 		mtx_leave(&d->bd_mtx);
806 		break;
807 
808 	/*
809 	 * Put interface into promiscuous mode.
810 	 */
811 	case BIOCPROMISC:
812 		if (d->bd_bif == NULL) {
813 			/*
814 			 * No interface attached yet.
815 			 */
816 			error = EINVAL;
817 		} else if (d->bd_bif->bif_ifp != NULL) {
818 			if (d->bd_promisc == 0) {
819 				MUTEX_ASSERT_UNLOCKED(&d->bd_mtx);
820 				NET_LOCK();
821 				error = ifpromisc(d->bd_bif->bif_ifp, 1);
822 				NET_UNLOCK();
823 				if (error == 0)
824 					d->bd_promisc = 1;
825 			}
826 		}
827 		break;
828 
829 	/*
830 	 * Get a list of supported device parameters.
831 	 */
832 	case BIOCGDLTLIST:
833 		if (d->bd_bif == NULL)
834 			error = EINVAL;
835 		else
836 			error = bpf_getdltlist(d, (struct bpf_dltlist *)addr);
837 		break;
838 
839 	/*
840 	 * Get device parameters.
841 	 */
842 	case BIOCGDLT:
843 		if (d->bd_bif == NULL)
844 			error = EINVAL;
845 		else
846 			*(u_int *)addr = d->bd_bif->bif_dlt;
847 		break;
848 
849 	/*
850 	 * Set device parameters.
851 	 */
852 	case BIOCSDLT:
853 		if (d->bd_bif == NULL)
854 			error = EINVAL;
855 		else {
856 			mtx_enter(&d->bd_mtx);
857 			error = bpf_setdlt(d, *(u_int *)addr);
858 			mtx_leave(&d->bd_mtx);
859 		}
860 		break;
861 
862 	/*
863 	 * Set interface name.
864 	 */
865 	case BIOCGETIF:
866 		if (d->bd_bif == NULL)
867 			error = EINVAL;
868 		else
869 			bpf_ifname(d->bd_bif, (struct ifreq *)addr);
870 		break;
871 
872 	/*
873 	 * Set interface.
874 	 */
875 	case BIOCSETIF:
876 		error = bpf_setif(d, (struct ifreq *)addr);
877 		break;
878 
879 	/*
880 	 * Set read timeout.
881 	 */
882 	case BIOCSRTIMEOUT:
883 		{
884 			struct timeval *tv = (struct timeval *)addr;
885 			uint64_t rtout;
886 
887 			if (tv->tv_sec < 0 || !timerisvalid(tv)) {
888 				error = EINVAL;
889 				break;
890 			}
891 			rtout = TIMEVAL_TO_NSEC(tv);
892 			if (rtout > MAXTSLP) {
893 				error = EOVERFLOW;
894 				break;
895 			}
896 			mtx_enter(&d->bd_mtx);
897 			d->bd_rtout = rtout;
898 			mtx_leave(&d->bd_mtx);
899 			break;
900 		}
901 
902 	/*
903 	 * Get read timeout.
904 	 */
905 	case BIOCGRTIMEOUT:
906 		{
907 			struct timeval *tv = (struct timeval *)addr;
908 
909 			memset(tv, 0, sizeof(*tv));
910 			mtx_enter(&d->bd_mtx);
911 			NSEC_TO_TIMEVAL(d->bd_rtout, tv);
912 			mtx_leave(&d->bd_mtx);
913 			break;
914 		}
915 
916 	/*
917 	 * Get packet stats.
918 	 */
919 	case BIOCGSTATS:
920 		{
921 			struct bpf_stat *bs = (struct bpf_stat *)addr;
922 
923 			bs->bs_recv = d->bd_rcount;
924 			bs->bs_drop = d->bd_dcount;
925 			break;
926 		}
927 
928 	/*
929 	 * Set immediate mode.
930 	 */
931 	case BIOCIMMEDIATE:
932 		d->bd_immediate = *(u_int *)addr;
933 		break;
934 
935 	case BIOCVERSION:
936 		{
937 			struct bpf_version *bv = (struct bpf_version *)addr;
938 
939 			bv->bv_major = BPF_MAJOR_VERSION;
940 			bv->bv_minor = BPF_MINOR_VERSION;
941 			break;
942 		}
943 
944 	case BIOCGHDRCMPLT:	/* get "header already complete" flag */
945 		*(u_int *)addr = d->bd_hdrcmplt;
946 		break;
947 
948 	case BIOCSHDRCMPLT:	/* set "header already complete" flag */
949 		d->bd_hdrcmplt = *(u_int *)addr ? 1 : 0;
950 		break;
951 
952 	case BIOCLOCK:		/* set "locked" flag (no reset) */
953 		d->bd_locked = 1;
954 		break;
955 
956 	case BIOCGFILDROP:	/* get "filter-drop" flag */
957 		*(u_int *)addr = d->bd_fildrop;
958 		break;
959 
960 	case BIOCSFILDROP: {	/* set "filter-drop" flag */
961 		unsigned int fildrop = *(u_int *)addr;
962 		switch (fildrop) {
963 		case BPF_FILDROP_PASS:
964 		case BPF_FILDROP_CAPTURE:
965 		case BPF_FILDROP_DROP:
966 			d->bd_fildrop = fildrop;
967 			break;
968 		default:
969 			error = EINVAL;
970 			break;
971 		}
972 		break;
973 	}
974 
975 	case BIOCGDIRFILT:	/* get direction filter */
976 		*(u_int *)addr = d->bd_dirfilt;
977 		break;
978 
979 	case BIOCSDIRFILT:	/* set direction filter */
980 		d->bd_dirfilt = (*(u_int *)addr) &
981 		    (BPF_DIRECTION_IN|BPF_DIRECTION_OUT);
982 		break;
983 
984 	case FIONBIO:		/* Non-blocking I/O */
985 		if (*(int *)addr)
986 			d->bd_rnonblock = 1;
987 		else
988 			d->bd_rnonblock = 0;
989 		break;
990 
991 	case FIOASYNC:		/* Send signal on receive packets */
992 		d->bd_async = *(int *)addr;
993 		break;
994 
995 	case FIOSETOWN:		/* Process or group to send signals to */
996 	case TIOCSPGRP:
997 		error = sigio_setown(&d->bd_sigio, cmd, addr);
998 		break;
999 
1000 	case FIOGETOWN:
1001 	case TIOCGPGRP:
1002 		sigio_getown(&d->bd_sigio, cmd, addr);
1003 		break;
1004 
1005 	case BIOCSRSIG:		/* Set receive signal */
1006 		{
1007 			u_int sig;
1008 
1009 			sig = *(u_int *)addr;
1010 
1011 			if (sig >= NSIG)
1012 				error = EINVAL;
1013 			else
1014 				d->bd_sig = sig;
1015 			break;
1016 		}
1017 	case BIOCGRSIG:
1018 		*(u_int *)addr = d->bd_sig;
1019 		break;
1020 	}
1021 
1022 	bpf_put(d);
1023 	return (error);
1024 }
1025 
1026 /*
1027  * Set d's packet filter program to fp.  If this file already has a filter,
1028  * free it and replace it.  Returns EINVAL for bogus requests.
1029  */
1030 int
1031 bpf_setf(struct bpf_d *d, struct bpf_program *fp, int wf)
1032 {
1033 	struct bpf_program_smr *bps, *old_bps;
1034 	struct bpf_insn *fcode;
1035 	u_int flen, size;
1036 
1037 	KERNEL_ASSERT_LOCKED();
1038 
1039 	if (fp->bf_insns == 0) {
1040 		if (fp->bf_len != 0)
1041 			return (EINVAL);
1042 		bps = NULL;
1043 	} else {
1044 		flen = fp->bf_len;
1045 		if (flen > BPF_MAXINSNS)
1046 			return (EINVAL);
1047 
1048 		fcode = mallocarray(flen, sizeof(*fp->bf_insns), M_DEVBUF,
1049 		    M_WAITOK | M_CANFAIL);
1050 		if (fcode == NULL)
1051 			return (ENOMEM);
1052 
1053 		size = flen * sizeof(*fp->bf_insns);
1054 		if (copyin(fp->bf_insns, fcode, size) != 0 ||
1055 		    bpf_validate(fcode, (int)flen) == 0) {
1056 			free(fcode, M_DEVBUF, size);
1057 			return (EINVAL);
1058 		}
1059 
1060 		bps = malloc(sizeof(*bps), M_DEVBUF, M_WAITOK);
1061 		smr_init(&bps->bps_smr);
1062 		bps->bps_bf.bf_len = flen;
1063 		bps->bps_bf.bf_insns = fcode;
1064 	}
1065 
1066 	if (wf == 0) {
1067 		old_bps = SMR_PTR_GET_LOCKED(&d->bd_rfilter);
1068 		SMR_PTR_SET_LOCKED(&d->bd_rfilter, bps);
1069 	} else {
1070 		old_bps = SMR_PTR_GET_LOCKED(&d->bd_wfilter);
1071 		SMR_PTR_SET_LOCKED(&d->bd_wfilter, bps);
1072 	}
1073 
1074 	mtx_enter(&d->bd_mtx);
1075 	bpf_resetd(d);
1076 	mtx_leave(&d->bd_mtx);
1077 	if (old_bps != NULL)
1078 		smr_call(&old_bps->bps_smr, bpf_prog_smr, old_bps);
1079 
1080 	return (0);
1081 }
1082 
1083 /*
1084  * Detach a file from its current interface (if attached at all) and attach
1085  * to the interface indicated by the name stored in ifr.
1086  * Return an errno or 0.
1087  */
1088 int
1089 bpf_setif(struct bpf_d *d, struct ifreq *ifr)
1090 {
1091 	struct bpf_if *bp, *candidate = NULL;
1092 	int error = 0;
1093 
1094 	/*
1095 	 * Look through attached interfaces for the named one.
1096 	 */
1097 	for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) {
1098 		if (strcmp(bp->bif_name, ifr->ifr_name) != 0)
1099 			continue;
1100 
1101 		if (candidate == NULL || candidate->bif_dlt > bp->bif_dlt)
1102 			candidate = bp;
1103 	}
1104 
1105 	/* Not found. */
1106 	if (candidate == NULL)
1107 		return (ENXIO);
1108 
1109 	/*
1110 	 * Allocate the packet buffers if we need to.
1111 	 * If we're already attached to requested interface,
1112 	 * just flush the buffer.
1113 	 */
1114 	mtx_enter(&d->bd_mtx);
1115 	if (d->bd_sbuf == NULL) {
1116 		if ((error = bpf_allocbufs(d)))
1117 			goto out;
1118 	}
1119 	if (candidate != d->bd_bif) {
1120 		/*
1121 		 * Detach if attached to something else.
1122 		 */
1123 		bpf_detachd(d);
1124 		bpf_attachd(d, candidate);
1125 	}
1126 	bpf_resetd(d);
1127 out:
1128 	mtx_leave(&d->bd_mtx);
1129 	return (error);
1130 }
1131 
1132 /*
1133  * Copy the interface name to the ifreq.
1134  */
1135 void
1136 bpf_ifname(struct bpf_if *bif, struct ifreq *ifr)
1137 {
1138 	bcopy(bif->bif_name, ifr->ifr_name, sizeof(ifr->ifr_name));
1139 }
1140 
1141 /*
1142  * Support for poll() system call
1143  */
1144 int
1145 bpfpoll(dev_t dev, int events, struct proc *p)
1146 {
1147 	struct bpf_d *d;
1148 	int revents;
1149 
1150 	KERNEL_ASSERT_LOCKED();
1151 
1152 	/*
1153 	 * An imitation of the FIONREAD ioctl code.
1154 	 */
1155 	d = bpfilter_lookup(minor(dev));
1156 
1157 	/*
1158 	 * XXX The USB stack manages it to trigger some race condition
1159 	 * which causes bpfilter_lookup to return NULL when a USB device
1160 	 * gets detached while it is up and has an open bpf handler (e.g.
1161 	 * dhclient).  We still should recheck if we can fix the root
1162 	 * cause of this issue.
1163 	 */
1164 	if (d == NULL)
1165 		return (POLLERR);
1166 
1167 	/* Always ready to write data */
1168 	revents = events & (POLLOUT | POLLWRNORM);
1169 
1170 	if (events & (POLLIN | POLLRDNORM)) {
1171 		mtx_enter(&d->bd_mtx);
1172 		if (d->bd_hlen != 0 || (d->bd_immediate && d->bd_slen != 0))
1173 			revents |= events & (POLLIN | POLLRDNORM);
1174 		else
1175 			selrecord(p, &d->bd_sel);
1176 		mtx_leave(&d->bd_mtx);
1177 	}
1178 	return (revents);
1179 }
1180 
1181 const struct filterops bpfread_filtops = {
1182 	.f_flags	= FILTEROP_ISFD,
1183 	.f_attach	= NULL,
1184 	.f_detach	= filt_bpfrdetach,
1185 	.f_event	= filt_bpfread,
1186 };
1187 
1188 int
1189 bpfkqfilter(dev_t dev, struct knote *kn)
1190 {
1191 	struct bpf_d *d;
1192 	struct klist *klist;
1193 
1194 	KERNEL_ASSERT_LOCKED();
1195 
1196 	d = bpfilter_lookup(minor(dev));
1197 
1198 	switch (kn->kn_filter) {
1199 	case EVFILT_READ:
1200 		klist = &d->bd_sel.si_note;
1201 		kn->kn_fop = &bpfread_filtops;
1202 		break;
1203 	default:
1204 		return (EINVAL);
1205 	}
1206 
1207 	bpf_get(d);
1208 	kn->kn_hook = d;
1209 	klist_insert_locked(klist, kn);
1210 
1211 	return (0);
1212 }
1213 
1214 void
1215 filt_bpfrdetach(struct knote *kn)
1216 {
1217 	struct bpf_d *d = kn->kn_hook;
1218 
1219 	KERNEL_ASSERT_LOCKED();
1220 
1221 	klist_remove_locked(&d->bd_sel.si_note, kn);
1222 	bpf_put(d);
1223 }
1224 
1225 int
1226 filt_bpfread(struct knote *kn, long hint)
1227 {
1228 	struct bpf_d *d = kn->kn_hook;
1229 
1230 	KERNEL_ASSERT_LOCKED();
1231 
1232 	mtx_enter(&d->bd_mtx);
1233 	kn->kn_data = d->bd_hlen;
1234 	if (d->bd_immediate)
1235 		kn->kn_data += d->bd_slen;
1236 	mtx_leave(&d->bd_mtx);
1237 
1238 	return (kn->kn_data > 0);
1239 }
1240 
1241 /*
1242  * Copy data from an mbuf chain into a buffer.  This code is derived
1243  * from m_copydata in sys/uipc_mbuf.c.
1244  */
1245 void
1246 bpf_mcopy(const void *src_arg, void *dst_arg, size_t len)
1247 {
1248 	const struct mbuf *m;
1249 	u_int count;
1250 	u_char *dst;
1251 
1252 	m = src_arg;
1253 	dst = dst_arg;
1254 	while (len > 0) {
1255 		if (m == NULL)
1256 			panic("bpf_mcopy");
1257 		count = min(m->m_len, len);
1258 		bcopy(mtod(m, caddr_t), (caddr_t)dst, count);
1259 		m = m->m_next;
1260 		dst += count;
1261 		len -= count;
1262 	}
1263 }
1264 
1265 int
1266 bpf_mtap(caddr_t arg, const struct mbuf *m, u_int direction)
1267 {
1268 	return _bpf_mtap(arg, m, m, direction);
1269 }
1270 
1271 int
1272 _bpf_mtap(caddr_t arg, const struct mbuf *mp, const struct mbuf *m,
1273     u_int direction)
1274 {
1275 	struct bpf_if *bp = (struct bpf_if *)arg;
1276 	struct bpf_d *d;
1277 	size_t pktlen, slen;
1278 	const struct mbuf *m0;
1279 	struct bpf_hdr tbh;
1280 	int gothdr = 0;
1281 	int drop = 0;
1282 
1283 	if (m == NULL)
1284 		return (0);
1285 
1286 	if (bp == NULL)
1287 		return (0);
1288 
1289 	pktlen = 0;
1290 	for (m0 = m; m0 != NULL; m0 = m0->m_next)
1291 		pktlen += m0->m_len;
1292 
1293 	smr_read_enter();
1294 	SMR_SLIST_FOREACH(d, &bp->bif_dlist, bd_next) {
1295 		struct bpf_program_smr *bps;
1296 		struct bpf_insn *fcode = NULL;
1297 
1298 		atomic_inc_long(&d->bd_rcount);
1299 
1300 		if (ISSET(d->bd_dirfilt, direction))
1301 			continue;
1302 
1303 		bps = SMR_PTR_GET(&d->bd_rfilter);
1304 		if (bps != NULL)
1305 			fcode = bps->bps_bf.bf_insns;
1306 		slen = bpf_mfilter(fcode, m, pktlen);
1307 
1308 		if (slen == 0)
1309 			continue;
1310 		if (d->bd_fildrop != BPF_FILDROP_PASS)
1311 			drop = 1;
1312 		if (d->bd_fildrop != BPF_FILDROP_DROP) {
1313 			if (!gothdr) {
1314 				struct timeval tv;
1315 				memset(&tbh, 0, sizeof(tbh));
1316 
1317 				if (ISSET(mp->m_flags, M_PKTHDR)) {
1318 					tbh.bh_ifidx = mp->m_pkthdr.ph_ifidx;
1319 					tbh.bh_flowid = mp->m_pkthdr.ph_flowid;
1320 					tbh.bh_flags = mp->m_pkthdr.pf.prio;
1321 					if (ISSET(mp->m_pkthdr.csum_flags,
1322 					    M_FLOWID))
1323 						SET(tbh.bh_flags, BPF_F_FLOWID);
1324 
1325 					m_microtime(mp, &tv);
1326 				} else
1327 					microtime(&tv);
1328 
1329 				tbh.bh_tstamp.tv_sec = tv.tv_sec;
1330 				tbh.bh_tstamp.tv_usec = tv.tv_usec;
1331 				SET(tbh.bh_flags, direction << BPF_F_DIR_SHIFT);
1332 
1333 				gothdr = 1;
1334 			}
1335 
1336 			mtx_enter(&d->bd_mtx);
1337 			bpf_catchpacket(d, (u_char *)m, pktlen, slen, &tbh);
1338 			mtx_leave(&d->bd_mtx);
1339 		}
1340 	}
1341 	smr_read_leave();
1342 
1343 	return (drop);
1344 }
1345 
1346 /*
1347  * Incoming linkage from device drivers, where a data buffer should be
1348  * prepended by an arbitrary header. In this situation we already have a
1349  * way of representing a chain of memory buffers, ie, mbufs, so reuse
1350  * the existing functionality by attaching the buffers to mbufs.
1351  *
1352  * Con up a minimal mbuf chain to pacify bpf by allocating (only) a
1353  * struct m_hdr each for the header and data on the stack.
1354  */
1355 int
1356 bpf_tap_hdr(caddr_t arg, const void *hdr, unsigned int hdrlen,
1357     const void *buf, unsigned int buflen, u_int direction)
1358 {
1359 	struct m_hdr mh, md;
1360 	struct mbuf *m0 = NULL;
1361 	struct mbuf **mp = &m0;
1362 
1363 	if (hdr != NULL) {
1364 		mh.mh_flags = 0;
1365 		mh.mh_next = NULL;
1366 		mh.mh_len = hdrlen;
1367 		mh.mh_data = (void *)hdr;
1368 
1369 		*mp = (struct mbuf *)&mh;
1370 		mp = &mh.mh_next;
1371 	}
1372 
1373 	if (buf != NULL) {
1374 		md.mh_flags = 0;
1375 		md.mh_next = NULL;
1376 		md.mh_len = buflen;
1377 		md.mh_data = (void *)buf;
1378 
1379 		*mp = (struct mbuf *)&md;
1380 	}
1381 
1382 	return bpf_mtap(arg, m0, direction);
1383 }
1384 
1385 /*
1386  * Incoming linkage from device drivers, where we have a mbuf chain
1387  * but need to prepend some arbitrary header from a linear buffer.
1388  *
1389  * Con up a minimal dummy header to pacify bpf.  Allocate (only) a
1390  * struct m_hdr on the stack.  This is safe as bpf only reads from the
1391  * fields in this header that we initialize, and will not try to free
1392  * it or keep a pointer to it.
1393  */
1394 int
1395 bpf_mtap_hdr(caddr_t arg, const void *data, u_int dlen, const struct mbuf *m,
1396     u_int direction)
1397 {
1398 	struct m_hdr mh;
1399 	const struct mbuf *m0;
1400 
1401 	if (dlen > 0) {
1402 		mh.mh_flags = 0;
1403 		mh.mh_next = (struct mbuf *)m;
1404 		mh.mh_len = dlen;
1405 		mh.mh_data = (void *)data;
1406 		m0 = (struct mbuf *)&mh;
1407 	} else
1408 		m0 = m;
1409 
1410 	return _bpf_mtap(arg, m, m0, direction);
1411 }
1412 
1413 /*
1414  * Incoming linkage from device drivers, where we have a mbuf chain
1415  * but need to prepend the address family.
1416  *
1417  * Con up a minimal dummy header to pacify bpf.  We allocate (only) a
1418  * struct m_hdr on the stack.  This is safe as bpf only reads from the
1419  * fields in this header that we initialize, and will not try to free
1420  * it or keep a pointer to it.
1421  */
1422 int
1423 bpf_mtap_af(caddr_t arg, u_int32_t af, const struct mbuf *m, u_int direction)
1424 {
1425 	u_int32_t    afh;
1426 
1427 	afh = htonl(af);
1428 
1429 	return bpf_mtap_hdr(arg, &afh, sizeof(afh), m, direction);
1430 }
1431 
1432 /*
1433  * Incoming linkage from device drivers, where we have a mbuf chain
1434  * but need to prepend a VLAN encapsulation header.
1435  *
1436  * Con up a minimal dummy header to pacify bpf.  Allocate (only) a
1437  * struct m_hdr on the stack.  This is safe as bpf only reads from the
1438  * fields in this header that we initialize, and will not try to free
1439  * it or keep a pointer to it.
1440  */
1441 int
1442 bpf_mtap_ether(caddr_t arg, const struct mbuf *m, u_int direction)
1443 {
1444 #if NVLAN > 0
1445 	struct ether_vlan_header evh;
1446 	struct m_hdr mh, md;
1447 	uint8_t prio;
1448 
1449 	if ((m->m_flags & M_VLANTAG) == 0)
1450 #endif
1451 	{
1452 		return _bpf_mtap(arg, m, m, direction);
1453 	}
1454 
1455 #if NVLAN > 0
1456 	KASSERT(m->m_len >= ETHER_HDR_LEN);
1457 
1458 	prio = m->m_pkthdr.pf.prio;
1459 	if (prio <= 1)
1460 		prio = !prio;
1461 
1462 	memcpy(&evh, mtod(m, char *), ETHER_HDR_LEN);
1463 	evh.evl_proto = evh.evl_encap_proto;
1464 	evh.evl_encap_proto = htons(ETHERTYPE_VLAN);
1465 	evh.evl_tag = htons(m->m_pkthdr.ether_vtag |
1466 	    (prio << EVL_PRIO_BITS));
1467 
1468 	mh.mh_flags = 0;
1469 	mh.mh_data = (caddr_t)&evh;
1470 	mh.mh_len = sizeof(evh);
1471 	mh.mh_next = (struct mbuf *)&md;
1472 
1473 	md.mh_flags = 0;
1474 	md.mh_data = m->m_data + ETHER_HDR_LEN;
1475 	md.mh_len = m->m_len - ETHER_HDR_LEN;
1476 	md.mh_next = m->m_next;
1477 
1478 	return _bpf_mtap(arg, m, (struct mbuf *)&mh, direction);
1479 #endif
1480 }
1481 
1482 /*
1483  * Move the packet data from interface memory (pkt) into the
1484  * store buffer.  Wake up listeners if needed.
1485  * "copy" is the routine called to do the actual data
1486  * transfer.  bcopy is passed in to copy contiguous chunks, while
1487  * bpf_mcopy is passed in to copy mbuf chains.  In the latter case,
1488  * pkt is really an mbuf.
1489  */
1490 void
1491 bpf_catchpacket(struct bpf_d *d, u_char *pkt, size_t pktlen, size_t snaplen,
1492     const struct bpf_hdr *tbh)
1493 {
1494 	struct bpf_hdr *bh;
1495 	int totlen, curlen;
1496 	int hdrlen, do_wakeup = 0;
1497 
1498 	MUTEX_ASSERT_LOCKED(&d->bd_mtx);
1499 	if (d->bd_bif == NULL)
1500 		return;
1501 
1502 	hdrlen = d->bd_bif->bif_hdrlen;
1503 
1504 	/*
1505 	 * Figure out how many bytes to move.  If the packet is
1506 	 * greater or equal to the snapshot length, transfer that
1507 	 * much.  Otherwise, transfer the whole packet (unless
1508 	 * we hit the buffer size limit).
1509 	 */
1510 	totlen = hdrlen + min(snaplen, pktlen);
1511 	if (totlen > d->bd_bufsize)
1512 		totlen = d->bd_bufsize;
1513 
1514 	/*
1515 	 * Round up the end of the previous packet to the next longword.
1516 	 */
1517 	curlen = BPF_WORDALIGN(d->bd_slen);
1518 	if (curlen + totlen > d->bd_bufsize) {
1519 		/*
1520 		 * This packet will overflow the storage buffer.
1521 		 * Rotate the buffers if we can, then wakeup any
1522 		 * pending reads.
1523 		 */
1524 		if (d->bd_fbuf == NULL) {
1525 			/*
1526 			 * We haven't completed the previous read yet,
1527 			 * so drop the packet.
1528 			 */
1529 			++d->bd_dcount;
1530 			return;
1531 		}
1532 		ROTATE_BUFFERS(d);
1533 		do_wakeup = 1;
1534 		curlen = 0;
1535 	}
1536 
1537 	/*
1538 	 * Append the bpf header.
1539 	 */
1540 	bh = (struct bpf_hdr *)(d->bd_sbuf + curlen);
1541 	*bh = *tbh;
1542 	bh->bh_datalen = pktlen;
1543 	bh->bh_hdrlen = hdrlen;
1544 	bh->bh_caplen = totlen - hdrlen;
1545 
1546 	/*
1547 	 * Copy the packet data into the store buffer and update its length.
1548 	 */
1549 	bpf_mcopy(pkt, (u_char *)bh + hdrlen, bh->bh_caplen);
1550 	d->bd_slen = curlen + totlen;
1551 
1552 	if (d->bd_immediate) {
1553 		/*
1554 		 * Immediate mode is set.  A packet arrived so any
1555 		 * reads should be woken up.
1556 		 */
1557 		do_wakeup = 1;
1558 	}
1559 
1560 	if (do_wakeup)
1561 		bpf_wakeup(d);
1562 }
1563 
1564 /*
1565  * Initialize all nonzero fields of a descriptor.
1566  */
1567 int
1568 bpf_allocbufs(struct bpf_d *d)
1569 {
1570 	MUTEX_ASSERT_LOCKED(&d->bd_mtx);
1571 
1572 	d->bd_fbuf = malloc(d->bd_bufsize, M_DEVBUF, M_NOWAIT);
1573 	if (d->bd_fbuf == NULL)
1574 		return (ENOMEM);
1575 
1576 	d->bd_sbuf = malloc(d->bd_bufsize, M_DEVBUF, M_NOWAIT);
1577 	if (d->bd_sbuf == NULL) {
1578 		free(d->bd_fbuf, M_DEVBUF, d->bd_bufsize);
1579 		return (ENOMEM);
1580 	}
1581 
1582 	d->bd_slen = 0;
1583 	d->bd_hlen = 0;
1584 
1585 	return (0);
1586 }
1587 
1588 void
1589 bpf_prog_smr(void *bps_arg)
1590 {
1591 	struct bpf_program_smr *bps = bps_arg;
1592 
1593 	free(bps->bps_bf.bf_insns, M_DEVBUF,
1594 	    bps->bps_bf.bf_len * sizeof(struct bpf_insn));
1595 	free(bps, M_DEVBUF, sizeof(struct bpf_program_smr));
1596 }
1597 
1598 void
1599 bpf_d_smr(void *smr)
1600 {
1601 	struct bpf_d	*bd = smr;
1602 
1603 	sigio_free(&bd->bd_sigio);
1604 	free(bd->bd_sbuf, M_DEVBUF, bd->bd_bufsize);
1605 	free(bd->bd_hbuf, M_DEVBUF, bd->bd_bufsize);
1606 	free(bd->bd_fbuf, M_DEVBUF, bd->bd_bufsize);
1607 
1608 	if (bd->bd_rfilter != NULL)
1609 		bpf_prog_smr(bd->bd_rfilter);
1610 	if (bd->bd_wfilter != NULL)
1611 		bpf_prog_smr(bd->bd_wfilter);
1612 
1613 	free(bd, M_DEVBUF, sizeof(*bd));
1614 }
1615 
1616 void
1617 bpf_get(struct bpf_d *bd)
1618 {
1619 	atomic_inc_int(&bd->bd_ref);
1620 }
1621 
1622 /*
1623  * Free buffers currently in use by a descriptor
1624  * when the reference count drops to zero.
1625  */
1626 void
1627 bpf_put(struct bpf_d *bd)
1628 {
1629 	if (atomic_dec_int_nv(&bd->bd_ref) > 0)
1630 		return;
1631 
1632 	smr_call(&bd->bd_smr, bpf_d_smr, bd);
1633 }
1634 
1635 void *
1636 bpfsattach(caddr_t *bpfp, const char *name, u_int dlt, u_int hdrlen)
1637 {
1638 	struct bpf_if *bp;
1639 
1640 	if ((bp = malloc(sizeof(*bp), M_DEVBUF, M_NOWAIT)) == NULL)
1641 		panic("bpfattach");
1642 	SMR_SLIST_INIT(&bp->bif_dlist);
1643 	bp->bif_driverp = (struct bpf_if **)bpfp;
1644 	bp->bif_name = name;
1645 	bp->bif_ifp = NULL;
1646 	bp->bif_dlt = dlt;
1647 
1648 	bp->bif_next = bpf_iflist;
1649 	bpf_iflist = bp;
1650 
1651 	*bp->bif_driverp = NULL;
1652 
1653 	/*
1654 	 * Compute the length of the bpf header.  This is not necessarily
1655 	 * equal to SIZEOF_BPF_HDR because we want to insert spacing such
1656 	 * that the network layer header begins on a longword boundary (for
1657 	 * performance reasons and to alleviate alignment restrictions).
1658 	 */
1659 	bp->bif_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen;
1660 
1661 	return (bp);
1662 }
1663 
1664 void
1665 bpfattach(caddr_t *driverp, struct ifnet *ifp, u_int dlt, u_int hdrlen)
1666 {
1667 	struct bpf_if *bp;
1668 
1669 	bp = bpfsattach(driverp, ifp->if_xname, dlt, hdrlen);
1670 	bp->bif_ifp = ifp;
1671 }
1672 
1673 /* Detach an interface from its attached bpf device.  */
1674 void
1675 bpfdetach(struct ifnet *ifp)
1676 {
1677 	struct bpf_if *bp, *nbp;
1678 
1679 	KERNEL_ASSERT_LOCKED();
1680 
1681 	for (bp = bpf_iflist; bp; bp = nbp) {
1682 		nbp = bp->bif_next;
1683 		if (bp->bif_ifp == ifp)
1684 			bpfsdetach(bp);
1685 	}
1686 	ifp->if_bpf = NULL;
1687 }
1688 
1689 void
1690 bpfsdetach(void *p)
1691 {
1692 	struct bpf_if *bp = p, *tbp;
1693 	struct bpf_d *bd;
1694 	int maj;
1695 
1696 	KERNEL_ASSERT_LOCKED();
1697 
1698 	/* Locate the major number. */
1699 	for (maj = 0; maj < nchrdev; maj++)
1700 		if (cdevsw[maj].d_open == bpfopen)
1701 			break;
1702 
1703 	while ((bd = SMR_SLIST_FIRST_LOCKED(&bp->bif_dlist)))
1704 		vdevgone(maj, bd->bd_unit, bd->bd_unit, VCHR);
1705 
1706 	for (tbp = bpf_iflist; tbp; tbp = tbp->bif_next) {
1707 		if (tbp->bif_next == bp) {
1708 			tbp->bif_next = bp->bif_next;
1709 			break;
1710 		}
1711 	}
1712 
1713 	if (bpf_iflist == bp)
1714 		bpf_iflist = bp->bif_next;
1715 
1716 	free(bp, M_DEVBUF, sizeof(*bp));
1717 }
1718 
1719 int
1720 bpf_sysctl_locked(int *name, u_int namelen, void *oldp, size_t *oldlenp,
1721     void *newp, size_t newlen)
1722 {
1723 	switch (name[0]) {
1724 	case NET_BPF_BUFSIZE:
1725 		return sysctl_int_bounded(oldp, oldlenp, newp, newlen,
1726 		    &bpf_bufsize, BPF_MINBUFSIZE, bpf_maxbufsize);
1727 	case NET_BPF_MAXBUFSIZE:
1728 		return sysctl_int_bounded(oldp, oldlenp, newp, newlen,
1729 		    &bpf_maxbufsize, BPF_MINBUFSIZE, INT_MAX);
1730 	default:
1731 		return (EOPNOTSUPP);
1732 	}
1733 }
1734 
1735 int
1736 bpf_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
1737     size_t newlen)
1738 {
1739 	int flags = RW_INTR;
1740 	int error;
1741 
1742 	if (namelen != 1)
1743 		return (ENOTDIR);
1744 
1745 	flags |= (newp == NULL) ? RW_READ : RW_WRITE;
1746 
1747 	error = rw_enter(&bpf_sysctl_lk, flags);
1748 	if (error != 0)
1749 		return (error);
1750 
1751 	error = bpf_sysctl_locked(name, namelen, oldp, oldlenp, newp, newlen);
1752 
1753 	rw_exit(&bpf_sysctl_lk);
1754 
1755 	return (error);
1756 }
1757 
1758 struct bpf_d *
1759 bpfilter_lookup(int unit)
1760 {
1761 	struct bpf_d *bd;
1762 
1763 	KERNEL_ASSERT_LOCKED();
1764 
1765 	LIST_FOREACH(bd, &bpf_d_list, bd_list)
1766 		if (bd->bd_unit == unit)
1767 			return (bd);
1768 	return (NULL);
1769 }
1770 
1771 /*
1772  * Get a list of available data link type of the interface.
1773  */
1774 int
1775 bpf_getdltlist(struct bpf_d *d, struct bpf_dltlist *bfl)
1776 {
1777 	int n, error;
1778 	struct bpf_if *bp;
1779 	const char *name;
1780 
1781 	name = d->bd_bif->bif_name;
1782 	n = 0;
1783 	error = 0;
1784 	for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) {
1785 		if (strcmp(name, bp->bif_name) != 0)
1786 			continue;
1787 		if (bfl->bfl_list != NULL) {
1788 			if (n >= bfl->bfl_len)
1789 				return (ENOMEM);
1790 			error = copyout(&bp->bif_dlt,
1791 			    bfl->bfl_list + n, sizeof(u_int));
1792 			if (error)
1793 				break;
1794 		}
1795 		n++;
1796 	}
1797 
1798 	bfl->bfl_len = n;
1799 	return (error);
1800 }
1801 
1802 /*
1803  * Set the data link type of a BPF instance.
1804  */
1805 int
1806 bpf_setdlt(struct bpf_d *d, u_int dlt)
1807 {
1808 	const char *name;
1809 	struct bpf_if *bp;
1810 
1811 	MUTEX_ASSERT_LOCKED(&d->bd_mtx);
1812 	if (d->bd_bif->bif_dlt == dlt)
1813 		return (0);
1814 	name = d->bd_bif->bif_name;
1815 	for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) {
1816 		if (strcmp(name, bp->bif_name) != 0)
1817 			continue;
1818 		if (bp->bif_dlt == dlt)
1819 			break;
1820 	}
1821 	if (bp == NULL)
1822 		return (EINVAL);
1823 	bpf_detachd(d);
1824 	bpf_attachd(d, bp);
1825 	bpf_resetd(d);
1826 	return (0);
1827 }
1828 
1829 u_int32_t	bpf_mbuf_ldw(const void *, u_int32_t, int *);
1830 u_int32_t	bpf_mbuf_ldh(const void *, u_int32_t, int *);
1831 u_int32_t	bpf_mbuf_ldb(const void *, u_int32_t, int *);
1832 
1833 int		bpf_mbuf_copy(const struct mbuf *, u_int32_t,
1834 		    void *, u_int32_t);
1835 
1836 const struct bpf_ops bpf_mbuf_ops = {
1837 	bpf_mbuf_ldw,
1838 	bpf_mbuf_ldh,
1839 	bpf_mbuf_ldb,
1840 };
1841 
1842 int
1843 bpf_mbuf_copy(const struct mbuf *m, u_int32_t off, void *buf, u_int32_t len)
1844 {
1845 	u_int8_t *cp = buf;
1846 	u_int32_t count;
1847 
1848 	while (off >= m->m_len) {
1849 		off -= m->m_len;
1850 
1851 		m = m->m_next;
1852 		if (m == NULL)
1853 			return (-1);
1854 	}
1855 
1856 	for (;;) {
1857 		count = min(m->m_len - off, len);
1858 
1859 		memcpy(cp, m->m_data + off, count);
1860 		len -= count;
1861 
1862 		if (len == 0)
1863 			return (0);
1864 
1865 		m = m->m_next;
1866 		if (m == NULL)
1867 			break;
1868 
1869 		cp += count;
1870 		off = 0;
1871 	}
1872 
1873 	return (-1);
1874 }
1875 
1876 u_int32_t
1877 bpf_mbuf_ldw(const void *m0, u_int32_t k, int *err)
1878 {
1879 	u_int32_t v;
1880 
1881 	if (bpf_mbuf_copy(m0, k, &v, sizeof(v)) != 0) {
1882 		*err = 1;
1883 		return (0);
1884 	}
1885 
1886 	*err = 0;
1887 	return ntohl(v);
1888 }
1889 
1890 u_int32_t
1891 bpf_mbuf_ldh(const void *m0, u_int32_t k, int *err)
1892 {
1893 	u_int16_t v;
1894 
1895 	if (bpf_mbuf_copy(m0, k, &v, sizeof(v)) != 0) {
1896 		*err = 1;
1897 		return (0);
1898 	}
1899 
1900 	*err = 0;
1901 	return ntohs(v);
1902 }
1903 
1904 u_int32_t
1905 bpf_mbuf_ldb(const void *m0, u_int32_t k, int *err)
1906 {
1907 	const struct mbuf *m = m0;
1908 	u_int8_t v;
1909 
1910 	while (k >= m->m_len) {
1911 		k -= m->m_len;
1912 
1913 		m = m->m_next;
1914 		if (m == NULL) {
1915 			*err = 1;
1916 			return (0);
1917 		}
1918 	}
1919 	v = m->m_data[k];
1920 
1921 	*err = 0;
1922 	return v;
1923 }
1924 
1925 u_int
1926 bpf_mfilter(const struct bpf_insn *pc, const struct mbuf *m, u_int wirelen)
1927 {
1928 	return _bpf_filter(pc, &bpf_mbuf_ops, m, wirelen);
1929 }
1930