xref: /netbsd-src/sys/net/bpf.c (revision 7f21db1c0118155e0dd40b75182e30c589d9f63e)
1 /*	$NetBSD: bpf.c,v 1.155 2010/01/26 01:06:23 pooka Exp $	*/
2 
3 /*
4  * Copyright (c) 1990, 1991, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from the Stanford/CMU enet packet filter,
8  * (net/enet.c) distributed as part of 4.3BSD, and code contributed
9  * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
10  * Berkeley Laboratory.
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  * 3. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  *
36  *	@(#)bpf.c	8.4 (Berkeley) 1/9/95
37  * static char rcsid[] =
38  * "Header: bpf.c,v 1.67 96/09/26 22:00:52 leres Exp ";
39  */
40 
41 #include <sys/cdefs.h>
42 __KERNEL_RCSID(0, "$NetBSD: bpf.c,v 1.155 2010/01/26 01:06:23 pooka Exp $");
43 
44 #if defined(_KERNEL_OPT)
45 #include "opt_bpf.h"
46 #include "sl.h"
47 #include "strip.h"
48 #endif
49 
50 #include <sys/param.h>
51 #include <sys/systm.h>
52 #include <sys/mbuf.h>
53 #include <sys/buf.h>
54 #include <sys/time.h>
55 #include <sys/proc.h>
56 #include <sys/ioctl.h>
57 #include <sys/conf.h>
58 #include <sys/vnode.h>
59 #include <sys/queue.h>
60 #include <sys/stat.h>
61 #include <sys/module.h>
62 #include <sys/once.h>
63 #include <sys/atomic.h>
64 
65 #include <sys/file.h>
66 #include <sys/filedesc.h>
67 #include <sys/tty.h>
68 #include <sys/uio.h>
69 
70 #include <sys/protosw.h>
71 #include <sys/socket.h>
72 #include <sys/errno.h>
73 #include <sys/kernel.h>
74 #include <sys/poll.h>
75 #include <sys/sysctl.h>
76 #include <sys/kauth.h>
77 
78 #include <net/if.h>
79 #include <net/slip.h>
80 
81 #include <net/bpf.h>
82 #include <net/bpfdesc.h>
83 
84 #include <net/if_arc.h>
85 #include <net/if_ether.h>
86 
87 #include <netinet/in.h>
88 #include <netinet/if_inarp.h>
89 
90 
91 #include <compat/sys/sockio.h>
92 
93 #ifndef BPF_BUFSIZE
94 /*
95  * 4096 is too small for FDDI frames. 8192 is too small for gigabit Ethernet
96  * jumbos (circa 9k), ATM, or Intel gig/10gig ethernet jumbos (16k).
97  */
98 # define BPF_BUFSIZE 32768
99 #endif
100 
101 #define PRINET  26			/* interruptible */
102 
103 /*
104  * The default read buffer size, and limit for BIOCSBLEN, is sysctl'able.
105  * XXX the default values should be computed dynamically based
106  * on available memory size and available mbuf clusters.
107  */
108 int bpf_bufsize = BPF_BUFSIZE;
109 int bpf_maxbufsize = BPF_DFLTBUFSIZE;	/* XXX set dynamically, see above */
110 
111 
112 /*
113  * Global BPF statistics returned by net.bpf.stats sysctl.
114  */
115 struct bpf_stat	bpf_gstats;
116 
117 /*
118  * Use a mutex to avoid a race condition between gathering the stats/peers
119  * and opening/closing the device.
120  */
121 static kmutex_t bpf_mtx;
122 
123 /*
124  *  bpf_iflist is the list of interfaces; each corresponds to an ifnet
125  *  bpf_dtab holds the descriptors, indexed by minor device #
126  */
127 struct bpf_if	*bpf_iflist;
128 LIST_HEAD(, bpf_d) bpf_list;
129 
130 static int	bpf_allocbufs(struct bpf_d *);
131 static void	bpf_deliver(struct bpf_if *,
132 		            void *(*cpfn)(void *, const void *, size_t),
133 			    void *, u_int, u_int, struct ifnet *);
134 static void	bpf_freed(struct bpf_d *);
135 static void	bpf_ifname(struct ifnet *, struct ifreq *);
136 static void	*bpf_mcpy(void *, const void *, size_t);
137 static int	bpf_movein(struct uio *, int, int,
138 			        struct mbuf **, struct sockaddr *);
139 static void	bpf_attachd(struct bpf_d *, struct bpf_if *);
140 static void	bpf_detachd(struct bpf_d *);
141 static int	bpf_setif(struct bpf_d *, struct ifreq *);
142 static void	bpf_timed_out(void *);
143 static inline void
144 		bpf_wakeup(struct bpf_d *);
145 static void	catchpacket(struct bpf_d *, u_char *, u_int, u_int,
146     void *(*)(void *, const void *, size_t), struct timespec *);
147 static void	reset_d(struct bpf_d *);
148 static int	bpf_getdltlist(struct bpf_d *, struct bpf_dltlist *);
149 static int	bpf_setdlt(struct bpf_d *, u_int);
150 
151 static int	bpf_read(struct file *, off_t *, struct uio *, kauth_cred_t,
152     int);
153 static int	bpf_write(struct file *, off_t *, struct uio *, kauth_cred_t,
154     int);
155 static int	bpf_ioctl(struct file *, u_long, void *);
156 static int	bpf_poll(struct file *, int);
157 static int	bpf_stat(struct file *, struct stat *);
158 static int	bpf_close(struct file *);
159 static int	bpf_kqfilter(struct file *, struct knote *);
160 static void	bpf_softintr(void *);
161 
162 static const struct fileops bpf_fileops = {
163 	.fo_read = bpf_read,
164 	.fo_write = bpf_write,
165 	.fo_ioctl = bpf_ioctl,
166 	.fo_fcntl = fnullop_fcntl,
167 	.fo_poll = bpf_poll,
168 	.fo_stat = bpf_stat,
169 	.fo_close = bpf_close,
170 	.fo_kqfilter = bpf_kqfilter,
171 	.fo_restart = fnullop_restart,
172 };
173 
174 dev_type_open(bpfopen);
175 
176 const struct cdevsw bpf_cdevsw = {
177 	bpfopen, noclose, noread, nowrite, noioctl,
178 	nostop, notty, nopoll, nommap, nokqfilter, D_OTHER
179 };
180 
181 static int
182 bpf_movein(struct uio *uio, int linktype, int mtu, struct mbuf **mp,
183 	   struct sockaddr *sockp)
184 {
185 	struct mbuf *m;
186 	int error;
187 	int len;
188 	int hlen;
189 	int align;
190 
191 	/*
192 	 * Build a sockaddr based on the data link layer type.
193 	 * We do this at this level because the ethernet header
194 	 * is copied directly into the data field of the sockaddr.
195 	 * In the case of SLIP, there is no header and the packet
196 	 * is forwarded as is.
197 	 * Also, we are careful to leave room at the front of the mbuf
198 	 * for the link level header.
199 	 */
200 	switch (linktype) {
201 
202 	case DLT_SLIP:
203 		sockp->sa_family = AF_INET;
204 		hlen = 0;
205 		align = 0;
206 		break;
207 
208 	case DLT_PPP:
209 		sockp->sa_family = AF_UNSPEC;
210 		hlen = 0;
211 		align = 0;
212 		break;
213 
214 	case DLT_EN10MB:
215 		sockp->sa_family = AF_UNSPEC;
216 		/* XXX Would MAXLINKHDR be better? */
217  		/* 6(dst)+6(src)+2(type) */
218 		hlen = sizeof(struct ether_header);
219 		align = 2;
220 		break;
221 
222 	case DLT_ARCNET:
223 		sockp->sa_family = AF_UNSPEC;
224 		hlen = ARC_HDRLEN;
225 		align = 5;
226 		break;
227 
228 	case DLT_FDDI:
229 		sockp->sa_family = AF_LINK;
230 		/* XXX 4(FORMAC)+6(dst)+6(src) */
231 		hlen = 16;
232 		align = 0;
233 		break;
234 
235 	case DLT_ECONET:
236 		sockp->sa_family = AF_UNSPEC;
237 		hlen = 6;
238 		align = 2;
239 		break;
240 
241 	case DLT_NULL:
242 		sockp->sa_family = AF_UNSPEC;
243 		hlen = 0;
244 		align = 0;
245 		break;
246 
247 	default:
248 		return (EIO);
249 	}
250 
251 	len = uio->uio_resid;
252 	/*
253 	 * If there aren't enough bytes for a link level header or the
254 	 * packet length exceeds the interface mtu, return an error.
255 	 */
256 	if (len < hlen || len - hlen > mtu)
257 		return (EMSGSIZE);
258 
259 	/*
260 	 * XXX Avoid complicated buffer chaining ---
261 	 * bail if it won't fit in a single mbuf.
262 	 * (Take into account possible alignment bytes)
263 	 */
264 	if ((unsigned)len > MCLBYTES - align)
265 		return (EIO);
266 
267 	m = m_gethdr(M_WAIT, MT_DATA);
268 	m->m_pkthdr.rcvif = 0;
269 	m->m_pkthdr.len = len - hlen;
270 	if (len > MHLEN - align) {
271 		m_clget(m, M_WAIT);
272 		if ((m->m_flags & M_EXT) == 0) {
273 			error = ENOBUFS;
274 			goto bad;
275 		}
276 	}
277 
278 	/* Insure the data is properly aligned */
279 	if (align > 0) {
280 		m->m_data += align;
281 		m->m_len -= align;
282 	}
283 
284 	error = uiomove(mtod(m, void *), len, uio);
285 	if (error)
286 		goto bad;
287 	if (hlen != 0) {
288 		memcpy(sockp->sa_data, mtod(m, void *), hlen);
289 		m->m_data += hlen; /* XXX */
290 		len -= hlen;
291 	}
292 	m->m_len = len;
293 	*mp = m;
294 	return (0);
295 
296 bad:
297 	m_freem(m);
298 	return (error);
299 }
300 
301 /*
302  * Attach file to the bpf interface, i.e. make d listen on bp.
303  * Must be called at splnet.
304  */
305 static void
306 bpf_attachd(struct bpf_d *d, struct bpf_if *bp)
307 {
308 	/*
309 	 * Point d at bp, and add d to the interface's list of listeners.
310 	 * Finally, point the driver's bpf cookie at the interface so
311 	 * it will divert packets to bpf.
312 	 */
313 	d->bd_bif = bp;
314 	d->bd_next = bp->bif_dlist;
315 	bp->bif_dlist = d;
316 
317 	*bp->bif_driverp = bp;
318 }
319 
320 /*
321  * Detach a file from its interface.
322  */
323 static void
324 bpf_detachd(struct bpf_d *d)
325 {
326 	struct bpf_d **p;
327 	struct bpf_if *bp;
328 
329 	bp = d->bd_bif;
330 	/*
331 	 * Check if this descriptor had requested promiscuous mode.
332 	 * If so, turn it off.
333 	 */
334 	if (d->bd_promisc) {
335 		int error;
336 
337 		d->bd_promisc = 0;
338 		/*
339 		 * Take device out of promiscuous mode.  Since we were
340 		 * able to enter promiscuous mode, we should be able
341 		 * to turn it off.  But we can get an error if
342 		 * the interface was configured down, so only panic
343 		 * if we don't get an unexpected error.
344 		 */
345   		error = ifpromisc(bp->bif_ifp, 0);
346 		if (error && error != EINVAL)
347 			panic("%s: ifpromisc failed: %d", __func__, error);
348 	}
349 	/* Remove d from the interface's descriptor list. */
350 	p = &bp->bif_dlist;
351 	while (*p != d) {
352 		p = &(*p)->bd_next;
353 		if (*p == 0)
354 			panic("%s: descriptor not in list", __func__);
355 	}
356 	*p = (*p)->bd_next;
357 	if (bp->bif_dlist == 0)
358 		/*
359 		 * Let the driver know that there are no more listeners.
360 		 */
361 		*d->bd_bif->bif_driverp = 0;
362 	d->bd_bif = 0;
363 }
364 
365 static int
366 doinit(void)
367 {
368 
369 	mutex_init(&bpf_mtx, MUTEX_DEFAULT, IPL_NONE);
370 
371 	LIST_INIT(&bpf_list);
372 
373 	bpf_gstats.bs_recv = 0;
374 	bpf_gstats.bs_drop = 0;
375 	bpf_gstats.bs_capt = 0;
376 
377 	return 0;
378 }
379 
380 /*
381  * bpfilterattach() is called at boot time.
382  */
383 /* ARGSUSED */
384 void
385 bpfilterattach(int n)
386 {
387 	static ONCE_DECL(control);
388 
389 	RUN_ONCE(&control, doinit);
390 }
391 
392 /*
393  * Open ethernet device. Clones.
394  */
395 /* ARGSUSED */
396 int
397 bpfopen(dev_t dev, int flag, int mode, struct lwp *l)
398 {
399 	struct bpf_d *d;
400 	struct file *fp;
401 	int error, fd;
402 
403 	/* falloc() will use the descriptor for us. */
404 	if ((error = fd_allocfile(&fp, &fd)) != 0)
405 		return error;
406 
407 	d = malloc(sizeof(*d), M_DEVBUF, M_WAITOK|M_ZERO);
408 	d->bd_bufsize = bpf_bufsize;
409 	d->bd_seesent = 1;
410 	d->bd_pid = l->l_proc->p_pid;
411 	getnanotime(&d->bd_btime);
412 	d->bd_atime = d->bd_mtime = d->bd_btime;
413 	callout_init(&d->bd_callout, 0);
414 	selinit(&d->bd_sel);
415 	d->bd_sih = softint_establish(SOFTINT_CLOCK, bpf_softintr, d);
416 
417 	mutex_enter(&bpf_mtx);
418 	LIST_INSERT_HEAD(&bpf_list, d, bd_list);
419 	mutex_exit(&bpf_mtx);
420 
421 	return fd_clone(fp, fd, flag, &bpf_fileops, d);
422 }
423 
424 /*
425  * Close the descriptor by detaching it from its interface,
426  * deallocating its buffers, and marking it free.
427  */
428 /* ARGSUSED */
429 static int
430 bpf_close(struct file *fp)
431 {
432 	struct bpf_d *d = fp->f_data;
433 	int s;
434 
435 	KERNEL_LOCK(1, NULL);
436 
437 	/*
438 	 * Refresh the PID associated with this bpf file.
439 	 */
440 	d->bd_pid = curproc->p_pid;
441 
442 	s = splnet();
443 	if (d->bd_state == BPF_WAITING)
444 		callout_stop(&d->bd_callout);
445 	d->bd_state = BPF_IDLE;
446 	if (d->bd_bif)
447 		bpf_detachd(d);
448 	splx(s);
449 	bpf_freed(d);
450 	mutex_enter(&bpf_mtx);
451 	LIST_REMOVE(d, bd_list);
452 	mutex_exit(&bpf_mtx);
453 	callout_destroy(&d->bd_callout);
454 	seldestroy(&d->bd_sel);
455 	softint_disestablish(d->bd_sih);
456 	free(d, M_DEVBUF);
457 	fp->f_data = NULL;
458 
459 	KERNEL_UNLOCK_ONE(NULL);
460 
461 	return (0);
462 }
463 
464 /*
465  * Rotate the packet buffers in descriptor d.  Move the store buffer
466  * into the hold slot, and the free buffer into the store slot.
467  * Zero the length of the new store buffer.
468  */
469 #define ROTATE_BUFFERS(d) \
470 	(d)->bd_hbuf = (d)->bd_sbuf; \
471 	(d)->bd_hlen = (d)->bd_slen; \
472 	(d)->bd_sbuf = (d)->bd_fbuf; \
473 	(d)->bd_slen = 0; \
474 	(d)->bd_fbuf = 0;
475 /*
476  *  bpfread - read next chunk of packets from buffers
477  */
478 static int
479 bpf_read(struct file *fp, off_t *offp, struct uio *uio,
480     kauth_cred_t cred, int flags)
481 {
482 	struct bpf_d *d = fp->f_data;
483 	int timed_out;
484 	int error;
485 	int s;
486 
487 	getnanotime(&d->bd_atime);
488 	/*
489 	 * Restrict application to use a buffer the same size as
490 	 * the kernel buffers.
491 	 */
492 	if (uio->uio_resid != d->bd_bufsize)
493 		return (EINVAL);
494 
495 	KERNEL_LOCK(1, NULL);
496 	s = splnet();
497 	if (d->bd_state == BPF_WAITING)
498 		callout_stop(&d->bd_callout);
499 	timed_out = (d->bd_state == BPF_TIMED_OUT);
500 	d->bd_state = BPF_IDLE;
501 	/*
502 	 * If the hold buffer is empty, then do a timed sleep, which
503 	 * ends when the timeout expires or when enough packets
504 	 * have arrived to fill the store buffer.
505 	 */
506 	while (d->bd_hbuf == 0) {
507 		if (fp->f_flag & FNONBLOCK) {
508 			if (d->bd_slen == 0) {
509 				splx(s);
510 				KERNEL_UNLOCK_ONE(NULL);
511 				return (EWOULDBLOCK);
512 			}
513 			ROTATE_BUFFERS(d);
514 			break;
515 		}
516 
517 		if ((d->bd_immediate || timed_out) && d->bd_slen != 0) {
518 			/*
519 			 * A packet(s) either arrived since the previous
520 			 * read or arrived while we were asleep.
521 			 * Rotate the buffers and return what's here.
522 			 */
523 			ROTATE_BUFFERS(d);
524 			break;
525 		}
526 		error = tsleep(d, PRINET|PCATCH, "bpf",
527 				d->bd_rtout);
528 		if (error == EINTR || error == ERESTART) {
529 			splx(s);
530 			KERNEL_UNLOCK_ONE(NULL);
531 			return (error);
532 		}
533 		if (error == EWOULDBLOCK) {
534 			/*
535 			 * On a timeout, return what's in the buffer,
536 			 * which may be nothing.  If there is something
537 			 * in the store buffer, we can rotate the buffers.
538 			 */
539 			if (d->bd_hbuf)
540 				/*
541 				 * We filled up the buffer in between
542 				 * getting the timeout and arriving
543 				 * here, so we don't need to rotate.
544 				 */
545 				break;
546 
547 			if (d->bd_slen == 0) {
548 				splx(s);
549 				KERNEL_UNLOCK_ONE(NULL);
550 				return (0);
551 			}
552 			ROTATE_BUFFERS(d);
553 			break;
554 		}
555 		if (error != 0)
556 			goto done;
557 	}
558 	/*
559 	 * At this point, we know we have something in the hold slot.
560 	 */
561 	splx(s);
562 
563 	/*
564 	 * Move data from hold buffer into user space.
565 	 * We know the entire buffer is transferred since
566 	 * we checked above that the read buffer is bpf_bufsize bytes.
567 	 */
568 	error = uiomove(d->bd_hbuf, d->bd_hlen, uio);
569 
570 	s = splnet();
571 	d->bd_fbuf = d->bd_hbuf;
572 	d->bd_hbuf = 0;
573 	d->bd_hlen = 0;
574 done:
575 	splx(s);
576 	KERNEL_UNLOCK_ONE(NULL);
577 	return (error);
578 }
579 
580 
581 /*
582  * If there are processes sleeping on this descriptor, wake them up.
583  */
584 static inline void
585 bpf_wakeup(struct bpf_d *d)
586 {
587 	wakeup(d);
588 	if (d->bd_async)
589 		softint_schedule(d->bd_sih);
590 	selnotify(&d->bd_sel, 0, 0);
591 }
592 
593 static void
594 bpf_softintr(void *cookie)
595 {
596 	struct bpf_d *d;
597 
598 	d = cookie;
599 	if (d->bd_async)
600 		fownsignal(d->bd_pgid, SIGIO, 0, 0, NULL);
601 }
602 
603 static void
604 bpf_timed_out(void *arg)
605 {
606 	struct bpf_d *d = arg;
607 	int s;
608 
609 	s = splnet();
610 	if (d->bd_state == BPF_WAITING) {
611 		d->bd_state = BPF_TIMED_OUT;
612 		if (d->bd_slen != 0)
613 			bpf_wakeup(d);
614 	}
615 	splx(s);
616 }
617 
618 
619 static int
620 bpf_write(struct file *fp, off_t *offp, struct uio *uio,
621     kauth_cred_t cred, int flags)
622 {
623 	struct bpf_d *d = fp->f_data;
624 	struct ifnet *ifp;
625 	struct mbuf *m;
626 	int error, s;
627 	static struct sockaddr_storage dst;
628 
629 	m = NULL;	/* XXX gcc */
630 
631 	KERNEL_LOCK(1, NULL);
632 
633 	if (d->bd_bif == 0) {
634 		KERNEL_UNLOCK_ONE(NULL);
635 		return (ENXIO);
636 	}
637 	getnanotime(&d->bd_mtime);
638 
639 	ifp = d->bd_bif->bif_ifp;
640 
641 	if (uio->uio_resid == 0) {
642 		KERNEL_UNLOCK_ONE(NULL);
643 		return (0);
644 	}
645 
646 	error = bpf_movein(uio, (int)d->bd_bif->bif_dlt, ifp->if_mtu, &m,
647 		(struct sockaddr *) &dst);
648 	if (error) {
649 		KERNEL_UNLOCK_ONE(NULL);
650 		return (error);
651 	}
652 
653 	if (m->m_pkthdr.len > ifp->if_mtu) {
654 		KERNEL_UNLOCK_ONE(NULL);
655 		m_freem(m);
656 		return (EMSGSIZE);
657 	}
658 
659 	if (d->bd_hdrcmplt)
660 		dst.ss_family = pseudo_AF_HDRCMPLT;
661 
662 	s = splsoftnet();
663 	error = (*ifp->if_output)(ifp, m, (struct sockaddr *) &dst, NULL);
664 	splx(s);
665 	KERNEL_UNLOCK_ONE(NULL);
666 	/*
667 	 * The driver frees the mbuf.
668 	 */
669 	return (error);
670 }
671 
672 /*
673  * Reset a descriptor by flushing its packet buffer and clearing the
674  * receive and drop counts.  Should be called at splnet.
675  */
676 static void
677 reset_d(struct bpf_d *d)
678 {
679 	if (d->bd_hbuf) {
680 		/* Free the hold buffer. */
681 		d->bd_fbuf = d->bd_hbuf;
682 		d->bd_hbuf = 0;
683 	}
684 	d->bd_slen = 0;
685 	d->bd_hlen = 0;
686 	d->bd_rcount = 0;
687 	d->bd_dcount = 0;
688 	d->bd_ccount = 0;
689 }
690 
691 /*
692  *  FIONREAD		Check for read packet available.
693  *  BIOCGBLEN		Get buffer len [for read()].
694  *  BIOCSETF		Set ethernet read filter.
695  *  BIOCFLUSH		Flush read packet buffer.
696  *  BIOCPROMISC		Put interface into promiscuous mode.
697  *  BIOCGDLT		Get link layer type.
698  *  BIOCGETIF		Get interface name.
699  *  BIOCSETIF		Set interface.
700  *  BIOCSRTIMEOUT	Set read timeout.
701  *  BIOCGRTIMEOUT	Get read timeout.
702  *  BIOCGSTATS		Get packet stats.
703  *  BIOCIMMEDIATE	Set immediate mode.
704  *  BIOCVERSION		Get filter language version.
705  *  BIOCGHDRCMPLT	Get "header already complete" flag.
706  *  BIOCSHDRCMPLT	Set "header already complete" flag.
707  */
708 /* ARGSUSED */
709 static int
710 bpf_ioctl(struct file *fp, u_long cmd, void *addr)
711 {
712 	struct bpf_d *d = fp->f_data;
713 	int s, error = 0;
714 
715 	/*
716 	 * Refresh the PID associated with this bpf file.
717 	 */
718 	KERNEL_LOCK(1, NULL);
719 	d->bd_pid = curproc->p_pid;
720 
721 	s = splnet();
722 	if (d->bd_state == BPF_WAITING)
723 		callout_stop(&d->bd_callout);
724 	d->bd_state = BPF_IDLE;
725 	splx(s);
726 
727 	switch (cmd) {
728 
729 	default:
730 		error = EINVAL;
731 		break;
732 
733 	/*
734 	 * Check for read packet available.
735 	 */
736 	case FIONREAD:
737 		{
738 			int n;
739 
740 			s = splnet();
741 			n = d->bd_slen;
742 			if (d->bd_hbuf)
743 				n += d->bd_hlen;
744 			splx(s);
745 
746 			*(int *)addr = n;
747 			break;
748 		}
749 
750 	/*
751 	 * Get buffer len [for read()].
752 	 */
753 	case BIOCGBLEN:
754 		*(u_int *)addr = d->bd_bufsize;
755 		break;
756 
757 	/*
758 	 * Set buffer length.
759 	 */
760 	case BIOCSBLEN:
761 		if (d->bd_bif != 0)
762 			error = EINVAL;
763 		else {
764 			u_int size = *(u_int *)addr;
765 
766 			if (size > bpf_maxbufsize)
767 				*(u_int *)addr = size = bpf_maxbufsize;
768 			else if (size < BPF_MINBUFSIZE)
769 				*(u_int *)addr = size = BPF_MINBUFSIZE;
770 			d->bd_bufsize = size;
771 		}
772 		break;
773 
774 	/*
775 	 * Set link layer read filter.
776 	 */
777 	case BIOCSETF:
778 		error = bpf_setf(d, addr);
779 		break;
780 
781 	/*
782 	 * Flush read packet buffer.
783 	 */
784 	case BIOCFLUSH:
785 		s = splnet();
786 		reset_d(d);
787 		splx(s);
788 		break;
789 
790 	/*
791 	 * Put interface into promiscuous mode.
792 	 */
793 	case BIOCPROMISC:
794 		if (d->bd_bif == 0) {
795 			/*
796 			 * No interface attached yet.
797 			 */
798 			error = EINVAL;
799 			break;
800 		}
801 		s = splnet();
802 		if (d->bd_promisc == 0) {
803 			error = ifpromisc(d->bd_bif->bif_ifp, 1);
804 			if (error == 0)
805 				d->bd_promisc = 1;
806 		}
807 		splx(s);
808 		break;
809 
810 	/*
811 	 * Get device parameters.
812 	 */
813 	case BIOCGDLT:
814 		if (d->bd_bif == 0)
815 			error = EINVAL;
816 		else
817 			*(u_int *)addr = d->bd_bif->bif_dlt;
818 		break;
819 
820 	/*
821 	 * Get a list of supported device parameters.
822 	 */
823 	case BIOCGDLTLIST:
824 		if (d->bd_bif == 0)
825 			error = EINVAL;
826 		else
827 			error = bpf_getdltlist(d, addr);
828 		break;
829 
830 	/*
831 	 * Set device parameters.
832 	 */
833 	case BIOCSDLT:
834 		if (d->bd_bif == 0)
835 			error = EINVAL;
836 		else
837 			error = bpf_setdlt(d, *(u_int *)addr);
838 		break;
839 
840 	/*
841 	 * Set interface name.
842 	 */
843 #ifdef OBIOCGETIF
844 	case OBIOCGETIF:
845 #endif
846 	case BIOCGETIF:
847 		if (d->bd_bif == 0)
848 			error = EINVAL;
849 		else
850 			bpf_ifname(d->bd_bif->bif_ifp, addr);
851 		break;
852 
853 	/*
854 	 * Set interface.
855 	 */
856 #ifdef OBIOCSETIF
857 	case OBIOCSETIF:
858 #endif
859 	case BIOCSETIF:
860 		error = bpf_setif(d, addr);
861 		break;
862 
863 	/*
864 	 * Set read timeout.
865 	 */
866 	case BIOCSRTIMEOUT:
867 		{
868 			struct timeval *tv = addr;
869 
870 			/* Compute number of ticks. */
871 			d->bd_rtout = tv->tv_sec * hz + tv->tv_usec / tick;
872 			if ((d->bd_rtout == 0) && (tv->tv_usec != 0))
873 				d->bd_rtout = 1;
874 			break;
875 		}
876 
877 #ifdef BIOCGORTIMEOUT
878 	/*
879 	 * Get read timeout.
880 	 */
881 	case BIOCGORTIMEOUT:
882 		{
883 			struct timeval50 *tv = addr;
884 
885 			tv->tv_sec = d->bd_rtout / hz;
886 			tv->tv_usec = (d->bd_rtout % hz) * tick;
887 			break;
888 		}
889 #endif
890 
891 #ifdef BIOCSORTIMEOUT
892 	/*
893 	 * Set read timeout.
894 	 */
895 	case BIOCSORTIMEOUT:
896 		{
897 			struct timeval50 *tv = addr;
898 
899 			/* Compute number of ticks. */
900 			d->bd_rtout = tv->tv_sec * hz + tv->tv_usec / tick;
901 			if ((d->bd_rtout == 0) && (tv->tv_usec != 0))
902 				d->bd_rtout = 1;
903 			break;
904 		}
905 #endif
906 
907 	/*
908 	 * Get read timeout.
909 	 */
910 	case BIOCGRTIMEOUT:
911 		{
912 			struct timeval *tv = addr;
913 
914 			tv->tv_sec = d->bd_rtout / hz;
915 			tv->tv_usec = (d->bd_rtout % hz) * tick;
916 			break;
917 		}
918 	/*
919 	 * Get packet stats.
920 	 */
921 	case BIOCGSTATS:
922 		{
923 			struct bpf_stat *bs = addr;
924 
925 			bs->bs_recv = d->bd_rcount;
926 			bs->bs_drop = d->bd_dcount;
927 			bs->bs_capt = d->bd_ccount;
928 			break;
929 		}
930 
931 	case BIOCGSTATSOLD:
932 		{
933 			struct bpf_stat_old *bs = addr;
934 
935 			bs->bs_recv = d->bd_rcount;
936 			bs->bs_drop = d->bd_dcount;
937 			break;
938 		}
939 
940 	/*
941 	 * Set immediate mode.
942 	 */
943 	case BIOCIMMEDIATE:
944 		d->bd_immediate = *(u_int *)addr;
945 		break;
946 
947 	case BIOCVERSION:
948 		{
949 			struct bpf_version *bv = addr;
950 
951 			bv->bv_major = BPF_MAJOR_VERSION;
952 			bv->bv_minor = BPF_MINOR_VERSION;
953 			break;
954 		}
955 
956 	case BIOCGHDRCMPLT:	/* get "header already complete" flag */
957 		*(u_int *)addr = d->bd_hdrcmplt;
958 		break;
959 
960 	case BIOCSHDRCMPLT:	/* set "header already complete" flag */
961 		d->bd_hdrcmplt = *(u_int *)addr ? 1 : 0;
962 		break;
963 
964 	/*
965 	 * Get "see sent packets" flag
966 	 */
967 	case BIOCGSEESENT:
968 		*(u_int *)addr = d->bd_seesent;
969 		break;
970 
971 	/*
972 	 * Set "see sent" packets flag
973 	 */
974 	case BIOCSSEESENT:
975 		d->bd_seesent = *(u_int *)addr;
976 		break;
977 
978 	case FIONBIO:		/* Non-blocking I/O */
979 		/*
980 		 * No need to do anything special as we use IO_NDELAY in
981 		 * bpfread() as an indication of whether or not to block
982 		 * the read.
983 		 */
984 		break;
985 
986 	case FIOASYNC:		/* Send signal on receive packets */
987 		d->bd_async = *(int *)addr;
988 		break;
989 
990 	case TIOCSPGRP:		/* Process or group to send signals to */
991 	case FIOSETOWN:
992 		error = fsetown(&d->bd_pgid, cmd, addr);
993 		break;
994 
995 	case TIOCGPGRP:
996 	case FIOGETOWN:
997 		error = fgetown(d->bd_pgid, cmd, addr);
998 		break;
999 	}
1000 	KERNEL_UNLOCK_ONE(NULL);
1001 	return (error);
1002 }
1003 
1004 /*
1005  * Set d's packet filter program to fp.  If this file already has a filter,
1006  * free it and replace it.  Returns EINVAL for bogus requests.
1007  */
1008 int
1009 bpf_setf(struct bpf_d *d, struct bpf_program *fp)
1010 {
1011 	struct bpf_insn *fcode, *old;
1012 	u_int flen, size;
1013 	int s;
1014 
1015 	old = d->bd_filter;
1016 	if (fp->bf_insns == 0) {
1017 		if (fp->bf_len != 0)
1018 			return (EINVAL);
1019 		s = splnet();
1020 		d->bd_filter = 0;
1021 		reset_d(d);
1022 		splx(s);
1023 		if (old != 0)
1024 			free(old, M_DEVBUF);
1025 		return (0);
1026 	}
1027 	flen = fp->bf_len;
1028 	if (flen > BPF_MAXINSNS)
1029 		return (EINVAL);
1030 
1031 	size = flen * sizeof(*fp->bf_insns);
1032 	fcode = malloc(size, M_DEVBUF, M_WAITOK);
1033 	if (copyin(fp->bf_insns, fcode, size) == 0 &&
1034 	    bpf_validate(fcode, (int)flen)) {
1035 		s = splnet();
1036 		d->bd_filter = fcode;
1037 		reset_d(d);
1038 		splx(s);
1039 		if (old != 0)
1040 			free(old, M_DEVBUF);
1041 
1042 		return (0);
1043 	}
1044 	free(fcode, M_DEVBUF);
1045 	return (EINVAL);
1046 }
1047 
1048 /*
1049  * Detach a file from its current interface (if attached at all) and attach
1050  * to the interface indicated by the name stored in ifr.
1051  * Return an errno or 0.
1052  */
1053 static int
1054 bpf_setif(struct bpf_d *d, struct ifreq *ifr)
1055 {
1056 	struct bpf_if *bp;
1057 	char *cp;
1058 	int unit_seen, i, s, error;
1059 
1060 	/*
1061 	 * Make sure the provided name has a unit number, and default
1062 	 * it to '0' if not specified.
1063 	 * XXX This is ugly ... do this differently?
1064 	 */
1065 	unit_seen = 0;
1066 	cp = ifr->ifr_name;
1067 	cp[sizeof(ifr->ifr_name) - 1] = '\0';	/* sanity */
1068 	while (*cp++)
1069 		if (*cp >= '0' && *cp <= '9')
1070 			unit_seen = 1;
1071 	if (!unit_seen) {
1072 		/* Make sure to leave room for the '\0'. */
1073 		for (i = 0; i < (IFNAMSIZ - 1); ++i) {
1074 			if ((ifr->ifr_name[i] >= 'a' &&
1075 			     ifr->ifr_name[i] <= 'z') ||
1076 			    (ifr->ifr_name[i] >= 'A' &&
1077 			     ifr->ifr_name[i] <= 'Z'))
1078 				continue;
1079 			ifr->ifr_name[i] = '0';
1080 		}
1081 	}
1082 
1083 	/*
1084 	 * Look through attached interfaces for the named one.
1085 	 */
1086 	for (bp = bpf_iflist; bp != 0; bp = bp->bif_next) {
1087 		struct ifnet *ifp = bp->bif_ifp;
1088 
1089 		if (ifp == 0 ||
1090 		    strcmp(ifp->if_xname, ifr->ifr_name) != 0)
1091 			continue;
1092 		/* skip additional entry */
1093 		if (bp->bif_driverp != &ifp->if_bpf)
1094 			continue;
1095 		/*
1096 		 * We found the requested interface.
1097 		 * Allocate the packet buffers if we need to.
1098 		 * If we're already attached to requested interface,
1099 		 * just flush the buffer.
1100 		 */
1101 		if (d->bd_sbuf == 0) {
1102 			error = bpf_allocbufs(d);
1103 			if (error != 0)
1104 				return (error);
1105 		}
1106 		s = splnet();
1107 		if (bp != d->bd_bif) {
1108 			if (d->bd_bif)
1109 				/*
1110 				 * Detach if attached to something else.
1111 				 */
1112 				bpf_detachd(d);
1113 
1114 			bpf_attachd(d, bp);
1115 		}
1116 		reset_d(d);
1117 		splx(s);
1118 		return (0);
1119 	}
1120 	/* Not found. */
1121 	return (ENXIO);
1122 }
1123 
1124 /*
1125  * Copy the interface name to the ifreq.
1126  */
1127 static void
1128 bpf_ifname(struct ifnet *ifp, struct ifreq *ifr)
1129 {
1130 	memcpy(ifr->ifr_name, ifp->if_xname, IFNAMSIZ);
1131 }
1132 
1133 static int
1134 bpf_stat(struct file *fp, struct stat *st)
1135 {
1136 	struct bpf_d *d = fp->f_data;
1137 
1138 	(void)memset(st, 0, sizeof(*st));
1139 	KERNEL_LOCK(1, NULL);
1140 	st->st_dev = makedev(cdevsw_lookup_major(&bpf_cdevsw), d->bd_pid);
1141 	st->st_atimespec = d->bd_atime;
1142 	st->st_mtimespec = d->bd_mtime;
1143 	st->st_ctimespec = st->st_birthtimespec = d->bd_btime;
1144 	st->st_uid = kauth_cred_geteuid(fp->f_cred);
1145 	st->st_gid = kauth_cred_getegid(fp->f_cred);
1146 	KERNEL_UNLOCK_ONE(NULL);
1147 	return 0;
1148 }
1149 
1150 /*
1151  * Support for poll() system call
1152  *
1153  * Return true iff the specific operation will not block indefinitely - with
1154  * the assumption that it is safe to positively acknowledge a request for the
1155  * ability to write to the BPF device.
1156  * Otherwise, return false but make a note that a selnotify() must be done.
1157  */
1158 static int
1159 bpf_poll(struct file *fp, int events)
1160 {
1161 	struct bpf_d *d = fp->f_data;
1162 	int s = splnet();
1163 	int revents;
1164 
1165 	/*
1166 	 * Refresh the PID associated with this bpf file.
1167 	 */
1168 	KERNEL_LOCK(1, NULL);
1169 	d->bd_pid = curproc->p_pid;
1170 
1171 	revents = events & (POLLOUT | POLLWRNORM);
1172 	if (events & (POLLIN | POLLRDNORM)) {
1173 		/*
1174 		 * An imitation of the FIONREAD ioctl code.
1175 		 */
1176 		if (d->bd_hlen != 0 ||
1177 		    ((d->bd_immediate || d->bd_state == BPF_TIMED_OUT) &&
1178 		     d->bd_slen != 0)) {
1179 			revents |= events & (POLLIN | POLLRDNORM);
1180 		} else {
1181 			selrecord(curlwp, &d->bd_sel);
1182 			/* Start the read timeout if necessary */
1183 			if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
1184 				callout_reset(&d->bd_callout, d->bd_rtout,
1185 					      bpf_timed_out, d);
1186 				d->bd_state = BPF_WAITING;
1187 			}
1188 		}
1189 	}
1190 
1191 	KERNEL_UNLOCK_ONE(NULL);
1192 	splx(s);
1193 	return (revents);
1194 }
1195 
1196 static void
1197 filt_bpfrdetach(struct knote *kn)
1198 {
1199 	struct bpf_d *d = kn->kn_hook;
1200 	int s;
1201 
1202 	KERNEL_LOCK(1, NULL);
1203 	s = splnet();
1204 	SLIST_REMOVE(&d->bd_sel.sel_klist, kn, knote, kn_selnext);
1205 	splx(s);
1206 	KERNEL_UNLOCK_ONE(NULL);
1207 }
1208 
1209 static int
1210 filt_bpfread(struct knote *kn, long hint)
1211 {
1212 	struct bpf_d *d = kn->kn_hook;
1213 	int rv;
1214 
1215 	KERNEL_LOCK(1, NULL);
1216 	kn->kn_data = d->bd_hlen;
1217 	if (d->bd_immediate)
1218 		kn->kn_data += d->bd_slen;
1219 	rv = (kn->kn_data > 0);
1220 	KERNEL_UNLOCK_ONE(NULL);
1221 	return rv;
1222 }
1223 
1224 static const struct filterops bpfread_filtops =
1225 	{ 1, NULL, filt_bpfrdetach, filt_bpfread };
1226 
1227 static int
1228 bpf_kqfilter(struct file *fp, struct knote *kn)
1229 {
1230 	struct bpf_d *d = fp->f_data;
1231 	struct klist *klist;
1232 	int s;
1233 
1234 	KERNEL_LOCK(1, NULL);
1235 
1236 	switch (kn->kn_filter) {
1237 	case EVFILT_READ:
1238 		klist = &d->bd_sel.sel_klist;
1239 		kn->kn_fop = &bpfread_filtops;
1240 		break;
1241 
1242 	default:
1243 		KERNEL_UNLOCK_ONE(NULL);
1244 		return (EINVAL);
1245 	}
1246 
1247 	kn->kn_hook = d;
1248 
1249 	s = splnet();
1250 	SLIST_INSERT_HEAD(klist, kn, kn_selnext);
1251 	splx(s);
1252 	KERNEL_UNLOCK_ONE(NULL);
1253 
1254 	return (0);
1255 }
1256 
1257 /*
1258  * Incoming linkage from device drivers.  Process the packet pkt, of length
1259  * pktlen, which is stored in a contiguous buffer.  The packet is parsed
1260  * by each process' filter, and if accepted, stashed into the corresponding
1261  * buffer.
1262  */
1263 static void
1264 bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen)
1265 {
1266 	struct bpf_d *d;
1267 	u_int slen;
1268 	struct timespec ts;
1269 	int gottime=0;
1270 
1271 	/*
1272 	 * Note that the ipl does not have to be raised at this point.
1273 	 * The only problem that could arise here is that if two different
1274 	 * interfaces shared any data.  This is not the case.
1275 	 */
1276 	for (d = bp->bif_dlist; d != 0; d = d->bd_next) {
1277 		++d->bd_rcount;
1278 		++bpf_gstats.bs_recv;
1279 		slen = bpf_filter(d->bd_filter, pkt, pktlen, pktlen);
1280 		if (slen != 0) {
1281 			if (!gottime) {
1282 				nanotime(&ts);
1283 				gottime = 1;
1284 			}
1285 			catchpacket(d, pkt, pktlen, slen, memcpy, &ts);
1286 		}
1287 	}
1288 }
1289 
1290 /*
1291  * Copy data from an mbuf chain into a buffer.  This code is derived
1292  * from m_copydata in sys/uipc_mbuf.c.
1293  */
1294 static void *
1295 bpf_mcpy(void *dst_arg, const void *src_arg, size_t len)
1296 {
1297 	const struct mbuf *m;
1298 	u_int count;
1299 	u_char *dst;
1300 
1301 	m = src_arg;
1302 	dst = dst_arg;
1303 	while (len > 0) {
1304 		if (m == NULL)
1305 			panic("bpf_mcpy");
1306 		count = min(m->m_len, len);
1307 		memcpy(dst, mtod(m, const void *), count);
1308 		m = m->m_next;
1309 		dst += count;
1310 		len -= count;
1311 	}
1312 	return dst_arg;
1313 }
1314 
1315 /*
1316  * Dispatch a packet to all the listeners on interface bp.
1317  *
1318  * marg    pointer to the packet, either a data buffer or an mbuf chain
1319  * buflen  buffer length, if marg is a data buffer
1320  * cpfn    a function that can copy marg into the listener's buffer
1321  * pktlen  length of the packet
1322  * rcvif   either NULL or the interface the packet came in on.
1323  */
1324 static inline void
1325 bpf_deliver(struct bpf_if *bp, void *(*cpfn)(void *, const void *, size_t),
1326 	    void *marg, u_int pktlen, u_int buflen, struct ifnet *rcvif)
1327 {
1328 	u_int slen;
1329 	struct bpf_d *d;
1330 	struct timespec ts;
1331 	int gottime = 0;
1332 
1333 	for (d = bp->bif_dlist; d != 0; d = d->bd_next) {
1334 		if (!d->bd_seesent && (rcvif == NULL))
1335 			continue;
1336 		++d->bd_rcount;
1337 		++bpf_gstats.bs_recv;
1338 		slen = bpf_filter(d->bd_filter, marg, pktlen, buflen);
1339 		if (slen != 0) {
1340 			if(!gottime) {
1341 				nanotime(&ts);
1342 				gottime = 1;
1343 			}
1344 			catchpacket(d, marg, pktlen, slen, cpfn, &ts);
1345 		}
1346 	}
1347 }
1348 
1349 /*
1350  * Incoming linkage from device drivers, when the head of the packet is in
1351  * a buffer, and the tail is in an mbuf chain.
1352  */
1353 static void
1354 bpf_mtap2(struct bpf_if *bp, void *data, u_int dlen, struct mbuf *m)
1355 {
1356 	u_int pktlen;
1357 	struct mbuf mb;
1358 
1359 	pktlen = m_length(m) + dlen;
1360 
1361 	/*
1362 	 * Craft on-stack mbuf suitable for passing to bpf_filter.
1363 	 * Note that we cut corners here; we only setup what's
1364 	 * absolutely needed--this mbuf should never go anywhere else.
1365 	 */
1366 	(void)memset(&mb, 0, sizeof(mb));
1367 	mb.m_next = m;
1368 	mb.m_data = data;
1369 	mb.m_len = dlen;
1370 
1371 	bpf_deliver(bp, bpf_mcpy, &mb, pktlen, 0, m->m_pkthdr.rcvif);
1372 }
1373 
1374 /*
1375  * Incoming linkage from device drivers, when packet is in an mbuf chain.
1376  */
1377 static void
1378 bpf_mtap(struct bpf_if *bp, struct mbuf *m)
1379 {
1380 	void *(*cpfn)(void *, const void *, size_t);
1381 	u_int pktlen, buflen;
1382 	void *marg;
1383 
1384 	pktlen = m_length(m);
1385 
1386 	if (pktlen == m->m_len) {
1387 		cpfn = (void *)memcpy;
1388 		marg = mtod(m, void *);
1389 		buflen = pktlen;
1390 	} else {
1391 /*###1299 [cc] warning: assignment from incompatible pointer type%%%*/
1392 		cpfn = bpf_mcpy;
1393 		marg = m;
1394 		buflen = 0;
1395 	}
1396 
1397 	bpf_deliver(bp, cpfn, marg, pktlen, buflen, m->m_pkthdr.rcvif);
1398 }
1399 
1400 /*
1401  * We need to prepend the address family as
1402  * a four byte field.  Cons up a dummy header
1403  * to pacify bpf.  This is safe because bpf
1404  * will only read from the mbuf (i.e., it won't
1405  * try to free it or keep a pointer a to it).
1406  */
1407 static void
1408 bpf_mtap_af(struct bpf_if *bp, uint32_t af, struct mbuf *m)
1409 {
1410 	struct mbuf m0;
1411 
1412 	m0.m_flags = 0;
1413 	m0.m_next = m;
1414 	m0.m_len = 4;
1415 	m0.m_data = (char *)&af;
1416 
1417 	bpf_mtap(bp, &m0);
1418 }
1419 
1420 static void
1421 bpf_mtap_et(struct bpf_if *bp, uint16_t et, struct mbuf *m)
1422 {
1423 	struct mbuf m0;
1424 
1425 	m0.m_flags = 0;
1426 	m0.m_next = m;
1427 	m0.m_len = 14;
1428 	m0.m_data = m0.m_dat;
1429 
1430 	((uint32_t *)m0.m_data)[0] = 0;
1431 	((uint32_t *)m0.m_data)[1] = 0;
1432 	((uint32_t *)m0.m_data)[2] = 0;
1433 	((uint16_t *)m0.m_data)[6] = et;
1434 
1435 	bpf_mtap(bp, &m0);
1436 }
1437 
1438 /*
1439  * Put the SLIP pseudo-"link header" in place.
1440  * Note this M_PREPEND() should never fail,
1441  * swince we know we always have enough space
1442  * in the input buffer.
1443  */
1444 static void
1445 bpf_mtap_sl_in(struct bpf_if *bp, u_char *chdr, struct mbuf **m)
1446 {
1447 	int s;
1448 	u_char *hp;
1449 
1450 	M_PREPEND(*m, SLIP_HDRLEN, M_DONTWAIT);
1451 	if (*m == NULL)
1452 		return;
1453 
1454 	hp = mtod(*m, u_char *);
1455 	hp[SLX_DIR] = SLIPDIR_IN;
1456 	(void)memcpy(&hp[SLX_CHDR], chdr, CHDR_LEN);
1457 
1458 	s = splnet();
1459 	bpf_mtap(bp, *m);
1460 	splx(s);
1461 
1462 	m_adj(*m, SLIP_HDRLEN);
1463 }
1464 
1465 /*
1466  * Put the SLIP pseudo-"link header" in
1467  * place.  The compressed header is now
1468  * at the beginning of the mbuf.
1469  */
1470 static void
1471 bpf_mtap_sl_out(struct bpf_if *bp, u_char *chdr, struct mbuf *m)
1472 {
1473 	struct mbuf m0;
1474 	u_char *hp;
1475 	int s;
1476 
1477 	m0.m_flags = 0;
1478 	m0.m_next = m;
1479 	m0.m_data = m0.m_dat;
1480 	m0.m_len = SLIP_HDRLEN;
1481 
1482 	hp = mtod(&m0, u_char *);
1483 
1484 	hp[SLX_DIR] = SLIPDIR_OUT;
1485 	(void)memcpy(&hp[SLX_CHDR], chdr, CHDR_LEN);
1486 
1487 	s = splnet();
1488 	bpf_mtap(bp, &m0);
1489 	splx(s);
1490 	m_freem(m);
1491 }
1492 
1493 /*
1494  * Move the packet data from interface memory (pkt) into the
1495  * store buffer.  Return 1 if it's time to wakeup a listener (buffer full),
1496  * otherwise 0.  "copy" is the routine called to do the actual data
1497  * transfer.  memcpy is passed in to copy contiguous chunks, while
1498  * bpf_mcpy is passed in to copy mbuf chains.  In the latter case,
1499  * pkt is really an mbuf.
1500  */
1501 static void
1502 catchpacket(struct bpf_d *d, u_char *pkt, u_int pktlen, u_int snaplen,
1503     void *(*cpfn)(void *, const void *, size_t), struct timespec *ts)
1504 {
1505 	struct bpf_hdr *hp;
1506 	int totlen, curlen;
1507 	int hdrlen = d->bd_bif->bif_hdrlen;
1508 	int do_wakeup = 0;
1509 
1510 	++d->bd_ccount;
1511 	++bpf_gstats.bs_capt;
1512 	/*
1513 	 * Figure out how many bytes to move.  If the packet is
1514 	 * greater or equal to the snapshot length, transfer that
1515 	 * much.  Otherwise, transfer the whole packet (unless
1516 	 * we hit the buffer size limit).
1517 	 */
1518 	totlen = hdrlen + min(snaplen, pktlen);
1519 	if (totlen > d->bd_bufsize)
1520 		totlen = d->bd_bufsize;
1521 
1522 	/*
1523 	 * Round up the end of the previous packet to the next longword.
1524 	 */
1525 	curlen = BPF_WORDALIGN(d->bd_slen);
1526 	if (curlen + totlen > d->bd_bufsize) {
1527 		/*
1528 		 * This packet will overflow the storage buffer.
1529 		 * Rotate the buffers if we can, then wakeup any
1530 		 * pending reads.
1531 		 */
1532 		if (d->bd_fbuf == 0) {
1533 			/*
1534 			 * We haven't completed the previous read yet,
1535 			 * so drop the packet.
1536 			 */
1537 			++d->bd_dcount;
1538 			++bpf_gstats.bs_drop;
1539 			return;
1540 		}
1541 		ROTATE_BUFFERS(d);
1542 		do_wakeup = 1;
1543 		curlen = 0;
1544 	} else if (d->bd_immediate || d->bd_state == BPF_TIMED_OUT) {
1545 		/*
1546 		 * Immediate mode is set, or the read timeout has
1547 		 * already expired during a select call.  A packet
1548 		 * arrived, so the reader should be woken up.
1549 		 */
1550 		do_wakeup = 1;
1551 	}
1552 
1553 	/*
1554 	 * Append the bpf header.
1555 	 */
1556 	hp = (struct bpf_hdr *)((char *)d->bd_sbuf + curlen);
1557 	hp->bh_tstamp.tv_sec = ts->tv_sec;
1558 	hp->bh_tstamp.tv_usec = ts->tv_nsec / 1000;
1559 	hp->bh_datalen = pktlen;
1560 	hp->bh_hdrlen = hdrlen;
1561 	/*
1562 	 * Copy the packet data into the store buffer and update its length.
1563 	 */
1564 	(*cpfn)((u_char *)hp + hdrlen, pkt, (hp->bh_caplen = totlen - hdrlen));
1565 	d->bd_slen = curlen + totlen;
1566 
1567 	/*
1568 	 * Call bpf_wakeup after bd_slen has been updated so that kevent(2)
1569 	 * will cause filt_bpfread() to be called with it adjusted.
1570 	 */
1571 	if (do_wakeup)
1572 		bpf_wakeup(d);
1573 }
1574 
1575 /*
1576  * Initialize all nonzero fields of a descriptor.
1577  */
1578 static int
1579 bpf_allocbufs(struct bpf_d *d)
1580 {
1581 
1582 	d->bd_fbuf = malloc(d->bd_bufsize, M_DEVBUF, M_NOWAIT);
1583 	if (!d->bd_fbuf)
1584 		return (ENOBUFS);
1585 	d->bd_sbuf = malloc(d->bd_bufsize, M_DEVBUF, M_NOWAIT);
1586 	if (!d->bd_sbuf) {
1587 		free(d->bd_fbuf, M_DEVBUF);
1588 		return (ENOBUFS);
1589 	}
1590 	d->bd_slen = 0;
1591 	d->bd_hlen = 0;
1592 	return (0);
1593 }
1594 
1595 /*
1596  * Free buffers currently in use by a descriptor.
1597  * Called on close.
1598  */
1599 static void
1600 bpf_freed(struct bpf_d *d)
1601 {
1602 	/*
1603 	 * We don't need to lock out interrupts since this descriptor has
1604 	 * been detached from its interface and it yet hasn't been marked
1605 	 * free.
1606 	 */
1607 	if (d->bd_sbuf != 0) {
1608 		free(d->bd_sbuf, M_DEVBUF);
1609 		if (d->bd_hbuf != 0)
1610 			free(d->bd_hbuf, M_DEVBUF);
1611 		if (d->bd_fbuf != 0)
1612 			free(d->bd_fbuf, M_DEVBUF);
1613 	}
1614 	if (d->bd_filter)
1615 		free(d->bd_filter, M_DEVBUF);
1616 }
1617 
1618 /*
1619  * Attach an interface to bpf.  dlt is the link layer type;
1620  * hdrlen is the fixed size of the link header for the specified dlt
1621  * (variable length headers not yet supported).
1622  */
1623 static void
1624 bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen, struct bpf_if **driverp)
1625 {
1626 	struct bpf_if *bp;
1627 	bp = malloc(sizeof(*bp), M_DEVBUF, M_DONTWAIT);
1628 	if (bp == 0)
1629 		panic("bpfattach");
1630 
1631 	bp->bif_dlist = 0;
1632 	bp->bif_driverp = driverp;
1633 	bp->bif_ifp = ifp;
1634 	bp->bif_dlt = dlt;
1635 
1636 	bp->bif_next = bpf_iflist;
1637 	bpf_iflist = bp;
1638 
1639 	*bp->bif_driverp = 0;
1640 
1641 	/*
1642 	 * Compute the length of the bpf header.  This is not necessarily
1643 	 * equal to SIZEOF_BPF_HDR because we want to insert spacing such
1644 	 * that the network layer header begins on a longword boundary (for
1645 	 * performance reasons and to alleviate alignment restrictions).
1646 	 */
1647 	bp->bif_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen;
1648 
1649 #if 0
1650 	printf("bpf: %s attached\n", ifp->if_xname);
1651 #endif
1652 }
1653 
1654 /*
1655  * Remove an interface from bpf.
1656  */
1657 static void
1658 bpfdetach(struct ifnet *ifp)
1659 {
1660 	struct bpf_if *bp, **pbp;
1661 	struct bpf_d *d;
1662 	int s;
1663 
1664 	/* Nuke the vnodes for any open instances */
1665 	LIST_FOREACH(d, &bpf_list, bd_list) {
1666 		if (d->bd_bif != NULL && d->bd_bif->bif_ifp == ifp) {
1667 			/*
1668 			 * Detach the descriptor from an interface now.
1669 			 * It will be free'ed later by close routine.
1670 			 */
1671 			s = splnet();
1672 			d->bd_promisc = 0;	/* we can't touch device. */
1673 			bpf_detachd(d);
1674 			splx(s);
1675 		}
1676 	}
1677 
1678   again:
1679 	for (bp = bpf_iflist, pbp = &bpf_iflist;
1680 	     bp != NULL; pbp = &bp->bif_next, bp = bp->bif_next) {
1681 		if (bp->bif_ifp == ifp) {
1682 			*pbp = bp->bif_next;
1683 			free(bp, M_DEVBUF);
1684 			goto again;
1685 		}
1686 	}
1687 }
1688 
1689 /*
1690  * Change the data link type of a interface.
1691  */
1692 static void
1693 bpf_change_type(struct ifnet *ifp, u_int dlt, u_int hdrlen)
1694 {
1695 	struct bpf_if *bp;
1696 
1697 	for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) {
1698 		if (bp->bif_driverp == &ifp->if_bpf)
1699 			break;
1700 	}
1701 	if (bp == NULL)
1702 		panic("bpf_change_type");
1703 
1704 	bp->bif_dlt = dlt;
1705 
1706 	/*
1707 	 * Compute the length of the bpf header.  This is not necessarily
1708 	 * equal to SIZEOF_BPF_HDR because we want to insert spacing such
1709 	 * that the network layer header begins on a longword boundary (for
1710 	 * performance reasons and to alleviate alignment restrictions).
1711 	 */
1712 	bp->bif_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen;
1713 }
1714 
1715 /*
1716  * Get a list of available data link type of the interface.
1717  */
1718 static int
1719 bpf_getdltlist(struct bpf_d *d, struct bpf_dltlist *bfl)
1720 {
1721 	int n, error;
1722 	struct ifnet *ifp;
1723 	struct bpf_if *bp;
1724 
1725 	ifp = d->bd_bif->bif_ifp;
1726 	n = 0;
1727 	error = 0;
1728 	for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) {
1729 		if (bp->bif_ifp != ifp)
1730 			continue;
1731 		if (bfl->bfl_list != NULL) {
1732 			if (n >= bfl->bfl_len)
1733 				return ENOMEM;
1734 			error = copyout(&bp->bif_dlt,
1735 			    bfl->bfl_list + n, sizeof(u_int));
1736 		}
1737 		n++;
1738 	}
1739 	bfl->bfl_len = n;
1740 	return error;
1741 }
1742 
1743 /*
1744  * Set the data link type of a BPF instance.
1745  */
1746 static int
1747 bpf_setdlt(struct bpf_d *d, u_int dlt)
1748 {
1749 	int s, error, opromisc;
1750 	struct ifnet *ifp;
1751 	struct bpf_if *bp;
1752 
1753 	if (d->bd_bif->bif_dlt == dlt)
1754 		return 0;
1755 	ifp = d->bd_bif->bif_ifp;
1756 	for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) {
1757 		if (bp->bif_ifp == ifp && bp->bif_dlt == dlt)
1758 			break;
1759 	}
1760 	if (bp == NULL)
1761 		return EINVAL;
1762 	s = splnet();
1763 	opromisc = d->bd_promisc;
1764 	bpf_detachd(d);
1765 	bpf_attachd(d, bp);
1766 	reset_d(d);
1767 	if (opromisc) {
1768 		error = ifpromisc(bp->bif_ifp, 1);
1769 		if (error)
1770 			printf("%s: bpf_setdlt: ifpromisc failed (%d)\n",
1771 			    bp->bif_ifp->if_xname, error);
1772 		else
1773 			d->bd_promisc = 1;
1774 	}
1775 	splx(s);
1776 	return 0;
1777 }
1778 
1779 static int
1780 sysctl_net_bpf_maxbufsize(SYSCTLFN_ARGS)
1781 {
1782 	int newsize, error;
1783 	struct sysctlnode node;
1784 
1785 	node = *rnode;
1786 	node.sysctl_data = &newsize;
1787 	newsize = bpf_maxbufsize;
1788 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
1789 	if (error || newp == NULL)
1790 		return (error);
1791 
1792 	if (newsize < BPF_MINBUFSIZE || newsize > BPF_MAXBUFSIZE)
1793 		return (EINVAL);
1794 
1795 	bpf_maxbufsize = newsize;
1796 
1797 	return (0);
1798 }
1799 
1800 static int
1801 sysctl_net_bpf_peers(SYSCTLFN_ARGS)
1802 {
1803 	int    error, elem_count;
1804 	struct bpf_d	 *dp;
1805 	struct bpf_d_ext  dpe;
1806 	size_t len, needed, elem_size, out_size;
1807 	char   *sp;
1808 
1809 	if (namelen == 1 && name[0] == CTL_QUERY)
1810 		return (sysctl_query(SYSCTLFN_CALL(rnode)));
1811 
1812 	if (namelen != 2)
1813 		return (EINVAL);
1814 
1815 	/* BPF peers is privileged information. */
1816 	error = kauth_authorize_network(l->l_cred, KAUTH_NETWORK_INTERFACE,
1817 	    KAUTH_REQ_NETWORK_INTERFACE_GETPRIV, NULL, NULL, NULL);
1818 	if (error)
1819 		return (EPERM);
1820 
1821 	len = (oldp != NULL) ? *oldlenp : 0;
1822 	sp = oldp;
1823 	elem_size = name[0];
1824 	elem_count = name[1];
1825 	out_size = MIN(sizeof(dpe), elem_size);
1826 	needed = 0;
1827 
1828 	if (elem_size < 1 || elem_count < 0)
1829 		return (EINVAL);
1830 
1831 	mutex_enter(&bpf_mtx);
1832 	LIST_FOREACH(dp, &bpf_list, bd_list) {
1833 		if (len >= elem_size && elem_count > 0) {
1834 #define BPF_EXT(field)	dpe.bde_ ## field = dp->bd_ ## field
1835 			BPF_EXT(bufsize);
1836 			BPF_EXT(promisc);
1837 			BPF_EXT(promisc);
1838 			BPF_EXT(state);
1839 			BPF_EXT(immediate);
1840 			BPF_EXT(hdrcmplt);
1841 			BPF_EXT(seesent);
1842 			BPF_EXT(pid);
1843 			BPF_EXT(rcount);
1844 			BPF_EXT(dcount);
1845 			BPF_EXT(ccount);
1846 #undef BPF_EXT
1847 			if (dp->bd_bif)
1848 				(void)strlcpy(dpe.bde_ifname,
1849 				    dp->bd_bif->bif_ifp->if_xname,
1850 				    IFNAMSIZ - 1);
1851 			else
1852 				dpe.bde_ifname[0] = '\0';
1853 
1854 			error = copyout(&dpe, sp, out_size);
1855 			if (error)
1856 				break;
1857 			sp += elem_size;
1858 			len -= elem_size;
1859 		}
1860 		needed += elem_size;
1861 		if (elem_count > 0 && elem_count != INT_MAX)
1862 			elem_count--;
1863 	}
1864 	mutex_exit(&bpf_mtx);
1865 
1866 	*oldlenp = needed;
1867 
1868 	return (error);
1869 }
1870 
1871 SYSCTL_SETUP(sysctl_net_bpf_setup, "sysctl net.bpf subtree setup")
1872 {
1873 	const struct sysctlnode *node;
1874 
1875 	sysctl_createv(clog, 0, NULL, NULL,
1876 		       CTLFLAG_PERMANENT,
1877 		       CTLTYPE_NODE, "net", NULL,
1878 		       NULL, 0, NULL, 0,
1879 		       CTL_NET, CTL_EOL);
1880 
1881 	node = NULL;
1882 	sysctl_createv(clog, 0, NULL, &node,
1883 		       CTLFLAG_PERMANENT,
1884 		       CTLTYPE_NODE, "bpf",
1885 		       SYSCTL_DESCR("BPF options"),
1886 		       NULL, 0, NULL, 0,
1887 		       CTL_NET, CTL_CREATE, CTL_EOL);
1888 	if (node != NULL) {
1889 		sysctl_createv(clog, 0, NULL, NULL,
1890 			CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1891 			CTLTYPE_INT, "maxbufsize",
1892 			SYSCTL_DESCR("Maximum size for data capture buffer"),
1893 			sysctl_net_bpf_maxbufsize, 0, &bpf_maxbufsize, 0,
1894 			CTL_NET, node->sysctl_num, CTL_CREATE, CTL_EOL);
1895 		sysctl_createv(clog, 0, NULL, NULL,
1896 			CTLFLAG_PERMANENT,
1897 			CTLTYPE_STRUCT, "stats",
1898 			SYSCTL_DESCR("BPF stats"),
1899 			NULL, 0, &bpf_gstats, sizeof(bpf_gstats),
1900 			CTL_NET, node->sysctl_num, CTL_CREATE, CTL_EOL);
1901 		sysctl_createv(clog, 0, NULL, NULL,
1902 			CTLFLAG_PERMANENT,
1903 			CTLTYPE_STRUCT, "peers",
1904 			SYSCTL_DESCR("BPF peers"),
1905 			sysctl_net_bpf_peers, 0, NULL, 0,
1906 			CTL_NET, node->sysctl_num, CTL_CREATE, CTL_EOL);
1907 	}
1908 
1909 }
1910 
1911 struct bpf_ops bpf_ops_kernel = {
1912 	.bpf_attach =		bpfattach,
1913 	.bpf_detach =		bpfdetach,
1914 	.bpf_change_type =	bpf_change_type,
1915 
1916 	.bpf_tap =		bpf_tap,
1917 	.bpf_mtap =		bpf_mtap,
1918 	.bpf_mtap2 =		bpf_mtap2,
1919 	.bpf_mtap_af =		bpf_mtap_af,
1920 	.bpf_mtap_et =		bpf_mtap_et,
1921 	.bpf_mtap_sl_in =	bpf_mtap_sl_in,
1922 	.bpf_mtap_sl_out =	bpf_mtap_sl_out,
1923 };
1924 
1925 MODULE(MODULE_CLASS_DRIVER, bpf, NULL);
1926 
1927 static int
1928 bpf_modcmd(modcmd_t cmd, void *arg)
1929 {
1930 	devmajor_t bmajor, cmajor;
1931 	int error;
1932 
1933 	bmajor = cmajor = NODEVMAJOR;
1934 
1935 	switch (cmd) {
1936 	case MODULE_CMD_INIT:
1937 		bpfilterattach(0);
1938 		error = devsw_attach("bpf", NULL, &bmajor,
1939 		    &bpf_cdevsw, &cmajor);
1940 		if (error == EEXIST)
1941 			error = 0; /* maybe built-in ... improve eventually */
1942 		if (error)
1943 			break;
1944 
1945 		bpf_ops_handover_enter(&bpf_ops_kernel);
1946 		atomic_swap_ptr(&bpf_ops, &bpf_ops_kernel);
1947 		bpf_ops_handover_exit();
1948 		break;
1949 
1950 	case MODULE_CMD_FINI:
1951 		/*
1952 		 * bpf_ops is not (yet) referenced in the callers before
1953 		 * attach.  maybe other issues too.  "safety first".
1954 		 */
1955 		error = EOPNOTSUPP;
1956 		break;
1957 
1958 	default:
1959 		error = ENOTTY;
1960 		break;
1961 	}
1962 
1963 	return error;
1964 }
1965