xref: /netbsd-src/sys/net/bpf.c (revision bada23909e740596d0a3785a73bd3583a9807fb8)
1 /*	$NetBSD: bpf.c,v 1.46 1998/12/04 11:04:37 bouyer Exp $	*/
2 
3 /*
4  * Copyright (c) 1990, 1991, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from the Stanford/CMU enet packet filter,
8  * (net/enet.c) distributed as part of 4.3BSD, and code contributed
9  * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
10  * Berkeley Laboratory.
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  * 3. All advertising materials mentioning features or use of this software
21  *    must display the following acknowledgement:
22  *	This product includes software developed by the University of
23  *	California, Berkeley and its contributors.
24  * 4. Neither the name of the University nor the names of its contributors
25  *    may be used to endorse or promote products derived from this software
26  *    without specific prior written permission.
27  *
28  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
29  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
32  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
34  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38  * SUCH DAMAGE.
39  *
40  *	@(#)bpf.c	8.4 (Berkeley) 1/9/95
41  * static char rcsid[] =
42  * "Header: bpf.c,v 1.67 96/09/26 22:00:52 leres Exp ";
43  */
44 
45 #include "bpfilter.h"
46 
47 #include <sys/param.h>
48 #include <sys/systm.h>
49 #include <sys/mbuf.h>
50 #include <sys/buf.h>
51 #include <sys/time.h>
52 #include <sys/proc.h>
53 #include <sys/user.h>
54 #include <sys/ioctl.h>
55 #include <sys/map.h>
56 #include <sys/conf.h>
57 
58 #include <sys/file.h>
59 #if defined(sparc) && BSD < 199103
60 #include <sys/stream.h>
61 #endif
62 #include <sys/tty.h>
63 #include <sys/uio.h>
64 
65 #include <sys/protosw.h>
66 #include <sys/socket.h>
67 #include <sys/errno.h>
68 #include <sys/kernel.h>
69 #include <sys/poll.h>
70 
71 #include <net/if.h>
72 
73 #include <net/bpf.h>
74 #include <net/bpfdesc.h>
75 
76 #include <net/if_arc.h>
77 #include <net/if_ether.h>
78 
79 #include <netinet/in.h>
80 #include <netinet/if_inarp.h>
81 
82 /*
83  * Older BSDs don't have kernel malloc.
84  */
85 #if BSD < 199103
86 extern bcopy();
87 static caddr_t bpf_alloc();
88 #include <net/bpf_compat.h>
89 #define BPF_BUFSIZE (MCLBYTES-8)
90 #define UIOMOVE(cp, len, code, uio) uiomove(cp, len, code, uio)
91 #else
92 #define BPF_BUFSIZE 8192		/* 4096 too small for FDDI frames */
93 #define UIOMOVE(cp, len, code, uio) uiomove(cp, len, uio)
94 #endif
95 
96 #define PRINET  26			/* interruptible */
97 
98 /*
99  * The default read buffer size is patchable.
100  */
101 int bpf_bufsize = BPF_BUFSIZE;
102 
103 /*
104  *  bpf_iflist is the list of interfaces; each corresponds to an ifnet
105  *  bpf_dtab holds the descriptors, indexed by minor device #
106  */
107 struct bpf_if	*bpf_iflist;
108 struct bpf_d	bpf_dtab[NBPFILTER];
109 
110 static int	bpf_allocbufs __P((struct bpf_d *));
111 static int	bpf_allocbufs __P((struct bpf_d *));
112 static void	bpf_freed __P((struct bpf_d *));
113 static void	bpf_freed __P((struct bpf_d *));
114 static void	bpf_ifname __P((struct ifnet *, struct ifreq *));
115 static void	bpf_ifname __P((struct ifnet *, struct ifreq *));
116 static void	*bpf_mcpy __P((void *, const void *, size_t));
117 static int	bpf_movein __P((struct uio *, int, int,
118 			        struct mbuf **, struct sockaddr *));
119 static void	bpf_attachd __P((struct bpf_d *, struct bpf_if *));
120 static void	bpf_detachd __P((struct bpf_d *));
121 static int	bpf_setif __P((struct bpf_d *, struct ifreq *));
122 int		bpfpoll __P((dev_t, int, struct proc *));
123 static __inline void
124 		bpf_wakeup __P((struct bpf_d *));
125 static void	catchpacket __P((struct bpf_d *, u_char *, u_int, u_int,
126 				 void *(*)(void *, const void *, size_t)));
127 static void	reset_d __P((struct bpf_d *));
128 
129 static int
130 bpf_movein(uio, linktype, mtu, mp, sockp)
131 	register struct uio *uio;
132 	int linktype;
133 	int mtu;
134 	register struct mbuf **mp;
135 	register struct sockaddr *sockp;
136 {
137 	struct mbuf *m;
138 	int error;
139 	int len;
140 	int hlen;
141 	int align;
142 
143 	/*
144 	 * Build a sockaddr based on the data link layer type.
145 	 * We do this at this level because the ethernet header
146 	 * is copied directly into the data field of the sockaddr.
147 	 * In the case of SLIP, there is no header and the packet
148 	 * is forwarded as is.
149 	 * Also, we are careful to leave room at the front of the mbuf
150 	 * for the link level header.
151 	 */
152 	switch (linktype) {
153 
154 	case DLT_SLIP:
155 		sockp->sa_family = AF_INET;
156 		hlen = 0;
157 		align = 0;
158 		break;
159 
160 	case DLT_PPP:
161 		sockp->sa_family = AF_UNSPEC;
162 		hlen = 0;
163 		align = 0;
164 		break;
165 
166 	case DLT_EN10MB:
167 		sockp->sa_family = AF_UNSPEC;
168 		/* XXX Would MAXLINKHDR be better? */
169  		/* 6(dst)+6(src)+2(type) */
170 		hlen = sizeof(struct ether_header);
171 		align = 2;
172 		break;
173 
174 	case DLT_ARCNET:
175 		sockp->sa_family = AF_UNSPEC;
176 		hlen = ARC_HDRLEN;
177 		align = 5;
178 		break;
179 
180 	case DLT_FDDI:
181 		sockp->sa_family = AF_UNSPEC;
182 		/* XXX 4(FORMAC)+6(dst)+6(src)+3(LLC)+5(SNAP) */
183 		hlen = 24;
184 		align = 0;
185 		break;
186 
187 	case DLT_NULL:
188 		sockp->sa_family = AF_UNSPEC;
189 		hlen = 0;
190 		align = 0;
191 		break;
192 
193 	default:
194 		return (EIO);
195 	}
196 
197 	len = uio->uio_resid;
198 	/*
199 	 * If there aren't enough bytes for a link level header or the
200 	 * packet length exceeds the interface mtu, return an error.
201 	 */
202 	if (len < hlen || len - hlen > mtu)
203 		return (EMSGSIZE);
204 
205 	/*
206 	 * XXX Avoid complicated buffer chaining ---
207 	 * bail if it won't fit in a single mbuf.
208 	 * (Take into account possible alignment bytes)
209 	 */
210 	if ((unsigned)len > MCLBYTES - align)
211 		return (EIO);
212 
213 	MGETHDR(m, M_WAIT, MT_DATA);
214 	if (m == 0)
215 		return (ENOBUFS);
216 	m->m_pkthdr.rcvif = 0;
217 	m->m_pkthdr.len = len - hlen;
218 	if (len > MHLEN - align) {
219 #if BSD >= 199103
220 		MCLGET(m, M_WAIT);
221 		if ((m->m_flags & M_EXT) == 0) {
222 #else
223 		MCLGET(m);
224 		if (m->m_len != MCLBYTES) {
225 #endif
226 			error = ENOBUFS;
227 			goto bad;
228 		}
229 	}
230 
231 	/* Insure the data is properly aligned */
232 	if (align > 0) {
233 #if BSD >= 199103
234 		m->m_data += align;
235 #else
236 		m->m_off += align;
237 #endif
238 		m->m_len -= align;
239 	}
240 
241 	error = UIOMOVE(mtod(m, caddr_t), len, UIO_WRITE, uio);
242 	if (error)
243 		goto bad;
244 	if (hlen != 0) {
245 		memcpy(sockp->sa_data, mtod(m, caddr_t), hlen);
246 #if BSD >= 199103
247 		m->m_data += hlen; /* XXX */
248 #else
249 		m->m_off += hlen;
250 #endif
251 		len -= hlen;
252 	}
253 	m->m_len = len;
254 	*mp = m;
255 	return (0);
256 
257 bad:
258 	m_freem(m);
259 	return (error);
260 }
261 
262 /*
263  * Attach file to the bpf interface, i.e. make d listen on bp.
264  * Must be called at splimp.
265  */
266 static void
267 bpf_attachd(d, bp)
268 	struct bpf_d *d;
269 	struct bpf_if *bp;
270 {
271 	/*
272 	 * Point d at bp, and add d to the interface's list of listeners.
273 	 * Finally, point the driver's bpf cookie at the interface so
274 	 * it will divert packets to bpf.
275 	 */
276 	d->bd_bif = bp;
277 	d->bd_next = bp->bif_dlist;
278 	bp->bif_dlist = d;
279 
280 	*bp->bif_driverp = bp;
281 }
282 
283 /*
284  * Detach a file from its interface.
285  */
286 static void
287 bpf_detachd(d)
288 	struct bpf_d *d;
289 {
290 	struct bpf_d **p;
291 	struct bpf_if *bp;
292 
293 	bp = d->bd_bif;
294 	/*
295 	 * Check if this descriptor had requested promiscuous mode.
296 	 * If so, turn it off.
297 	 */
298 	if (d->bd_promisc) {
299 		int error;
300 
301 		d->bd_promisc = 0;
302 		/*
303 		 * Take device out of promiscuous mode.  Since we were
304 		 * able to enter promiscuous mode, we should be able
305 		 * to turn it off.  But we can get an error if
306 		 * the interface was configured down, so only panic
307 		 * if we don't get an unexpected error.
308 		 */
309   		error = ifpromisc(bp->bif_ifp, 0);
310 		if (error && error != EINVAL)
311 			panic("bpf: ifpromisc failed");
312 	}
313 	/* Remove d from the interface's descriptor list. */
314 	p = &bp->bif_dlist;
315 	while (*p != d) {
316 		p = &(*p)->bd_next;
317 		if (*p == 0)
318 			panic("bpf_detachd: descriptor not in list");
319 	}
320 	*p = (*p)->bd_next;
321 	if (bp->bif_dlist == 0)
322 		/*
323 		 * Let the driver know that there are no more listeners.
324 		 */
325 		*d->bd_bif->bif_driverp = 0;
326 	d->bd_bif = 0;
327 }
328 
329 
330 /*
331  * Mark a descriptor free by making it point to itself.
332  * This is probably cheaper than marking with a constant since
333  * the address should be in a register anyway.
334  */
335 #define D_ISFREE(d) ((d) == (d)->bd_next)
336 #define D_MARKFREE(d) ((d)->bd_next = (d))
337 #define D_MARKUSED(d) ((d)->bd_next = 0)
338 
339 /*
340  * bpfilterattach() is called at boot time.
341  */
342 /* ARGSUSED */
343 void
344 bpfilterattach(n)
345 	int n;
346 {
347 	int i;
348 	/*
349 	 * Mark all the descriptors free.
350 	 */
351 	for (i = 0; i < NBPFILTER; ++i)
352 		D_MARKFREE(&bpf_dtab[i]);
353 
354 }
355 
356 /*
357  * Open ethernet device.  Returns ENXIO for illegal minor device number,
358  * EBUSY if file is open by another process.
359  */
360 /* ARGSUSED */
361 int
362 bpfopen(dev, flag, mode, p)
363 	dev_t dev;
364 	int flag;
365 	int mode;
366 	struct proc *p;
367 {
368 	register struct bpf_d *d;
369 
370 	if (minor(dev) >= NBPFILTER)
371 		return (ENXIO);
372 	/*
373 	 * Each minor can be opened by only one process.  If the requested
374 	 * minor is in use, return EBUSY.
375 	 */
376 	d = &bpf_dtab[minor(dev)];
377 	if (!D_ISFREE(d))
378 		return (EBUSY);
379 
380 	/* Mark "free" and do most initialization. */
381 	memset((char *)d, 0, sizeof(*d));
382 	d->bd_bufsize = bpf_bufsize;
383 
384 	return (0);
385 }
386 
387 /*
388  * Close the descriptor by detaching it from its interface,
389  * deallocating its buffers, and marking it free.
390  */
391 /* ARGSUSED */
392 int
393 bpfclose(dev, flag, mode, p)
394 	dev_t dev;
395 	int flag;
396 	int mode;
397 	struct proc *p;
398 {
399 	register struct bpf_d *d = &bpf_dtab[minor(dev)];
400 	register int s;
401 
402 	s = splimp();
403 	if (d->bd_bif)
404 		bpf_detachd(d);
405 	splx(s);
406 	bpf_freed(d);
407 
408 	return (0);
409 }
410 
411 /*
412  * Support for SunOS, which does not have tsleep.
413  */
414 #if BSD < 199103
415 static
416 bpf_timeout(arg)
417 	caddr_t arg;
418 {
419 	struct bpf_d *d = (struct bpf_d *)arg;
420 	d->bd_timedout = 1;
421 	wakeup(arg);
422 }
423 
424 #define BPF_SLEEP(chan, pri, s, t) bpf_sleep((struct bpf_d *)chan)
425 
426 int
427 bpf_sleep(d)
428 	register struct bpf_d *d;
429 {
430 	register int rto = d->bd_rtout;
431 	register int st;
432 
433 	if (rto != 0) {
434 		d->bd_timedout = 0;
435 		timeout(bpf_timeout, (caddr_t)d, rto);
436 	}
437 	st = sleep((caddr_t)d, PRINET|PCATCH);
438 	if (rto != 0) {
439 		if (d->bd_timedout == 0)
440 			untimeout(bpf_timeout, (caddr_t)d);
441 		else if (st == 0)
442 			return EWOULDBLOCK;
443 	}
444 	return (st != 0) ? EINTR : 0;
445 }
446 #else
447 #define BPF_SLEEP tsleep
448 #endif
449 
450 /*
451  * Rotate the packet buffers in descriptor d.  Move the store buffer
452  * into the hold slot, and the free buffer into the store slot.
453  * Zero the length of the new store buffer.
454  */
455 #define ROTATE_BUFFERS(d) \
456 	(d)->bd_hbuf = (d)->bd_sbuf; \
457 	(d)->bd_hlen = (d)->bd_slen; \
458 	(d)->bd_sbuf = (d)->bd_fbuf; \
459 	(d)->bd_slen = 0; \
460 	(d)->bd_fbuf = 0;
461 /*
462  *  bpfread - read next chunk of packets from buffers
463  */
464 int
465 bpfread(dev, uio, ioflag)
466 	dev_t dev;
467 	register struct uio *uio;
468 	int ioflag;
469 {
470 	register struct bpf_d *d = &bpf_dtab[minor(dev)];
471 	int error;
472 	int s;
473 
474 	/*
475 	 * Restrict application to use a buffer the same size as
476 	 * as kernel buffers.
477 	 */
478 	if (uio->uio_resid != d->bd_bufsize)
479 		return (EINVAL);
480 
481 	s = splimp();
482 	/*
483 	 * If the hold buffer is empty, then do a timed sleep, which
484 	 * ends when the timeout expires or when enough packets
485 	 * have arrived to fill the store buffer.
486 	 */
487 	while (d->bd_hbuf == 0) {
488 		if (d->bd_immediate) {
489 			if (d->bd_slen == 0) {
490 				splx(s);
491 				return (EWOULDBLOCK);
492 			}
493 			/*
494 			 * A packet(s) either arrived since the previous
495 			 * read or arrived while we were asleep.
496 			 * Rotate the buffers and return what's here.
497 			 */
498 			ROTATE_BUFFERS(d);
499 			break;
500 		}
501 		if (d->bd_rtout != -1)
502 			error = BPF_SLEEP((caddr_t)d, PRINET|PCATCH, "bpf",
503 					  d->bd_rtout);
504 		else
505 			error = EWOULDBLOCK; /* User requested non-blocking I/O */
506 		if (error == EINTR || error == ERESTART) {
507 			splx(s);
508 			return (error);
509 		}
510 		if (error == EWOULDBLOCK) {
511 			/*
512 			 * On a timeout, return what's in the buffer,
513 			 * which may be nothing.  If there is something
514 			 * in the store buffer, we can rotate the buffers.
515 			 */
516 			if (d->bd_hbuf)
517 				/*
518 				 * We filled up the buffer in between
519 				 * getting the timeout and arriving
520 				 * here, so we don't need to rotate.
521 				 */
522 				break;
523 
524 			if (d->bd_slen == 0) {
525 				splx(s);
526 				return (0);
527 			}
528 			ROTATE_BUFFERS(d);
529 			break;
530 		}
531 		if (error != 0)
532 			goto done;
533 	}
534 	/*
535 	 * At this point, we know we have something in the hold slot.
536 	 */
537 	splx(s);
538 
539 	/*
540 	 * Move data from hold buffer into user space.
541 	 * We know the entire buffer is transferred since
542 	 * we checked above that the read buffer is bpf_bufsize bytes.
543 	 */
544 	error = UIOMOVE(d->bd_hbuf, d->bd_hlen, UIO_READ, uio);
545 
546 	s = splimp();
547 	d->bd_fbuf = d->bd_hbuf;
548 	d->bd_hbuf = 0;
549 	d->bd_hlen = 0;
550 done:
551 	splx(s);
552 	return (error);
553 }
554 
555 
556 /*
557  * If there are processes sleeping on this descriptor, wake them up.
558  */
559 static __inline void
560 bpf_wakeup(d)
561 	register struct bpf_d *d;
562 {
563 	struct proc *p;
564 
565 	wakeup((caddr_t)d);
566 	if (d->bd_async) {
567 		if (d->bd_pgid > 0)
568 			gsignal (d->bd_pgid, SIGIO);
569 		else if ((p = pfind (-d->bd_pgid)) != NULL)
570 			psignal (p, SIGIO);
571 	}
572 
573 #if BSD >= 199103
574 	selwakeup(&d->bd_sel);
575 	/* XXX */
576 	d->bd_sel.si_pid = 0;
577 #else
578 	if (d->bd_selproc) {
579 		selwakeup(d->bd_selproc, (int)d->bd_selcoll);
580 		d->bd_selcoll = 0;
581 		d->bd_selproc = 0;
582 	}
583 #endif
584 }
585 
586 int
587 bpfwrite(dev, uio, ioflag)
588 	dev_t dev;
589 	struct uio *uio;
590 	int ioflag;
591 {
592 	register struct bpf_d *d = &bpf_dtab[minor(dev)];
593 	struct ifnet *ifp;
594 	struct mbuf *m;
595 	int error, s;
596 	static struct sockaddr dst;
597 
598 	if (d->bd_bif == 0)
599 		return (ENXIO);
600 
601 	ifp = d->bd_bif->bif_ifp;
602 
603 	if (uio->uio_resid == 0)
604 		return (0);
605 
606 	error = bpf_movein(uio, (int)d->bd_bif->bif_dlt, ifp->if_mtu, &m, &dst);
607 	if (error)
608 		return (error);
609 
610 	if (m->m_pkthdr.len > ifp->if_mtu)
611 		return (EMSGSIZE);
612 
613 	if (d->bd_hdrcmplt)
614 		dst.sa_family = pseudo_AF_HDRCMPLT;
615 
616 	s = splsoftnet();
617 #if BSD >= 199103
618 	error = (*ifp->if_output)(ifp, m, &dst, (struct rtentry *)0);
619 #else
620 	error = (*ifp->if_output)(ifp, m, &dst);
621 #endif
622 	splx(s);
623 	/*
624 	 * The driver frees the mbuf.
625 	 */
626 	return (error);
627 }
628 
629 /*
630  * Reset a descriptor by flushing its packet buffer and clearing the
631  * receive and drop counts.  Should be called at splimp.
632  */
633 static void
634 reset_d(d)
635 	struct bpf_d *d;
636 {
637 	if (d->bd_hbuf) {
638 		/* Free the hold buffer. */
639 		d->bd_fbuf = d->bd_hbuf;
640 		d->bd_hbuf = 0;
641 	}
642 	d->bd_slen = 0;
643 	d->bd_hlen = 0;
644 	d->bd_rcount = 0;
645 	d->bd_dcount = 0;
646 }
647 
648 #ifdef BPF_KERN_FILTER
649 extern struct bpf_insn *bpf_tcp_filter;
650 extern struct bpf_insn *bpf_udp_filter;
651 #endif
652 
653 /*
654  *  FIONREAD		Check for read packet available.
655  *  BIOCGBLEN		Get buffer len [for read()].
656  *  BIOCSETF		Set ethernet read filter.
657  *  BIOCFLUSH		Flush read packet buffer.
658  *  BIOCPROMISC		Put interface into promiscuous mode.
659  *  BIOCGDLT		Get link layer type.
660  *  BIOCGETIF		Get interface name.
661  *  BIOCSETIF		Set interface.
662  *  BIOCSRTIMEOUT	Set read timeout.
663  *  BIOCGRTIMEOUT	Get read timeout.
664  *  BIOCGSTATS		Get packet stats.
665  *  BIOCIMMEDIATE	Set immediate mode.
666  *  BIOCVERSION		Get filter language version.
667  *  BIOGHDRCMPLT	Get "header already complete" flag.
668  *  BIOSHDRCMPLT	Set "header already complete" flag.
669  */
670 /* ARGSUSED */
671 int
672 bpfioctl(dev, cmd, addr, flag, p)
673 	dev_t dev;
674 	u_long cmd;
675 	caddr_t addr;
676 	int flag;
677 	struct proc *p;
678 {
679 	register struct bpf_d *d = &bpf_dtab[minor(dev)];
680 	int s, error = 0;
681 #ifdef BPF_KERN_FILTER
682 	register struct bpf_insn **p;
683 #endif
684 
685 	switch (cmd) {
686 
687 	default:
688 		error = EINVAL;
689 		break;
690 
691 	/*
692 	 * Check for read packet available.
693 	 */
694 	case FIONREAD:
695 		{
696 			int n;
697 
698 			s = splimp();
699 			n = d->bd_slen;
700 			if (d->bd_hbuf)
701 				n += d->bd_hlen;
702 			splx(s);
703 
704 			*(int *)addr = n;
705 			break;
706 		}
707 
708 	/*
709 	 * Get buffer len [for read()].
710 	 */
711 	case BIOCGBLEN:
712 		*(u_int *)addr = d->bd_bufsize;
713 		break;
714 
715 	/*
716 	 * Set buffer length.
717 	 */
718 	case BIOCSBLEN:
719 #if BSD < 199103
720 		error = EINVAL;
721 #else
722 		if (d->bd_bif != 0)
723 			error = EINVAL;
724 		else {
725 			register u_int size = *(u_int *)addr;
726 
727 			if (size > BPF_MAXBUFSIZE)
728 				*(u_int *)addr = size = BPF_MAXBUFSIZE;
729 			else if (size < BPF_MINBUFSIZE)
730 				*(u_int *)addr = size = BPF_MINBUFSIZE;
731 			d->bd_bufsize = size;
732 		}
733 #endif
734 		break;
735 
736 	/*
737 	 * Set link layer read filter.
738 	 */
739 	case BIOCSETF:
740 		error = bpf_setf(d, (struct bpf_program *)addr);
741 		break;
742 
743 #ifdef BPF_KERN_FILTER
744 	/*
745 	 * Set TCP or UDP reject filter.
746 	 */
747 	case BIOCSTCPF:
748 	case BIOCSUDPF:
749 		if (!suser()) {
750 			error = EPERM;
751 			break;
752 		}
753 
754 		/* Validate and store filter */
755 		error = bpf_setf(d, (struct bpf_program *)addr);
756 
757 		/* Free possible old filter */
758 		if (cmd == BIOCSTCPF)
759 			p = &bpf_tcp_filter;
760 		else
761 			p = &bpf_udp_filter;
762 		if (*p != NULL)
763 			free((caddr_t)*p, M_DEVBUF);
764 
765 		/* Steal new filter (noop if error) */
766 		s = splimp();
767 		*p = d->bd_filter;
768 		d->bd_filter = NULL;
769 		splx(s);
770 		break;
771 #endif
772 
773 	/*
774 	 * Flush read packet buffer.
775 	 */
776 	case BIOCFLUSH:
777 		s = splimp();
778 		reset_d(d);
779 		splx(s);
780 		break;
781 
782 	/*
783 	 * Put interface into promiscuous mode.
784 	 */
785 	case BIOCPROMISC:
786 		if (d->bd_bif == 0) {
787 			/*
788 			 * No interface attached yet.
789 			 */
790 			error = EINVAL;
791 			break;
792 		}
793 		s = splimp();
794 		if (d->bd_promisc == 0) {
795 			error = ifpromisc(d->bd_bif->bif_ifp, 1);
796 			if (error == 0)
797 				d->bd_promisc = 1;
798 		}
799 		splx(s);
800 		break;
801 
802 	/*
803 	 * Get device parameters.
804 	 */
805 	case BIOCGDLT:
806 		if (d->bd_bif == 0)
807 			error = EINVAL;
808 		else
809 			*(u_int *)addr = d->bd_bif->bif_dlt;
810 		break;
811 
812 	/*
813 	 * Set interface name.
814 	 */
815 	case BIOCGETIF:
816 		if (d->bd_bif == 0)
817 			error = EINVAL;
818 		else
819 			bpf_ifname(d->bd_bif->bif_ifp, (struct ifreq *)addr);
820 		break;
821 
822 	/*
823 	 * Set interface.
824 	 */
825 	case BIOCSETIF:
826 		error = bpf_setif(d, (struct ifreq *)addr);
827 		break;
828 
829 	/*
830 	 * Set read timeout.
831 	 */
832 	case BIOCSRTIMEOUT:
833 		{
834 			struct timeval *tv = (struct timeval *)addr;
835 
836 			/* Compute number of ticks. */
837 			d->bd_rtout = tv->tv_sec * hz + tv->tv_usec / tick;
838 			if ((d->bd_rtout == 0) && (tv->tv_usec != 0))
839 				d->bd_rtout = 1;
840 			break;
841 		}
842 
843 	/*
844 	 * Get read timeout.
845 	 */
846 	case BIOCGRTIMEOUT:
847 		{
848 			struct timeval *tv = (struct timeval *)addr;
849 
850 			tv->tv_sec = d->bd_rtout / hz;
851 			tv->tv_usec = (d->bd_rtout % hz) * tick;
852 			break;
853 		}
854 
855 	/*
856 	 * Get packet stats.
857 	 */
858 	case BIOCGSTATS:
859 		{
860 			struct bpf_stat *bs = (struct bpf_stat *)addr;
861 
862 			bs->bs_recv = d->bd_rcount;
863 			bs->bs_drop = d->bd_dcount;
864 			break;
865 		}
866 
867 	/*
868 	 * Set immediate mode.
869 	 */
870 	case BIOCIMMEDIATE:
871 		d->bd_immediate = *(u_int *)addr;
872 		break;
873 
874 	case BIOCVERSION:
875 		{
876 			struct bpf_version *bv = (struct bpf_version *)addr;
877 
878 			bv->bv_major = BPF_MAJOR_VERSION;
879 			bv->bv_minor = BPF_MINOR_VERSION;
880 			break;
881 		}
882 
883 	case BIOCGHDRCMPLT:	/* get "header already complete" flag */
884 		*(u_int *)addr = d->bd_hdrcmplt;
885 		break;
886 
887 	case BIOCSHDRCMPLT:	/* set "header already complete" flag */
888 		d->bd_hdrcmplt = *(u_int *)addr ? 1 : 0;
889 		break;
890 
891 	case FIONBIO:		/* Non-blocking I/O */
892 		if (*(int *)addr)
893 			d->bd_rtout = -1;
894 		else
895 			d->bd_rtout = 0;
896 		break;
897 
898 	case FIOASYNC:		/* Send signal on receive packets */
899 		d->bd_async = *(int *)addr;
900 		break;
901 
902 	/*
903 	 * N.B.  ioctl (FIOSETOWN) and fcntl (F_SETOWN) both end up doing
904 	 * the equivalent of a TIOCSPGRP and hence end up here.  *However*
905 	 * TIOCSPGRP's arg is a process group if it's positive and a process
906 	 * id if it's negative.  This is exactly the opposite of what the
907 	 * other two functions want!  Therefore there is code in ioctl and
908 	 * fcntl to negate the arg before calling here.
909 	 */
910 	case TIOCSPGRP:		/* Process or group to send signals to */
911 		d->bd_pgid = *(int *)addr;
912 		break;
913 
914 	case TIOCGPGRP:
915 		*(int *)addr = d->bd_pgid;
916 		break;
917 	}
918 	return (error);
919 }
920 
921 /*
922  * Set d's packet filter program to fp.  If this file already has a filter,
923  * free it and replace it.  Returns EINVAL for bogus requests.
924  */
925 int
926 bpf_setf(d, fp)
927 	struct bpf_d *d;
928 	struct bpf_program *fp;
929 {
930 	struct bpf_insn *fcode, *old;
931 	u_int flen, size;
932 	int s;
933 
934 	old = d->bd_filter;
935 	if (fp->bf_insns == 0) {
936 		if (fp->bf_len != 0)
937 			return (EINVAL);
938 		s = splimp();
939 		d->bd_filter = 0;
940 		reset_d(d);
941 		splx(s);
942 		if (old != 0)
943 			free((caddr_t)old, M_DEVBUF);
944 		return (0);
945 	}
946 	flen = fp->bf_len;
947 	if (flen > BPF_MAXINSNS)
948 		return (EINVAL);
949 
950 	size = flen * sizeof(*fp->bf_insns);
951 	fcode = (struct bpf_insn *)malloc(size, M_DEVBUF, M_WAITOK);
952 	if (fcode == 0)
953 		return (ENOMEM);
954 	if (copyin((caddr_t)fp->bf_insns, (caddr_t)fcode, size) == 0 &&
955 	    bpf_validate(fcode, (int)flen)) {
956 		s = splimp();
957 		d->bd_filter = fcode;
958 		reset_d(d);
959 		splx(s);
960 		if (old != 0)
961 			free((caddr_t)old, M_DEVBUF);
962 
963 		return (0);
964 	}
965 	free((caddr_t)fcode, M_DEVBUF);
966 	return (EINVAL);
967 }
968 
969 /*
970  * Detach a file from its current interface (if attached at all) and attach
971  * to the interface indicated by the name stored in ifr.
972  * Return an errno or 0.
973  */
974 static int
975 bpf_setif(d, ifr)
976 	struct bpf_d *d;
977 	struct ifreq *ifr;
978 {
979 	struct bpf_if *bp;
980 	char *cp;
981 	int unit_seen, i, s, error;
982 
983 	/*
984 	 * Make sure the provided name has a unit number, and default
985 	 * it to '0' if not specified.
986 	 * XXX This is ugly ... do this differently?
987 	 */
988 	unit_seen = 0;
989 	cp = ifr->ifr_name;
990 	cp[sizeof(ifr->ifr_name) - 1] = '\0';	/* sanity */
991 	while (*cp++)
992 		if (*cp >= '0' && *cp <= '9')
993 			unit_seen = 1;
994 	if (!unit_seen) {
995 		/* Make sure to leave room for the '\0'. */
996 		for (i = 0; i < (IFNAMSIZ - 1); ++i) {
997 			if ((ifr->ifr_name[i] >= 'a' &&
998 			     ifr->ifr_name[i] <= 'z') ||
999 			    (ifr->ifr_name[i] >= 'A' &&
1000 			     ifr->ifr_name[i] <= 'Z'))
1001 				continue;
1002 			ifr->ifr_name[i] = '0';
1003 		}
1004 	}
1005 
1006 	/*
1007 	 * Look through attached interfaces for the named one.
1008 	 */
1009 	for (bp = bpf_iflist; bp != 0; bp = bp->bif_next) {
1010 		struct ifnet *ifp = bp->bif_ifp;
1011 
1012 		if (ifp == 0 ||
1013 		    strcmp(ifp->if_xname, ifr->ifr_name) != 0)
1014 			continue;
1015 		/*
1016 		 * We found the requested interface.
1017 		 * If it's not up, return an error.
1018 		 * Allocate the packet buffers if we need to.
1019 		 * If we're already attached to requested interface,
1020 		 * just flush the buffer.
1021 		 */
1022 		if ((ifp->if_flags & IFF_UP) == 0)
1023 			return (ENETDOWN);
1024 
1025 		if (d->bd_sbuf == 0) {
1026 			error = bpf_allocbufs(d);
1027 			if (error != 0)
1028 				return (error);
1029 		}
1030 		s = splimp();
1031 		if (bp != d->bd_bif) {
1032 			if (d->bd_bif)
1033 				/*
1034 				 * Detach if attached to something else.
1035 				 */
1036 				bpf_detachd(d);
1037 
1038 			bpf_attachd(d, bp);
1039 		}
1040 		reset_d(d);
1041 		splx(s);
1042 		return (0);
1043 	}
1044 	/* Not found. */
1045 	return (ENXIO);
1046 }
1047 
1048 /*
1049  * Copy the interface name to the ifreq.
1050  */
1051 static void
1052 bpf_ifname(ifp, ifr)
1053 	struct ifnet *ifp;
1054 	struct ifreq *ifr;
1055 {
1056 
1057 	memcpy(ifr->ifr_name, ifp->if_xname, IFNAMSIZ);
1058 }
1059 
1060 /*
1061  * Support for poll() system call
1062  *
1063  * Return true iff the specific operation will not block indefinitely.
1064  * Otherwise, return false but make a note that a selwakeup() must be done.
1065  */
1066 int
1067 bpfpoll(dev, events, p)
1068 	register dev_t dev;
1069 	int events;
1070 	struct proc *p;
1071 {
1072 	register struct bpf_d *d = &bpf_dtab[minor(dev)];
1073 	int revents = 0;
1074 	register int s = splimp();
1075 
1076 	/*
1077 	 * An imitation of the FIONREAD ioctl code.
1078 	 */
1079 	if (events & (POLLIN | POLLRDNORM)) {
1080 		if (d->bd_hlen != 0 || (d->bd_immediate && d->bd_slen != 0))
1081 			revents |= events & (POLLIN | POLLRDNORM);
1082 		else
1083 			selrecord(p, &d->bd_sel);
1084 	}
1085 
1086 	splx(s);
1087 	return (revents);
1088 }
1089 
1090 /*
1091  * Incoming linkage from device drivers.  Process the packet pkt, of length
1092  * pktlen, which is stored in a contiguous buffer.  The packet is parsed
1093  * by each process' filter, and if accepted, stashed into the corresponding
1094  * buffer.
1095  */
1096 void
1097 bpf_tap(arg, pkt, pktlen)
1098 	caddr_t arg;
1099 	register u_char *pkt;
1100 	register u_int pktlen;
1101 {
1102 	struct bpf_if *bp;
1103 	register struct bpf_d *d;
1104 	register u_int slen;
1105 	/*
1106 	 * Note that the ipl does not have to be raised at this point.
1107 	 * The only problem that could arise here is that if two different
1108 	 * interfaces shared any data.  This is not the case.
1109 	 */
1110 	bp = (struct bpf_if *)arg;
1111 	for (d = bp->bif_dlist; d != 0; d = d->bd_next) {
1112 		++d->bd_rcount;
1113 		slen = bpf_filter(d->bd_filter, pkt, pktlen, pktlen);
1114 		if (slen != 0)
1115 			catchpacket(d, pkt, pktlen, slen, memcpy);
1116 	}
1117 }
1118 
1119 /*
1120  * Copy data from an mbuf chain into a buffer.  This code is derived
1121  * from m_copydata in sys/uipc_mbuf.c.
1122  */
1123 static void *
1124 bpf_mcpy(dst_arg, src_arg, len)
1125 	void *dst_arg;
1126 	const void *src_arg;
1127 	register size_t len;
1128 {
1129 	register const struct mbuf *m;
1130 	register u_int count;
1131 	u_char *dst;
1132 
1133 	m = src_arg;
1134 	dst = dst_arg;
1135 	while (len > 0) {
1136 		if (m == 0)
1137 			panic("bpf_mcpy");
1138 		count = min(m->m_len, len);
1139 		memcpy((caddr_t)dst, mtod(m, caddr_t), count);
1140 		m = m->m_next;
1141 		dst += count;
1142 		len -= count;
1143 	}
1144 	return(dst_arg);
1145 }
1146 
1147 /*
1148  * Incoming linkage from device drivers, when packet is in an mbuf chain.
1149  */
1150 void
1151 bpf_mtap(arg, m)
1152 	caddr_t arg;
1153 	struct mbuf *m;
1154 {
1155 	struct bpf_if *bp = (struct bpf_if *)arg;
1156 	struct bpf_d *d;
1157 	u_int pktlen, slen;
1158 	struct mbuf *m0;
1159 
1160 	pktlen = 0;
1161 	for (m0 = m; m0 != 0; m0 = m0->m_next)
1162 		pktlen += m0->m_len;
1163 
1164 	for (d = bp->bif_dlist; d != 0; d = d->bd_next) {
1165 		++d->bd_rcount;
1166 		slen = bpf_filter(d->bd_filter, (u_char *)m, pktlen, 0);
1167 		if (slen != 0)
1168 			catchpacket(d, (u_char *)m, pktlen, slen, bpf_mcpy);
1169 	}
1170 }
1171 
1172 /*
1173  * Move the packet data from interface memory (pkt) into the
1174  * store buffer.  Return 1 if it's time to wakeup a listener (buffer full),
1175  * otherwise 0.  "copy" is the routine called to do the actual data
1176  * transfer.  memcpy is passed in to copy contiguous chunks, while
1177  * bpf_mcpy is passed in to copy mbuf chains.  In the latter case,
1178  * pkt is really an mbuf.
1179  */
1180 static void
1181 catchpacket(d, pkt, pktlen, snaplen, cpfn)
1182 	register struct bpf_d *d;
1183 	register u_char *pkt;
1184 	register u_int pktlen, snaplen;
1185 	register void *(*cpfn) __P((void *, const void *, size_t));
1186 {
1187 	register struct bpf_hdr *hp;
1188 	register int totlen, curlen;
1189 	register int hdrlen = d->bd_bif->bif_hdrlen;
1190 	/*
1191 	 * Figure out how many bytes to move.  If the packet is
1192 	 * greater or equal to the snapshot length, transfer that
1193 	 * much.  Otherwise, transfer the whole packet (unless
1194 	 * we hit the buffer size limit).
1195 	 */
1196 	totlen = hdrlen + min(snaplen, pktlen);
1197 	if (totlen > d->bd_bufsize)
1198 		totlen = d->bd_bufsize;
1199 
1200 	/*
1201 	 * Round up the end of the previous packet to the next longword.
1202 	 */
1203 	curlen = BPF_WORDALIGN(d->bd_slen);
1204 	if (curlen + totlen > d->bd_bufsize) {
1205 		/*
1206 		 * This packet will overflow the storage buffer.
1207 		 * Rotate the buffers if we can, then wakeup any
1208 		 * pending reads.
1209 		 */
1210 		if (d->bd_fbuf == 0) {
1211 			/*
1212 			 * We haven't completed the previous read yet,
1213 			 * so drop the packet.
1214 			 */
1215 			++d->bd_dcount;
1216 			return;
1217 		}
1218 		ROTATE_BUFFERS(d);
1219 		bpf_wakeup(d);
1220 		curlen = 0;
1221 	}
1222 	else if (d->bd_immediate)
1223 		/*
1224 		 * Immediate mode is set.  A packet arrived so any
1225 		 * reads should be woken up.
1226 		 */
1227 		bpf_wakeup(d);
1228 
1229 	/*
1230 	 * Append the bpf header.
1231 	 */
1232 	hp = (struct bpf_hdr *)(d->bd_sbuf + curlen);
1233 #if BSD >= 199103
1234 	microtime(&hp->bh_tstamp);
1235 #elif defined(sun)
1236 	uniqtime(&hp->bh_tstamp);
1237 #else
1238 	hp->bh_tstamp = time;
1239 #endif
1240 	hp->bh_datalen = pktlen;
1241 	hp->bh_hdrlen = hdrlen;
1242 	/*
1243 	 * Copy the packet data into the store buffer and update its length.
1244 	 */
1245 	(*cpfn)((u_char *)hp + hdrlen, pkt, (hp->bh_caplen = totlen - hdrlen));
1246 	d->bd_slen = curlen + totlen;
1247 }
1248 
1249 /*
1250  * Initialize all nonzero fields of a descriptor.
1251  */
1252 static int
1253 bpf_allocbufs(d)
1254 	register struct bpf_d *d;
1255 {
1256 	d->bd_fbuf = (caddr_t)malloc(d->bd_bufsize, M_DEVBUF, M_WAITOK);
1257 	if (d->bd_fbuf == 0)
1258 		return (ENOBUFS);
1259 
1260 	d->bd_sbuf = (caddr_t)malloc(d->bd_bufsize, M_DEVBUF, M_WAITOK);
1261 	if (d->bd_sbuf == 0) {
1262 		free(d->bd_fbuf, M_DEVBUF);
1263 		return (ENOBUFS);
1264 	}
1265 	d->bd_slen = 0;
1266 	d->bd_hlen = 0;
1267 	return (0);
1268 }
1269 
1270 /*
1271  * Free buffers currently in use by a descriptor.
1272  * Called on close.
1273  */
1274 static void
1275 bpf_freed(d)
1276 	register struct bpf_d *d;
1277 {
1278 	/*
1279 	 * We don't need to lock out interrupts since this descriptor has
1280 	 * been detached from its interface and it yet hasn't been marked
1281 	 * free.
1282 	 */
1283 	if (d->bd_sbuf != 0) {
1284 		free(d->bd_sbuf, M_DEVBUF);
1285 		if (d->bd_hbuf != 0)
1286 			free(d->bd_hbuf, M_DEVBUF);
1287 		if (d->bd_fbuf != 0)
1288 			free(d->bd_fbuf, M_DEVBUF);
1289 	}
1290 	if (d->bd_filter)
1291 		free((caddr_t)d->bd_filter, M_DEVBUF);
1292 
1293 	D_MARKFREE(d);
1294 }
1295 
1296 /*
1297  * Attach an interface to bpf.  driverp is a pointer to a (struct bpf_if *)
1298  * in the driver's softc; dlt is the link layer type; hdrlen is the fixed
1299  * size of the link header (variable length headers not yet supported).
1300  */
1301 void
1302 bpfattach(driverp, ifp, dlt, hdrlen)
1303 	caddr_t *driverp;
1304 	struct ifnet *ifp;
1305 	u_int dlt, hdrlen;
1306 {
1307 	struct bpf_if *bp;
1308 #if BSD < 199103
1309 	static struct bpf_if bpf_ifs[NBPFILTER];
1310 	static int bpfifno;
1311 
1312 	bp = (bpfifno < NBPFILTER) ? &bpf_ifs[bpfifno++] : 0;
1313 #else
1314 	bp = (struct bpf_if *)malloc(sizeof(*bp), M_DEVBUF, M_DONTWAIT);
1315 #endif
1316 	if (bp == 0)
1317 		panic("bpfattach");
1318 
1319 	bp->bif_dlist = 0;
1320 	bp->bif_driverp = (struct bpf_if **)driverp;
1321 	bp->bif_ifp = ifp;
1322 	bp->bif_dlt = dlt;
1323 
1324 	bp->bif_next = bpf_iflist;
1325 	bpf_iflist = bp;
1326 
1327 	*bp->bif_driverp = 0;
1328 
1329 	/*
1330 	 * Compute the length of the bpf header.  This is not necessarily
1331 	 * equal to SIZEOF_BPF_HDR because we want to insert spacing such
1332 	 * that the network layer header begins on a longword boundary (for
1333 	 * performance reasons and to alleviate alignment restrictions).
1334 	 */
1335 	bp->bif_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen;
1336 
1337 #if 0
1338 	printf("bpf: %s attached\n", ifp->if_xname);
1339 #endif
1340 }
1341 
1342 #if BSD >= 199103
1343 /* XXX This routine belongs in net/if.c. */
1344 /*
1345  * Set/clear promiscuous mode on interface ifp based on the truth value
1346  * of pswitch.  The calls are reference counted so that only the first
1347  * "on" request actually has an effect, as does the final "off" request.
1348  * Results are undefined if the "off" and "on" requests are not matched.
1349  */
1350 int
1351 ifpromisc(ifp, pswitch)
1352 	register struct ifnet *ifp;
1353 	register int pswitch;
1354 {
1355 	register int pcount, ret;
1356 	register short flags;
1357 	struct ifreq ifr;
1358 
1359 	pcount = ifp->if_pcount;
1360 	flags = ifp->if_flags;
1361 	if (pswitch) {
1362 		/*
1363 		 * If the device is not configured up, we cannot put it in
1364 		 * promiscuous mode.
1365 		 */
1366 		if ((ifp->if_flags & IFF_UP) == 0)
1367 			return (ENETDOWN);
1368 		if (ifp->if_pcount++ != 0)
1369 			return (0);
1370 		ifp->if_flags |= IFF_PROMISC;
1371 	} else {
1372 		if (--ifp->if_pcount > 0)
1373 			return (0);
1374 		ifp->if_flags &= ~IFF_PROMISC;
1375 		/*
1376 		 * If the device is not configured up, we should not need to
1377 		 * turn off promiscuous mode (device should have turned it
1378 		 * off when interface went down; and will look at IFF_PROMISC
1379 		 * again next time interface comes up).
1380 		 */
1381 		if ((ifp->if_flags & IFF_UP) == 0)
1382 			return (0);
1383 	}
1384 	memset((caddr_t)&ifr, 0, sizeof(ifr));
1385 	ifr.ifr_flags = ifp->if_flags;
1386 	ret = (*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
1387 	/* Restore interface state if not successful */
1388 	if (ret != 0) {
1389 		ifp->if_pcount = pcount;
1390 		ifp->if_flags = flags;
1391 	}
1392 	return (ret);
1393 }
1394 #endif
1395 
1396 #if BSD < 199103
1397 /*
1398  * Allocate some memory for bpf.  This is temporary SunOS support, and
1399  * is admittedly a hack.
1400  * If resources unavailable, return 0.
1401  */
1402 static caddr_t
1403 bpf_alloc(size, canwait)
1404 	register int size;
1405 	register int canwait;
1406 {
1407 	register struct mbuf *m;
1408 
1409 	if ((unsigned)size > (MCLBYTES-8))
1410 		return 0;
1411 
1412 	MGET(m, canwait, MT_DATA);
1413 	if (m == 0)
1414 		return 0;
1415 	if ((unsigned)size > (MLEN-8)) {
1416 		MCLGET(m);
1417 		if (m->m_len != MCLBYTES) {
1418 			m_freem(m);
1419 			return 0;
1420 		}
1421 	}
1422 	*mtod(m, struct mbuf **) = m;
1423 	return mtod(m, caddr_t) + 8;
1424 }
1425 #endif
1426