xref: /netbsd-src/sys/net/bpf.c (revision c41a4eebefede43f6950f838a387dc18c6a431bf)
1 /*	$NetBSD: bpf.c,v 1.38 1997/10/12 16:35:10 mycroft Exp $	*/
2 
3 /*
4  * Copyright (c) 1990, 1991, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from the Stanford/CMU enet packet filter,
8  * (net/enet.c) distributed as part of 4.3BSD, and code contributed
9  * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
10  * Berkeley Laboratory.
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  * 3. All advertising materials mentioning features or use of this software
21  *    must display the following acknowledgement:
22  *	This product includes software developed by the University of
23  *	California, Berkeley and its contributors.
24  * 4. Neither the name of the University nor the names of its contributors
25  *    may be used to endorse or promote products derived from this software
26  *    without specific prior written permission.
27  *
28  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
29  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
32  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
34  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38  * SUCH DAMAGE.
39  *
40  *	@(#)bpf.c	8.2 (Berkeley) 3/28/94
41  * static char rcsid[] =
42  * "Header: bpf.c,v 1.67 96/09/26 22:00:52 leres Exp ";
43  */
44 
45 #include "bpfilter.h"
46 
47 #include <sys/param.h>
48 #include <sys/systm.h>
49 #include <sys/mbuf.h>
50 #include <sys/buf.h>
51 #include <sys/time.h>
52 #include <sys/proc.h>
53 #include <sys/user.h>
54 #include <sys/ioctl.h>
55 #include <sys/map.h>
56 #include <sys/conf.h>
57 
58 #include <sys/file.h>
59 #if defined(sparc) && BSD < 199103
60 #include <sys/stream.h>
61 #endif
62 #include <sys/tty.h>
63 #include <sys/uio.h>
64 
65 #include <sys/protosw.h>
66 #include <sys/socket.h>
67 #include <sys/errno.h>
68 #include <sys/kernel.h>
69 #include <sys/poll.h>
70 
71 #include <net/if.h>
72 
73 #include <net/bpf.h>
74 #include <net/bpfdesc.h>
75 
76 #include <net/if_arc.h>
77 #include <net/if_ether.h>
78 
79 #include <netinet/in.h>
80 #include <netinet/if_inarp.h>
81 
82 /*
83  * Older BSDs don't have kernel malloc.
84  */
85 #if BSD < 199103
86 extern bcopy();
87 static caddr_t bpf_alloc();
88 #include <net/bpf_compat.h>
89 #define BPF_BUFSIZE (MCLBYTES-8)
90 #define UIOMOVE(cp, len, code, uio) uiomove(cp, len, code, uio)
91 #else
92 #define BPF_BUFSIZE 4096
93 #define UIOMOVE(cp, len, code, uio) uiomove(cp, len, uio)
94 #endif
95 
96 #define PRINET  26			/* interruptible */
97 
98 /*
99  * The default read buffer size is patchable.
100  */
101 int bpf_bufsize = BPF_BUFSIZE;
102 
103 /*
104  *  bpf_iflist is the list of interfaces; each corresponds to an ifnet
105  *  bpf_dtab holds the descriptors, indexed by minor device #
106  */
107 struct bpf_if	*bpf_iflist;
108 struct bpf_d	bpf_dtab[NBPFILTER];
109 
110 #if BSD >= 199207 || NetBSD0_9 >= 2
111 /*
112  * bpfilterattach() is called at boot time in new systems.  We do
113  * nothing here since old systems will not call this.
114  */
115 /* ARGSUSED */
116 void
117 bpfilterattach(n)
118 	int n;
119 {
120 }
121 #endif
122 
123 static int	bpf_allocbufs __P((struct bpf_d *));
124 static int	bpf_allocbufs __P((struct bpf_d *));
125 static void	bpf_freed __P((struct bpf_d *));
126 static void	bpf_freed __P((struct bpf_d *));
127 static void	bpf_ifname __P((struct ifnet *, struct ifreq *));
128 static void	bpf_ifname __P((struct ifnet *, struct ifreq *));
129 static void	bpf_mcopy __P((const void *, void *, size_t));
130 static int	bpf_movein __P((struct uio *, int, int,
131 			        struct mbuf **, struct sockaddr *));
132 static void	bpf_attachd __P((struct bpf_d *, struct bpf_if *));
133 static void	bpf_detachd __P((struct bpf_d *));
134 static int	bpf_setif __P((struct bpf_d *, struct ifreq *));
135 int		bpfpoll __P((dev_t, int, struct proc *));
136 static __inline void
137 		bpf_wakeup __P((struct bpf_d *));
138 static void	catchpacket __P((struct bpf_d *, u_char *, u_int, u_int,
139 				 void (*)(const void *, void *, size_t)));
140 static void	reset_d __P((struct bpf_d *));
141 
142 static int
143 bpf_movein(uio, linktype, mtu, mp, sockp)
144 	register struct uio *uio;
145 	int linktype;
146 	int mtu;
147 	register struct mbuf **mp;
148 	register struct sockaddr *sockp;
149 {
150 	struct mbuf *m;
151 	int error;
152 	int len;
153 	int hlen;
154 	int align;
155 
156 	/*
157 	 * Build a sockaddr based on the data link layer type.
158 	 * We do this at this level because the ethernet header
159 	 * is copied directly into the data field of the sockaddr.
160 	 * In the case of SLIP, there is no header and the packet
161 	 * is forwarded as is.
162 	 * Also, we are careful to leave room at the front of the mbuf
163 	 * for the link level header.
164 	 */
165 	switch (linktype) {
166 
167 	case DLT_SLIP:
168 		sockp->sa_family = AF_INET;
169 		hlen = 0;
170 		align = 0;
171 		break;
172 
173 	case DLT_PPP:
174 		sockp->sa_family = AF_UNSPEC;
175 		hlen = 0;
176 		align = 0;
177 		break;
178 
179 	case DLT_EN10MB:
180 		sockp->sa_family = AF_UNSPEC;
181 		/* XXX Would MAXLINKHDR be better? */
182  		/* 6(dst)+6(src)+2(type) */
183 		hlen = sizeof(struct ether_header);
184 		align = 2;
185 		break;
186 
187 	case DLT_ARCNET:
188 		sockp->sa_family = AF_UNSPEC;
189 		hlen = ARC_HDRLEN;
190 		align = 5;
191 		break;
192 
193 	case DLT_FDDI:
194 		sockp->sa_family = AF_UNSPEC;
195 		/* XXX 4(FORMAC)+6(dst)+6(src)+3(LLC)+5(SNAP) */
196 		hlen = 24;
197 		align = 0;
198 		break;
199 
200 	case DLT_NULL:
201 		sockp->sa_family = AF_UNSPEC;
202 		hlen = 0;
203 		align = 0;
204 		break;
205 
206 	default:
207 		return (EIO);
208 	}
209 
210 	len = uio->uio_resid;
211 	/*
212 	 * If there aren't enough bytes for a link level header or the
213 	 * packet length exceeds the interface mtu, return an error.
214 	 */
215 	if (len < hlen || len - hlen > mtu)
216 		return (EMSGSIZE);
217 
218 	/*
219 	 * XXX Avoid complicated buffer chaining ---
220 	 * bail if it won't fit in a single mbuf.
221 	 * (Take into account possible alignment bytes)
222 	 */
223 	if ((unsigned)len > MCLBYTES - align)
224 		return (EIO);
225 
226 	MGETHDR(m, M_WAIT, MT_DATA);
227 	if (m == 0)
228 		return (ENOBUFS);
229 	m->m_pkthdr.rcvif = 0;
230 	m->m_pkthdr.len = len - hlen;
231 	if (len > MHLEN - align) {
232 #if BSD >= 199103
233 		MCLGET(m, M_WAIT);
234 		if ((m->m_flags & M_EXT) == 0) {
235 #else
236 		MCLGET(m);
237 		if (m->m_len != MCLBYTES) {
238 #endif
239 			error = ENOBUFS;
240 			goto bad;
241 		}
242 	}
243 
244 	/* Insure the data is properly aligned */
245 	if (align > 0) {
246 #if BSD >= 199103
247 		m->m_data += align;
248 #else
249 		m->m_off += align;
250 #endif
251 		m->m_len -= align;
252 	}
253 
254 	error = UIOMOVE(mtod(m, caddr_t), len, UIO_WRITE, uio);
255 	if (error)
256 		goto bad;
257 	if (hlen != 0) {
258 		bcopy(mtod(m, caddr_t), sockp->sa_data, hlen);
259 #if BSD >= 199103
260 		m->m_data += hlen; /* XXX */
261 #else
262 		m->m_off += hlen;
263 #endif
264 		len -= hlen;
265 	}
266 	m->m_len = len;
267 	*mp = m;
268 	return (0);
269 
270 bad:
271 	m_freem(m);
272 	return (error);
273 }
274 
275 /*
276  * Attach file to the bpf interface, i.e. make d listen on bp.
277  * Must be called at splimp.
278  */
279 static void
280 bpf_attachd(d, bp)
281 	struct bpf_d *d;
282 	struct bpf_if *bp;
283 {
284 	/*
285 	 * Point d at bp, and add d to the interface's list of listeners.
286 	 * Finally, point the driver's bpf cookie at the interface so
287 	 * it will divert packets to bpf.
288 	 */
289 	d->bd_bif = bp;
290 	d->bd_next = bp->bif_dlist;
291 	bp->bif_dlist = d;
292 
293 	*bp->bif_driverp = bp;
294 }
295 
296 /*
297  * Detach a file from its interface.
298  */
299 static void
300 bpf_detachd(d)
301 	struct bpf_d *d;
302 {
303 	struct bpf_d **p;
304 	struct bpf_if *bp;
305 
306 	bp = d->bd_bif;
307 	/*
308 	 * Check if this descriptor had requested promiscuous mode.
309 	 * If so, turn it off.
310 	 */
311 	if (d->bd_promisc) {
312 		int error;
313 
314 		d->bd_promisc = 0;
315 		/*
316 		 * Take device out of promiscuous mode.  Since we were
317 		 * able to enter promiscuous mode, we should be able
318 		 * to turn it off.  But we can get an error if
319 		 * the interface was configured down, so only panic
320 		 * if we don't get an unexpected error.
321 		 */
322   		error = ifpromisc(bp->bif_ifp, 0);
323 		if (error && error != EINVAL)
324 			panic("bpf: ifpromisc failed");
325 	}
326 	/* Remove d from the interface's descriptor list. */
327 	p = &bp->bif_dlist;
328 	while (*p != d) {
329 		p = &(*p)->bd_next;
330 		if (*p == 0)
331 			panic("bpf_detachd: descriptor not in list");
332 	}
333 	*p = (*p)->bd_next;
334 	if (bp->bif_dlist == 0)
335 		/*
336 		 * Let the driver know that there are no more listeners.
337 		 */
338 		*d->bd_bif->bif_driverp = 0;
339 	d->bd_bif = 0;
340 }
341 
342 
343 /*
344  * Mark a descriptor free by making it point to itself.
345  * This is probably cheaper than marking with a constant since
346  * the address should be in a register anyway.
347  */
348 #define D_ISFREE(d) ((d) == (d)->bd_next)
349 #define D_MARKFREE(d) ((d)->bd_next = (d))
350 #define D_MARKUSED(d) ((d)->bd_next = 0)
351 
352 /*
353  * Open ethernet device.  Returns ENXIO for illegal minor device number,
354  * EBUSY if file is open by another process.
355  */
356 /* ARGSUSED */
357 int
358 bpfopen(dev, flag, mode, p)
359 	dev_t dev;
360 	int flag;
361 	int mode;
362 	struct proc *p;
363 {
364 	register struct bpf_d *d;
365 
366 	if (minor(dev) >= NBPFILTER)
367 		return (ENXIO);
368 	/*
369 	 * Each minor can be opened by only one process.  If the requested
370 	 * minor is in use, return EBUSY.
371 	 */
372 	d = &bpf_dtab[minor(dev)];
373 	if (!D_ISFREE(d))
374 		return (EBUSY);
375 
376 	/* Mark "free" and do most initialization. */
377 	bzero((char *)d, sizeof(*d));
378 	d->bd_bufsize = bpf_bufsize;
379 
380 	return (0);
381 }
382 
383 /*
384  * Close the descriptor by detaching it from its interface,
385  * deallocating its buffers, and marking it free.
386  */
387 /* ARGSUSED */
388 int
389 bpfclose(dev, flag, mode, p)
390 	dev_t dev;
391 	int flag;
392 	int mode;
393 	struct proc *p;
394 {
395 	register struct bpf_d *d = &bpf_dtab[minor(dev)];
396 	register int s;
397 
398 	s = splimp();
399 	if (d->bd_bif)
400 		bpf_detachd(d);
401 	splx(s);
402 	bpf_freed(d);
403 
404 	return (0);
405 }
406 
407 /*
408  * Support for SunOS, which does not have tsleep.
409  */
410 #if BSD < 199103
411 static
412 bpf_timeout(arg)
413 	caddr_t arg;
414 {
415 	struct bpf_d *d = (struct bpf_d *)arg;
416 	d->bd_timedout = 1;
417 	wakeup(arg);
418 }
419 
420 #define BPF_SLEEP(chan, pri, s, t) bpf_sleep((struct bpf_d *)chan)
421 
422 int
423 bpf_sleep(d)
424 	register struct bpf_d *d;
425 {
426 	register int rto = d->bd_rtout;
427 	register int st;
428 
429 	if (rto != 0) {
430 		d->bd_timedout = 0;
431 		timeout(bpf_timeout, (caddr_t)d, rto);
432 	}
433 	st = sleep((caddr_t)d, PRINET|PCATCH);
434 	if (rto != 0) {
435 		if (d->bd_timedout == 0)
436 			untimeout(bpf_timeout, (caddr_t)d);
437 		else if (st == 0)
438 			return EWOULDBLOCK;
439 	}
440 	return (st != 0) ? EINTR : 0;
441 }
442 #else
443 #define BPF_SLEEP tsleep
444 #endif
445 
446 /*
447  * Rotate the packet buffers in descriptor d.  Move the store buffer
448  * into the hold slot, and the free buffer into the store slot.
449  * Zero the length of the new store buffer.
450  */
451 #define ROTATE_BUFFERS(d) \
452 	(d)->bd_hbuf = (d)->bd_sbuf; \
453 	(d)->bd_hlen = (d)->bd_slen; \
454 	(d)->bd_sbuf = (d)->bd_fbuf; \
455 	(d)->bd_slen = 0; \
456 	(d)->bd_fbuf = 0;
457 /*
458  *  bpfread - read next chunk of packets from buffers
459  */
460 int
461 bpfread(dev, uio, ioflag)
462 	dev_t dev;
463 	register struct uio *uio;
464 	int ioflag;
465 {
466 	register struct bpf_d *d = &bpf_dtab[minor(dev)];
467 	int error;
468 	int s;
469 
470 	/*
471 	 * Restrict application to use a buffer the same size as
472 	 * as kernel buffers.
473 	 */
474 	if (uio->uio_resid != d->bd_bufsize)
475 		return (EINVAL);
476 
477 	s = splimp();
478 	/*
479 	 * If the hold buffer is empty, then do a timed sleep, which
480 	 * ends when the timeout expires or when enough packets
481 	 * have arrived to fill the store buffer.
482 	 */
483 	while (d->bd_hbuf == 0) {
484 		if (d->bd_immediate) {
485 			if (d->bd_slen == 0) {
486 				splx(s);
487 				return (EWOULDBLOCK);
488 			}
489 			/*
490 			 * A packet(s) either arrived since the previous
491 			 * read or arrived while we were asleep.
492 			 * Rotate the buffers and return what's here.
493 			 */
494 			ROTATE_BUFFERS(d);
495 			break;
496 		}
497 		if (d->bd_rtout != -1)
498 			error = BPF_SLEEP((caddr_t)d, PRINET|PCATCH, "bpf",
499 					  d->bd_rtout);
500 		else
501 			error = EWOULDBLOCK; /* User requested non-blocking I/O */
502 		if (error == EINTR || error == ERESTART) {
503 			splx(s);
504 			return (error);
505 		}
506 		if (error == EWOULDBLOCK) {
507 			/*
508 			 * On a timeout, return what's in the buffer,
509 			 * which may be nothing.  If there is something
510 			 * in the store buffer, we can rotate the buffers.
511 			 */
512 			if (d->bd_hbuf)
513 				/*
514 				 * We filled up the buffer in between
515 				 * getting the timeout and arriving
516 				 * here, so we don't need to rotate.
517 				 */
518 				break;
519 
520 			if (d->bd_slen == 0) {
521 				splx(s);
522 				return (0);
523 			}
524 			ROTATE_BUFFERS(d);
525 			break;
526 		}
527 		if (error != 0)
528 			goto done;
529 	}
530 	/*
531 	 * At this point, we know we have something in the hold slot.
532 	 */
533 	splx(s);
534 
535 	/*
536 	 * Move data from hold buffer into user space.
537 	 * We know the entire buffer is transferred since
538 	 * we checked above that the read buffer is bpf_bufsize bytes.
539 	 */
540 	error = UIOMOVE(d->bd_hbuf, d->bd_hlen, UIO_READ, uio);
541 
542 	s = splimp();
543 	d->bd_fbuf = d->bd_hbuf;
544 	d->bd_hbuf = 0;
545 	d->bd_hlen = 0;
546 done:
547 	splx(s);
548 	return (error);
549 }
550 
551 
552 /*
553  * If there are processes sleeping on this descriptor, wake them up.
554  */
555 static __inline void
556 bpf_wakeup(d)
557 	register struct bpf_d *d;
558 {
559 	struct proc *p;
560 
561 	wakeup((caddr_t)d);
562 	if (d->bd_async)
563 		if (d->bd_pgid > 0)
564 			gsignal (d->bd_pgid, SIGIO);
565 		else if ((p = pfind (-d->bd_pgid)) != NULL)
566 			psignal (p, SIGIO);
567 
568 #if BSD >= 199103
569 	selwakeup(&d->bd_sel);
570 	/* XXX */
571 	d->bd_sel.si_pid = 0;
572 #else
573 	if (d->bd_selproc) {
574 		selwakeup(d->bd_selproc, (int)d->bd_selcoll);
575 		d->bd_selcoll = 0;
576 		d->bd_selproc = 0;
577 	}
578 #endif
579 }
580 
581 int
582 bpfwrite(dev, uio, ioflag)
583 	dev_t dev;
584 	struct uio *uio;
585 	int ioflag;
586 {
587 	register struct bpf_d *d = &bpf_dtab[minor(dev)];
588 	struct ifnet *ifp;
589 	struct mbuf *m;
590 	int error, s;
591 	static struct sockaddr dst;
592 
593 	if (d->bd_bif == 0)
594 		return (ENXIO);
595 
596 	ifp = d->bd_bif->bif_ifp;
597 
598 	if (uio->uio_resid == 0)
599 		return (0);
600 
601 	error = bpf_movein(uio, (int)d->bd_bif->bif_dlt, ifp->if_mtu, &m, &dst);
602 	if (error)
603 		return (error);
604 
605 	if (m->m_pkthdr.len > ifp->if_mtu)
606 		return (EMSGSIZE);
607 
608 	s = splsoftnet();
609 #if BSD >= 199103
610 	error = (*ifp->if_output)(ifp, m, &dst, (struct rtentry *)0);
611 #else
612 	error = (*ifp->if_output)(ifp, m, &dst);
613 #endif
614 	splx(s);
615 	/*
616 	 * The driver frees the mbuf.
617 	 */
618 	return (error);
619 }
620 
621 /*
622  * Reset a descriptor by flushing its packet buffer and clearing the
623  * receive and drop counts.  Should be called at splimp.
624  */
625 static void
626 reset_d(d)
627 	struct bpf_d *d;
628 {
629 	if (d->bd_hbuf) {
630 		/* Free the hold buffer. */
631 		d->bd_fbuf = d->bd_hbuf;
632 		d->bd_hbuf = 0;
633 	}
634 	d->bd_slen = 0;
635 	d->bd_hlen = 0;
636 	d->bd_rcount = 0;
637 	d->bd_dcount = 0;
638 }
639 
640 #ifdef BPF_KERN_FILTER
641 extern struct bpf_insn *bpf_tcp_filter;
642 extern struct bpf_insn *bpf_udp_filter;
643 #endif
644 
645 /*
646  *  FIONREAD		Check for read packet available.
647  *  BIOCGBLEN		Get buffer len [for read()].
648  *  BIOCSETF		Set ethernet read filter.
649  *  BIOCFLUSH		Flush read packet buffer.
650  *  BIOCPROMISC		Put interface into promiscuous mode.
651  *  BIOCGDLT		Get link layer type.
652  *  BIOCGETIF		Get interface name.
653  *  BIOCSETIF		Set interface.
654  *  BIOCSRTIMEOUT	Set read timeout.
655  *  BIOCGRTIMEOUT	Get read timeout.
656  *  BIOCGSTATS		Get packet stats.
657  *  BIOCIMMEDIATE	Set immediate mode.
658  *  BIOCVERSION		Get filter language version.
659  */
660 /* ARGSUSED */
661 int
662 bpfioctl(dev, cmd, addr, flag, p)
663 	dev_t dev;
664 	u_long cmd;
665 	caddr_t addr;
666 	int flag;
667 	struct proc *p;
668 {
669 	register struct bpf_d *d = &bpf_dtab[minor(dev)];
670 	int s, error = 0;
671 #ifdef BPF_KERN_FILTER
672 	register struct bpf_insn **p;
673 #endif
674 
675 	switch (cmd) {
676 
677 	default:
678 		error = EINVAL;
679 		break;
680 
681 	/*
682 	 * Check for read packet available.
683 	 */
684 	case FIONREAD:
685 		{
686 			int n;
687 
688 			s = splimp();
689 			n = d->bd_slen;
690 			if (d->bd_hbuf)
691 				n += d->bd_hlen;
692 			splx(s);
693 
694 			*(int *)addr = n;
695 			break;
696 		}
697 
698 	/*
699 	 * Get buffer len [for read()].
700 	 */
701 	case BIOCGBLEN:
702 		*(u_int *)addr = d->bd_bufsize;
703 		break;
704 
705 	/*
706 	 * Set buffer length.
707 	 */
708 	case BIOCSBLEN:
709 #if BSD < 199103
710 		error = EINVAL;
711 #else
712 		if (d->bd_bif != 0)
713 			error = EINVAL;
714 		else {
715 			register u_int size = *(u_int *)addr;
716 
717 			if (size > BPF_MAXBUFSIZE)
718 				*(u_int *)addr = size = BPF_MAXBUFSIZE;
719 			else if (size < BPF_MINBUFSIZE)
720 				*(u_int *)addr = size = BPF_MINBUFSIZE;
721 			d->bd_bufsize = size;
722 		}
723 #endif
724 		break;
725 
726 	/*
727 	 * Set link layer read filter.
728 	 */
729 	case BIOCSETF:
730 		error = bpf_setf(d, (struct bpf_program *)addr);
731 		break;
732 
733 #ifdef BPF_KERN_FILTER
734 	/*
735 	 * Set TCP or UDP reject filter.
736 	 */
737 	case BIOCSTCPF:
738 	case BIOCSUDPF:
739 		if (!suser()) {
740 			error = EPERM;
741 			break;
742 		}
743 
744 		/* Validate and store filter */
745 		error = bpf_setf(d, (struct bpf_program *)addr);
746 
747 		/* Free possible old filter */
748 		if (cmd == BIOCSTCPF)
749 			p = &bpf_tcp_filter;
750 		else
751 			p = &bpf_udp_filter;
752 		if (*p != NULL)
753 			free((caddr_t)*p, M_DEVBUF);
754 
755 		/* Steal new filter (noop if error) */
756 		s = splimp();
757 		*p = d->bd_filter;
758 		d->bd_filter = NULL;
759 		splx(s);
760 		break;
761 #endif
762 
763 	/*
764 	 * Flush read packet buffer.
765 	 */
766 	case BIOCFLUSH:
767 		s = splimp();
768 		reset_d(d);
769 		splx(s);
770 		break;
771 
772 	/*
773 	 * Put interface into promiscuous mode.
774 	 */
775 	case BIOCPROMISC:
776 		if (d->bd_bif == 0) {
777 			/*
778 			 * No interface attached yet.
779 			 */
780 			error = EINVAL;
781 			break;
782 		}
783 		s = splimp();
784 		if (d->bd_promisc == 0) {
785 			error = ifpromisc(d->bd_bif->bif_ifp, 1);
786 			if (error == 0)
787 				d->bd_promisc = 1;
788 		}
789 		splx(s);
790 		break;
791 
792 	/*
793 	 * Get device parameters.
794 	 */
795 	case BIOCGDLT:
796 		if (d->bd_bif == 0)
797 			error = EINVAL;
798 		else
799 			*(u_int *)addr = d->bd_bif->bif_dlt;
800 		break;
801 
802 	/*
803 	 * Set interface name.
804 	 */
805 	case BIOCGETIF:
806 		if (d->bd_bif == 0)
807 			error = EINVAL;
808 		else
809 			bpf_ifname(d->bd_bif->bif_ifp, (struct ifreq *)addr);
810 		break;
811 
812 	/*
813 	 * Set interface.
814 	 */
815 	case BIOCSETIF:
816 		error = bpf_setif(d, (struct ifreq *)addr);
817 		break;
818 
819 	/*
820 	 * Set read timeout.
821 	 */
822 	case BIOCSRTIMEOUT:
823 		{
824 			struct timeval *tv = (struct timeval *)addr;
825 
826 			/* Compute number of ticks. */
827 			d->bd_rtout = tv->tv_sec * hz + tv->tv_usec / tick;
828 			if ((d->bd_rtout == 0) && (tv->tv_usec != 0))
829 				d->bd_rtout = 1;
830 			break;
831 		}
832 
833 	/*
834 	 * Get read timeout.
835 	 */
836 	case BIOCGRTIMEOUT:
837 		{
838 			struct timeval *tv = (struct timeval *)addr;
839 
840 			tv->tv_sec = d->bd_rtout / hz;
841 			tv->tv_usec = (d->bd_rtout % hz) * tick;
842 			break;
843 		}
844 
845 	/*
846 	 * Get packet stats.
847 	 */
848 	case BIOCGSTATS:
849 		{
850 			struct bpf_stat *bs = (struct bpf_stat *)addr;
851 
852 			bs->bs_recv = d->bd_rcount;
853 			bs->bs_drop = d->bd_dcount;
854 			break;
855 		}
856 
857 	/*
858 	 * Set immediate mode.
859 	 */
860 	case BIOCIMMEDIATE:
861 		d->bd_immediate = *(u_int *)addr;
862 		break;
863 
864 	case BIOCVERSION:
865 		{
866 			struct bpf_version *bv = (struct bpf_version *)addr;
867 
868 			bv->bv_major = BPF_MAJOR_VERSION;
869 			bv->bv_minor = BPF_MINOR_VERSION;
870 			break;
871 		}
872 
873 
874 	case FIONBIO:		/* Non-blocking I/O */
875 		if (*(int *)addr)
876 			d->bd_rtout = -1;
877 		else
878 			d->bd_rtout = 0;
879 		break;
880 
881 	case FIOASYNC:		/* Send signal on receive packets */
882 		d->bd_async = *(int *)addr;
883 		break;
884 
885 	/*
886 	 * N.B.  ioctl (FIOSETOWN) and fcntl (F_SETOWN) both end up doing
887 	 * the equivalent of a TIOCSPGRP and hence end up here.  *However*
888 	 * TIOCSPGRP's arg is a process group if it's positive and a process
889 	 * id if it's negative.  This is exactly the opposite of what the
890 	 * other two functions want!  Therefore there is code in ioctl and
891 	 * fcntl to negate the arg before calling here.
892 	 */
893 	case TIOCSPGRP:		/* Process or group to send signals to */
894 		d->bd_pgid = *(int *)addr;
895 		break;
896 
897 	case TIOCGPGRP:
898 		*(int *)addr = d->bd_pgid;
899 		break;
900 	}
901 	return (error);
902 }
903 
904 /*
905  * Set d's packet filter program to fp.  If this file already has a filter,
906  * free it and replace it.  Returns EINVAL for bogus requests.
907  */
908 int
909 bpf_setf(d, fp)
910 	struct bpf_d *d;
911 	struct bpf_program *fp;
912 {
913 	struct bpf_insn *fcode, *old;
914 	u_int flen, size;
915 	int s;
916 
917 	old = d->bd_filter;
918 	if (fp->bf_insns == 0) {
919 		if (fp->bf_len != 0)
920 			return (EINVAL);
921 		s = splimp();
922 		d->bd_filter = 0;
923 		reset_d(d);
924 		splx(s);
925 		if (old != 0)
926 			free((caddr_t)old, M_DEVBUF);
927 		return (0);
928 	}
929 	flen = fp->bf_len;
930 	if (flen > BPF_MAXINSNS)
931 		return (EINVAL);
932 
933 	size = flen * sizeof(*fp->bf_insns);
934 	fcode = (struct bpf_insn *)malloc(size, M_DEVBUF, M_WAITOK);
935 	if (fcode == 0)
936 		return (ENOMEM);
937 	if (copyin((caddr_t)fp->bf_insns, (caddr_t)fcode, size) == 0 &&
938 	    bpf_validate(fcode, (int)flen)) {
939 		s = splimp();
940 		d->bd_filter = fcode;
941 		reset_d(d);
942 		splx(s);
943 		if (old != 0)
944 			free((caddr_t)old, M_DEVBUF);
945 
946 		return (0);
947 	}
948 	free((caddr_t)fcode, M_DEVBUF);
949 	return (EINVAL);
950 }
951 
952 /*
953  * Detach a file from its current interface (if attached at all) and attach
954  * to the interface indicated by the name stored in ifr.
955  * Return an errno or 0.
956  */
957 static int
958 bpf_setif(d, ifr)
959 	struct bpf_d *d;
960 	struct ifreq *ifr;
961 {
962 	struct bpf_if *bp;
963 	char *cp;
964 	int unit_seen, i, s, error;
965 
966 	/*
967 	 * Make sure the provided name has a unit number, and default
968 	 * it to '0' if not specified.
969 	 * XXX This is ugly ... do this differently?
970 	 */
971 	unit_seen = 0;
972 	cp = ifr->ifr_name;
973 	cp[sizeof(ifr->ifr_name) - 1] = '\0';	/* sanity */
974 	while (*cp++)
975 		if (*cp >= '0' && *cp <= '9')
976 			unit_seen = 1;
977 	if (!unit_seen) {
978 		/* Make sure to leave room for the '\0'. */
979 		for (i = 0; i < (IFNAMSIZ - 1); ++i) {
980 			if ((ifr->ifr_name[i] >= 'a' &&
981 			     ifr->ifr_name[i] <= 'z') ||
982 			    (ifr->ifr_name[i] >= 'A' &&
983 			     ifr->ifr_name[i] <= 'Z'))
984 				continue;
985 			ifr->ifr_name[i] = '0';
986 		}
987 	}
988 
989 	/*
990 	 * Look through attached interfaces for the named one.
991 	 */
992 	for (bp = bpf_iflist; bp != 0; bp = bp->bif_next) {
993 		struct ifnet *ifp = bp->bif_ifp;
994 
995 		if (ifp == 0 ||
996 		    strcmp(ifp->if_xname, ifr->ifr_name) != 0)
997 			continue;
998 		/*
999 		 * We found the requested interface.
1000 		 * If it's not up, return an error.
1001 		 * Allocate the packet buffers if we need to.
1002 		 * If we're already attached to requested interface,
1003 		 * just flush the buffer.
1004 		 */
1005 		if ((ifp->if_flags & IFF_UP) == 0)
1006 			return (ENETDOWN);
1007 
1008 		if (d->bd_sbuf == 0) {
1009 			error = bpf_allocbufs(d);
1010 			if (error != 0)
1011 				return (error);
1012 		}
1013 		s = splimp();
1014 		if (bp != d->bd_bif) {
1015 			if (d->bd_bif)
1016 				/*
1017 				 * Detach if attached to something else.
1018 				 */
1019 				bpf_detachd(d);
1020 
1021 			bpf_attachd(d, bp);
1022 		}
1023 		reset_d(d);
1024 		splx(s);
1025 		return (0);
1026 	}
1027 	/* Not found. */
1028 	return (ENXIO);
1029 }
1030 
1031 /*
1032  * Copy the interface name to the ifreq.
1033  */
1034 static void
1035 bpf_ifname(ifp, ifr)
1036 	struct ifnet *ifp;
1037 	struct ifreq *ifr;
1038 {
1039 
1040 	bcopy(ifp->if_xname, ifr->ifr_name, IFNAMSIZ);
1041 }
1042 
1043 /*
1044  * Support for poll() system call
1045  *
1046  * Return true iff the specific operation will not block indefinitely.
1047  * Otherwise, return false but make a note that a selwakeup() must be done.
1048  */
1049 int
1050 bpfpoll(dev, events, p)
1051 	register dev_t dev;
1052 	int events;
1053 	struct proc *p;
1054 {
1055 	register struct bpf_d *d = &bpf_dtab[minor(dev)];
1056 	int revents = 0;
1057 	register int s = splimp();
1058 
1059 	/*
1060 	 * An imitation of the FIONREAD ioctl code.
1061 	 */
1062 	if (events & (POLLIN | POLLRDNORM))
1063 		if (d->bd_hlen != 0 || (d->bd_immediate && d->bd_slen != 0))
1064 			revents |= events & (POLLIN | POLLRDNORM);
1065 		else
1066 			selrecord(p, &d->bd_sel);
1067 
1068 	splx(s);
1069 	return (revents);
1070 }
1071 
1072 /*
1073  * Incoming linkage from device drivers.  Process the packet pkt, of length
1074  * pktlen, which is stored in a contiguous buffer.  The packet is parsed
1075  * by each process' filter, and if accepted, stashed into the corresponding
1076  * buffer.
1077  */
1078 void
1079 bpf_tap(arg, pkt, pktlen)
1080 	caddr_t arg;
1081 	register u_char *pkt;
1082 	register u_int pktlen;
1083 {
1084 	struct bpf_if *bp;
1085 	register struct bpf_d *d;
1086 	register u_int slen;
1087 	/*
1088 	 * Note that the ipl does not have to be raised at this point.
1089 	 * The only problem that could arise here is that if two different
1090 	 * interfaces shared any data.  This is not the case.
1091 	 */
1092 	bp = (struct bpf_if *)arg;
1093 	for (d = bp->bif_dlist; d != 0; d = d->bd_next) {
1094 		++d->bd_rcount;
1095 		slen = bpf_filter(d->bd_filter, pkt, pktlen, pktlen);
1096 		if (slen != 0)
1097 			catchpacket(d, pkt, pktlen, slen, bcopy);
1098 	}
1099 }
1100 
1101 /*
1102  * Copy data from an mbuf chain into a buffer.  This code is derived
1103  * from m_copydata in sys/uipc_mbuf.c.
1104  */
1105 static void
1106 bpf_mcopy(src_arg, dst_arg, len)
1107 	const void *src_arg;
1108 	void *dst_arg;
1109 	register size_t len;
1110 {
1111 	register const struct mbuf *m;
1112 	register u_int count;
1113 	u_char *dst;
1114 
1115 	m = src_arg;
1116 	dst = dst_arg;
1117 	while (len > 0) {
1118 		if (m == 0)
1119 			panic("bpf_mcopy");
1120 		count = min(m->m_len, len);
1121 		bcopy(mtod(m, caddr_t), (caddr_t)dst, count);
1122 		m = m->m_next;
1123 		dst += count;
1124 		len -= count;
1125 	}
1126 }
1127 
1128 /*
1129  * Incoming linkage from device drivers, when packet is in an mbuf chain.
1130  */
1131 void
1132 bpf_mtap(arg, m)
1133 	caddr_t arg;
1134 	struct mbuf *m;
1135 {
1136 	struct bpf_if *bp = (struct bpf_if *)arg;
1137 	struct bpf_d *d;
1138 	u_int pktlen, slen;
1139 	struct mbuf *m0;
1140 
1141 	pktlen = 0;
1142 	for (m0 = m; m0 != 0; m0 = m0->m_next)
1143 		pktlen += m0->m_len;
1144 
1145 	for (d = bp->bif_dlist; d != 0; d = d->bd_next) {
1146 		++d->bd_rcount;
1147 		slen = bpf_filter(d->bd_filter, (u_char *)m, pktlen, 0);
1148 		if (slen != 0)
1149 			catchpacket(d, (u_char *)m, pktlen, slen, bpf_mcopy);
1150 	}
1151 }
1152 
1153 /*
1154  * Move the packet data from interface memory (pkt) into the
1155  * store buffer.  Return 1 if it's time to wakeup a listener (buffer full),
1156  * otherwise 0.  "copy" is the routine called to do the actual data
1157  * transfer.  bcopy is passed in to copy contiguous chunks, while
1158  * bpf_mcopy is passed in to copy mbuf chains.  In the latter case,
1159  * pkt is really an mbuf.
1160  */
1161 static void
1162 catchpacket(d, pkt, pktlen, snaplen, cpfn)
1163 	register struct bpf_d *d;
1164 	register u_char *pkt;
1165 	register u_int pktlen, snaplen;
1166 	register void (*cpfn) __P((const void *, void *, size_t));
1167 {
1168 	register struct bpf_hdr *hp;
1169 	register int totlen, curlen;
1170 	register int hdrlen = d->bd_bif->bif_hdrlen;
1171 	/*
1172 	 * Figure out how many bytes to move.  If the packet is
1173 	 * greater or equal to the snapshot length, transfer that
1174 	 * much.  Otherwise, transfer the whole packet (unless
1175 	 * we hit the buffer size limit).
1176 	 */
1177 	totlen = hdrlen + min(snaplen, pktlen);
1178 	if (totlen > d->bd_bufsize)
1179 		totlen = d->bd_bufsize;
1180 
1181 	/*
1182 	 * Round up the end of the previous packet to the next longword.
1183 	 */
1184 	curlen = BPF_WORDALIGN(d->bd_slen);
1185 	if (curlen + totlen > d->bd_bufsize) {
1186 		/*
1187 		 * This packet will overflow the storage buffer.
1188 		 * Rotate the buffers if we can, then wakeup any
1189 		 * pending reads.
1190 		 */
1191 		if (d->bd_fbuf == 0) {
1192 			/*
1193 			 * We haven't completed the previous read yet,
1194 			 * so drop the packet.
1195 			 */
1196 			++d->bd_dcount;
1197 			return;
1198 		}
1199 		ROTATE_BUFFERS(d);
1200 		bpf_wakeup(d);
1201 		curlen = 0;
1202 	}
1203 	else if (d->bd_immediate)
1204 		/*
1205 		 * Immediate mode is set.  A packet arrived so any
1206 		 * reads should be woken up.
1207 		 */
1208 		bpf_wakeup(d);
1209 
1210 	/*
1211 	 * Append the bpf header.
1212 	 */
1213 	hp = (struct bpf_hdr *)(d->bd_sbuf + curlen);
1214 #if BSD >= 199103
1215 	microtime(&hp->bh_tstamp);
1216 #elif defined(sun)
1217 	uniqtime(&hp->bh_tstamp);
1218 #else
1219 	hp->bh_tstamp = time;
1220 #endif
1221 	hp->bh_datalen = pktlen;
1222 	hp->bh_hdrlen = hdrlen;
1223 	/*
1224 	 * Copy the packet data into the store buffer and update its length.
1225 	 */
1226 	(*cpfn)(pkt, (u_char *)hp + hdrlen, (hp->bh_caplen = totlen - hdrlen));
1227 	d->bd_slen = curlen + totlen;
1228 }
1229 
1230 /*
1231  * Initialize all nonzero fields of a descriptor.
1232  */
1233 static int
1234 bpf_allocbufs(d)
1235 	register struct bpf_d *d;
1236 {
1237 	d->bd_fbuf = (caddr_t)malloc(d->bd_bufsize, M_DEVBUF, M_WAITOK);
1238 	if (d->bd_fbuf == 0)
1239 		return (ENOBUFS);
1240 
1241 	d->bd_sbuf = (caddr_t)malloc(d->bd_bufsize, M_DEVBUF, M_WAITOK);
1242 	if (d->bd_sbuf == 0) {
1243 		free(d->bd_fbuf, M_DEVBUF);
1244 		return (ENOBUFS);
1245 	}
1246 	d->bd_slen = 0;
1247 	d->bd_hlen = 0;
1248 	return (0);
1249 }
1250 
1251 /*
1252  * Free buffers currently in use by a descriptor.
1253  * Called on close.
1254  */
1255 static void
1256 bpf_freed(d)
1257 	register struct bpf_d *d;
1258 {
1259 	/*
1260 	 * We don't need to lock out interrupts since this descriptor has
1261 	 * been detached from its interface and it yet hasn't been marked
1262 	 * free.
1263 	 */
1264 	if (d->bd_sbuf != 0) {
1265 		free(d->bd_sbuf, M_DEVBUF);
1266 		if (d->bd_hbuf != 0)
1267 			free(d->bd_hbuf, M_DEVBUF);
1268 		if (d->bd_fbuf != 0)
1269 			free(d->bd_fbuf, M_DEVBUF);
1270 	}
1271 	if (d->bd_filter)
1272 		free((caddr_t)d->bd_filter, M_DEVBUF);
1273 
1274 	D_MARKFREE(d);
1275 }
1276 
1277 /*
1278  * Attach an interface to bpf.  driverp is a pointer to a (struct bpf_if *)
1279  * in the driver's softc; dlt is the link layer type; hdrlen is the fixed
1280  * size of the link header (variable length headers not yet supported).
1281  */
1282 void
1283 bpfattach(driverp, ifp, dlt, hdrlen)
1284 	caddr_t *driverp;
1285 	struct ifnet *ifp;
1286 	u_int dlt, hdrlen;
1287 {
1288 	struct bpf_if *bp;
1289 	int i;
1290 #if BSD < 199103
1291 	static struct bpf_if bpf_ifs[NBPFILTER];
1292 	static int bpfifno;
1293 
1294 	bp = (bpfifno < NBPFILTER) ? &bpf_ifs[bpfifno++] : 0;
1295 #else
1296 	bp = (struct bpf_if *)malloc(sizeof(*bp), M_DEVBUF, M_DONTWAIT);
1297 #endif
1298 	if (bp == 0)
1299 		panic("bpfattach");
1300 
1301 	bp->bif_dlist = 0;
1302 	bp->bif_driverp = (struct bpf_if **)driverp;
1303 	bp->bif_ifp = ifp;
1304 	bp->bif_dlt = dlt;
1305 
1306 	bp->bif_next = bpf_iflist;
1307 	bpf_iflist = bp;
1308 
1309 	*bp->bif_driverp = 0;
1310 
1311 	/*
1312 	 * Compute the length of the bpf header.  This is not necessarily
1313 	 * equal to SIZEOF_BPF_HDR because we want to insert spacing such
1314 	 * that the network layer header begins on a longword boundary (for
1315 	 * performance reasons and to alleviate alignment restrictions).
1316 	 */
1317 	bp->bif_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen;
1318 
1319 	/*
1320 	 * Mark all the descriptors free if this hasn't been done.
1321 	 */
1322 	if (!D_ISFREE(&bpf_dtab[0]))
1323 		for (i = 0; i < NBPFILTER; ++i)
1324 			D_MARKFREE(&bpf_dtab[i]);
1325 
1326 #if 0
1327 	printf("bpf: %s attached\n", ifp->if_xname);
1328 #endif
1329 }
1330 
1331 #if BSD >= 199103
1332 /* XXX This routine belongs in net/if.c. */
1333 /*
1334  * Set/clear promiscuous mode on interface ifp based on the truth value
1335  * of pswitch.  The calls are reference counted so that only the first
1336  * "on" request actually has an effect, as does the final "off" request.
1337  * Results are undefined if the "off" and "on" requests are not matched.
1338  */
1339 int
1340 ifpromisc(ifp, pswitch)
1341 	register struct ifnet *ifp;
1342 	register int pswitch;
1343 {
1344 	register int pcount, ret;
1345 	register short flags;
1346 	struct ifreq ifr;
1347 
1348 	pcount = ifp->if_pcount;
1349 	flags = ifp->if_flags;
1350 	if (pswitch) {
1351 		/*
1352 		 * If the device is not configured up, we cannot put it in
1353 		 * promiscuous mode.
1354 		 */
1355 		if ((ifp->if_flags & IFF_UP) == 0)
1356 			return (ENETDOWN);
1357 		if (ifp->if_pcount++ != 0)
1358 			return (0);
1359 		ifp->if_flags |= IFF_PROMISC;
1360 	} else {
1361 		if (--ifp->if_pcount > 0)
1362 			return (0);
1363 		ifp->if_flags &= ~IFF_PROMISC;
1364 		/*
1365 		 * If the device is not configured up, we should not need to
1366 		 * turn off promiscuous mode (device should have turned it
1367 		 * off when interface went down; and will look at IFF_PROMISC
1368 		 * again next time interface comes up).
1369 		 */
1370 		if ((ifp->if_flags & IFF_UP) == 0)
1371 			return (0);
1372 	}
1373 	bzero((caddr_t)&ifr, sizeof(ifr));
1374 	ifr.ifr_flags = ifp->if_flags;
1375 	ret = (*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
1376 	/* Restore interface state if not successful */
1377 	if (ret != 0) {
1378 		ifp->if_pcount = pcount;
1379 		ifp->if_flags = flags;
1380 	}
1381 	return (ret);
1382 }
1383 #endif
1384 
1385 #if BSD < 199103
1386 /*
1387  * Allocate some memory for bpf.  This is temporary SunOS support, and
1388  * is admittedly a hack.
1389  * If resources unavailable, return 0.
1390  */
1391 static caddr_t
1392 bpf_alloc(size, canwait)
1393 	register int size;
1394 	register int canwait;
1395 {
1396 	register struct mbuf *m;
1397 
1398 	if ((unsigned)size > (MCLBYTES-8))
1399 		return 0;
1400 
1401 	MGET(m, canwait, MT_DATA);
1402 	if (m == 0)
1403 		return 0;
1404 	if ((unsigned)size > (MLEN-8)) {
1405 		MCLGET(m);
1406 		if (m->m_len != MCLBYTES) {
1407 			m_freem(m);
1408 			return 0;
1409 		}
1410 	}
1411 	*mtod(m, struct mbuf **) = m;
1412 	return mtod(m, caddr_t) + 8;
1413 }
1414 #endif
1415